CocoRoF commited on
Commit
4a09888
·
verified ·
1 Parent(s): 585f99f

Training in progress, step 25000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c23cf68abad32271631729127eb144e752956ff3904650e5132254b9fad4fa45
3
  size 306619286
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba4ebc8d324592b24aa466cb1a17beb4eb518d5cd7415ad4d10867a1f113452a
3
  size 306619286
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02301192e164720633e799be07ca43015ab4e55ee46289fcc068979fca646eac
3
  size 919972410
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9684e35dfc059389b032c609c2d17105dd7d52f3b875814129afa1ef90d3e36
3
  size 919972410
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06fea830cf5ad73ec00d500ea6fb952740ac936f18e93fa2d32abde1ea3ead92
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69ec6e3926fa071bede113523efa3dc6e630c3c7958c54a9ca321cf4d62ed145
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be561d1df19be227394d8ea607c54262a06c9bf880af0aa5e04a52596a2a6cb0
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6127ee4f0c13500ec5038fce65af8f7beec63c137c7d4b7c157aa6303cf5879
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03f3e24417a59435f5a8450a4aeb0f09cc92734b5c3b45a0701b2c043c415c05
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da01d1c5eb2cc3a323f97c1f590d13ccfac2a4c5b1479bd378b4e643304f5a4f
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2bea02744c29f30024590ab1629a0e7b7dabbf1e8476456c2e7c5ce46dc35c28
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49a3f04d76c0d3acc7d3dd95a04215f368f35a451ae8cba8a2fdba38cda9ca0a
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:041be966454b60c86af576fc1eb7f34189114689abff8f9622b947110f7334c8
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df7d2c9825dba80cb544920f8cc0c72122f96514e6cd259052a8765b034393e2
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b85766f6596d15a810177d77dd259d9b50588cf100ec5f8ebff5fed881d57957
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a20a42d44ff48cc162224010190e898fe28598ddad8cd1896d330a3bb1d8ec3
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8be75d04b1ebe614241b88fd010a5dda1b7bf703c00c6ebe310ca07975830fe7
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18ac0dc4f09f25179860561fcea7c5c8f997aabdc46a170665f9dc5a72bc27c6
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4699833a7ab4cb692996ef7567f934c0bac79d6a067963a873f89a38e412bd48
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a16fcb5411ff961b47eff7378d85105fe9837e0492d19ea5ce3b7c4b77aa3b6
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2628d182b8ffc02b994fb2eed0e111e21ac10dadfa106370a9ce0523145ccd0
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6276b39eb0a6a4f547784c30a100b3eee72c8aefbe6f0f7bb1ca7dca8f60dc4b
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3711461689596541,
5
  "eval_steps": 5000,
6
- "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -14039,6 +14039,3514 @@
14039
  "eval_samples_per_second": 3203.381,
14040
  "eval_steps_per_second": 50.054,
14041
  "step": 20000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14042
  }
14043
  ],
14044
  "logging_steps": 10,
@@ -14058,7 +17566,7 @@
14058
  "attributes": {}
14059
  }
14060
  },
14061
- "total_flos": 3.491045518082048e+18,
14062
  "train_batch_size": 8,
14063
  "trial_name": null,
14064
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.46393271119956764,
5
  "eval_steps": 5000,
6
+ "global_step": 25000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
14039
  "eval_samples_per_second": 3203.381,
14040
  "eval_steps_per_second": 50.054,
14041
  "step": 20000
14042
+ },
14043
+ {
14044
+ "epoch": 0.37133174204413394,
14045
+ "grad_norm": 37.40625,
14046
+ "learning_rate": 9.94197943212913e-06,
14047
+ "loss": 21.1517,
14048
+ "step": 20010
14049
+ },
14050
+ {
14051
+ "epoch": 0.37151731512861375,
14052
+ "grad_norm": 35.5,
14053
+ "learning_rate": 9.941950436343087e-06,
14054
+ "loss": 21.8961,
14055
+ "step": 20020
14056
+ },
14057
+ {
14058
+ "epoch": 0.37170288821309355,
14059
+ "grad_norm": 33.875,
14060
+ "learning_rate": 9.941921440557045e-06,
14061
+ "loss": 21.297,
14062
+ "step": 20030
14063
+ },
14064
+ {
14065
+ "epoch": 0.3718884612975734,
14066
+ "grad_norm": 34.4375,
14067
+ "learning_rate": 9.941892444771002e-06,
14068
+ "loss": 21.2152,
14069
+ "step": 20040
14070
+ },
14071
+ {
14072
+ "epoch": 0.3720740343820532,
14073
+ "grad_norm": 33.75,
14074
+ "learning_rate": 9.94186344898496e-06,
14075
+ "loss": 21.5122,
14076
+ "step": 20050
14077
+ },
14078
+ {
14079
+ "epoch": 0.372259607466533,
14080
+ "grad_norm": 33.375,
14081
+ "learning_rate": 9.941834453198917e-06,
14082
+ "loss": 21.7115,
14083
+ "step": 20060
14084
+ },
14085
+ {
14086
+ "epoch": 0.3724451805510129,
14087
+ "grad_norm": 34.90625,
14088
+ "learning_rate": 9.941805457412876e-06,
14089
+ "loss": 21.4614,
14090
+ "step": 20070
14091
+ },
14092
+ {
14093
+ "epoch": 0.3726307536354927,
14094
+ "grad_norm": 34.28125,
14095
+ "learning_rate": 9.941776461626833e-06,
14096
+ "loss": 21.2661,
14097
+ "step": 20080
14098
+ },
14099
+ {
14100
+ "epoch": 0.37281632671997256,
14101
+ "grad_norm": 32.09375,
14102
+ "learning_rate": 9.94174746584079e-06,
14103
+ "loss": 21.056,
14104
+ "step": 20090
14105
+ },
14106
+ {
14107
+ "epoch": 0.37300189980445236,
14108
+ "grad_norm": 34.96875,
14109
+ "learning_rate": 9.941718470054748e-06,
14110
+ "loss": 21.249,
14111
+ "step": 20100
14112
+ },
14113
+ {
14114
+ "epoch": 0.37318747288893217,
14115
+ "grad_norm": 34.5,
14116
+ "learning_rate": 9.941689474268706e-06,
14117
+ "loss": 20.9543,
14118
+ "step": 20110
14119
+ },
14120
+ {
14121
+ "epoch": 0.37337304597341203,
14122
+ "grad_norm": 35.21875,
14123
+ "learning_rate": 9.941660478482663e-06,
14124
+ "loss": 21.4361,
14125
+ "step": 20120
14126
+ },
14127
+ {
14128
+ "epoch": 0.37355861905789184,
14129
+ "grad_norm": 34.875,
14130
+ "learning_rate": 9.94163148269662e-06,
14131
+ "loss": 21.5762,
14132
+ "step": 20130
14133
+ },
14134
+ {
14135
+ "epoch": 0.37374419214237165,
14136
+ "grad_norm": 37.21875,
14137
+ "learning_rate": 9.941602486910578e-06,
14138
+ "loss": 21.4474,
14139
+ "step": 20140
14140
+ },
14141
+ {
14142
+ "epoch": 0.3739297652268515,
14143
+ "grad_norm": 36.28125,
14144
+ "learning_rate": 9.941573491124535e-06,
14145
+ "loss": 21.4356,
14146
+ "step": 20150
14147
+ },
14148
+ {
14149
+ "epoch": 0.3741153383113313,
14150
+ "grad_norm": 33.8125,
14151
+ "learning_rate": 9.941544495338493e-06,
14152
+ "loss": 21.2281,
14153
+ "step": 20160
14154
+ },
14155
+ {
14156
+ "epoch": 0.3743009113958112,
14157
+ "grad_norm": 32.8125,
14158
+ "learning_rate": 9.94151549955245e-06,
14159
+ "loss": 21.3117,
14160
+ "step": 20170
14161
+ },
14162
+ {
14163
+ "epoch": 0.374486484480291,
14164
+ "grad_norm": 35.0,
14165
+ "learning_rate": 9.94148650376641e-06,
14166
+ "loss": 21.3198,
14167
+ "step": 20180
14168
+ },
14169
+ {
14170
+ "epoch": 0.3746720575647708,
14171
+ "grad_norm": 37.8125,
14172
+ "learning_rate": 9.941457507980367e-06,
14173
+ "loss": 21.4875,
14174
+ "step": 20190
14175
+ },
14176
+ {
14177
+ "epoch": 0.37485763064925065,
14178
+ "grad_norm": 35.03125,
14179
+ "learning_rate": 9.941428512194324e-06,
14180
+ "loss": 21.4072,
14181
+ "step": 20200
14182
+ },
14183
+ {
14184
+ "epoch": 0.37504320373373046,
14185
+ "grad_norm": 36.40625,
14186
+ "learning_rate": 9.941399516408281e-06,
14187
+ "loss": 21.2224,
14188
+ "step": 20210
14189
+ },
14190
+ {
14191
+ "epoch": 0.37522877681821026,
14192
+ "grad_norm": 36.03125,
14193
+ "learning_rate": 9.941370520622239e-06,
14194
+ "loss": 21.606,
14195
+ "step": 20220
14196
+ },
14197
+ {
14198
+ "epoch": 0.3754143499026901,
14199
+ "grad_norm": 35.28125,
14200
+ "learning_rate": 9.941341524836196e-06,
14201
+ "loss": 21.2329,
14202
+ "step": 20230
14203
+ },
14204
+ {
14205
+ "epoch": 0.37559992298716993,
14206
+ "grad_norm": 36.28125,
14207
+ "learning_rate": 9.941312529050154e-06,
14208
+ "loss": 20.9903,
14209
+ "step": 20240
14210
+ },
14211
+ {
14212
+ "epoch": 0.3757854960716498,
14213
+ "grad_norm": 34.59375,
14214
+ "learning_rate": 9.941283533264111e-06,
14215
+ "loss": 21.4073,
14216
+ "step": 20250
14217
+ },
14218
+ {
14219
+ "epoch": 0.3759710691561296,
14220
+ "grad_norm": 34.9375,
14221
+ "learning_rate": 9.941254537478069e-06,
14222
+ "loss": 21.637,
14223
+ "step": 20260
14224
+ },
14225
+ {
14226
+ "epoch": 0.3761566422406094,
14227
+ "grad_norm": 34.15625,
14228
+ "learning_rate": 9.941225541692026e-06,
14229
+ "loss": 21.1687,
14230
+ "step": 20270
14231
+ },
14232
+ {
14233
+ "epoch": 0.37634221532508927,
14234
+ "grad_norm": 36.84375,
14235
+ "learning_rate": 9.941196545905985e-06,
14236
+ "loss": 21.4842,
14237
+ "step": 20280
14238
+ },
14239
+ {
14240
+ "epoch": 0.3765277884095691,
14241
+ "grad_norm": 36.28125,
14242
+ "learning_rate": 9.941167550119942e-06,
14243
+ "loss": 21.6633,
14244
+ "step": 20290
14245
+ },
14246
+ {
14247
+ "epoch": 0.3767133614940489,
14248
+ "grad_norm": 37.9375,
14249
+ "learning_rate": 9.941138554333898e-06,
14250
+ "loss": 21.217,
14251
+ "step": 20300
14252
+ },
14253
+ {
14254
+ "epoch": 0.37689893457852874,
14255
+ "grad_norm": 34.75,
14256
+ "learning_rate": 9.941109558547857e-06,
14257
+ "loss": 21.5193,
14258
+ "step": 20310
14259
+ },
14260
+ {
14261
+ "epoch": 0.37708450766300855,
14262
+ "grad_norm": 33.09375,
14263
+ "learning_rate": 9.941080562761815e-06,
14264
+ "loss": 21.1617,
14265
+ "step": 20320
14266
+ },
14267
+ {
14268
+ "epoch": 0.37727008074748836,
14269
+ "grad_norm": 34.53125,
14270
+ "learning_rate": 9.941051566975772e-06,
14271
+ "loss": 21.3097,
14272
+ "step": 20330
14273
+ },
14274
+ {
14275
+ "epoch": 0.3774556538319682,
14276
+ "grad_norm": 33.90625,
14277
+ "learning_rate": 9.94102257118973e-06,
14278
+ "loss": 21.4043,
14279
+ "step": 20340
14280
+ },
14281
+ {
14282
+ "epoch": 0.377641226916448,
14283
+ "grad_norm": 35.625,
14284
+ "learning_rate": 9.940993575403689e-06,
14285
+ "loss": 21.6863,
14286
+ "step": 20350
14287
+ },
14288
+ {
14289
+ "epoch": 0.3778268000009279,
14290
+ "grad_norm": 33.78125,
14291
+ "learning_rate": 9.940964579617644e-06,
14292
+ "loss": 21.3448,
14293
+ "step": 20360
14294
+ },
14295
+ {
14296
+ "epoch": 0.3780123730854077,
14297
+ "grad_norm": 36.78125,
14298
+ "learning_rate": 9.940935583831602e-06,
14299
+ "loss": 21.0266,
14300
+ "step": 20370
14301
+ },
14302
+ {
14303
+ "epoch": 0.3781979461698875,
14304
+ "grad_norm": 34.84375,
14305
+ "learning_rate": 9.94090658804556e-06,
14306
+ "loss": 21.4672,
14307
+ "step": 20380
14308
+ },
14309
+ {
14310
+ "epoch": 0.37838351925436736,
14311
+ "grad_norm": 33.78125,
14312
+ "learning_rate": 9.940877592259518e-06,
14313
+ "loss": 21.7418,
14314
+ "step": 20390
14315
+ },
14316
+ {
14317
+ "epoch": 0.37856909233884717,
14318
+ "grad_norm": 35.65625,
14319
+ "learning_rate": 9.940848596473476e-06,
14320
+ "loss": 21.626,
14321
+ "step": 20400
14322
+ },
14323
+ {
14324
+ "epoch": 0.378754665423327,
14325
+ "grad_norm": 34.5,
14326
+ "learning_rate": 9.940819600687433e-06,
14327
+ "loss": 21.6884,
14328
+ "step": 20410
14329
+ },
14330
+ {
14331
+ "epoch": 0.37894023850780684,
14332
+ "grad_norm": 35.6875,
14333
+ "learning_rate": 9.94079060490139e-06,
14334
+ "loss": 21.1694,
14335
+ "step": 20420
14336
+ },
14337
+ {
14338
+ "epoch": 0.37912581159228664,
14339
+ "grad_norm": 35.28125,
14340
+ "learning_rate": 9.940761609115348e-06,
14341
+ "loss": 21.7092,
14342
+ "step": 20430
14343
+ },
14344
+ {
14345
+ "epoch": 0.3793113846767665,
14346
+ "grad_norm": 36.65625,
14347
+ "learning_rate": 9.940732613329305e-06,
14348
+ "loss": 21.0703,
14349
+ "step": 20440
14350
+ },
14351
+ {
14352
+ "epoch": 0.3794969577612463,
14353
+ "grad_norm": 36.6875,
14354
+ "learning_rate": 9.940703617543264e-06,
14355
+ "loss": 21.3644,
14356
+ "step": 20450
14357
+ },
14358
+ {
14359
+ "epoch": 0.3796825308457261,
14360
+ "grad_norm": 35.34375,
14361
+ "learning_rate": 9.94067462175722e-06,
14362
+ "loss": 21.7173,
14363
+ "step": 20460
14364
+ },
14365
+ {
14366
+ "epoch": 0.379868103930206,
14367
+ "grad_norm": 35.03125,
14368
+ "learning_rate": 9.940645625971177e-06,
14369
+ "loss": 21.8825,
14370
+ "step": 20470
14371
+ },
14372
+ {
14373
+ "epoch": 0.3800536770146858,
14374
+ "grad_norm": 35.3125,
14375
+ "learning_rate": 9.940616630185137e-06,
14376
+ "loss": 21.3377,
14377
+ "step": 20480
14378
+ },
14379
+ {
14380
+ "epoch": 0.3802392500991656,
14381
+ "grad_norm": 33.875,
14382
+ "learning_rate": 9.940587634399094e-06,
14383
+ "loss": 21.2556,
14384
+ "step": 20490
14385
+ },
14386
+ {
14387
+ "epoch": 0.38042482318364546,
14388
+ "grad_norm": 35.15625,
14389
+ "learning_rate": 9.940558638613051e-06,
14390
+ "loss": 21.1311,
14391
+ "step": 20500
14392
+ },
14393
+ {
14394
+ "epoch": 0.38061039626812526,
14395
+ "grad_norm": 34.78125,
14396
+ "learning_rate": 9.940529642827009e-06,
14397
+ "loss": 21.4698,
14398
+ "step": 20510
14399
+ },
14400
+ {
14401
+ "epoch": 0.3807959693526051,
14402
+ "grad_norm": 35.8125,
14403
+ "learning_rate": 9.940500647040966e-06,
14404
+ "loss": 21.5138,
14405
+ "step": 20520
14406
+ },
14407
+ {
14408
+ "epoch": 0.38098154243708493,
14409
+ "grad_norm": 36.46875,
14410
+ "learning_rate": 9.940471651254924e-06,
14411
+ "loss": 21.1583,
14412
+ "step": 20530
14413
+ },
14414
+ {
14415
+ "epoch": 0.38116711552156474,
14416
+ "grad_norm": 33.375,
14417
+ "learning_rate": 9.940442655468881e-06,
14418
+ "loss": 21.1859,
14419
+ "step": 20540
14420
+ },
14421
+ {
14422
+ "epoch": 0.3813526886060446,
14423
+ "grad_norm": 35.03125,
14424
+ "learning_rate": 9.94041365968284e-06,
14425
+ "loss": 20.9751,
14426
+ "step": 20550
14427
+ },
14428
+ {
14429
+ "epoch": 0.3815382616905244,
14430
+ "grad_norm": 34.96875,
14431
+ "learning_rate": 9.940384663896798e-06,
14432
+ "loss": 21.1197,
14433
+ "step": 20560
14434
+ },
14435
+ {
14436
+ "epoch": 0.3817238347750042,
14437
+ "grad_norm": 35.9375,
14438
+ "learning_rate": 9.940355668110753e-06,
14439
+ "loss": 21.0839,
14440
+ "step": 20570
14441
+ },
14442
+ {
14443
+ "epoch": 0.3819094078594841,
14444
+ "grad_norm": 36.96875,
14445
+ "learning_rate": 9.940326672324712e-06,
14446
+ "loss": 21.6738,
14447
+ "step": 20580
14448
+ },
14449
+ {
14450
+ "epoch": 0.3820949809439639,
14451
+ "grad_norm": 35.71875,
14452
+ "learning_rate": 9.94029767653867e-06,
14453
+ "loss": 21.366,
14454
+ "step": 20590
14455
+ },
14456
+ {
14457
+ "epoch": 0.38228055402844374,
14458
+ "grad_norm": 36.84375,
14459
+ "learning_rate": 9.940268680752627e-06,
14460
+ "loss": 21.195,
14461
+ "step": 20600
14462
+ },
14463
+ {
14464
+ "epoch": 0.38246612711292355,
14465
+ "grad_norm": 34.84375,
14466
+ "learning_rate": 9.940239684966585e-06,
14467
+ "loss": 21.5918,
14468
+ "step": 20610
14469
+ },
14470
+ {
14471
+ "epoch": 0.38265170019740335,
14472
+ "grad_norm": 36.1875,
14473
+ "learning_rate": 9.940210689180542e-06,
14474
+ "loss": 20.7986,
14475
+ "step": 20620
14476
+ },
14477
+ {
14478
+ "epoch": 0.3828372732818832,
14479
+ "grad_norm": 34.09375,
14480
+ "learning_rate": 9.9401816933945e-06,
14481
+ "loss": 21.4164,
14482
+ "step": 20630
14483
+ },
14484
+ {
14485
+ "epoch": 0.383022846366363,
14486
+ "grad_norm": 34.96875,
14487
+ "learning_rate": 9.940152697608457e-06,
14488
+ "loss": 21.2902,
14489
+ "step": 20640
14490
+ },
14491
+ {
14492
+ "epoch": 0.38320841945084283,
14493
+ "grad_norm": 32.65625,
14494
+ "learning_rate": 9.940123701822416e-06,
14495
+ "loss": 21.2691,
14496
+ "step": 20650
14497
+ },
14498
+ {
14499
+ "epoch": 0.3833939925353227,
14500
+ "grad_norm": 36.75,
14501
+ "learning_rate": 9.940094706036373e-06,
14502
+ "loss": 20.983,
14503
+ "step": 20660
14504
+ },
14505
+ {
14506
+ "epoch": 0.3835795656198025,
14507
+ "grad_norm": 35.15625,
14508
+ "learning_rate": 9.94006571025033e-06,
14509
+ "loss": 20.9418,
14510
+ "step": 20670
14511
+ },
14512
+ {
14513
+ "epoch": 0.3837651387042823,
14514
+ "grad_norm": 37.3125,
14515
+ "learning_rate": 9.940036714464288e-06,
14516
+ "loss": 21.948,
14517
+ "step": 20680
14518
+ },
14519
+ {
14520
+ "epoch": 0.38395071178876217,
14521
+ "grad_norm": 34.78125,
14522
+ "learning_rate": 9.940007718678245e-06,
14523
+ "loss": 22.0029,
14524
+ "step": 20690
14525
+ },
14526
+ {
14527
+ "epoch": 0.384136284873242,
14528
+ "grad_norm": 34.90625,
14529
+ "learning_rate": 9.939978722892203e-06,
14530
+ "loss": 21.1508,
14531
+ "step": 20700
14532
+ },
14533
+ {
14534
+ "epoch": 0.38432185795772184,
14535
+ "grad_norm": 33.90625,
14536
+ "learning_rate": 9.93994972710616e-06,
14537
+ "loss": 21.2856,
14538
+ "step": 20710
14539
+ },
14540
+ {
14541
+ "epoch": 0.38450743104220164,
14542
+ "grad_norm": 35.0,
14543
+ "learning_rate": 9.939920731320118e-06,
14544
+ "loss": 21.1426,
14545
+ "step": 20720
14546
+ },
14547
+ {
14548
+ "epoch": 0.38469300412668145,
14549
+ "grad_norm": 35.28125,
14550
+ "learning_rate": 9.939891735534075e-06,
14551
+ "loss": 21.1099,
14552
+ "step": 20730
14553
+ },
14554
+ {
14555
+ "epoch": 0.3848785772111613,
14556
+ "grad_norm": 35.375,
14557
+ "learning_rate": 9.939862739748033e-06,
14558
+ "loss": 21.4204,
14559
+ "step": 20740
14560
+ },
14561
+ {
14562
+ "epoch": 0.3850641502956411,
14563
+ "grad_norm": 34.28125,
14564
+ "learning_rate": 9.93983374396199e-06,
14565
+ "loss": 21.0373,
14566
+ "step": 20750
14567
+ },
14568
+ {
14569
+ "epoch": 0.3852497233801209,
14570
+ "grad_norm": 34.90625,
14571
+ "learning_rate": 9.939804748175949e-06,
14572
+ "loss": 22.1613,
14573
+ "step": 20760
14574
+ },
14575
+ {
14576
+ "epoch": 0.3854352964646008,
14577
+ "grad_norm": 35.53125,
14578
+ "learning_rate": 9.939775752389906e-06,
14579
+ "loss": 21.1425,
14580
+ "step": 20770
14581
+ },
14582
+ {
14583
+ "epoch": 0.3856208695490806,
14584
+ "grad_norm": 35.3125,
14585
+ "learning_rate": 9.939746756603864e-06,
14586
+ "loss": 21.195,
14587
+ "step": 20780
14588
+ },
14589
+ {
14590
+ "epoch": 0.38580644263356045,
14591
+ "grad_norm": 35.8125,
14592
+ "learning_rate": 9.939717760817821e-06,
14593
+ "loss": 21.2977,
14594
+ "step": 20790
14595
+ },
14596
+ {
14597
+ "epoch": 0.38599201571804026,
14598
+ "grad_norm": 36.46875,
14599
+ "learning_rate": 9.939688765031779e-06,
14600
+ "loss": 21.1699,
14601
+ "step": 20800
14602
+ },
14603
+ {
14604
+ "epoch": 0.38617758880252007,
14605
+ "grad_norm": 35.3125,
14606
+ "learning_rate": 9.939659769245736e-06,
14607
+ "loss": 20.9866,
14608
+ "step": 20810
14609
+ },
14610
+ {
14611
+ "epoch": 0.38636316188699993,
14612
+ "grad_norm": 36.65625,
14613
+ "learning_rate": 9.939630773459693e-06,
14614
+ "loss": 21.6496,
14615
+ "step": 20820
14616
+ },
14617
+ {
14618
+ "epoch": 0.38654873497147973,
14619
+ "grad_norm": 34.71875,
14620
+ "learning_rate": 9.939601777673653e-06,
14621
+ "loss": 21.3789,
14622
+ "step": 20830
14623
+ },
14624
+ {
14625
+ "epoch": 0.38673430805595954,
14626
+ "grad_norm": 33.28125,
14627
+ "learning_rate": 9.939572781887608e-06,
14628
+ "loss": 21.2779,
14629
+ "step": 20840
14630
+ },
14631
+ {
14632
+ "epoch": 0.3869198811404394,
14633
+ "grad_norm": 34.15625,
14634
+ "learning_rate": 9.939543786101566e-06,
14635
+ "loss": 21.5408,
14636
+ "step": 20850
14637
+ },
14638
+ {
14639
+ "epoch": 0.3871054542249192,
14640
+ "grad_norm": 35.0,
14641
+ "learning_rate": 9.939514790315525e-06,
14642
+ "loss": 21.4127,
14643
+ "step": 20860
14644
+ },
14645
+ {
14646
+ "epoch": 0.38729102730939907,
14647
+ "grad_norm": 34.46875,
14648
+ "learning_rate": 9.939485794529482e-06,
14649
+ "loss": 21.4833,
14650
+ "step": 20870
14651
+ },
14652
+ {
14653
+ "epoch": 0.3874766003938789,
14654
+ "grad_norm": 35.625,
14655
+ "learning_rate": 9.93945679874344e-06,
14656
+ "loss": 21.41,
14657
+ "step": 20880
14658
+ },
14659
+ {
14660
+ "epoch": 0.3876621734783587,
14661
+ "grad_norm": 33.6875,
14662
+ "learning_rate": 9.939427802957397e-06,
14663
+ "loss": 21.1335,
14664
+ "step": 20890
14665
+ },
14666
+ {
14667
+ "epoch": 0.38784774656283855,
14668
+ "grad_norm": 34.8125,
14669
+ "learning_rate": 9.939398807171354e-06,
14670
+ "loss": 21.3565,
14671
+ "step": 20900
14672
+ },
14673
+ {
14674
+ "epoch": 0.38803331964731835,
14675
+ "grad_norm": 35.40625,
14676
+ "learning_rate": 9.939369811385312e-06,
14677
+ "loss": 20.9849,
14678
+ "step": 20910
14679
+ },
14680
+ {
14681
+ "epoch": 0.38821889273179816,
14682
+ "grad_norm": 35.0,
14683
+ "learning_rate": 9.93934081559927e-06,
14684
+ "loss": 20.8741,
14685
+ "step": 20920
14686
+ },
14687
+ {
14688
+ "epoch": 0.388404465816278,
14689
+ "grad_norm": 33.625,
14690
+ "learning_rate": 9.939311819813228e-06,
14691
+ "loss": 20.8931,
14692
+ "step": 20930
14693
+ },
14694
+ {
14695
+ "epoch": 0.38859003890075783,
14696
+ "grad_norm": 36.5,
14697
+ "learning_rate": 9.939282824027186e-06,
14698
+ "loss": 21.2333,
14699
+ "step": 20940
14700
+ },
14701
+ {
14702
+ "epoch": 0.38877561198523763,
14703
+ "grad_norm": 35.96875,
14704
+ "learning_rate": 9.939253828241141e-06,
14705
+ "loss": 21.4989,
14706
+ "step": 20950
14707
+ },
14708
+ {
14709
+ "epoch": 0.3889611850697175,
14710
+ "grad_norm": 34.75,
14711
+ "learning_rate": 9.9392248324551e-06,
14712
+ "loss": 20.8834,
14713
+ "step": 20960
14714
+ },
14715
+ {
14716
+ "epoch": 0.3891467581541973,
14717
+ "grad_norm": 33.9375,
14718
+ "learning_rate": 9.939195836669058e-06,
14719
+ "loss": 21.2821,
14720
+ "step": 20970
14721
+ },
14722
+ {
14723
+ "epoch": 0.38933233123867717,
14724
+ "grad_norm": 35.03125,
14725
+ "learning_rate": 9.939166840883015e-06,
14726
+ "loss": 21.2434,
14727
+ "step": 20980
14728
+ },
14729
+ {
14730
+ "epoch": 0.38951790432315697,
14731
+ "grad_norm": 35.71875,
14732
+ "learning_rate": 9.939137845096973e-06,
14733
+ "loss": 21.0523,
14734
+ "step": 20990
14735
+ },
14736
+ {
14737
+ "epoch": 0.3897034774076368,
14738
+ "grad_norm": 35.25,
14739
+ "learning_rate": 9.93910884931093e-06,
14740
+ "loss": 21.281,
14741
+ "step": 21000
14742
+ },
14743
+ {
14744
+ "epoch": 0.38988905049211664,
14745
+ "grad_norm": 35.46875,
14746
+ "learning_rate": 9.939079853524888e-06,
14747
+ "loss": 21.435,
14748
+ "step": 21010
14749
+ },
14750
+ {
14751
+ "epoch": 0.39007462357659645,
14752
+ "grad_norm": 33.46875,
14753
+ "learning_rate": 9.939050857738845e-06,
14754
+ "loss": 21.3981,
14755
+ "step": 21020
14756
+ },
14757
+ {
14758
+ "epoch": 0.39026019666107625,
14759
+ "grad_norm": 34.6875,
14760
+ "learning_rate": 9.939021861952804e-06,
14761
+ "loss": 21.0117,
14762
+ "step": 21030
14763
+ },
14764
+ {
14765
+ "epoch": 0.3904457697455561,
14766
+ "grad_norm": 35.40625,
14767
+ "learning_rate": 9.938992866166762e-06,
14768
+ "loss": 21.1832,
14769
+ "step": 21040
14770
+ },
14771
+ {
14772
+ "epoch": 0.3906313428300359,
14773
+ "grad_norm": 34.8125,
14774
+ "learning_rate": 9.938963870380717e-06,
14775
+ "loss": 21.2241,
14776
+ "step": 21050
14777
+ },
14778
+ {
14779
+ "epoch": 0.3908169159145158,
14780
+ "grad_norm": 33.0,
14781
+ "learning_rate": 9.938934874594676e-06,
14782
+ "loss": 21.5346,
14783
+ "step": 21060
14784
+ },
14785
+ {
14786
+ "epoch": 0.3910024889989956,
14787
+ "grad_norm": 34.6875,
14788
+ "learning_rate": 9.938905878808634e-06,
14789
+ "loss": 21.1235,
14790
+ "step": 21070
14791
+ },
14792
+ {
14793
+ "epoch": 0.3911880620834754,
14794
+ "grad_norm": 34.0625,
14795
+ "learning_rate": 9.938876883022591e-06,
14796
+ "loss": 21.1906,
14797
+ "step": 21080
14798
+ },
14799
+ {
14800
+ "epoch": 0.39137363516795526,
14801
+ "grad_norm": 33.40625,
14802
+ "learning_rate": 9.938847887236549e-06,
14803
+ "loss": 21.4451,
14804
+ "step": 21090
14805
+ },
14806
+ {
14807
+ "epoch": 0.39155920825243506,
14808
+ "grad_norm": 34.875,
14809
+ "learning_rate": 9.938818891450508e-06,
14810
+ "loss": 20.9447,
14811
+ "step": 21100
14812
+ },
14813
+ {
14814
+ "epoch": 0.39174478133691487,
14815
+ "grad_norm": 35.71875,
14816
+ "learning_rate": 9.938789895664463e-06,
14817
+ "loss": 21.0956,
14818
+ "step": 21110
14819
+ },
14820
+ {
14821
+ "epoch": 0.39193035442139473,
14822
+ "grad_norm": 33.125,
14823
+ "learning_rate": 9.93876089987842e-06,
14824
+ "loss": 20.8059,
14825
+ "step": 21120
14826
+ },
14827
+ {
14828
+ "epoch": 0.39211592750587454,
14829
+ "grad_norm": 32.8125,
14830
+ "learning_rate": 9.93873190409238e-06,
14831
+ "loss": 21.3003,
14832
+ "step": 21130
14833
+ },
14834
+ {
14835
+ "epoch": 0.3923015005903544,
14836
+ "grad_norm": 34.53125,
14837
+ "learning_rate": 9.938702908306337e-06,
14838
+ "loss": 21.3167,
14839
+ "step": 21140
14840
+ },
14841
+ {
14842
+ "epoch": 0.3924870736748342,
14843
+ "grad_norm": 35.03125,
14844
+ "learning_rate": 9.938673912520295e-06,
14845
+ "loss": 21.307,
14846
+ "step": 21150
14847
+ },
14848
+ {
14849
+ "epoch": 0.392672646759314,
14850
+ "grad_norm": 35.71875,
14851
+ "learning_rate": 9.938644916734252e-06,
14852
+ "loss": 21.1579,
14853
+ "step": 21160
14854
+ },
14855
+ {
14856
+ "epoch": 0.3928582198437939,
14857
+ "grad_norm": 34.59375,
14858
+ "learning_rate": 9.93861592094821e-06,
14859
+ "loss": 21.4428,
14860
+ "step": 21170
14861
+ },
14862
+ {
14863
+ "epoch": 0.3930437929282737,
14864
+ "grad_norm": 33.59375,
14865
+ "learning_rate": 9.938586925162167e-06,
14866
+ "loss": 21.4122,
14867
+ "step": 21180
14868
+ },
14869
+ {
14870
+ "epoch": 0.3932293660127535,
14871
+ "grad_norm": 36.0625,
14872
+ "learning_rate": 9.938557929376124e-06,
14873
+ "loss": 21.1277,
14874
+ "step": 21190
14875
+ },
14876
+ {
14877
+ "epoch": 0.39341493909723335,
14878
+ "grad_norm": 36.03125,
14879
+ "learning_rate": 9.938528933590082e-06,
14880
+ "loss": 21.1074,
14881
+ "step": 21200
14882
+ },
14883
+ {
14884
+ "epoch": 0.39360051218171316,
14885
+ "grad_norm": 32.875,
14886
+ "learning_rate": 9.938499937804039e-06,
14887
+ "loss": 21.2574,
14888
+ "step": 21210
14889
+ },
14890
+ {
14891
+ "epoch": 0.39378608526619296,
14892
+ "grad_norm": 33.65625,
14893
+ "learning_rate": 9.938470942017997e-06,
14894
+ "loss": 21.3344,
14895
+ "step": 21220
14896
+ },
14897
+ {
14898
+ "epoch": 0.3939716583506728,
14899
+ "grad_norm": 33.46875,
14900
+ "learning_rate": 9.938441946231954e-06,
14901
+ "loss": 21.6166,
14902
+ "step": 21230
14903
+ },
14904
+ {
14905
+ "epoch": 0.39415723143515263,
14906
+ "grad_norm": 32.65625,
14907
+ "learning_rate": 9.938412950445913e-06,
14908
+ "loss": 21.6185,
14909
+ "step": 21240
14910
+ },
14911
+ {
14912
+ "epoch": 0.3943428045196325,
14913
+ "grad_norm": 36.875,
14914
+ "learning_rate": 9.93838395465987e-06,
14915
+ "loss": 21.2259,
14916
+ "step": 21250
14917
+ },
14918
+ {
14919
+ "epoch": 0.3945283776041123,
14920
+ "grad_norm": 34.5,
14921
+ "learning_rate": 9.938354958873828e-06,
14922
+ "loss": 21.6313,
14923
+ "step": 21260
14924
+ },
14925
+ {
14926
+ "epoch": 0.3947139506885921,
14927
+ "grad_norm": 32.90625,
14928
+ "learning_rate": 9.938325963087785e-06,
14929
+ "loss": 21.2726,
14930
+ "step": 21270
14931
+ },
14932
+ {
14933
+ "epoch": 0.39489952377307197,
14934
+ "grad_norm": 35.03125,
14935
+ "learning_rate": 9.938296967301743e-06,
14936
+ "loss": 21.2475,
14937
+ "step": 21280
14938
+ },
14939
+ {
14940
+ "epoch": 0.3950850968575518,
14941
+ "grad_norm": 37.65625,
14942
+ "learning_rate": 9.9382679715157e-06,
14943
+ "loss": 21.1077,
14944
+ "step": 21290
14945
+ },
14946
+ {
14947
+ "epoch": 0.3952706699420316,
14948
+ "grad_norm": 35.0,
14949
+ "learning_rate": 9.938238975729657e-06,
14950
+ "loss": 21.5344,
14951
+ "step": 21300
14952
+ },
14953
+ {
14954
+ "epoch": 0.39545624302651144,
14955
+ "grad_norm": 35.78125,
14956
+ "learning_rate": 9.938209979943617e-06,
14957
+ "loss": 21.3853,
14958
+ "step": 21310
14959
+ },
14960
+ {
14961
+ "epoch": 0.39564181611099125,
14962
+ "grad_norm": 34.96875,
14963
+ "learning_rate": 9.938180984157572e-06,
14964
+ "loss": 21.4214,
14965
+ "step": 21320
14966
+ },
14967
+ {
14968
+ "epoch": 0.3958273891954711,
14969
+ "grad_norm": 34.8125,
14970
+ "learning_rate": 9.93815198837153e-06,
14971
+ "loss": 21.111,
14972
+ "step": 21330
14973
+ },
14974
+ {
14975
+ "epoch": 0.3960129622799509,
14976
+ "grad_norm": 34.8125,
14977
+ "learning_rate": 9.938122992585489e-06,
14978
+ "loss": 21.2438,
14979
+ "step": 21340
14980
+ },
14981
+ {
14982
+ "epoch": 0.3961985353644307,
14983
+ "grad_norm": 34.84375,
14984
+ "learning_rate": 9.938093996799446e-06,
14985
+ "loss": 20.9801,
14986
+ "step": 21350
14987
+ },
14988
+ {
14989
+ "epoch": 0.3963841084489106,
14990
+ "grad_norm": 36.125,
14991
+ "learning_rate": 9.938065001013404e-06,
14992
+ "loss": 20.6129,
14993
+ "step": 21360
14994
+ },
14995
+ {
14996
+ "epoch": 0.3965696815333904,
14997
+ "grad_norm": 32.96875,
14998
+ "learning_rate": 9.938036005227361e-06,
14999
+ "loss": 21.0012,
15000
+ "step": 21370
15001
+ },
15002
+ {
15003
+ "epoch": 0.3967552546178702,
15004
+ "grad_norm": 33.9375,
15005
+ "learning_rate": 9.938007009441318e-06,
15006
+ "loss": 21.3073,
15007
+ "step": 21380
15008
+ },
15009
+ {
15010
+ "epoch": 0.39694082770235006,
15011
+ "grad_norm": 37.4375,
15012
+ "learning_rate": 9.937978013655276e-06,
15013
+ "loss": 21.4929,
15014
+ "step": 21390
15015
+ },
15016
+ {
15017
+ "epoch": 0.39712640078682987,
15018
+ "grad_norm": 33.4375,
15019
+ "learning_rate": 9.937949017869233e-06,
15020
+ "loss": 21.2244,
15021
+ "step": 21400
15022
+ },
15023
+ {
15024
+ "epoch": 0.39731197387130973,
15025
+ "grad_norm": 35.5625,
15026
+ "learning_rate": 9.937920022083192e-06,
15027
+ "loss": 21.2312,
15028
+ "step": 21410
15029
+ },
15030
+ {
15031
+ "epoch": 0.39749754695578954,
15032
+ "grad_norm": 35.375,
15033
+ "learning_rate": 9.93789102629715e-06,
15034
+ "loss": 21.3317,
15035
+ "step": 21420
15036
+ },
15037
+ {
15038
+ "epoch": 0.39768312004026934,
15039
+ "grad_norm": 35.375,
15040
+ "learning_rate": 9.937862030511105e-06,
15041
+ "loss": 21.4387,
15042
+ "step": 21430
15043
+ },
15044
+ {
15045
+ "epoch": 0.3978686931247492,
15046
+ "grad_norm": 35.59375,
15047
+ "learning_rate": 9.937833034725065e-06,
15048
+ "loss": 21.0185,
15049
+ "step": 21440
15050
+ },
15051
+ {
15052
+ "epoch": 0.398054266209229,
15053
+ "grad_norm": 34.5,
15054
+ "learning_rate": 9.937804038939022e-06,
15055
+ "loss": 21.4504,
15056
+ "step": 21450
15057
+ },
15058
+ {
15059
+ "epoch": 0.3982398392937088,
15060
+ "grad_norm": 34.53125,
15061
+ "learning_rate": 9.93777504315298e-06,
15062
+ "loss": 21.5877,
15063
+ "step": 21460
15064
+ },
15065
+ {
15066
+ "epoch": 0.3984254123781887,
15067
+ "grad_norm": 34.75,
15068
+ "learning_rate": 9.937746047366937e-06,
15069
+ "loss": 21.2731,
15070
+ "step": 21470
15071
+ },
15072
+ {
15073
+ "epoch": 0.3986109854626685,
15074
+ "grad_norm": 34.59375,
15075
+ "learning_rate": 9.937717051580894e-06,
15076
+ "loss": 21.3098,
15077
+ "step": 21480
15078
+ },
15079
+ {
15080
+ "epoch": 0.3987965585471483,
15081
+ "grad_norm": 36.78125,
15082
+ "learning_rate": 9.937688055794852e-06,
15083
+ "loss": 21.2444,
15084
+ "step": 21490
15085
+ },
15086
+ {
15087
+ "epoch": 0.39898213163162816,
15088
+ "grad_norm": 35.53125,
15089
+ "learning_rate": 9.937659060008809e-06,
15090
+ "loss": 21.0413,
15091
+ "step": 21500
15092
+ },
15093
+ {
15094
+ "epoch": 0.39916770471610796,
15095
+ "grad_norm": 35.75,
15096
+ "learning_rate": 9.937630064222768e-06,
15097
+ "loss": 21.0791,
15098
+ "step": 21510
15099
+ },
15100
+ {
15101
+ "epoch": 0.3993532778005878,
15102
+ "grad_norm": 34.1875,
15103
+ "learning_rate": 9.937601068436726e-06,
15104
+ "loss": 21.4088,
15105
+ "step": 21520
15106
+ },
15107
+ {
15108
+ "epoch": 0.39953885088506763,
15109
+ "grad_norm": 35.78125,
15110
+ "learning_rate": 9.937572072650683e-06,
15111
+ "loss": 21.4432,
15112
+ "step": 21530
15113
+ },
15114
+ {
15115
+ "epoch": 0.39972442396954744,
15116
+ "grad_norm": 34.53125,
15117
+ "learning_rate": 9.93754307686464e-06,
15118
+ "loss": 20.6209,
15119
+ "step": 21540
15120
+ },
15121
+ {
15122
+ "epoch": 0.3999099970540273,
15123
+ "grad_norm": 35.5625,
15124
+ "learning_rate": 9.937514081078598e-06,
15125
+ "loss": 20.9569,
15126
+ "step": 21550
15127
+ },
15128
+ {
15129
+ "epoch": 0.4000955701385071,
15130
+ "grad_norm": 35.59375,
15131
+ "learning_rate": 9.937485085292555e-06,
15132
+ "loss": 21.1618,
15133
+ "step": 21560
15134
+ },
15135
+ {
15136
+ "epoch": 0.4002811432229869,
15137
+ "grad_norm": 34.65625,
15138
+ "learning_rate": 9.937456089506513e-06,
15139
+ "loss": 21.1128,
15140
+ "step": 21570
15141
+ },
15142
+ {
15143
+ "epoch": 0.4004667163074668,
15144
+ "grad_norm": 34.28125,
15145
+ "learning_rate": 9.937427093720472e-06,
15146
+ "loss": 21.432,
15147
+ "step": 21580
15148
+ },
15149
+ {
15150
+ "epoch": 0.4006522893919466,
15151
+ "grad_norm": 33.9375,
15152
+ "learning_rate": 9.937398097934427e-06,
15153
+ "loss": 20.9126,
15154
+ "step": 21590
15155
+ },
15156
+ {
15157
+ "epoch": 0.40083786247642644,
15158
+ "grad_norm": 36.15625,
15159
+ "learning_rate": 9.937369102148385e-06,
15160
+ "loss": 21.2893,
15161
+ "step": 21600
15162
+ },
15163
+ {
15164
+ "epoch": 0.40102343556090625,
15165
+ "grad_norm": 36.09375,
15166
+ "learning_rate": 9.937340106362344e-06,
15167
+ "loss": 20.9199,
15168
+ "step": 21610
15169
+ },
15170
+ {
15171
+ "epoch": 0.40120900864538606,
15172
+ "grad_norm": 36.375,
15173
+ "learning_rate": 9.937311110576301e-06,
15174
+ "loss": 21.4188,
15175
+ "step": 21620
15176
+ },
15177
+ {
15178
+ "epoch": 0.4013945817298659,
15179
+ "grad_norm": 35.90625,
15180
+ "learning_rate": 9.937282114790259e-06,
15181
+ "loss": 21.0579,
15182
+ "step": 21630
15183
+ },
15184
+ {
15185
+ "epoch": 0.4015801548143457,
15186
+ "grad_norm": 37.46875,
15187
+ "learning_rate": 9.937253119004216e-06,
15188
+ "loss": 21.2522,
15189
+ "step": 21640
15190
+ },
15191
+ {
15192
+ "epoch": 0.40176572789882553,
15193
+ "grad_norm": 35.0625,
15194
+ "learning_rate": 9.937224123218174e-06,
15195
+ "loss": 21.4908,
15196
+ "step": 21650
15197
+ },
15198
+ {
15199
+ "epoch": 0.4019513009833054,
15200
+ "grad_norm": 33.1875,
15201
+ "learning_rate": 9.937195127432131e-06,
15202
+ "loss": 21.2079,
15203
+ "step": 21660
15204
+ },
15205
+ {
15206
+ "epoch": 0.4021368740677852,
15207
+ "grad_norm": 36.125,
15208
+ "learning_rate": 9.937166131646088e-06,
15209
+ "loss": 21.2321,
15210
+ "step": 21670
15211
+ },
15212
+ {
15213
+ "epoch": 0.40232244715226506,
15214
+ "grad_norm": 33.0,
15215
+ "learning_rate": 9.937137135860046e-06,
15216
+ "loss": 20.916,
15217
+ "step": 21680
15218
+ },
15219
+ {
15220
+ "epoch": 0.40250802023674487,
15221
+ "grad_norm": 34.125,
15222
+ "learning_rate": 9.937108140074005e-06,
15223
+ "loss": 21.2056,
15224
+ "step": 21690
15225
+ },
15226
+ {
15227
+ "epoch": 0.4026935933212247,
15228
+ "grad_norm": 34.78125,
15229
+ "learning_rate": 9.93707914428796e-06,
15230
+ "loss": 20.9596,
15231
+ "step": 21700
15232
+ },
15233
+ {
15234
+ "epoch": 0.40287916640570454,
15235
+ "grad_norm": 33.90625,
15236
+ "learning_rate": 9.93705014850192e-06,
15237
+ "loss": 20.9652,
15238
+ "step": 21710
15239
+ },
15240
+ {
15241
+ "epoch": 0.40306473949018434,
15242
+ "grad_norm": 35.9375,
15243
+ "learning_rate": 9.937021152715877e-06,
15244
+ "loss": 21.0897,
15245
+ "step": 21720
15246
+ },
15247
+ {
15248
+ "epoch": 0.40325031257466415,
15249
+ "grad_norm": 34.875,
15250
+ "learning_rate": 9.936992156929834e-06,
15251
+ "loss": 20.8345,
15252
+ "step": 21730
15253
+ },
15254
+ {
15255
+ "epoch": 0.403435885659144,
15256
+ "grad_norm": 35.9375,
15257
+ "learning_rate": 9.936963161143792e-06,
15258
+ "loss": 21.1519,
15259
+ "step": 21740
15260
+ },
15261
+ {
15262
+ "epoch": 0.4036214587436238,
15263
+ "grad_norm": 33.15625,
15264
+ "learning_rate": 9.93693416535775e-06,
15265
+ "loss": 20.8396,
15266
+ "step": 21750
15267
+ },
15268
+ {
15269
+ "epoch": 0.4038070318281036,
15270
+ "grad_norm": 34.3125,
15271
+ "learning_rate": 9.936905169571707e-06,
15272
+ "loss": 21.4036,
15273
+ "step": 21760
15274
+ },
15275
+ {
15276
+ "epoch": 0.4039926049125835,
15277
+ "grad_norm": 35.21875,
15278
+ "learning_rate": 9.936876173785664e-06,
15279
+ "loss": 21.1442,
15280
+ "step": 21770
15281
+ },
15282
+ {
15283
+ "epoch": 0.4041781779970633,
15284
+ "grad_norm": 35.40625,
15285
+ "learning_rate": 9.936847177999622e-06,
15286
+ "loss": 20.9068,
15287
+ "step": 21780
15288
+ },
15289
+ {
15290
+ "epoch": 0.40436375108154315,
15291
+ "grad_norm": 36.1875,
15292
+ "learning_rate": 9.93681818221358e-06,
15293
+ "loss": 21.2187,
15294
+ "step": 21790
15295
+ },
15296
+ {
15297
+ "epoch": 0.40454932416602296,
15298
+ "grad_norm": 35.9375,
15299
+ "learning_rate": 9.936789186427536e-06,
15300
+ "loss": 21.6709,
15301
+ "step": 21800
15302
+ },
15303
+ {
15304
+ "epoch": 0.40473489725050277,
15305
+ "grad_norm": 35.65625,
15306
+ "learning_rate": 9.936760190641494e-06,
15307
+ "loss": 20.8711,
15308
+ "step": 21810
15309
+ },
15310
+ {
15311
+ "epoch": 0.40492047033498263,
15312
+ "grad_norm": 33.875,
15313
+ "learning_rate": 9.936731194855453e-06,
15314
+ "loss": 21.3662,
15315
+ "step": 21820
15316
+ },
15317
+ {
15318
+ "epoch": 0.40510604341946244,
15319
+ "grad_norm": 36.71875,
15320
+ "learning_rate": 9.93670219906941e-06,
15321
+ "loss": 21.4065,
15322
+ "step": 21830
15323
+ },
15324
+ {
15325
+ "epoch": 0.40529161650394224,
15326
+ "grad_norm": 33.0,
15327
+ "learning_rate": 9.936673203283368e-06,
15328
+ "loss": 21.2187,
15329
+ "step": 21840
15330
+ },
15331
+ {
15332
+ "epoch": 0.4054771895884221,
15333
+ "grad_norm": 34.78125,
15334
+ "learning_rate": 9.936644207497325e-06,
15335
+ "loss": 21.1473,
15336
+ "step": 21850
15337
+ },
15338
+ {
15339
+ "epoch": 0.4056627626729019,
15340
+ "grad_norm": 34.78125,
15341
+ "learning_rate": 9.936615211711282e-06,
15342
+ "loss": 20.9902,
15343
+ "step": 21860
15344
+ },
15345
+ {
15346
+ "epoch": 0.4058483357573818,
15347
+ "grad_norm": 35.34375,
15348
+ "learning_rate": 9.93658621592524e-06,
15349
+ "loss": 21.4328,
15350
+ "step": 21870
15351
+ },
15352
+ {
15353
+ "epoch": 0.4060339088418616,
15354
+ "grad_norm": 35.25,
15355
+ "learning_rate": 9.936557220139197e-06,
15356
+ "loss": 21.0445,
15357
+ "step": 21880
15358
+ },
15359
+ {
15360
+ "epoch": 0.4062194819263414,
15361
+ "grad_norm": 35.375,
15362
+ "learning_rate": 9.936528224353156e-06,
15363
+ "loss": 21.3687,
15364
+ "step": 21890
15365
+ },
15366
+ {
15367
+ "epoch": 0.40640505501082125,
15368
+ "grad_norm": 35.8125,
15369
+ "learning_rate": 9.936499228567114e-06,
15370
+ "loss": 20.9341,
15371
+ "step": 21900
15372
+ },
15373
+ {
15374
+ "epoch": 0.40659062809530105,
15375
+ "grad_norm": 33.6875,
15376
+ "learning_rate": 9.93647023278107e-06,
15377
+ "loss": 21.1997,
15378
+ "step": 21910
15379
+ },
15380
+ {
15381
+ "epoch": 0.40677620117978086,
15382
+ "grad_norm": 33.6875,
15383
+ "learning_rate": 9.936441236995029e-06,
15384
+ "loss": 21.0207,
15385
+ "step": 21920
15386
+ },
15387
+ {
15388
+ "epoch": 0.4069617742642607,
15389
+ "grad_norm": 34.15625,
15390
+ "learning_rate": 9.936412241208986e-06,
15391
+ "loss": 21.2807,
15392
+ "step": 21930
15393
+ },
15394
+ {
15395
+ "epoch": 0.40714734734874053,
15396
+ "grad_norm": 36.78125,
15397
+ "learning_rate": 9.936383245422943e-06,
15398
+ "loss": 21.2499,
15399
+ "step": 21940
15400
+ },
15401
+ {
15402
+ "epoch": 0.4073329204332204,
15403
+ "grad_norm": 34.3125,
15404
+ "learning_rate": 9.9363542496369e-06,
15405
+ "loss": 21.1466,
15406
+ "step": 21950
15407
+ },
15408
+ {
15409
+ "epoch": 0.4075184935177002,
15410
+ "grad_norm": 35.125,
15411
+ "learning_rate": 9.93632525385086e-06,
15412
+ "loss": 20.8732,
15413
+ "step": 21960
15414
+ },
15415
+ {
15416
+ "epoch": 0.40770406660218,
15417
+ "grad_norm": 34.5,
15418
+ "learning_rate": 9.936296258064816e-06,
15419
+ "loss": 21.2597,
15420
+ "step": 21970
15421
+ },
15422
+ {
15423
+ "epoch": 0.40788963968665987,
15424
+ "grad_norm": 33.40625,
15425
+ "learning_rate": 9.936267262278773e-06,
15426
+ "loss": 21.0341,
15427
+ "step": 21980
15428
+ },
15429
+ {
15430
+ "epoch": 0.4080752127711397,
15431
+ "grad_norm": 34.90625,
15432
+ "learning_rate": 9.936238266492732e-06,
15433
+ "loss": 20.9766,
15434
+ "step": 21990
15435
+ },
15436
+ {
15437
+ "epoch": 0.4082607858556195,
15438
+ "grad_norm": 33.9375,
15439
+ "learning_rate": 9.93620927070669e-06,
15440
+ "loss": 21.0238,
15441
+ "step": 22000
15442
+ },
15443
+ {
15444
+ "epoch": 0.40844635894009934,
15445
+ "grad_norm": 36.1875,
15446
+ "learning_rate": 9.936180274920647e-06,
15447
+ "loss": 21.1887,
15448
+ "step": 22010
15449
+ },
15450
+ {
15451
+ "epoch": 0.40863193202457915,
15452
+ "grad_norm": 35.78125,
15453
+ "learning_rate": 9.936151279134604e-06,
15454
+ "loss": 21.0594,
15455
+ "step": 22020
15456
+ },
15457
+ {
15458
+ "epoch": 0.408817505109059,
15459
+ "grad_norm": 36.59375,
15460
+ "learning_rate": 9.936122283348562e-06,
15461
+ "loss": 21.4135,
15462
+ "step": 22030
15463
+ },
15464
+ {
15465
+ "epoch": 0.4090030781935388,
15466
+ "grad_norm": 35.03125,
15467
+ "learning_rate": 9.93609328756252e-06,
15468
+ "loss": 21.2114,
15469
+ "step": 22040
15470
+ },
15471
+ {
15472
+ "epoch": 0.4091886512780186,
15473
+ "grad_norm": 38.0,
15474
+ "learning_rate": 9.936064291776477e-06,
15475
+ "loss": 21.0613,
15476
+ "step": 22050
15477
+ },
15478
+ {
15479
+ "epoch": 0.4093742243624985,
15480
+ "grad_norm": 38.5625,
15481
+ "learning_rate": 9.936035295990436e-06,
15482
+ "loss": 21.36,
15483
+ "step": 22060
15484
+ },
15485
+ {
15486
+ "epoch": 0.4095597974469783,
15487
+ "grad_norm": 35.90625,
15488
+ "learning_rate": 9.936006300204391e-06,
15489
+ "loss": 21.3268,
15490
+ "step": 22070
15491
+ },
15492
+ {
15493
+ "epoch": 0.4097453705314581,
15494
+ "grad_norm": 37.0625,
15495
+ "learning_rate": 9.935977304418349e-06,
15496
+ "loss": 21.247,
15497
+ "step": 22080
15498
+ },
15499
+ {
15500
+ "epoch": 0.40993094361593796,
15501
+ "grad_norm": 35.8125,
15502
+ "learning_rate": 9.935948308632308e-06,
15503
+ "loss": 21.2327,
15504
+ "step": 22090
15505
+ },
15506
+ {
15507
+ "epoch": 0.41011651670041777,
15508
+ "grad_norm": 34.53125,
15509
+ "learning_rate": 9.935919312846265e-06,
15510
+ "loss": 21.3096,
15511
+ "step": 22100
15512
+ },
15513
+ {
15514
+ "epoch": 0.4103020897848976,
15515
+ "grad_norm": 35.0,
15516
+ "learning_rate": 9.935890317060223e-06,
15517
+ "loss": 21.2457,
15518
+ "step": 22110
15519
+ },
15520
+ {
15521
+ "epoch": 0.41048766286937743,
15522
+ "grad_norm": 34.65625,
15523
+ "learning_rate": 9.93586132127418e-06,
15524
+ "loss": 21.1049,
15525
+ "step": 22120
15526
+ },
15527
+ {
15528
+ "epoch": 0.41067323595385724,
15529
+ "grad_norm": 37.65625,
15530
+ "learning_rate": 9.935832325488138e-06,
15531
+ "loss": 21.487,
15532
+ "step": 22130
15533
+ },
15534
+ {
15535
+ "epoch": 0.4108588090383371,
15536
+ "grad_norm": 35.125,
15537
+ "learning_rate": 9.935803329702095e-06,
15538
+ "loss": 21.1487,
15539
+ "step": 22140
15540
+ },
15541
+ {
15542
+ "epoch": 0.4110443821228169,
15543
+ "grad_norm": 34.90625,
15544
+ "learning_rate": 9.935774333916052e-06,
15545
+ "loss": 20.6907,
15546
+ "step": 22150
15547
+ },
15548
+ {
15549
+ "epoch": 0.4112299552072967,
15550
+ "grad_norm": 34.1875,
15551
+ "learning_rate": 9.935745338130011e-06,
15552
+ "loss": 21.3479,
15553
+ "step": 22160
15554
+ },
15555
+ {
15556
+ "epoch": 0.4114155282917766,
15557
+ "grad_norm": 36.125,
15558
+ "learning_rate": 9.935716342343969e-06,
15559
+ "loss": 20.9197,
15560
+ "step": 22170
15561
+ },
15562
+ {
15563
+ "epoch": 0.4116011013762564,
15564
+ "grad_norm": 33.6875,
15565
+ "learning_rate": 9.935687346557925e-06,
15566
+ "loss": 21.1271,
15567
+ "step": 22180
15568
+ },
15569
+ {
15570
+ "epoch": 0.4117866744607362,
15571
+ "grad_norm": 34.4375,
15572
+ "learning_rate": 9.935658350771884e-06,
15573
+ "loss": 21.4446,
15574
+ "step": 22190
15575
+ },
15576
+ {
15577
+ "epoch": 0.41197224754521605,
15578
+ "grad_norm": 33.125,
15579
+ "learning_rate": 9.935629354985841e-06,
15580
+ "loss": 20.7713,
15581
+ "step": 22200
15582
+ },
15583
+ {
15584
+ "epoch": 0.41215782062969586,
15585
+ "grad_norm": 34.78125,
15586
+ "learning_rate": 9.935600359199798e-06,
15587
+ "loss": 21.1482,
15588
+ "step": 22210
15589
+ },
15590
+ {
15591
+ "epoch": 0.4123433937141757,
15592
+ "grad_norm": 35.09375,
15593
+ "learning_rate": 9.935571363413756e-06,
15594
+ "loss": 20.867,
15595
+ "step": 22220
15596
+ },
15597
+ {
15598
+ "epoch": 0.4125289667986555,
15599
+ "grad_norm": 34.1875,
15600
+ "learning_rate": 9.935542367627713e-06,
15601
+ "loss": 21.1484,
15602
+ "step": 22230
15603
+ },
15604
+ {
15605
+ "epoch": 0.41271453988313533,
15606
+ "grad_norm": 34.25,
15607
+ "learning_rate": 9.93551337184167e-06,
15608
+ "loss": 21.3381,
15609
+ "step": 22240
15610
+ },
15611
+ {
15612
+ "epoch": 0.4129001129676152,
15613
+ "grad_norm": 34.84375,
15614
+ "learning_rate": 9.935484376055628e-06,
15615
+ "loss": 21.6229,
15616
+ "step": 22250
15617
+ },
15618
+ {
15619
+ "epoch": 0.413085686052095,
15620
+ "grad_norm": 34.21875,
15621
+ "learning_rate": 9.935455380269586e-06,
15622
+ "loss": 20.8437,
15623
+ "step": 22260
15624
+ },
15625
+ {
15626
+ "epoch": 0.4132712591365748,
15627
+ "grad_norm": 32.3125,
15628
+ "learning_rate": 9.935426384483545e-06,
15629
+ "loss": 21.2779,
15630
+ "step": 22270
15631
+ },
15632
+ {
15633
+ "epoch": 0.41345683222105467,
15634
+ "grad_norm": 34.59375,
15635
+ "learning_rate": 9.935397388697502e-06,
15636
+ "loss": 21.1612,
15637
+ "step": 22280
15638
+ },
15639
+ {
15640
+ "epoch": 0.4136424053055345,
15641
+ "grad_norm": 33.78125,
15642
+ "learning_rate": 9.93536839291146e-06,
15643
+ "loss": 20.7929,
15644
+ "step": 22290
15645
+ },
15646
+ {
15647
+ "epoch": 0.41382797839001434,
15648
+ "grad_norm": 33.5625,
15649
+ "learning_rate": 9.935339397125417e-06,
15650
+ "loss": 21.1799,
15651
+ "step": 22300
15652
+ },
15653
+ {
15654
+ "epoch": 0.41401355147449415,
15655
+ "grad_norm": 34.75,
15656
+ "learning_rate": 9.935310401339374e-06,
15657
+ "loss": 20.9251,
15658
+ "step": 22310
15659
+ },
15660
+ {
15661
+ "epoch": 0.41419912455897395,
15662
+ "grad_norm": 36.5,
15663
+ "learning_rate": 9.935281405553332e-06,
15664
+ "loss": 20.9843,
15665
+ "step": 22320
15666
+ },
15667
+ {
15668
+ "epoch": 0.4143846976434538,
15669
+ "grad_norm": 33.1875,
15670
+ "learning_rate": 9.935252409767289e-06,
15671
+ "loss": 20.8824,
15672
+ "step": 22330
15673
+ },
15674
+ {
15675
+ "epoch": 0.4145702707279336,
15676
+ "grad_norm": 35.90625,
15677
+ "learning_rate": 9.935223413981246e-06,
15678
+ "loss": 21.3414,
15679
+ "step": 22340
15680
+ },
15681
+ {
15682
+ "epoch": 0.4147558438124134,
15683
+ "grad_norm": 36.375,
15684
+ "learning_rate": 9.935194418195204e-06,
15685
+ "loss": 20.9215,
15686
+ "step": 22350
15687
+ },
15688
+ {
15689
+ "epoch": 0.4149414168968933,
15690
+ "grad_norm": 35.6875,
15691
+ "learning_rate": 9.935165422409161e-06,
15692
+ "loss": 21.0936,
15693
+ "step": 22360
15694
+ },
15695
+ {
15696
+ "epoch": 0.4151269899813731,
15697
+ "grad_norm": 33.8125,
15698
+ "learning_rate": 9.93513642662312e-06,
15699
+ "loss": 20.9315,
15700
+ "step": 22370
15701
+ },
15702
+ {
15703
+ "epoch": 0.4153125630658529,
15704
+ "grad_norm": 34.4375,
15705
+ "learning_rate": 9.935107430837078e-06,
15706
+ "loss": 20.7623,
15707
+ "step": 22380
15708
+ },
15709
+ {
15710
+ "epoch": 0.41549813615033276,
15711
+ "grad_norm": 36.625,
15712
+ "learning_rate": 9.935078435051034e-06,
15713
+ "loss": 21.2462,
15714
+ "step": 22390
15715
+ },
15716
+ {
15717
+ "epoch": 0.41568370923481257,
15718
+ "grad_norm": 35.5,
15719
+ "learning_rate": 9.935049439264993e-06,
15720
+ "loss": 20.9974,
15721
+ "step": 22400
15722
+ },
15723
+ {
15724
+ "epoch": 0.41586928231929243,
15725
+ "grad_norm": 35.65625,
15726
+ "learning_rate": 9.93502044347895e-06,
15727
+ "loss": 21.6205,
15728
+ "step": 22410
15729
+ },
15730
+ {
15731
+ "epoch": 0.41605485540377224,
15732
+ "grad_norm": 33.625,
15733
+ "learning_rate": 9.934991447692907e-06,
15734
+ "loss": 20.8627,
15735
+ "step": 22420
15736
+ },
15737
+ {
15738
+ "epoch": 0.41624042848825205,
15739
+ "grad_norm": 35.125,
15740
+ "learning_rate": 9.934962451906865e-06,
15741
+ "loss": 21.0586,
15742
+ "step": 22430
15743
+ },
15744
+ {
15745
+ "epoch": 0.4164260015727319,
15746
+ "grad_norm": 36.3125,
15747
+ "learning_rate": 9.934933456120824e-06,
15748
+ "loss": 20.8756,
15749
+ "step": 22440
15750
+ },
15751
+ {
15752
+ "epoch": 0.4166115746572117,
15753
+ "grad_norm": 34.21875,
15754
+ "learning_rate": 9.93490446033478e-06,
15755
+ "loss": 21.1442,
15756
+ "step": 22450
15757
+ },
15758
+ {
15759
+ "epoch": 0.4167971477416915,
15760
+ "grad_norm": 34.8125,
15761
+ "learning_rate": 9.934875464548737e-06,
15762
+ "loss": 21.3179,
15763
+ "step": 22460
15764
+ },
15765
+ {
15766
+ "epoch": 0.4169827208261714,
15767
+ "grad_norm": 35.0625,
15768
+ "learning_rate": 9.934846468762696e-06,
15769
+ "loss": 21.1509,
15770
+ "step": 22470
15771
+ },
15772
+ {
15773
+ "epoch": 0.4171682939106512,
15774
+ "grad_norm": 35.09375,
15775
+ "learning_rate": 9.934817472976654e-06,
15776
+ "loss": 20.9087,
15777
+ "step": 22480
15778
+ },
15779
+ {
15780
+ "epoch": 0.41735386699513105,
15781
+ "grad_norm": 36.9375,
15782
+ "learning_rate": 9.934788477190611e-06,
15783
+ "loss": 21.0097,
15784
+ "step": 22490
15785
+ },
15786
+ {
15787
+ "epoch": 0.41753944007961086,
15788
+ "grad_norm": 37.125,
15789
+ "learning_rate": 9.934759481404568e-06,
15790
+ "loss": 21.1743,
15791
+ "step": 22500
15792
+ },
15793
+ {
15794
+ "epoch": 0.41772501316409066,
15795
+ "grad_norm": 34.4375,
15796
+ "learning_rate": 9.934730485618526e-06,
15797
+ "loss": 21.2852,
15798
+ "step": 22510
15799
+ },
15800
+ {
15801
+ "epoch": 0.4179105862485705,
15802
+ "grad_norm": 32.96875,
15803
+ "learning_rate": 9.934701489832483e-06,
15804
+ "loss": 20.5226,
15805
+ "step": 22520
15806
+ },
15807
+ {
15808
+ "epoch": 0.41809615933305033,
15809
+ "grad_norm": 33.6875,
15810
+ "learning_rate": 9.93467249404644e-06,
15811
+ "loss": 21.4159,
15812
+ "step": 22530
15813
+ },
15814
+ {
15815
+ "epoch": 0.41828173241753014,
15816
+ "grad_norm": 34.40625,
15817
+ "learning_rate": 9.9346434982604e-06,
15818
+ "loss": 20.9896,
15819
+ "step": 22540
15820
+ },
15821
+ {
15822
+ "epoch": 0.41846730550201,
15823
+ "grad_norm": 33.9375,
15824
+ "learning_rate": 9.934614502474355e-06,
15825
+ "loss": 21.1083,
15826
+ "step": 22550
15827
+ },
15828
+ {
15829
+ "epoch": 0.4186528785864898,
15830
+ "grad_norm": 34.34375,
15831
+ "learning_rate": 9.934585506688313e-06,
15832
+ "loss": 21.1427,
15833
+ "step": 22560
15834
+ },
15835
+ {
15836
+ "epoch": 0.41883845167096967,
15837
+ "grad_norm": 35.53125,
15838
+ "learning_rate": 9.934556510902272e-06,
15839
+ "loss": 21.1074,
15840
+ "step": 22570
15841
+ },
15842
+ {
15843
+ "epoch": 0.4190240247554495,
15844
+ "grad_norm": 36.4375,
15845
+ "learning_rate": 9.93452751511623e-06,
15846
+ "loss": 21.1957,
15847
+ "step": 22580
15848
+ },
15849
+ {
15850
+ "epoch": 0.4192095978399293,
15851
+ "grad_norm": 35.71875,
15852
+ "learning_rate": 9.934498519330187e-06,
15853
+ "loss": 21.3737,
15854
+ "step": 22590
15855
+ },
15856
+ {
15857
+ "epoch": 0.41939517092440914,
15858
+ "grad_norm": 35.28125,
15859
+ "learning_rate": 9.934469523544144e-06,
15860
+ "loss": 21.1098,
15861
+ "step": 22600
15862
+ },
15863
+ {
15864
+ "epoch": 0.41958074400888895,
15865
+ "grad_norm": 34.84375,
15866
+ "learning_rate": 9.934440527758102e-06,
15867
+ "loss": 21.206,
15868
+ "step": 22610
15869
+ },
15870
+ {
15871
+ "epoch": 0.41976631709336876,
15872
+ "grad_norm": 35.03125,
15873
+ "learning_rate": 9.934411531972059e-06,
15874
+ "loss": 21.6308,
15875
+ "step": 22620
15876
+ },
15877
+ {
15878
+ "epoch": 0.4199518901778486,
15879
+ "grad_norm": 34.8125,
15880
+ "learning_rate": 9.934382536186016e-06,
15881
+ "loss": 20.8924,
15882
+ "step": 22630
15883
+ },
15884
+ {
15885
+ "epoch": 0.4201374632623284,
15886
+ "grad_norm": 35.78125,
15887
+ "learning_rate": 9.934353540399975e-06,
15888
+ "loss": 20.9669,
15889
+ "step": 22640
15890
+ },
15891
+ {
15892
+ "epoch": 0.42032303634680823,
15893
+ "grad_norm": 35.46875,
15894
+ "learning_rate": 9.934324544613933e-06,
15895
+ "loss": 21.2526,
15896
+ "step": 22650
15897
+ },
15898
+ {
15899
+ "epoch": 0.4205086094312881,
15900
+ "grad_norm": 33.1875,
15901
+ "learning_rate": 9.934295548827889e-06,
15902
+ "loss": 20.8902,
15903
+ "step": 22660
15904
+ },
15905
+ {
15906
+ "epoch": 0.4206941825157679,
15907
+ "grad_norm": 35.8125,
15908
+ "learning_rate": 9.934266553041848e-06,
15909
+ "loss": 21.2633,
15910
+ "step": 22670
15911
+ },
15912
+ {
15913
+ "epoch": 0.42087975560024776,
15914
+ "grad_norm": 37.9375,
15915
+ "learning_rate": 9.934237557255805e-06,
15916
+ "loss": 21.1654,
15917
+ "step": 22680
15918
+ },
15919
+ {
15920
+ "epoch": 0.42106532868472757,
15921
+ "grad_norm": 34.78125,
15922
+ "learning_rate": 9.934208561469762e-06,
15923
+ "loss": 20.9407,
15924
+ "step": 22690
15925
+ },
15926
+ {
15927
+ "epoch": 0.4212509017692074,
15928
+ "grad_norm": 33.09375,
15929
+ "learning_rate": 9.93417956568372e-06,
15930
+ "loss": 21.1454,
15931
+ "step": 22700
15932
+ },
15933
+ {
15934
+ "epoch": 0.42143647485368724,
15935
+ "grad_norm": 34.03125,
15936
+ "learning_rate": 9.934150569897677e-06,
15937
+ "loss": 21.0258,
15938
+ "step": 22710
15939
+ },
15940
+ {
15941
+ "epoch": 0.42162204793816704,
15942
+ "grad_norm": 34.65625,
15943
+ "learning_rate": 9.934121574111635e-06,
15944
+ "loss": 21.0839,
15945
+ "step": 22720
15946
+ },
15947
+ {
15948
+ "epoch": 0.42180762102264685,
15949
+ "grad_norm": 34.09375,
15950
+ "learning_rate": 9.934092578325592e-06,
15951
+ "loss": 21.1634,
15952
+ "step": 22730
15953
+ },
15954
+ {
15955
+ "epoch": 0.4219931941071267,
15956
+ "grad_norm": 35.46875,
15957
+ "learning_rate": 9.934063582539551e-06,
15958
+ "loss": 21.1216,
15959
+ "step": 22740
15960
+ },
15961
+ {
15962
+ "epoch": 0.4221787671916065,
15963
+ "grad_norm": 34.40625,
15964
+ "learning_rate": 9.934034586753509e-06,
15965
+ "loss": 21.0727,
15966
+ "step": 22750
15967
+ },
15968
+ {
15969
+ "epoch": 0.4223643402760864,
15970
+ "grad_norm": 35.625,
15971
+ "learning_rate": 9.934005590967466e-06,
15972
+ "loss": 20.6274,
15973
+ "step": 22760
15974
+ },
15975
+ {
15976
+ "epoch": 0.4225499133605662,
15977
+ "grad_norm": 33.09375,
15978
+ "learning_rate": 9.933976595181423e-06,
15979
+ "loss": 21.0867,
15980
+ "step": 22770
15981
+ },
15982
+ {
15983
+ "epoch": 0.422735486445046,
15984
+ "grad_norm": 36.0625,
15985
+ "learning_rate": 9.933947599395381e-06,
15986
+ "loss": 21.0255,
15987
+ "step": 22780
15988
+ },
15989
+ {
15990
+ "epoch": 0.42292105952952586,
15991
+ "grad_norm": 34.84375,
15992
+ "learning_rate": 9.933918603609338e-06,
15993
+ "loss": 21.1949,
15994
+ "step": 22790
15995
+ },
15996
+ {
15997
+ "epoch": 0.42310663261400566,
15998
+ "grad_norm": 35.5625,
15999
+ "learning_rate": 9.933889607823296e-06,
16000
+ "loss": 20.9742,
16001
+ "step": 22800
16002
+ },
16003
+ {
16004
+ "epoch": 0.42329220569848547,
16005
+ "grad_norm": 34.5625,
16006
+ "learning_rate": 9.933860612037253e-06,
16007
+ "loss": 21.0635,
16008
+ "step": 22810
16009
+ },
16010
+ {
16011
+ "epoch": 0.42347777878296533,
16012
+ "grad_norm": 33.71875,
16013
+ "learning_rate": 9.93383161625121e-06,
16014
+ "loss": 20.6119,
16015
+ "step": 22820
16016
+ },
16017
+ {
16018
+ "epoch": 0.42366335186744514,
16019
+ "grad_norm": 36.5625,
16020
+ "learning_rate": 9.933802620465168e-06,
16021
+ "loss": 20.8544,
16022
+ "step": 22830
16023
+ },
16024
+ {
16025
+ "epoch": 0.423848924951925,
16026
+ "grad_norm": 35.28125,
16027
+ "learning_rate": 9.933773624679125e-06,
16028
+ "loss": 21.1555,
16029
+ "step": 22840
16030
+ },
16031
+ {
16032
+ "epoch": 0.4240344980364048,
16033
+ "grad_norm": 35.84375,
16034
+ "learning_rate": 9.933744628893084e-06,
16035
+ "loss": 20.7869,
16036
+ "step": 22850
16037
+ },
16038
+ {
16039
+ "epoch": 0.4242200711208846,
16040
+ "grad_norm": 33.5625,
16041
+ "learning_rate": 9.933715633107042e-06,
16042
+ "loss": 21.0017,
16043
+ "step": 22860
16044
+ },
16045
+ {
16046
+ "epoch": 0.4244056442053645,
16047
+ "grad_norm": 35.125,
16048
+ "learning_rate": 9.933686637321e-06,
16049
+ "loss": 21.0555,
16050
+ "step": 22870
16051
+ },
16052
+ {
16053
+ "epoch": 0.4245912172898443,
16054
+ "grad_norm": 33.90625,
16055
+ "learning_rate": 9.933657641534957e-06,
16056
+ "loss": 21.1488,
16057
+ "step": 22880
16058
+ },
16059
+ {
16060
+ "epoch": 0.4247767903743241,
16061
+ "grad_norm": 34.40625,
16062
+ "learning_rate": 9.933628645748914e-06,
16063
+ "loss": 20.8459,
16064
+ "step": 22890
16065
+ },
16066
+ {
16067
+ "epoch": 0.42496236345880395,
16068
+ "grad_norm": 33.96875,
16069
+ "learning_rate": 9.933599649962871e-06,
16070
+ "loss": 20.7651,
16071
+ "step": 22900
16072
+ },
16073
+ {
16074
+ "epoch": 0.42514793654328376,
16075
+ "grad_norm": 33.1875,
16076
+ "learning_rate": 9.933570654176829e-06,
16077
+ "loss": 20.9696,
16078
+ "step": 22910
16079
+ },
16080
+ {
16081
+ "epoch": 0.42533350962776356,
16082
+ "grad_norm": 35.96875,
16083
+ "learning_rate": 9.933541658390788e-06,
16084
+ "loss": 21.2031,
16085
+ "step": 22920
16086
+ },
16087
+ {
16088
+ "epoch": 0.4255190827122434,
16089
+ "grad_norm": 35.0,
16090
+ "learning_rate": 9.933512662604744e-06,
16091
+ "loss": 21.2462,
16092
+ "step": 22930
16093
+ },
16094
+ {
16095
+ "epoch": 0.42570465579672323,
16096
+ "grad_norm": 35.15625,
16097
+ "learning_rate": 9.933483666818701e-06,
16098
+ "loss": 20.9242,
16099
+ "step": 22940
16100
+ },
16101
+ {
16102
+ "epoch": 0.4258902288812031,
16103
+ "grad_norm": 35.84375,
16104
+ "learning_rate": 9.93345467103266e-06,
16105
+ "loss": 20.7291,
16106
+ "step": 22950
16107
+ },
16108
+ {
16109
+ "epoch": 0.4260758019656829,
16110
+ "grad_norm": 36.75,
16111
+ "learning_rate": 9.933425675246618e-06,
16112
+ "loss": 21.4801,
16113
+ "step": 22960
16114
+ },
16115
+ {
16116
+ "epoch": 0.4262613750501627,
16117
+ "grad_norm": 36.34375,
16118
+ "learning_rate": 9.933396679460575e-06,
16119
+ "loss": 21.3721,
16120
+ "step": 22970
16121
+ },
16122
+ {
16123
+ "epoch": 0.42644694813464257,
16124
+ "grad_norm": 35.1875,
16125
+ "learning_rate": 9.933367683674532e-06,
16126
+ "loss": 20.9337,
16127
+ "step": 22980
16128
+ },
16129
+ {
16130
+ "epoch": 0.4266325212191224,
16131
+ "grad_norm": 35.28125,
16132
+ "learning_rate": 9.93333868788849e-06,
16133
+ "loss": 21.2105,
16134
+ "step": 22990
16135
+ },
16136
+ {
16137
+ "epoch": 0.4268180943036022,
16138
+ "grad_norm": 33.78125,
16139
+ "learning_rate": 9.933309692102447e-06,
16140
+ "loss": 20.954,
16141
+ "step": 23000
16142
+ },
16143
+ {
16144
+ "epoch": 0.42700366738808204,
16145
+ "grad_norm": 37.15625,
16146
+ "learning_rate": 9.933280696316405e-06,
16147
+ "loss": 21.0472,
16148
+ "step": 23010
16149
+ },
16150
+ {
16151
+ "epoch": 0.42718924047256185,
16152
+ "grad_norm": 36.25,
16153
+ "learning_rate": 9.933251700530364e-06,
16154
+ "loss": 20.8671,
16155
+ "step": 23020
16156
+ },
16157
+ {
16158
+ "epoch": 0.4273748135570417,
16159
+ "grad_norm": 34.03125,
16160
+ "learning_rate": 9.933222704744321e-06,
16161
+ "loss": 20.8912,
16162
+ "step": 23030
16163
+ },
16164
+ {
16165
+ "epoch": 0.4275603866415215,
16166
+ "grad_norm": 33.90625,
16167
+ "learning_rate": 9.933193708958277e-06,
16168
+ "loss": 21.3617,
16169
+ "step": 23040
16170
+ },
16171
+ {
16172
+ "epoch": 0.4277459597260013,
16173
+ "grad_norm": 34.6875,
16174
+ "learning_rate": 9.933164713172236e-06,
16175
+ "loss": 21.0914,
16176
+ "step": 23050
16177
+ },
16178
+ {
16179
+ "epoch": 0.4279315328104812,
16180
+ "grad_norm": 35.65625,
16181
+ "learning_rate": 9.933135717386193e-06,
16182
+ "loss": 21.1626,
16183
+ "step": 23060
16184
+ },
16185
+ {
16186
+ "epoch": 0.428117105894961,
16187
+ "grad_norm": 33.53125,
16188
+ "learning_rate": 9.93310672160015e-06,
16189
+ "loss": 21.385,
16190
+ "step": 23070
16191
+ },
16192
+ {
16193
+ "epoch": 0.4283026789794408,
16194
+ "grad_norm": 34.90625,
16195
+ "learning_rate": 9.933077725814108e-06,
16196
+ "loss": 21.1424,
16197
+ "step": 23080
16198
+ },
16199
+ {
16200
+ "epoch": 0.42848825206392066,
16201
+ "grad_norm": 34.125,
16202
+ "learning_rate": 9.933048730028066e-06,
16203
+ "loss": 20.7616,
16204
+ "step": 23090
16205
+ },
16206
+ {
16207
+ "epoch": 0.42867382514840047,
16208
+ "grad_norm": 34.6875,
16209
+ "learning_rate": 9.933019734242023e-06,
16210
+ "loss": 20.5844,
16211
+ "step": 23100
16212
+ },
16213
+ {
16214
+ "epoch": 0.42885939823288033,
16215
+ "grad_norm": 35.0625,
16216
+ "learning_rate": 9.93299073845598e-06,
16217
+ "loss": 21.0437,
16218
+ "step": 23110
16219
+ },
16220
+ {
16221
+ "epoch": 0.42904497131736014,
16222
+ "grad_norm": 34.34375,
16223
+ "learning_rate": 9.93296174266994e-06,
16224
+ "loss": 20.9484,
16225
+ "step": 23120
16226
+ },
16227
+ {
16228
+ "epoch": 0.42923054440183994,
16229
+ "grad_norm": 33.125,
16230
+ "learning_rate": 9.932932746883897e-06,
16231
+ "loss": 21.0415,
16232
+ "step": 23130
16233
+ },
16234
+ {
16235
+ "epoch": 0.4294161174863198,
16236
+ "grad_norm": 34.15625,
16237
+ "learning_rate": 9.932903751097853e-06,
16238
+ "loss": 21.0687,
16239
+ "step": 23140
16240
+ },
16241
+ {
16242
+ "epoch": 0.4296016905707996,
16243
+ "grad_norm": 34.0625,
16244
+ "learning_rate": 9.932874755311812e-06,
16245
+ "loss": 20.5685,
16246
+ "step": 23150
16247
+ },
16248
+ {
16249
+ "epoch": 0.4297872636552794,
16250
+ "grad_norm": 36.09375,
16251
+ "learning_rate": 9.932845759525769e-06,
16252
+ "loss": 21.2205,
16253
+ "step": 23160
16254
+ },
16255
+ {
16256
+ "epoch": 0.4299728367397593,
16257
+ "grad_norm": 33.71875,
16258
+ "learning_rate": 9.932816763739727e-06,
16259
+ "loss": 21.0816,
16260
+ "step": 23170
16261
+ },
16262
+ {
16263
+ "epoch": 0.4301584098242391,
16264
+ "grad_norm": 36.15625,
16265
+ "learning_rate": 9.932787767953684e-06,
16266
+ "loss": 20.8513,
16267
+ "step": 23180
16268
+ },
16269
+ {
16270
+ "epoch": 0.4303439829087189,
16271
+ "grad_norm": 34.28125,
16272
+ "learning_rate": 9.932758772167643e-06,
16273
+ "loss": 20.9409,
16274
+ "step": 23190
16275
+ },
16276
+ {
16277
+ "epoch": 0.43052955599319875,
16278
+ "grad_norm": 34.1875,
16279
+ "learning_rate": 9.932729776381599e-06,
16280
+ "loss": 20.9464,
16281
+ "step": 23200
16282
+ },
16283
+ {
16284
+ "epoch": 0.43071512907767856,
16285
+ "grad_norm": 35.25,
16286
+ "learning_rate": 9.932700780595556e-06,
16287
+ "loss": 21.2037,
16288
+ "step": 23210
16289
+ },
16290
+ {
16291
+ "epoch": 0.4309007021621584,
16292
+ "grad_norm": 34.6875,
16293
+ "learning_rate": 9.932671784809515e-06,
16294
+ "loss": 20.9092,
16295
+ "step": 23220
16296
+ },
16297
+ {
16298
+ "epoch": 0.43108627524663823,
16299
+ "grad_norm": 36.53125,
16300
+ "learning_rate": 9.932642789023473e-06,
16301
+ "loss": 20.9398,
16302
+ "step": 23230
16303
+ },
16304
+ {
16305
+ "epoch": 0.43127184833111804,
16306
+ "grad_norm": 35.5625,
16307
+ "learning_rate": 9.93261379323743e-06,
16308
+ "loss": 20.7783,
16309
+ "step": 23240
16310
+ },
16311
+ {
16312
+ "epoch": 0.4314574214155979,
16313
+ "grad_norm": 35.34375,
16314
+ "learning_rate": 9.932584797451387e-06,
16315
+ "loss": 20.5817,
16316
+ "step": 23250
16317
+ },
16318
+ {
16319
+ "epoch": 0.4316429945000777,
16320
+ "grad_norm": 35.125,
16321
+ "learning_rate": 9.932555801665345e-06,
16322
+ "loss": 20.9545,
16323
+ "step": 23260
16324
+ },
16325
+ {
16326
+ "epoch": 0.4318285675845575,
16327
+ "grad_norm": 36.59375,
16328
+ "learning_rate": 9.932526805879302e-06,
16329
+ "loss": 20.4197,
16330
+ "step": 23270
16331
+ },
16332
+ {
16333
+ "epoch": 0.43201414066903737,
16334
+ "grad_norm": 34.59375,
16335
+ "learning_rate": 9.93249781009326e-06,
16336
+ "loss": 21.3306,
16337
+ "step": 23280
16338
+ },
16339
+ {
16340
+ "epoch": 0.4321997137535172,
16341
+ "grad_norm": 34.34375,
16342
+ "learning_rate": 9.932468814307217e-06,
16343
+ "loss": 20.4233,
16344
+ "step": 23290
16345
+ },
16346
+ {
16347
+ "epoch": 0.43238528683799704,
16348
+ "grad_norm": 35.59375,
16349
+ "learning_rate": 9.932439818521176e-06,
16350
+ "loss": 20.9082,
16351
+ "step": 23300
16352
+ },
16353
+ {
16354
+ "epoch": 0.43257085992247685,
16355
+ "grad_norm": 35.375,
16356
+ "learning_rate": 9.932410822735132e-06,
16357
+ "loss": 21.0376,
16358
+ "step": 23310
16359
+ },
16360
+ {
16361
+ "epoch": 0.43275643300695665,
16362
+ "grad_norm": 37.09375,
16363
+ "learning_rate": 9.93238182694909e-06,
16364
+ "loss": 20.735,
16365
+ "step": 23320
16366
+ },
16367
+ {
16368
+ "epoch": 0.4329420060914365,
16369
+ "grad_norm": 34.65625,
16370
+ "learning_rate": 9.932352831163048e-06,
16371
+ "loss": 20.7816,
16372
+ "step": 23330
16373
+ },
16374
+ {
16375
+ "epoch": 0.4331275791759163,
16376
+ "grad_norm": 35.03125,
16377
+ "learning_rate": 9.932323835377006e-06,
16378
+ "loss": 21.1626,
16379
+ "step": 23340
16380
+ },
16381
+ {
16382
+ "epoch": 0.43331315226039613,
16383
+ "grad_norm": 33.21875,
16384
+ "learning_rate": 9.932294839590963e-06,
16385
+ "loss": 21.2156,
16386
+ "step": 23350
16387
+ },
16388
+ {
16389
+ "epoch": 0.433498725344876,
16390
+ "grad_norm": 34.71875,
16391
+ "learning_rate": 9.93226584380492e-06,
16392
+ "loss": 20.5191,
16393
+ "step": 23360
16394
+ },
16395
+ {
16396
+ "epoch": 0.4336842984293558,
16397
+ "grad_norm": 34.0625,
16398
+ "learning_rate": 9.932236848018878e-06,
16399
+ "loss": 20.6801,
16400
+ "step": 23370
16401
+ },
16402
+ {
16403
+ "epoch": 0.43386987151383566,
16404
+ "grad_norm": 34.25,
16405
+ "learning_rate": 9.932207852232835e-06,
16406
+ "loss": 20.9476,
16407
+ "step": 23380
16408
+ },
16409
+ {
16410
+ "epoch": 0.43405544459831547,
16411
+ "grad_norm": 35.0625,
16412
+ "learning_rate": 9.932178856446793e-06,
16413
+ "loss": 21.1848,
16414
+ "step": 23390
16415
+ },
16416
+ {
16417
+ "epoch": 0.43424101768279527,
16418
+ "grad_norm": 34.125,
16419
+ "learning_rate": 9.932149860660752e-06,
16420
+ "loss": 20.944,
16421
+ "step": 23400
16422
+ },
16423
+ {
16424
+ "epoch": 0.43442659076727513,
16425
+ "grad_norm": 35.71875,
16426
+ "learning_rate": 9.932120864874708e-06,
16427
+ "loss": 20.6635,
16428
+ "step": 23410
16429
+ },
16430
+ {
16431
+ "epoch": 0.43461216385175494,
16432
+ "grad_norm": 36.25,
16433
+ "learning_rate": 9.932091869088665e-06,
16434
+ "loss": 20.9886,
16435
+ "step": 23420
16436
+ },
16437
+ {
16438
+ "epoch": 0.43479773693623475,
16439
+ "grad_norm": 34.28125,
16440
+ "learning_rate": 9.932062873302624e-06,
16441
+ "loss": 21.3632,
16442
+ "step": 23430
16443
+ },
16444
+ {
16445
+ "epoch": 0.4349833100207146,
16446
+ "grad_norm": 35.15625,
16447
+ "learning_rate": 9.932033877516582e-06,
16448
+ "loss": 21.2067,
16449
+ "step": 23440
16450
+ },
16451
+ {
16452
+ "epoch": 0.4351688831051944,
16453
+ "grad_norm": 34.84375,
16454
+ "learning_rate": 9.932004881730539e-06,
16455
+ "loss": 20.9518,
16456
+ "step": 23450
16457
+ },
16458
+ {
16459
+ "epoch": 0.4353544561896743,
16460
+ "grad_norm": 35.34375,
16461
+ "learning_rate": 9.931975885944496e-06,
16462
+ "loss": 20.9001,
16463
+ "step": 23460
16464
+ },
16465
+ {
16466
+ "epoch": 0.4355400292741541,
16467
+ "grad_norm": 34.0625,
16468
+ "learning_rate": 9.931946890158454e-06,
16469
+ "loss": 20.7958,
16470
+ "step": 23470
16471
+ },
16472
+ {
16473
+ "epoch": 0.4357256023586339,
16474
+ "grad_norm": 34.1875,
16475
+ "learning_rate": 9.931917894372411e-06,
16476
+ "loss": 20.5711,
16477
+ "step": 23480
16478
+ },
16479
+ {
16480
+ "epoch": 0.43591117544311375,
16481
+ "grad_norm": 36.40625,
16482
+ "learning_rate": 9.931888898586369e-06,
16483
+ "loss": 20.796,
16484
+ "step": 23490
16485
+ },
16486
+ {
16487
+ "epoch": 0.43609674852759356,
16488
+ "grad_norm": 34.25,
16489
+ "learning_rate": 9.931859902800328e-06,
16490
+ "loss": 20.722,
16491
+ "step": 23500
16492
+ },
16493
+ {
16494
+ "epoch": 0.43628232161207336,
16495
+ "grad_norm": 37.15625,
16496
+ "learning_rate": 9.931830907014285e-06,
16497
+ "loss": 20.9826,
16498
+ "step": 23510
16499
+ },
16500
+ {
16501
+ "epoch": 0.4364678946965532,
16502
+ "grad_norm": 34.375,
16503
+ "learning_rate": 9.93180191122824e-06,
16504
+ "loss": 21.0188,
16505
+ "step": 23520
16506
+ },
16507
+ {
16508
+ "epoch": 0.43665346778103303,
16509
+ "grad_norm": 37.125,
16510
+ "learning_rate": 9.9317729154422e-06,
16511
+ "loss": 21.2313,
16512
+ "step": 23530
16513
+ },
16514
+ {
16515
+ "epoch": 0.43683904086551284,
16516
+ "grad_norm": 34.71875,
16517
+ "learning_rate": 9.931743919656157e-06,
16518
+ "loss": 20.5476,
16519
+ "step": 23540
16520
+ },
16521
+ {
16522
+ "epoch": 0.4370246139499927,
16523
+ "grad_norm": 35.125,
16524
+ "learning_rate": 9.931714923870115e-06,
16525
+ "loss": 21.0134,
16526
+ "step": 23550
16527
+ },
16528
+ {
16529
+ "epoch": 0.4372101870344725,
16530
+ "grad_norm": 35.6875,
16531
+ "learning_rate": 9.931685928084072e-06,
16532
+ "loss": 20.7415,
16533
+ "step": 23560
16534
+ },
16535
+ {
16536
+ "epoch": 0.43739576011895237,
16537
+ "grad_norm": 36.125,
16538
+ "learning_rate": 9.93165693229803e-06,
16539
+ "loss": 21.2975,
16540
+ "step": 23570
16541
+ },
16542
+ {
16543
+ "epoch": 0.4375813332034322,
16544
+ "grad_norm": 35.34375,
16545
+ "learning_rate": 9.931627936511987e-06,
16546
+ "loss": 20.9121,
16547
+ "step": 23580
16548
+ },
16549
+ {
16550
+ "epoch": 0.437766906287912,
16551
+ "grad_norm": 36.78125,
16552
+ "learning_rate": 9.931598940725944e-06,
16553
+ "loss": 20.9767,
16554
+ "step": 23590
16555
+ },
16556
+ {
16557
+ "epoch": 0.43795247937239185,
16558
+ "grad_norm": 35.15625,
16559
+ "learning_rate": 9.931569944939903e-06,
16560
+ "loss": 21.0601,
16561
+ "step": 23600
16562
+ },
16563
+ {
16564
+ "epoch": 0.43813805245687165,
16565
+ "grad_norm": 36.625,
16566
+ "learning_rate": 9.931540949153861e-06,
16567
+ "loss": 21.2006,
16568
+ "step": 23610
16569
+ },
16570
+ {
16571
+ "epoch": 0.43832362554135146,
16572
+ "grad_norm": 33.21875,
16573
+ "learning_rate": 9.931511953367818e-06,
16574
+ "loss": 20.9193,
16575
+ "step": 23620
16576
+ },
16577
+ {
16578
+ "epoch": 0.4385091986258313,
16579
+ "grad_norm": 35.5625,
16580
+ "learning_rate": 9.931482957581776e-06,
16581
+ "loss": 21.2494,
16582
+ "step": 23630
16583
+ },
16584
+ {
16585
+ "epoch": 0.4386947717103111,
16586
+ "grad_norm": 35.5,
16587
+ "learning_rate": 9.931453961795733e-06,
16588
+ "loss": 20.8032,
16589
+ "step": 23640
16590
+ },
16591
+ {
16592
+ "epoch": 0.438880344794791,
16593
+ "grad_norm": 34.09375,
16594
+ "learning_rate": 9.93142496600969e-06,
16595
+ "loss": 20.7432,
16596
+ "step": 23650
16597
+ },
16598
+ {
16599
+ "epoch": 0.4390659178792708,
16600
+ "grad_norm": 34.53125,
16601
+ "learning_rate": 9.931395970223648e-06,
16602
+ "loss": 21.1021,
16603
+ "step": 23660
16604
+ },
16605
+ {
16606
+ "epoch": 0.4392514909637506,
16607
+ "grad_norm": 33.625,
16608
+ "learning_rate": 9.931366974437607e-06,
16609
+ "loss": 21.2628,
16610
+ "step": 23670
16611
+ },
16612
+ {
16613
+ "epoch": 0.43943706404823046,
16614
+ "grad_norm": 34.4375,
16615
+ "learning_rate": 9.931337978651563e-06,
16616
+ "loss": 20.8008,
16617
+ "step": 23680
16618
+ },
16619
+ {
16620
+ "epoch": 0.43962263713271027,
16621
+ "grad_norm": 35.875,
16622
+ "learning_rate": 9.93130898286552e-06,
16623
+ "loss": 20.8276,
16624
+ "step": 23690
16625
+ },
16626
+ {
16627
+ "epoch": 0.4398082102171901,
16628
+ "grad_norm": 33.90625,
16629
+ "learning_rate": 9.93127998707948e-06,
16630
+ "loss": 20.5879,
16631
+ "step": 23700
16632
+ },
16633
+ {
16634
+ "epoch": 0.43999378330166994,
16635
+ "grad_norm": 36.25,
16636
+ "learning_rate": 9.931250991293437e-06,
16637
+ "loss": 20.905,
16638
+ "step": 23710
16639
+ },
16640
+ {
16641
+ "epoch": 0.44017935638614974,
16642
+ "grad_norm": 35.8125,
16643
+ "learning_rate": 9.931221995507394e-06,
16644
+ "loss": 21.0795,
16645
+ "step": 23720
16646
+ },
16647
+ {
16648
+ "epoch": 0.4403649294706296,
16649
+ "grad_norm": 35.40625,
16650
+ "learning_rate": 9.931192999721351e-06,
16651
+ "loss": 20.6996,
16652
+ "step": 23730
16653
+ },
16654
+ {
16655
+ "epoch": 0.4405505025551094,
16656
+ "grad_norm": 33.625,
16657
+ "learning_rate": 9.931164003935309e-06,
16658
+ "loss": 20.894,
16659
+ "step": 23740
16660
+ },
16661
+ {
16662
+ "epoch": 0.4407360756395892,
16663
+ "grad_norm": 35.0625,
16664
+ "learning_rate": 9.931135008149266e-06,
16665
+ "loss": 20.8419,
16666
+ "step": 23750
16667
+ },
16668
+ {
16669
+ "epoch": 0.4409216487240691,
16670
+ "grad_norm": 34.9375,
16671
+ "learning_rate": 9.931106012363224e-06,
16672
+ "loss": 20.6036,
16673
+ "step": 23760
16674
+ },
16675
+ {
16676
+ "epoch": 0.4411072218085489,
16677
+ "grad_norm": 34.34375,
16678
+ "learning_rate": 9.931077016577181e-06,
16679
+ "loss": 20.7,
16680
+ "step": 23770
16681
+ },
16682
+ {
16683
+ "epoch": 0.4412927948930287,
16684
+ "grad_norm": 35.84375,
16685
+ "learning_rate": 9.93104802079114e-06,
16686
+ "loss": 21.0251,
16687
+ "step": 23780
16688
+ },
16689
+ {
16690
+ "epoch": 0.44147836797750856,
16691
+ "grad_norm": 35.4375,
16692
+ "learning_rate": 9.931019025005096e-06,
16693
+ "loss": 20.7861,
16694
+ "step": 23790
16695
+ },
16696
+ {
16697
+ "epoch": 0.44166394106198836,
16698
+ "grad_norm": 34.53125,
16699
+ "learning_rate": 9.930990029219055e-06,
16700
+ "loss": 21.0714,
16701
+ "step": 23800
16702
+ },
16703
+ {
16704
+ "epoch": 0.44184951414646817,
16705
+ "grad_norm": 35.1875,
16706
+ "learning_rate": 9.930961033433012e-06,
16707
+ "loss": 21.0033,
16708
+ "step": 23810
16709
+ },
16710
+ {
16711
+ "epoch": 0.44203508723094803,
16712
+ "grad_norm": 34.75,
16713
+ "learning_rate": 9.93093203764697e-06,
16714
+ "loss": 20.7072,
16715
+ "step": 23820
16716
+ },
16717
+ {
16718
+ "epoch": 0.44222066031542784,
16719
+ "grad_norm": 35.625,
16720
+ "learning_rate": 9.930903041860927e-06,
16721
+ "loss": 20.7695,
16722
+ "step": 23830
16723
+ },
16724
+ {
16725
+ "epoch": 0.4424062333999077,
16726
+ "grad_norm": 36.40625,
16727
+ "learning_rate": 9.930874046074885e-06,
16728
+ "loss": 20.7267,
16729
+ "step": 23840
16730
+ },
16731
+ {
16732
+ "epoch": 0.4425918064843875,
16733
+ "grad_norm": 36.03125,
16734
+ "learning_rate": 9.930845050288842e-06,
16735
+ "loss": 20.6114,
16736
+ "step": 23850
16737
+ },
16738
+ {
16739
+ "epoch": 0.4427773795688673,
16740
+ "grad_norm": 35.4375,
16741
+ "learning_rate": 9.9308160545028e-06,
16742
+ "loss": 20.9765,
16743
+ "step": 23860
16744
+ },
16745
+ {
16746
+ "epoch": 0.4429629526533472,
16747
+ "grad_norm": 34.625,
16748
+ "learning_rate": 9.930787058716757e-06,
16749
+ "loss": 20.9792,
16750
+ "step": 23870
16751
+ },
16752
+ {
16753
+ "epoch": 0.443148525737827,
16754
+ "grad_norm": 34.25,
16755
+ "learning_rate": 9.930758062930716e-06,
16756
+ "loss": 21.1687,
16757
+ "step": 23880
16758
+ },
16759
+ {
16760
+ "epoch": 0.4433340988223068,
16761
+ "grad_norm": 33.6875,
16762
+ "learning_rate": 9.930729067144673e-06,
16763
+ "loss": 20.8174,
16764
+ "step": 23890
16765
+ },
16766
+ {
16767
+ "epoch": 0.44351967190678665,
16768
+ "grad_norm": 33.03125,
16769
+ "learning_rate": 9.930700071358629e-06,
16770
+ "loss": 20.5752,
16771
+ "step": 23900
16772
+ },
16773
+ {
16774
+ "epoch": 0.44370524499126646,
16775
+ "grad_norm": 37.15625,
16776
+ "learning_rate": 9.930671075572588e-06,
16777
+ "loss": 20.5927,
16778
+ "step": 23910
16779
+ },
16780
+ {
16781
+ "epoch": 0.4438908180757463,
16782
+ "grad_norm": 36.375,
16783
+ "learning_rate": 9.930642079786546e-06,
16784
+ "loss": 21.0733,
16785
+ "step": 23920
16786
+ },
16787
+ {
16788
+ "epoch": 0.4440763911602261,
16789
+ "grad_norm": 35.375,
16790
+ "learning_rate": 9.930613084000503e-06,
16791
+ "loss": 20.7896,
16792
+ "step": 23930
16793
+ },
16794
+ {
16795
+ "epoch": 0.44426196424470593,
16796
+ "grad_norm": 33.34375,
16797
+ "learning_rate": 9.93058408821446e-06,
16798
+ "loss": 20.5514,
16799
+ "step": 23940
16800
+ },
16801
+ {
16802
+ "epoch": 0.4444475373291858,
16803
+ "grad_norm": 33.25,
16804
+ "learning_rate": 9.930555092428418e-06,
16805
+ "loss": 21.1296,
16806
+ "step": 23950
16807
+ },
16808
+ {
16809
+ "epoch": 0.4446331104136656,
16810
+ "grad_norm": 35.96875,
16811
+ "learning_rate": 9.930526096642375e-06,
16812
+ "loss": 20.7448,
16813
+ "step": 23960
16814
+ },
16815
+ {
16816
+ "epoch": 0.4448186834981454,
16817
+ "grad_norm": 34.59375,
16818
+ "learning_rate": 9.930497100856333e-06,
16819
+ "loss": 21.14,
16820
+ "step": 23970
16821
+ },
16822
+ {
16823
+ "epoch": 0.44500425658262527,
16824
+ "grad_norm": 34.28125,
16825
+ "learning_rate": 9.930468105070292e-06,
16826
+ "loss": 20.7303,
16827
+ "step": 23980
16828
+ },
16829
+ {
16830
+ "epoch": 0.4451898296671051,
16831
+ "grad_norm": 34.09375,
16832
+ "learning_rate": 9.930439109284249e-06,
16833
+ "loss": 21.1835,
16834
+ "step": 23990
16835
+ },
16836
+ {
16837
+ "epoch": 0.44537540275158494,
16838
+ "grad_norm": 35.4375,
16839
+ "learning_rate": 9.930410113498205e-06,
16840
+ "loss": 20.9196,
16841
+ "step": 24000
16842
+ },
16843
+ {
16844
+ "epoch": 0.44556097583606474,
16845
+ "grad_norm": 33.75,
16846
+ "learning_rate": 9.930381117712164e-06,
16847
+ "loss": 21.0292,
16848
+ "step": 24010
16849
+ },
16850
+ {
16851
+ "epoch": 0.44574654892054455,
16852
+ "grad_norm": 33.78125,
16853
+ "learning_rate": 9.930352121926121e-06,
16854
+ "loss": 20.8839,
16855
+ "step": 24020
16856
+ },
16857
+ {
16858
+ "epoch": 0.4459321220050244,
16859
+ "grad_norm": 36.25,
16860
+ "learning_rate": 9.930323126140079e-06,
16861
+ "loss": 21.0878,
16862
+ "step": 24030
16863
+ },
16864
+ {
16865
+ "epoch": 0.4461176950895042,
16866
+ "grad_norm": 34.84375,
16867
+ "learning_rate": 9.930294130354036e-06,
16868
+ "loss": 20.8566,
16869
+ "step": 24040
16870
+ },
16871
+ {
16872
+ "epoch": 0.446303268173984,
16873
+ "grad_norm": 34.8125,
16874
+ "learning_rate": 9.930265134567995e-06,
16875
+ "loss": 20.5984,
16876
+ "step": 24050
16877
+ },
16878
+ {
16879
+ "epoch": 0.4464888412584639,
16880
+ "grad_norm": 34.59375,
16881
+ "learning_rate": 9.930236138781951e-06,
16882
+ "loss": 20.4023,
16883
+ "step": 24060
16884
+ },
16885
+ {
16886
+ "epoch": 0.4466744143429437,
16887
+ "grad_norm": 33.5625,
16888
+ "learning_rate": 9.930207142995908e-06,
16889
+ "loss": 20.9537,
16890
+ "step": 24070
16891
+ },
16892
+ {
16893
+ "epoch": 0.4468599874274235,
16894
+ "grad_norm": 34.5625,
16895
+ "learning_rate": 9.930178147209868e-06,
16896
+ "loss": 20.6655,
16897
+ "step": 24080
16898
+ },
16899
+ {
16900
+ "epoch": 0.44704556051190336,
16901
+ "grad_norm": 34.53125,
16902
+ "learning_rate": 9.930149151423825e-06,
16903
+ "loss": 20.7582,
16904
+ "step": 24090
16905
+ },
16906
+ {
16907
+ "epoch": 0.44723113359638317,
16908
+ "grad_norm": 34.53125,
16909
+ "learning_rate": 9.930120155637782e-06,
16910
+ "loss": 20.7693,
16911
+ "step": 24100
16912
+ },
16913
+ {
16914
+ "epoch": 0.44741670668086303,
16915
+ "grad_norm": 34.28125,
16916
+ "learning_rate": 9.93009115985174e-06,
16917
+ "loss": 21.0091,
16918
+ "step": 24110
16919
+ },
16920
+ {
16921
+ "epoch": 0.44760227976534284,
16922
+ "grad_norm": 33.71875,
16923
+ "learning_rate": 9.930062164065697e-06,
16924
+ "loss": 20.9896,
16925
+ "step": 24120
16926
+ },
16927
+ {
16928
+ "epoch": 0.44778785284982264,
16929
+ "grad_norm": 36.53125,
16930
+ "learning_rate": 9.930033168279655e-06,
16931
+ "loss": 20.8672,
16932
+ "step": 24130
16933
+ },
16934
+ {
16935
+ "epoch": 0.4479734259343025,
16936
+ "grad_norm": 36.28125,
16937
+ "learning_rate": 9.930004172493612e-06,
16938
+ "loss": 20.6846,
16939
+ "step": 24140
16940
+ },
16941
+ {
16942
+ "epoch": 0.4481589990187823,
16943
+ "grad_norm": 35.71875,
16944
+ "learning_rate": 9.929975176707571e-06,
16945
+ "loss": 20.8901,
16946
+ "step": 24150
16947
+ },
16948
+ {
16949
+ "epoch": 0.4483445721032621,
16950
+ "grad_norm": 33.65625,
16951
+ "learning_rate": 9.929946180921527e-06,
16952
+ "loss": 21.0096,
16953
+ "step": 24160
16954
+ },
16955
+ {
16956
+ "epoch": 0.448530145187742,
16957
+ "grad_norm": 34.28125,
16958
+ "learning_rate": 9.929917185135484e-06,
16959
+ "loss": 20.8181,
16960
+ "step": 24170
16961
+ },
16962
+ {
16963
+ "epoch": 0.4487157182722218,
16964
+ "grad_norm": 35.3125,
16965
+ "learning_rate": 9.929888189349443e-06,
16966
+ "loss": 20.9108,
16967
+ "step": 24180
16968
+ },
16969
+ {
16970
+ "epoch": 0.44890129135670165,
16971
+ "grad_norm": 37.625,
16972
+ "learning_rate": 9.9298591935634e-06,
16973
+ "loss": 20.9324,
16974
+ "step": 24190
16975
+ },
16976
+ {
16977
+ "epoch": 0.44908686444118145,
16978
+ "grad_norm": 36.09375,
16979
+ "learning_rate": 9.929830197777358e-06,
16980
+ "loss": 20.987,
16981
+ "step": 24200
16982
+ },
16983
+ {
16984
+ "epoch": 0.44927243752566126,
16985
+ "grad_norm": 35.1875,
16986
+ "learning_rate": 9.929801201991315e-06,
16987
+ "loss": 20.7505,
16988
+ "step": 24210
16989
+ },
16990
+ {
16991
+ "epoch": 0.4494580106101411,
16992
+ "grad_norm": 36.125,
16993
+ "learning_rate": 9.929772206205273e-06,
16994
+ "loss": 20.9636,
16995
+ "step": 24220
16996
+ },
16997
+ {
16998
+ "epoch": 0.44964358369462093,
16999
+ "grad_norm": 35.28125,
17000
+ "learning_rate": 9.92974321041923e-06,
17001
+ "loss": 21.0674,
17002
+ "step": 24230
17003
+ },
17004
+ {
17005
+ "epoch": 0.44982915677910074,
17006
+ "grad_norm": 34.3125,
17007
+ "learning_rate": 9.929714214633188e-06,
17008
+ "loss": 20.6893,
17009
+ "step": 24240
17010
+ },
17011
+ {
17012
+ "epoch": 0.4500147298635806,
17013
+ "grad_norm": 36.625,
17014
+ "learning_rate": 9.929685218847147e-06,
17015
+ "loss": 20.9585,
17016
+ "step": 24250
17017
+ },
17018
+ {
17019
+ "epoch": 0.4502003029480604,
17020
+ "grad_norm": 35.5,
17021
+ "learning_rate": 9.929656223061104e-06,
17022
+ "loss": 20.3475,
17023
+ "step": 24260
17024
+ },
17025
+ {
17026
+ "epoch": 0.45038587603254027,
17027
+ "grad_norm": 34.6875,
17028
+ "learning_rate": 9.92962722727506e-06,
17029
+ "loss": 20.5898,
17030
+ "step": 24270
17031
+ },
17032
+ {
17033
+ "epoch": 0.4505714491170201,
17034
+ "grad_norm": 36.21875,
17035
+ "learning_rate": 9.929598231489019e-06,
17036
+ "loss": 20.5172,
17037
+ "step": 24280
17038
+ },
17039
+ {
17040
+ "epoch": 0.4507570222014999,
17041
+ "grad_norm": 34.53125,
17042
+ "learning_rate": 9.929569235702976e-06,
17043
+ "loss": 20.8779,
17044
+ "step": 24290
17045
+ },
17046
+ {
17047
+ "epoch": 0.45094259528597974,
17048
+ "grad_norm": 34.25,
17049
+ "learning_rate": 9.929540239916934e-06,
17050
+ "loss": 20.8615,
17051
+ "step": 24300
17052
+ },
17053
+ {
17054
+ "epoch": 0.45112816837045955,
17055
+ "grad_norm": 34.78125,
17056
+ "learning_rate": 9.929511244130891e-06,
17057
+ "loss": 21.23,
17058
+ "step": 24310
17059
+ },
17060
+ {
17061
+ "epoch": 0.45131374145493935,
17062
+ "grad_norm": 35.0625,
17063
+ "learning_rate": 9.929482248344849e-06,
17064
+ "loss": 21.1603,
17065
+ "step": 24320
17066
+ },
17067
+ {
17068
+ "epoch": 0.4514993145394192,
17069
+ "grad_norm": 34.875,
17070
+ "learning_rate": 9.929453252558806e-06,
17071
+ "loss": 21.007,
17072
+ "step": 24330
17073
+ },
17074
+ {
17075
+ "epoch": 0.451684887623899,
17076
+ "grad_norm": 33.5625,
17077
+ "learning_rate": 9.929424256772763e-06,
17078
+ "loss": 20.4047,
17079
+ "step": 24340
17080
+ },
17081
+ {
17082
+ "epoch": 0.45187046070837883,
17083
+ "grad_norm": 34.90625,
17084
+ "learning_rate": 9.929395260986721e-06,
17085
+ "loss": 21.1434,
17086
+ "step": 24350
17087
+ },
17088
+ {
17089
+ "epoch": 0.4520560337928587,
17090
+ "grad_norm": 34.8125,
17091
+ "learning_rate": 9.92936626520068e-06,
17092
+ "loss": 21.1945,
17093
+ "step": 24360
17094
+ },
17095
+ {
17096
+ "epoch": 0.4522416068773385,
17097
+ "grad_norm": 34.625,
17098
+ "learning_rate": 9.929337269414637e-06,
17099
+ "loss": 20.697,
17100
+ "step": 24370
17101
+ },
17102
+ {
17103
+ "epoch": 0.45242717996181836,
17104
+ "grad_norm": 34.96875,
17105
+ "learning_rate": 9.929308273628595e-06,
17106
+ "loss": 20.7853,
17107
+ "step": 24380
17108
+ },
17109
+ {
17110
+ "epoch": 0.45261275304629817,
17111
+ "grad_norm": 34.375,
17112
+ "learning_rate": 9.929279277842552e-06,
17113
+ "loss": 20.2789,
17114
+ "step": 24390
17115
+ },
17116
+ {
17117
+ "epoch": 0.452798326130778,
17118
+ "grad_norm": 35.90625,
17119
+ "learning_rate": 9.92925028205651e-06,
17120
+ "loss": 20.8062,
17121
+ "step": 24400
17122
+ },
17123
+ {
17124
+ "epoch": 0.45298389921525783,
17125
+ "grad_norm": 34.75,
17126
+ "learning_rate": 9.929221286270467e-06,
17127
+ "loss": 20.4341,
17128
+ "step": 24410
17129
+ },
17130
+ {
17131
+ "epoch": 0.45316947229973764,
17132
+ "grad_norm": 35.34375,
17133
+ "learning_rate": 9.929192290484424e-06,
17134
+ "loss": 20.4916,
17135
+ "step": 24420
17136
+ },
17137
+ {
17138
+ "epoch": 0.45335504538421745,
17139
+ "grad_norm": 35.53125,
17140
+ "learning_rate": 9.929163294698382e-06,
17141
+ "loss": 20.987,
17142
+ "step": 24430
17143
+ },
17144
+ {
17145
+ "epoch": 0.4535406184686973,
17146
+ "grad_norm": 34.03125,
17147
+ "learning_rate": 9.92913429891234e-06,
17148
+ "loss": 21.2552,
17149
+ "step": 24440
17150
+ },
17151
+ {
17152
+ "epoch": 0.4537261915531771,
17153
+ "grad_norm": 33.28125,
17154
+ "learning_rate": 9.929105303126297e-06,
17155
+ "loss": 20.5123,
17156
+ "step": 24450
17157
+ },
17158
+ {
17159
+ "epoch": 0.453911764637657,
17160
+ "grad_norm": 36.25,
17161
+ "learning_rate": 9.929076307340256e-06,
17162
+ "loss": 20.896,
17163
+ "step": 24460
17164
+ },
17165
+ {
17166
+ "epoch": 0.4540973377221368,
17167
+ "grad_norm": 34.5,
17168
+ "learning_rate": 9.929047311554213e-06,
17169
+ "loss": 20.9112,
17170
+ "step": 24470
17171
+ },
17172
+ {
17173
+ "epoch": 0.4542829108066166,
17174
+ "grad_norm": 36.375,
17175
+ "learning_rate": 9.92901831576817e-06,
17176
+ "loss": 20.9512,
17177
+ "step": 24480
17178
+ },
17179
+ {
17180
+ "epoch": 0.45446848389109645,
17181
+ "grad_norm": 35.21875,
17182
+ "learning_rate": 9.928989319982128e-06,
17183
+ "loss": 20.469,
17184
+ "step": 24490
17185
+ },
17186
+ {
17187
+ "epoch": 0.45465405697557626,
17188
+ "grad_norm": 37.09375,
17189
+ "learning_rate": 9.928960324196085e-06,
17190
+ "loss": 20.8936,
17191
+ "step": 24500
17192
+ },
17193
+ {
17194
+ "epoch": 0.45483963006005607,
17195
+ "grad_norm": 35.125,
17196
+ "learning_rate": 9.928931328410043e-06,
17197
+ "loss": 20.4707,
17198
+ "step": 24510
17199
+ },
17200
+ {
17201
+ "epoch": 0.45502520314453593,
17202
+ "grad_norm": 35.46875,
17203
+ "learning_rate": 9.928902332624e-06,
17204
+ "loss": 20.5323,
17205
+ "step": 24520
17206
+ },
17207
+ {
17208
+ "epoch": 0.45521077622901573,
17209
+ "grad_norm": 33.90625,
17210
+ "learning_rate": 9.92887333683796e-06,
17211
+ "loss": 21.0152,
17212
+ "step": 24530
17213
+ },
17214
+ {
17215
+ "epoch": 0.4553963493134956,
17216
+ "grad_norm": 34.65625,
17217
+ "learning_rate": 9.928844341051915e-06,
17218
+ "loss": 20.7327,
17219
+ "step": 24540
17220
+ },
17221
+ {
17222
+ "epoch": 0.4555819223979754,
17223
+ "grad_norm": 35.34375,
17224
+ "learning_rate": 9.928815345265872e-06,
17225
+ "loss": 20.7267,
17226
+ "step": 24550
17227
+ },
17228
+ {
17229
+ "epoch": 0.4557674954824552,
17230
+ "grad_norm": 33.34375,
17231
+ "learning_rate": 9.928786349479832e-06,
17232
+ "loss": 20.4997,
17233
+ "step": 24560
17234
+ },
17235
+ {
17236
+ "epoch": 0.45595306856693507,
17237
+ "grad_norm": 37.34375,
17238
+ "learning_rate": 9.928757353693789e-06,
17239
+ "loss": 20.8639,
17240
+ "step": 24570
17241
+ },
17242
+ {
17243
+ "epoch": 0.4561386416514149,
17244
+ "grad_norm": 34.15625,
17245
+ "learning_rate": 9.928728357907746e-06,
17246
+ "loss": 21.0693,
17247
+ "step": 24580
17248
+ },
17249
+ {
17250
+ "epoch": 0.4563242147358947,
17251
+ "grad_norm": 33.75,
17252
+ "learning_rate": 9.928699362121704e-06,
17253
+ "loss": 21.0186,
17254
+ "step": 24590
17255
+ },
17256
+ {
17257
+ "epoch": 0.45650978782037455,
17258
+ "grad_norm": 37.59375,
17259
+ "learning_rate": 9.928670366335661e-06,
17260
+ "loss": 21.1377,
17261
+ "step": 24600
17262
+ },
17263
+ {
17264
+ "epoch": 0.45669536090485435,
17265
+ "grad_norm": 35.125,
17266
+ "learning_rate": 9.928641370549619e-06,
17267
+ "loss": 20.7898,
17268
+ "step": 24610
17269
+ },
17270
+ {
17271
+ "epoch": 0.45688093398933416,
17272
+ "grad_norm": 35.0,
17273
+ "learning_rate": 9.928612374763576e-06,
17274
+ "loss": 20.8915,
17275
+ "step": 24620
17276
+ },
17277
+ {
17278
+ "epoch": 0.457066507073814,
17279
+ "grad_norm": 34.96875,
17280
+ "learning_rate": 9.928583378977535e-06,
17281
+ "loss": 20.677,
17282
+ "step": 24630
17283
+ },
17284
+ {
17285
+ "epoch": 0.4572520801582938,
17286
+ "grad_norm": 34.78125,
17287
+ "learning_rate": 9.928554383191492e-06,
17288
+ "loss": 20.5464,
17289
+ "step": 24640
17290
+ },
17291
+ {
17292
+ "epoch": 0.4574376532427737,
17293
+ "grad_norm": 35.0625,
17294
+ "learning_rate": 9.928525387405448e-06,
17295
+ "loss": 20.7197,
17296
+ "step": 24650
17297
+ },
17298
+ {
17299
+ "epoch": 0.4576232263272535,
17300
+ "grad_norm": 35.28125,
17301
+ "learning_rate": 9.928496391619407e-06,
17302
+ "loss": 21.186,
17303
+ "step": 24660
17304
+ },
17305
+ {
17306
+ "epoch": 0.4578087994117333,
17307
+ "grad_norm": 35.71875,
17308
+ "learning_rate": 9.928467395833365e-06,
17309
+ "loss": 21.1732,
17310
+ "step": 24670
17311
+ },
17312
+ {
17313
+ "epoch": 0.45799437249621316,
17314
+ "grad_norm": 34.3125,
17315
+ "learning_rate": 9.928438400047322e-06,
17316
+ "loss": 20.2372,
17317
+ "step": 24680
17318
+ },
17319
+ {
17320
+ "epoch": 0.45817994558069297,
17321
+ "grad_norm": 34.1875,
17322
+ "learning_rate": 9.92840940426128e-06,
17323
+ "loss": 20.5822,
17324
+ "step": 24690
17325
+ },
17326
+ {
17327
+ "epoch": 0.4583655186651728,
17328
+ "grad_norm": 34.34375,
17329
+ "learning_rate": 9.928380408475237e-06,
17330
+ "loss": 20.9295,
17331
+ "step": 24700
17332
+ },
17333
+ {
17334
+ "epoch": 0.45855109174965264,
17335
+ "grad_norm": 34.6875,
17336
+ "learning_rate": 9.928351412689194e-06,
17337
+ "loss": 20.7293,
17338
+ "step": 24710
17339
+ },
17340
+ {
17341
+ "epoch": 0.45873666483413245,
17342
+ "grad_norm": 34.40625,
17343
+ "learning_rate": 9.928322416903152e-06,
17344
+ "loss": 21.0629,
17345
+ "step": 24720
17346
+ },
17347
+ {
17348
+ "epoch": 0.4589222379186123,
17349
+ "grad_norm": 37.03125,
17350
+ "learning_rate": 9.92829342111711e-06,
17351
+ "loss": 20.6274,
17352
+ "step": 24730
17353
+ },
17354
+ {
17355
+ "epoch": 0.4591078110030921,
17356
+ "grad_norm": 34.625,
17357
+ "learning_rate": 9.928264425331068e-06,
17358
+ "loss": 20.7442,
17359
+ "step": 24740
17360
+ },
17361
+ {
17362
+ "epoch": 0.4592933840875719,
17363
+ "grad_norm": 34.28125,
17364
+ "learning_rate": 9.928235429545024e-06,
17365
+ "loss": 20.7463,
17366
+ "step": 24750
17367
+ },
17368
+ {
17369
+ "epoch": 0.4594789571720518,
17370
+ "grad_norm": 36.5625,
17371
+ "learning_rate": 9.928206433758983e-06,
17372
+ "loss": 21.1036,
17373
+ "step": 24760
17374
+ },
17375
+ {
17376
+ "epoch": 0.4596645302565316,
17377
+ "grad_norm": 35.25,
17378
+ "learning_rate": 9.92817743797294e-06,
17379
+ "loss": 20.6891,
17380
+ "step": 24770
17381
+ },
17382
+ {
17383
+ "epoch": 0.4598501033410114,
17384
+ "grad_norm": 33.6875,
17385
+ "learning_rate": 9.928148442186898e-06,
17386
+ "loss": 20.3999,
17387
+ "step": 24780
17388
+ },
17389
+ {
17390
+ "epoch": 0.46003567642549126,
17391
+ "grad_norm": 36.6875,
17392
+ "learning_rate": 9.928119446400855e-06,
17393
+ "loss": 21.0503,
17394
+ "step": 24790
17395
+ },
17396
+ {
17397
+ "epoch": 0.46022124950997106,
17398
+ "grad_norm": 36.15625,
17399
+ "learning_rate": 9.928090450614813e-06,
17400
+ "loss": 20.7396,
17401
+ "step": 24800
17402
+ },
17403
+ {
17404
+ "epoch": 0.4604068225944509,
17405
+ "grad_norm": 37.03125,
17406
+ "learning_rate": 9.92806145482877e-06,
17407
+ "loss": 20.8848,
17408
+ "step": 24810
17409
+ },
17410
+ {
17411
+ "epoch": 0.46059239567893073,
17412
+ "grad_norm": 33.375,
17413
+ "learning_rate": 9.928032459042727e-06,
17414
+ "loss": 20.9374,
17415
+ "step": 24820
17416
+ },
17417
+ {
17418
+ "epoch": 0.46077796876341054,
17419
+ "grad_norm": 35.75,
17420
+ "learning_rate": 9.928003463256685e-06,
17421
+ "loss": 20.8332,
17422
+ "step": 24830
17423
+ },
17424
+ {
17425
+ "epoch": 0.4609635418478904,
17426
+ "grad_norm": 35.09375,
17427
+ "learning_rate": 9.927974467470644e-06,
17428
+ "loss": 20.6518,
17429
+ "step": 24840
17430
+ },
17431
+ {
17432
+ "epoch": 0.4611491149323702,
17433
+ "grad_norm": 35.78125,
17434
+ "learning_rate": 9.927945471684601e-06,
17435
+ "loss": 20.633,
17436
+ "step": 24850
17437
+ },
17438
+ {
17439
+ "epoch": 0.46133468801685,
17440
+ "grad_norm": 34.3125,
17441
+ "learning_rate": 9.927916475898559e-06,
17442
+ "loss": 21.1518,
17443
+ "step": 24860
17444
+ },
17445
+ {
17446
+ "epoch": 0.4615202611013299,
17447
+ "grad_norm": 34.625,
17448
+ "learning_rate": 9.927887480112516e-06,
17449
+ "loss": 20.6709,
17450
+ "step": 24870
17451
+ },
17452
+ {
17453
+ "epoch": 0.4617058341858097,
17454
+ "grad_norm": 35.34375,
17455
+ "learning_rate": 9.927858484326474e-06,
17456
+ "loss": 20.86,
17457
+ "step": 24880
17458
+ },
17459
+ {
17460
+ "epoch": 0.46189140727028954,
17461
+ "grad_norm": 34.875,
17462
+ "learning_rate": 9.927829488540431e-06,
17463
+ "loss": 21.1381,
17464
+ "step": 24890
17465
+ },
17466
+ {
17467
+ "epoch": 0.46207698035476935,
17468
+ "grad_norm": 32.96875,
17469
+ "learning_rate": 9.927800492754388e-06,
17470
+ "loss": 20.7075,
17471
+ "step": 24900
17472
+ },
17473
+ {
17474
+ "epoch": 0.46226255343924916,
17475
+ "grad_norm": 35.59375,
17476
+ "learning_rate": 9.927771496968346e-06,
17477
+ "loss": 20.5517,
17478
+ "step": 24910
17479
+ },
17480
+ {
17481
+ "epoch": 0.462448126523729,
17482
+ "grad_norm": 36.0625,
17483
+ "learning_rate": 9.927742501182303e-06,
17484
+ "loss": 20.7381,
17485
+ "step": 24920
17486
+ },
17487
+ {
17488
+ "epoch": 0.4626336996082088,
17489
+ "grad_norm": 37.375,
17490
+ "learning_rate": 9.92771350539626e-06,
17491
+ "loss": 20.8368,
17492
+ "step": 24930
17493
+ },
17494
+ {
17495
+ "epoch": 0.46281927269268863,
17496
+ "grad_norm": 35.25,
17497
+ "learning_rate": 9.92768450961022e-06,
17498
+ "loss": 21.0123,
17499
+ "step": 24940
17500
+ },
17501
+ {
17502
+ "epoch": 0.4630048457771685,
17503
+ "grad_norm": 35.03125,
17504
+ "learning_rate": 9.927655513824177e-06,
17505
+ "loss": 20.7466,
17506
+ "step": 24950
17507
+ },
17508
+ {
17509
+ "epoch": 0.4631904188616483,
17510
+ "grad_norm": 35.90625,
17511
+ "learning_rate": 9.927626518038135e-06,
17512
+ "loss": 20.4304,
17513
+ "step": 24960
17514
+ },
17515
+ {
17516
+ "epoch": 0.4633759919461281,
17517
+ "grad_norm": 34.46875,
17518
+ "learning_rate": 9.927597522252092e-06,
17519
+ "loss": 20.482,
17520
+ "step": 24970
17521
+ },
17522
+ {
17523
+ "epoch": 0.46356156503060797,
17524
+ "grad_norm": 37.25,
17525
+ "learning_rate": 9.92756852646605e-06,
17526
+ "loss": 20.8669,
17527
+ "step": 24980
17528
+ },
17529
+ {
17530
+ "epoch": 0.4637471381150878,
17531
+ "grad_norm": 36.25,
17532
+ "learning_rate": 9.927539530680007e-06,
17533
+ "loss": 20.7218,
17534
+ "step": 24990
17535
+ },
17536
+ {
17537
+ "epoch": 0.46393271119956764,
17538
+ "grad_norm": 33.96875,
17539
+ "learning_rate": 9.927510534893964e-06,
17540
+ "loss": 20.7564,
17541
+ "step": 25000
17542
+ },
17543
+ {
17544
+ "epoch": 0.46393271119956764,
17545
+ "eval_loss": 2.5967071056365967,
17546
+ "eval_runtime": 453.4834,
17547
+ "eval_samples_per_second": 3202.139,
17548
+ "eval_steps_per_second": 50.035,
17549
+ "step": 25000
17550
  }
17551
  ],
17552
  "logging_steps": 10,
 
17566
  "attributes": {}
17567
  }
17568
  },
17569
+ "total_flos": 4.36380689760256e+18,
17570
  "train_batch_size": 8,
17571
  "trial_name": null,
17572
  "trial_params": null