CocoRoF commited on
Commit
af4ddd7
·
verified ·
1 Parent(s): 6a7e8ec

Training in progress, step 20000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f373582976d1956b2adacaa9790d35a15e2ac8bfa5e191ca43010f1c2968b03
3
  size 306619286
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c23cf68abad32271631729127eb144e752956ff3904650e5132254b9fad4fa45
3
  size 306619286
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c0bb19542ae52096c047421eb4fe0a1f9474910bfbb60e796399a92ff1ef2e8
3
  size 919972410
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02301192e164720633e799be07ca43015ab4e55ee46289fcc068979fca646eac
3
  size 919972410
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c461c9d337dfc684e9352ec72bfa344e2f5d377f7cfc4475de9acae294dca89
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06fea830cf5ad73ec00d500ea6fb952740ac936f18e93fa2d32abde1ea3ead92
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fae392ec6232cbf9da21d6ed12bc8247d0d24e7f3a3606acd23be00f3e8bbfc5
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be561d1df19be227394d8ea607c54262a06c9bf880af0aa5e04a52596a2a6cb0
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbf3e7ca9991a58b0b16574a3c653483c551c270aa05aba06c162ea593f7b0f2
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03f3e24417a59435f5a8450a4aeb0f09cc92734b5c3b45a0701b2c043c415c05
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c695bebf6bcb75cbe26378bfe0ab7e2a33c49f713b9d6e4d10632b24322977e7
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bea02744c29f30024590ab1629a0e7b7dabbf1e8476456c2e7c5ce46dc35c28
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5ebb13c71265c5464c9aa9bb9b66f07764d73befe6cd63a2aaf8e781bf0a374
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:041be966454b60c86af576fc1eb7f34189114689abff8f9622b947110f7334c8
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12cc6e245e189be568c8dfd43a4dd8f04bb3dbd9f17f41458107935d2c2a6a9d
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b85766f6596d15a810177d77dd259d9b50588cf100ec5f8ebff5fed881d57957
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36086646e9a8f76fea69f8a227112e83bb63524964ccdfb82f4cdad88b90e5e4
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8be75d04b1ebe614241b88fd010a5dda1b7bf703c00c6ebe310ca07975830fe7
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b44153bacf860d0ca6ce4c6b9380a199feab8a72ca613e6745bfb671b02c4e4
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4699833a7ab4cb692996ef7567f934c0bac79d6a067963a873f89a38e412bd48
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f2df6d70b5170ad5f7fbfdd1443cce32f9e75727c4f2973f33ec9362abb0dd0
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2628d182b8ffc02b994fb2eed0e111e21ac10dadfa106370a9ce0523145ccd0
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.27835962671974057,
5
  "eval_steps": 5000,
6
- "global_step": 15000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -10531,6 +10531,3514 @@
10531
  "eval_samples_per_second": 3195.28,
10532
  "eval_steps_per_second": 49.928,
10533
  "step": 15000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10534
  }
10535
  ],
10536
  "logging_steps": 10,
@@ -10550,7 +14058,7 @@
10550
  "attributes": {}
10551
  }
10552
  },
10553
- "total_flos": 2.618284138561536e+18,
10554
  "train_batch_size": 8,
10555
  "trial_name": null,
10556
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.3711461689596541,
5
  "eval_steps": 5000,
6
+ "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
10531
  "eval_samples_per_second": 3195.28,
10532
  "eval_steps_per_second": 49.928,
10533
  "step": 15000
10534
+ },
10535
+ {
10536
+ "epoch": 0.2785451998042204,
10537
+ "grad_norm": 34.0625,
10538
+ "learning_rate": 9.956477325150337e-06,
10539
+ "loss": 22.2863,
10540
+ "step": 15010
10541
+ },
10542
+ {
10543
+ "epoch": 0.27873077288870024,
10544
+ "grad_norm": 34.96875,
10545
+ "learning_rate": 9.956448329364294e-06,
10546
+ "loss": 22.5122,
10547
+ "step": 15020
10548
+ },
10549
+ {
10550
+ "epoch": 0.27891634597318005,
10551
+ "grad_norm": 33.625,
10552
+ "learning_rate": 9.956419333578251e-06,
10553
+ "loss": 22.3634,
10554
+ "step": 15030
10555
+ },
10556
+ {
10557
+ "epoch": 0.27910191905765985,
10558
+ "grad_norm": 35.5625,
10559
+ "learning_rate": 9.95639033779221e-06,
10560
+ "loss": 22.5542,
10561
+ "step": 15040
10562
+ },
10563
+ {
10564
+ "epoch": 0.2792874921421397,
10565
+ "grad_norm": 35.25,
10566
+ "learning_rate": 9.956361342006166e-06,
10567
+ "loss": 22.201,
10568
+ "step": 15050
10569
+ },
10570
+ {
10571
+ "epoch": 0.2794730652266195,
10572
+ "grad_norm": 34.59375,
10573
+ "learning_rate": 9.956332346220124e-06,
10574
+ "loss": 22.3541,
10575
+ "step": 15060
10576
+ },
10577
+ {
10578
+ "epoch": 0.2796586383110994,
10579
+ "grad_norm": 33.875,
10580
+ "learning_rate": 9.956303350434083e-06,
10581
+ "loss": 21.9755,
10582
+ "step": 15070
10583
+ },
10584
+ {
10585
+ "epoch": 0.2798442113955792,
10586
+ "grad_norm": 34.03125,
10587
+ "learning_rate": 9.95627435464804e-06,
10588
+ "loss": 22.5443,
10589
+ "step": 15080
10590
+ },
10591
+ {
10592
+ "epoch": 0.280029784480059,
10593
+ "grad_norm": 35.46875,
10594
+ "learning_rate": 9.956245358861998e-06,
10595
+ "loss": 22.1045,
10596
+ "step": 15090
10597
+ },
10598
+ {
10599
+ "epoch": 0.28021535756453886,
10600
+ "grad_norm": 34.125,
10601
+ "learning_rate": 9.956216363075955e-06,
10602
+ "loss": 22.3805,
10603
+ "step": 15100
10604
+ },
10605
+ {
10606
+ "epoch": 0.28040093064901866,
10607
+ "grad_norm": 34.875,
10608
+ "learning_rate": 9.956187367289912e-06,
10609
+ "loss": 22.2453,
10610
+ "step": 15110
10611
+ },
10612
+ {
10613
+ "epoch": 0.28058650373349847,
10614
+ "grad_norm": 35.75,
10615
+ "learning_rate": 9.95615837150387e-06,
10616
+ "loss": 22.4519,
10617
+ "step": 15120
10618
+ },
10619
+ {
10620
+ "epoch": 0.28077207681797833,
10621
+ "grad_norm": 35.0,
10622
+ "learning_rate": 9.956129375717827e-06,
10623
+ "loss": 22.2457,
10624
+ "step": 15130
10625
+ },
10626
+ {
10627
+ "epoch": 0.28095764990245814,
10628
+ "grad_norm": 33.90625,
10629
+ "learning_rate": 9.956100379931786e-06,
10630
+ "loss": 22.1788,
10631
+ "step": 15140
10632
+ },
10633
+ {
10634
+ "epoch": 0.281143222986938,
10635
+ "grad_norm": 36.21875,
10636
+ "learning_rate": 9.956071384145742e-06,
10637
+ "loss": 22.0945,
10638
+ "step": 15150
10639
+ },
10640
+ {
10641
+ "epoch": 0.2813287960714178,
10642
+ "grad_norm": 35.90625,
10643
+ "learning_rate": 9.9560423883597e-06,
10644
+ "loss": 22.1752,
10645
+ "step": 15160
10646
+ },
10647
+ {
10648
+ "epoch": 0.2815143691558976,
10649
+ "grad_norm": 36.15625,
10650
+ "learning_rate": 9.956013392573658e-06,
10651
+ "loss": 22.5829,
10652
+ "step": 15170
10653
+ },
10654
+ {
10655
+ "epoch": 0.2816999422403775,
10656
+ "grad_norm": 37.0625,
10657
+ "learning_rate": 9.955984396787616e-06,
10658
+ "loss": 22.6663,
10659
+ "step": 15180
10660
+ },
10661
+ {
10662
+ "epoch": 0.2818855153248573,
10663
+ "grad_norm": 35.5,
10664
+ "learning_rate": 9.955955401001573e-06,
10665
+ "loss": 22.5561,
10666
+ "step": 15190
10667
+ },
10668
+ {
10669
+ "epoch": 0.2820710884093371,
10670
+ "grad_norm": 36.0,
10671
+ "learning_rate": 9.95592640521553e-06,
10672
+ "loss": 22.5047,
10673
+ "step": 15200
10674
+ },
10675
+ {
10676
+ "epoch": 0.28225666149381695,
10677
+ "grad_norm": 36.28125,
10678
+ "learning_rate": 9.955897409429488e-06,
10679
+ "loss": 22.4836,
10680
+ "step": 15210
10681
+ },
10682
+ {
10683
+ "epoch": 0.28244223457829676,
10684
+ "grad_norm": 35.96875,
10685
+ "learning_rate": 9.955868413643446e-06,
10686
+ "loss": 22.1041,
10687
+ "step": 15220
10688
+ },
10689
+ {
10690
+ "epoch": 0.28262780766277656,
10691
+ "grad_norm": 36.0,
10692
+ "learning_rate": 9.955839417857403e-06,
10693
+ "loss": 22.592,
10694
+ "step": 15230
10695
+ },
10696
+ {
10697
+ "epoch": 0.2828133807472564,
10698
+ "grad_norm": 35.375,
10699
+ "learning_rate": 9.955810422071362e-06,
10700
+ "loss": 22.2083,
10701
+ "step": 15240
10702
+ },
10703
+ {
10704
+ "epoch": 0.28299895383173623,
10705
+ "grad_norm": 34.53125,
10706
+ "learning_rate": 9.95578142628532e-06,
10707
+ "loss": 22.371,
10708
+ "step": 15250
10709
+ },
10710
+ {
10711
+ "epoch": 0.2831845269162161,
10712
+ "grad_norm": 34.25,
10713
+ "learning_rate": 9.955752430499275e-06,
10714
+ "loss": 22.5102,
10715
+ "step": 15260
10716
+ },
10717
+ {
10718
+ "epoch": 0.2833701000006959,
10719
+ "grad_norm": 34.125,
10720
+ "learning_rate": 9.955723434713234e-06,
10721
+ "loss": 22.0664,
10722
+ "step": 15270
10723
+ },
10724
+ {
10725
+ "epoch": 0.2835556730851757,
10726
+ "grad_norm": 36.1875,
10727
+ "learning_rate": 9.955694438927192e-06,
10728
+ "loss": 22.2947,
10729
+ "step": 15280
10730
+ },
10731
+ {
10732
+ "epoch": 0.28374124616965557,
10733
+ "grad_norm": 34.96875,
10734
+ "learning_rate": 9.955665443141149e-06,
10735
+ "loss": 22.093,
10736
+ "step": 15290
10737
+ },
10738
+ {
10739
+ "epoch": 0.2839268192541354,
10740
+ "grad_norm": 36.375,
10741
+ "learning_rate": 9.955636447355106e-06,
10742
+ "loss": 22.4046,
10743
+ "step": 15300
10744
+ },
10745
+ {
10746
+ "epoch": 0.2841123923386152,
10747
+ "grad_norm": 36.4375,
10748
+ "learning_rate": 9.955607451569066e-06,
10749
+ "loss": 22.7986,
10750
+ "step": 15310
10751
+ },
10752
+ {
10753
+ "epoch": 0.28429796542309504,
10754
+ "grad_norm": 37.40625,
10755
+ "learning_rate": 9.955578455783021e-06,
10756
+ "loss": 22.5896,
10757
+ "step": 15320
10758
+ },
10759
+ {
10760
+ "epoch": 0.28448353850757485,
10761
+ "grad_norm": 37.375,
10762
+ "learning_rate": 9.955549459996979e-06,
10763
+ "loss": 22.2348,
10764
+ "step": 15330
10765
+ },
10766
+ {
10767
+ "epoch": 0.2846691115920547,
10768
+ "grad_norm": 35.96875,
10769
+ "learning_rate": 9.955520464210938e-06,
10770
+ "loss": 22.1714,
10771
+ "step": 15340
10772
+ },
10773
+ {
10774
+ "epoch": 0.2848546846765345,
10775
+ "grad_norm": 35.53125,
10776
+ "learning_rate": 9.955491468424895e-06,
10777
+ "loss": 22.097,
10778
+ "step": 15350
10779
+ },
10780
+ {
10781
+ "epoch": 0.2850402577610143,
10782
+ "grad_norm": 38.03125,
10783
+ "learning_rate": 9.955462472638853e-06,
10784
+ "loss": 22.6676,
10785
+ "step": 15360
10786
+ },
10787
+ {
10788
+ "epoch": 0.2852258308454942,
10789
+ "grad_norm": 36.0625,
10790
+ "learning_rate": 9.95543347685281e-06,
10791
+ "loss": 21.9045,
10792
+ "step": 15370
10793
+ },
10794
+ {
10795
+ "epoch": 0.285411403929974,
10796
+ "grad_norm": 34.59375,
10797
+ "learning_rate": 9.955404481066767e-06,
10798
+ "loss": 22.1789,
10799
+ "step": 15380
10800
+ },
10801
+ {
10802
+ "epoch": 0.2855969770144538,
10803
+ "grad_norm": 34.4375,
10804
+ "learning_rate": 9.955375485280725e-06,
10805
+ "loss": 22.5378,
10806
+ "step": 15390
10807
+ },
10808
+ {
10809
+ "epoch": 0.28578255009893366,
10810
+ "grad_norm": 33.96875,
10811
+ "learning_rate": 9.955346489494682e-06,
10812
+ "loss": 22.4062,
10813
+ "step": 15400
10814
+ },
10815
+ {
10816
+ "epoch": 0.28596812318341347,
10817
+ "grad_norm": 35.53125,
10818
+ "learning_rate": 9.955317493708641e-06,
10819
+ "loss": 22.1425,
10820
+ "step": 15410
10821
+ },
10822
+ {
10823
+ "epoch": 0.28615369626789333,
10824
+ "grad_norm": 35.25,
10825
+ "learning_rate": 9.955288497922597e-06,
10826
+ "loss": 22.2226,
10827
+ "step": 15420
10828
+ },
10829
+ {
10830
+ "epoch": 0.28633926935237314,
10831
+ "grad_norm": 36.03125,
10832
+ "learning_rate": 9.955259502136554e-06,
10833
+ "loss": 22.172,
10834
+ "step": 15430
10835
+ },
10836
+ {
10837
+ "epoch": 0.28652484243685294,
10838
+ "grad_norm": 33.59375,
10839
+ "learning_rate": 9.955230506350514e-06,
10840
+ "loss": 21.8491,
10841
+ "step": 15440
10842
+ },
10843
+ {
10844
+ "epoch": 0.2867104155213328,
10845
+ "grad_norm": 34.59375,
10846
+ "learning_rate": 9.955201510564471e-06,
10847
+ "loss": 22.2386,
10848
+ "step": 15450
10849
+ },
10850
+ {
10851
+ "epoch": 0.2868959886058126,
10852
+ "grad_norm": 34.71875,
10853
+ "learning_rate": 9.955172514778428e-06,
10854
+ "loss": 22.1716,
10855
+ "step": 15460
10856
+ },
10857
+ {
10858
+ "epoch": 0.2870815616902924,
10859
+ "grad_norm": 35.75,
10860
+ "learning_rate": 9.955143518992386e-06,
10861
+ "loss": 21.7604,
10862
+ "step": 15470
10863
+ },
10864
+ {
10865
+ "epoch": 0.2872671347747723,
10866
+ "grad_norm": 35.21875,
10867
+ "learning_rate": 9.955114523206343e-06,
10868
+ "loss": 21.8419,
10869
+ "step": 15480
10870
+ },
10871
+ {
10872
+ "epoch": 0.2874527078592521,
10873
+ "grad_norm": 35.96875,
10874
+ "learning_rate": 9.9550855274203e-06,
10875
+ "loss": 21.8912,
10876
+ "step": 15490
10877
+ },
10878
+ {
10879
+ "epoch": 0.2876382809437319,
10880
+ "grad_norm": 35.71875,
10881
+ "learning_rate": 9.955056531634258e-06,
10882
+ "loss": 22.5902,
10883
+ "step": 15500
10884
+ },
10885
+ {
10886
+ "epoch": 0.28782385402821176,
10887
+ "grad_norm": 36.15625,
10888
+ "learning_rate": 9.955027535848215e-06,
10889
+ "loss": 22.1192,
10890
+ "step": 15510
10891
+ },
10892
+ {
10893
+ "epoch": 0.28800942711269156,
10894
+ "grad_norm": 35.25,
10895
+ "learning_rate": 9.954998540062175e-06,
10896
+ "loss": 21.5665,
10897
+ "step": 15520
10898
+ },
10899
+ {
10900
+ "epoch": 0.2881950001971714,
10901
+ "grad_norm": 33.46875,
10902
+ "learning_rate": 9.95496954427613e-06,
10903
+ "loss": 21.8936,
10904
+ "step": 15530
10905
+ },
10906
+ {
10907
+ "epoch": 0.28838057328165123,
10908
+ "grad_norm": 34.5625,
10909
+ "learning_rate": 9.95494054849009e-06,
10910
+ "loss": 22.6283,
10911
+ "step": 15540
10912
+ },
10913
+ {
10914
+ "epoch": 0.28856614636613104,
10915
+ "grad_norm": 35.3125,
10916
+ "learning_rate": 9.954911552704047e-06,
10917
+ "loss": 22.3834,
10918
+ "step": 15550
10919
+ },
10920
+ {
10921
+ "epoch": 0.2887517194506109,
10922
+ "grad_norm": 37.84375,
10923
+ "learning_rate": 9.954882556918004e-06,
10924
+ "loss": 21.9524,
10925
+ "step": 15560
10926
+ },
10927
+ {
10928
+ "epoch": 0.2889372925350907,
10929
+ "grad_norm": 33.75,
10930
+ "learning_rate": 9.954853561131962e-06,
10931
+ "loss": 22.4104,
10932
+ "step": 15570
10933
+ },
10934
+ {
10935
+ "epoch": 0.2891228656195705,
10936
+ "grad_norm": 34.53125,
10937
+ "learning_rate": 9.954824565345919e-06,
10938
+ "loss": 22.7281,
10939
+ "step": 15580
10940
+ },
10941
+ {
10942
+ "epoch": 0.2893084387040504,
10943
+ "grad_norm": 34.9375,
10944
+ "learning_rate": 9.954795569559876e-06,
10945
+ "loss": 21.9499,
10946
+ "step": 15590
10947
+ },
10948
+ {
10949
+ "epoch": 0.2894940117885302,
10950
+ "grad_norm": 35.53125,
10951
+ "learning_rate": 9.954766573773834e-06,
10952
+ "loss": 21.9476,
10953
+ "step": 15600
10954
+ },
10955
+ {
10956
+ "epoch": 0.28967958487301004,
10957
+ "grad_norm": 33.3125,
10958
+ "learning_rate": 9.954737577987791e-06,
10959
+ "loss": 21.7338,
10960
+ "step": 15610
10961
+ },
10962
+ {
10963
+ "epoch": 0.28986515795748985,
10964
+ "grad_norm": 35.6875,
10965
+ "learning_rate": 9.95470858220175e-06,
10966
+ "loss": 21.9634,
10967
+ "step": 15620
10968
+ },
10969
+ {
10970
+ "epoch": 0.29005073104196966,
10971
+ "grad_norm": 35.875,
10972
+ "learning_rate": 9.954679586415708e-06,
10973
+ "loss": 21.9819,
10974
+ "step": 15630
10975
+ },
10976
+ {
10977
+ "epoch": 0.2902363041264495,
10978
+ "grad_norm": 36.6875,
10979
+ "learning_rate": 9.954650590629663e-06,
10980
+ "loss": 22.4904,
10981
+ "step": 15640
10982
+ },
10983
+ {
10984
+ "epoch": 0.2904218772109293,
10985
+ "grad_norm": 34.78125,
10986
+ "learning_rate": 9.954621594843623e-06,
10987
+ "loss": 22.7612,
10988
+ "step": 15650
10989
+ },
10990
+ {
10991
+ "epoch": 0.29060745029540913,
10992
+ "grad_norm": 35.3125,
10993
+ "learning_rate": 9.95459259905758e-06,
10994
+ "loss": 22.3808,
10995
+ "step": 15660
10996
+ },
10997
+ {
10998
+ "epoch": 0.290793023379889,
10999
+ "grad_norm": 36.09375,
11000
+ "learning_rate": 9.954563603271537e-06,
11001
+ "loss": 22.5321,
11002
+ "step": 15670
11003
+ },
11004
+ {
11005
+ "epoch": 0.2909785964643688,
11006
+ "grad_norm": 35.46875,
11007
+ "learning_rate": 9.954534607485495e-06,
11008
+ "loss": 22.4891,
11009
+ "step": 15680
11010
+ },
11011
+ {
11012
+ "epoch": 0.29116416954884866,
11013
+ "grad_norm": 34.5625,
11014
+ "learning_rate": 9.954505611699452e-06,
11015
+ "loss": 22.188,
11016
+ "step": 15690
11017
+ },
11018
+ {
11019
+ "epoch": 0.29134974263332847,
11020
+ "grad_norm": 36.5625,
11021
+ "learning_rate": 9.95447661591341e-06,
11022
+ "loss": 22.2892,
11023
+ "step": 15700
11024
+ },
11025
+ {
11026
+ "epoch": 0.2915353157178083,
11027
+ "grad_norm": 37.6875,
11028
+ "learning_rate": 9.954447620127367e-06,
11029
+ "loss": 22.1511,
11030
+ "step": 15710
11031
+ },
11032
+ {
11033
+ "epoch": 0.29172088880228814,
11034
+ "grad_norm": 36.15625,
11035
+ "learning_rate": 9.954418624341326e-06,
11036
+ "loss": 22.2926,
11037
+ "step": 15720
11038
+ },
11039
+ {
11040
+ "epoch": 0.29190646188676794,
11041
+ "grad_norm": 33.8125,
11042
+ "learning_rate": 9.954389628555283e-06,
11043
+ "loss": 22.241,
11044
+ "step": 15730
11045
+ },
11046
+ {
11047
+ "epoch": 0.29209203497124775,
11048
+ "grad_norm": 34.71875,
11049
+ "learning_rate": 9.95436063276924e-06,
11050
+ "loss": 22.334,
11051
+ "step": 15740
11052
+ },
11053
+ {
11054
+ "epoch": 0.2922776080557276,
11055
+ "grad_norm": 36.53125,
11056
+ "learning_rate": 9.954331636983198e-06,
11057
+ "loss": 22.4288,
11058
+ "step": 15750
11059
+ },
11060
+ {
11061
+ "epoch": 0.2924631811402074,
11062
+ "grad_norm": 35.25,
11063
+ "learning_rate": 9.954302641197156e-06,
11064
+ "loss": 22.411,
11065
+ "step": 15760
11066
+ },
11067
+ {
11068
+ "epoch": 0.2926487542246872,
11069
+ "grad_norm": 35.90625,
11070
+ "learning_rate": 9.954273645411113e-06,
11071
+ "loss": 21.8901,
11072
+ "step": 15770
11073
+ },
11074
+ {
11075
+ "epoch": 0.2928343273091671,
11076
+ "grad_norm": 36.59375,
11077
+ "learning_rate": 9.95424464962507e-06,
11078
+ "loss": 22.2214,
11079
+ "step": 15780
11080
+ },
11081
+ {
11082
+ "epoch": 0.2930199003936469,
11083
+ "grad_norm": 33.90625,
11084
+ "learning_rate": 9.95421565383903e-06,
11085
+ "loss": 21.8987,
11086
+ "step": 15790
11087
+ },
11088
+ {
11089
+ "epoch": 0.29320547347812675,
11090
+ "grad_norm": 35.90625,
11091
+ "learning_rate": 9.954186658052985e-06,
11092
+ "loss": 22.1996,
11093
+ "step": 15800
11094
+ },
11095
+ {
11096
+ "epoch": 0.29339104656260656,
11097
+ "grad_norm": 34.6875,
11098
+ "learning_rate": 9.954157662266943e-06,
11099
+ "loss": 22.1059,
11100
+ "step": 15810
11101
+ },
11102
+ {
11103
+ "epoch": 0.29357661964708637,
11104
+ "grad_norm": 35.3125,
11105
+ "learning_rate": 9.954128666480902e-06,
11106
+ "loss": 22.3283,
11107
+ "step": 15820
11108
+ },
11109
+ {
11110
+ "epoch": 0.29376219273156623,
11111
+ "grad_norm": 36.59375,
11112
+ "learning_rate": 9.95409967069486e-06,
11113
+ "loss": 22.438,
11114
+ "step": 15830
11115
+ },
11116
+ {
11117
+ "epoch": 0.29394776581604604,
11118
+ "grad_norm": 37.15625,
11119
+ "learning_rate": 9.954070674908817e-06,
11120
+ "loss": 22.409,
11121
+ "step": 15840
11122
+ },
11123
+ {
11124
+ "epoch": 0.29413333890052584,
11125
+ "grad_norm": 34.8125,
11126
+ "learning_rate": 9.954041679122774e-06,
11127
+ "loss": 22.524,
11128
+ "step": 15850
11129
+ },
11130
+ {
11131
+ "epoch": 0.2943189119850057,
11132
+ "grad_norm": 37.0625,
11133
+ "learning_rate": 9.954012683336731e-06,
11134
+ "loss": 22.6223,
11135
+ "step": 15860
11136
+ },
11137
+ {
11138
+ "epoch": 0.2945044850694855,
11139
+ "grad_norm": 35.0,
11140
+ "learning_rate": 9.953983687550689e-06,
11141
+ "loss": 22.2213,
11142
+ "step": 15870
11143
+ },
11144
+ {
11145
+ "epoch": 0.2946900581539654,
11146
+ "grad_norm": 33.8125,
11147
+ "learning_rate": 9.953954691764646e-06,
11148
+ "loss": 21.861,
11149
+ "step": 15880
11150
+ },
11151
+ {
11152
+ "epoch": 0.2948756312384452,
11153
+ "grad_norm": 33.59375,
11154
+ "learning_rate": 9.953925695978605e-06,
11155
+ "loss": 22.1122,
11156
+ "step": 15890
11157
+ },
11158
+ {
11159
+ "epoch": 0.295061204322925,
11160
+ "grad_norm": 35.90625,
11161
+ "learning_rate": 9.953896700192563e-06,
11162
+ "loss": 21.8675,
11163
+ "step": 15900
11164
+ },
11165
+ {
11166
+ "epoch": 0.29524677740740485,
11167
+ "grad_norm": 32.65625,
11168
+ "learning_rate": 9.953867704406518e-06,
11169
+ "loss": 22.0003,
11170
+ "step": 15910
11171
+ },
11172
+ {
11173
+ "epoch": 0.29543235049188465,
11174
+ "grad_norm": 33.84375,
11175
+ "learning_rate": 9.953838708620478e-06,
11176
+ "loss": 22.2325,
11177
+ "step": 15920
11178
+ },
11179
+ {
11180
+ "epoch": 0.29561792357636446,
11181
+ "grad_norm": 36.15625,
11182
+ "learning_rate": 9.953809712834435e-06,
11183
+ "loss": 22.2477,
11184
+ "step": 15930
11185
+ },
11186
+ {
11187
+ "epoch": 0.2958034966608443,
11188
+ "grad_norm": 35.28125,
11189
+ "learning_rate": 9.953780717048392e-06,
11190
+ "loss": 22.0163,
11191
+ "step": 15940
11192
+ },
11193
+ {
11194
+ "epoch": 0.29598906974532413,
11195
+ "grad_norm": 35.84375,
11196
+ "learning_rate": 9.95375172126235e-06,
11197
+ "loss": 21.9324,
11198
+ "step": 15950
11199
+ },
11200
+ {
11201
+ "epoch": 0.296174642829804,
11202
+ "grad_norm": 36.90625,
11203
+ "learning_rate": 9.953722725476307e-06,
11204
+ "loss": 22.0944,
11205
+ "step": 15960
11206
+ },
11207
+ {
11208
+ "epoch": 0.2963602159142838,
11209
+ "grad_norm": 36.9375,
11210
+ "learning_rate": 9.953693729690265e-06,
11211
+ "loss": 22.2074,
11212
+ "step": 15970
11213
+ },
11214
+ {
11215
+ "epoch": 0.2965457889987636,
11216
+ "grad_norm": 35.84375,
11217
+ "learning_rate": 9.953664733904222e-06,
11218
+ "loss": 22.2047,
11219
+ "step": 15980
11220
+ },
11221
+ {
11222
+ "epoch": 0.29673136208324347,
11223
+ "grad_norm": 36.09375,
11224
+ "learning_rate": 9.953635738118181e-06,
11225
+ "loss": 22.3672,
11226
+ "step": 15990
11227
+ },
11228
+ {
11229
+ "epoch": 0.2969169351677233,
11230
+ "grad_norm": 33.78125,
11231
+ "learning_rate": 9.953606742332139e-06,
11232
+ "loss": 22.2694,
11233
+ "step": 16000
11234
+ },
11235
+ {
11236
+ "epoch": 0.2971025082522031,
11237
+ "grad_norm": 36.46875,
11238
+ "learning_rate": 9.953577746546094e-06,
11239
+ "loss": 21.7142,
11240
+ "step": 16010
11241
+ },
11242
+ {
11243
+ "epoch": 0.29728808133668294,
11244
+ "grad_norm": 34.09375,
11245
+ "learning_rate": 9.953548750760053e-06,
11246
+ "loss": 21.9962,
11247
+ "step": 16020
11248
+ },
11249
+ {
11250
+ "epoch": 0.29747365442116275,
11251
+ "grad_norm": 36.59375,
11252
+ "learning_rate": 9.95351975497401e-06,
11253
+ "loss": 22.0277,
11254
+ "step": 16030
11255
+ },
11256
+ {
11257
+ "epoch": 0.29765922750564255,
11258
+ "grad_norm": 33.15625,
11259
+ "learning_rate": 9.953490759187968e-06,
11260
+ "loss": 22.0372,
11261
+ "step": 16040
11262
+ },
11263
+ {
11264
+ "epoch": 0.2978448005901224,
11265
+ "grad_norm": 34.8125,
11266
+ "learning_rate": 9.953461763401926e-06,
11267
+ "loss": 22.3242,
11268
+ "step": 16050
11269
+ },
11270
+ {
11271
+ "epoch": 0.2980303736746022,
11272
+ "grad_norm": 36.15625,
11273
+ "learning_rate": 9.953432767615883e-06,
11274
+ "loss": 21.9268,
11275
+ "step": 16060
11276
+ },
11277
+ {
11278
+ "epoch": 0.2982159467590821,
11279
+ "grad_norm": 34.8125,
11280
+ "learning_rate": 9.95340377182984e-06,
11281
+ "loss": 21.7896,
11282
+ "step": 16070
11283
+ },
11284
+ {
11285
+ "epoch": 0.2984015198435619,
11286
+ "grad_norm": 35.625,
11287
+ "learning_rate": 9.953374776043798e-06,
11288
+ "loss": 21.933,
11289
+ "step": 16080
11290
+ },
11291
+ {
11292
+ "epoch": 0.2985870929280417,
11293
+ "grad_norm": 34.5625,
11294
+ "learning_rate": 9.953345780257755e-06,
11295
+ "loss": 22.3957,
11296
+ "step": 16090
11297
+ },
11298
+ {
11299
+ "epoch": 0.29877266601252156,
11300
+ "grad_norm": 34.3125,
11301
+ "learning_rate": 9.953316784471714e-06,
11302
+ "loss": 22.267,
11303
+ "step": 16100
11304
+ },
11305
+ {
11306
+ "epoch": 0.29895823909700137,
11307
+ "grad_norm": 34.21875,
11308
+ "learning_rate": 9.953287788685672e-06,
11309
+ "loss": 22.1046,
11310
+ "step": 16110
11311
+ },
11312
+ {
11313
+ "epoch": 0.2991438121814812,
11314
+ "grad_norm": 32.5625,
11315
+ "learning_rate": 9.953258792899627e-06,
11316
+ "loss": 22.5077,
11317
+ "step": 16120
11318
+ },
11319
+ {
11320
+ "epoch": 0.29932938526596103,
11321
+ "grad_norm": 35.28125,
11322
+ "learning_rate": 9.953229797113587e-06,
11323
+ "loss": 22.0254,
11324
+ "step": 16130
11325
+ },
11326
+ {
11327
+ "epoch": 0.29951495835044084,
11328
+ "grad_norm": 35.53125,
11329
+ "learning_rate": 9.953200801327544e-06,
11330
+ "loss": 21.9783,
11331
+ "step": 16140
11332
+ },
11333
+ {
11334
+ "epoch": 0.2997005314349207,
11335
+ "grad_norm": 35.96875,
11336
+ "learning_rate": 9.953171805541501e-06,
11337
+ "loss": 22.4283,
11338
+ "step": 16150
11339
+ },
11340
+ {
11341
+ "epoch": 0.2998861045194005,
11342
+ "grad_norm": 35.34375,
11343
+ "learning_rate": 9.953142809755459e-06,
11344
+ "loss": 21.9178,
11345
+ "step": 16160
11346
+ },
11347
+ {
11348
+ "epoch": 0.3000716776038803,
11349
+ "grad_norm": 35.5625,
11350
+ "learning_rate": 9.953113813969416e-06,
11351
+ "loss": 21.8685,
11352
+ "step": 16170
11353
+ },
11354
+ {
11355
+ "epoch": 0.3002572506883602,
11356
+ "grad_norm": 33.65625,
11357
+ "learning_rate": 9.953084818183374e-06,
11358
+ "loss": 22.4529,
11359
+ "step": 16180
11360
+ },
11361
+ {
11362
+ "epoch": 0.30044282377284,
11363
+ "grad_norm": 32.5,
11364
+ "learning_rate": 9.953055822397331e-06,
11365
+ "loss": 21.9859,
11366
+ "step": 16190
11367
+ },
11368
+ {
11369
+ "epoch": 0.3006283968573198,
11370
+ "grad_norm": 35.65625,
11371
+ "learning_rate": 9.95302682661129e-06,
11372
+ "loss": 21.9968,
11373
+ "step": 16200
11374
+ },
11375
+ {
11376
+ "epoch": 0.30081396994179965,
11377
+ "grad_norm": 35.90625,
11378
+ "learning_rate": 9.952997830825247e-06,
11379
+ "loss": 22.1074,
11380
+ "step": 16210
11381
+ },
11382
+ {
11383
+ "epoch": 0.30099954302627946,
11384
+ "grad_norm": 37.0625,
11385
+ "learning_rate": 9.952968835039205e-06,
11386
+ "loss": 21.8784,
11387
+ "step": 16220
11388
+ },
11389
+ {
11390
+ "epoch": 0.3011851161107593,
11391
+ "grad_norm": 36.71875,
11392
+ "learning_rate": 9.952939839253162e-06,
11393
+ "loss": 22.3373,
11394
+ "step": 16230
11395
+ },
11396
+ {
11397
+ "epoch": 0.3013706891952391,
11398
+ "grad_norm": 35.8125,
11399
+ "learning_rate": 9.95291084346712e-06,
11400
+ "loss": 21.973,
11401
+ "step": 16240
11402
+ },
11403
+ {
11404
+ "epoch": 0.30155626227971893,
11405
+ "grad_norm": 32.625,
11406
+ "learning_rate": 9.952881847681077e-06,
11407
+ "loss": 22.0185,
11408
+ "step": 16250
11409
+ },
11410
+ {
11411
+ "epoch": 0.3017418353641988,
11412
+ "grad_norm": 37.875,
11413
+ "learning_rate": 9.952852851895035e-06,
11414
+ "loss": 21.9603,
11415
+ "step": 16260
11416
+ },
11417
+ {
11418
+ "epoch": 0.3019274084486786,
11419
+ "grad_norm": 37.28125,
11420
+ "learning_rate": 9.952823856108994e-06,
11421
+ "loss": 22.3666,
11422
+ "step": 16270
11423
+ },
11424
+ {
11425
+ "epoch": 0.3021129815331584,
11426
+ "grad_norm": 35.65625,
11427
+ "learning_rate": 9.95279486032295e-06,
11428
+ "loss": 21.8065,
11429
+ "step": 16280
11430
+ },
11431
+ {
11432
+ "epoch": 0.30229855461763827,
11433
+ "grad_norm": 35.90625,
11434
+ "learning_rate": 9.952765864536907e-06,
11435
+ "loss": 22.0894,
11436
+ "step": 16290
11437
+ },
11438
+ {
11439
+ "epoch": 0.3024841277021181,
11440
+ "grad_norm": 36.25,
11441
+ "learning_rate": 9.952736868750866e-06,
11442
+ "loss": 22.6013,
11443
+ "step": 16300
11444
+ },
11445
+ {
11446
+ "epoch": 0.30266970078659794,
11447
+ "grad_norm": 36.34375,
11448
+ "learning_rate": 9.952707872964823e-06,
11449
+ "loss": 22.0674,
11450
+ "step": 16310
11451
+ },
11452
+ {
11453
+ "epoch": 0.30285527387107775,
11454
+ "grad_norm": 35.28125,
11455
+ "learning_rate": 9.95267887717878e-06,
11456
+ "loss": 21.6102,
11457
+ "step": 16320
11458
+ },
11459
+ {
11460
+ "epoch": 0.30304084695555755,
11461
+ "grad_norm": 35.53125,
11462
+ "learning_rate": 9.952649881392738e-06,
11463
+ "loss": 22.01,
11464
+ "step": 16330
11465
+ },
11466
+ {
11467
+ "epoch": 0.3032264200400374,
11468
+ "grad_norm": 36.21875,
11469
+ "learning_rate": 9.952620885606695e-06,
11470
+ "loss": 22.1756,
11471
+ "step": 16340
11472
+ },
11473
+ {
11474
+ "epoch": 0.3034119931245172,
11475
+ "grad_norm": 36.0625,
11476
+ "learning_rate": 9.952591889820653e-06,
11477
+ "loss": 22.1455,
11478
+ "step": 16350
11479
+ },
11480
+ {
11481
+ "epoch": 0.303597566208997,
11482
+ "grad_norm": 34.65625,
11483
+ "learning_rate": 9.95256289403461e-06,
11484
+ "loss": 22.1752,
11485
+ "step": 16360
11486
+ },
11487
+ {
11488
+ "epoch": 0.3037831392934769,
11489
+ "grad_norm": 34.75,
11490
+ "learning_rate": 9.95253389824857e-06,
11491
+ "loss": 22.277,
11492
+ "step": 16370
11493
+ },
11494
+ {
11495
+ "epoch": 0.3039687123779567,
11496
+ "grad_norm": 34.15625,
11497
+ "learning_rate": 9.952504902462527e-06,
11498
+ "loss": 22.5551,
11499
+ "step": 16380
11500
+ },
11501
+ {
11502
+ "epoch": 0.3041542854624365,
11503
+ "grad_norm": 34.0,
11504
+ "learning_rate": 9.952475906676482e-06,
11505
+ "loss": 21.9491,
11506
+ "step": 16390
11507
+ },
11508
+ {
11509
+ "epoch": 0.30433985854691636,
11510
+ "grad_norm": 35.21875,
11511
+ "learning_rate": 9.952446910890442e-06,
11512
+ "loss": 21.8338,
11513
+ "step": 16400
11514
+ },
11515
+ {
11516
+ "epoch": 0.30452543163139617,
11517
+ "grad_norm": 33.625,
11518
+ "learning_rate": 9.952417915104399e-06,
11519
+ "loss": 21.8369,
11520
+ "step": 16410
11521
+ },
11522
+ {
11523
+ "epoch": 0.30471100471587603,
11524
+ "grad_norm": 35.21875,
11525
+ "learning_rate": 9.952388919318356e-06,
11526
+ "loss": 22.0681,
11527
+ "step": 16420
11528
+ },
11529
+ {
11530
+ "epoch": 0.30489657780035584,
11531
+ "grad_norm": 34.59375,
11532
+ "learning_rate": 9.952359923532314e-06,
11533
+ "loss": 21.8606,
11534
+ "step": 16430
11535
+ },
11536
+ {
11537
+ "epoch": 0.30508215088483565,
11538
+ "grad_norm": 35.34375,
11539
+ "learning_rate": 9.952330927746271e-06,
11540
+ "loss": 21.8315,
11541
+ "step": 16440
11542
+ },
11543
+ {
11544
+ "epoch": 0.3052677239693155,
11545
+ "grad_norm": 35.09375,
11546
+ "learning_rate": 9.952301931960229e-06,
11547
+ "loss": 22.2084,
11548
+ "step": 16450
11549
+ },
11550
+ {
11551
+ "epoch": 0.3054532970537953,
11552
+ "grad_norm": 37.125,
11553
+ "learning_rate": 9.952272936174186e-06,
11554
+ "loss": 21.4615,
11555
+ "step": 16460
11556
+ },
11557
+ {
11558
+ "epoch": 0.3056388701382751,
11559
+ "grad_norm": 35.28125,
11560
+ "learning_rate": 9.952243940388145e-06,
11561
+ "loss": 21.9661,
11562
+ "step": 16470
11563
+ },
11564
+ {
11565
+ "epoch": 0.305824443222755,
11566
+ "grad_norm": 37.3125,
11567
+ "learning_rate": 9.952214944602103e-06,
11568
+ "loss": 21.8837,
11569
+ "step": 16480
11570
+ },
11571
+ {
11572
+ "epoch": 0.3060100163072348,
11573
+ "grad_norm": 34.4375,
11574
+ "learning_rate": 9.95218594881606e-06,
11575
+ "loss": 22.3747,
11576
+ "step": 16490
11577
+ },
11578
+ {
11579
+ "epoch": 0.30619558939171465,
11580
+ "grad_norm": 34.125,
11581
+ "learning_rate": 9.952156953030017e-06,
11582
+ "loss": 21.9814,
11583
+ "step": 16500
11584
+ },
11585
+ {
11586
+ "epoch": 0.30638116247619446,
11587
+ "grad_norm": 34.78125,
11588
+ "learning_rate": 9.952127957243975e-06,
11589
+ "loss": 22.081,
11590
+ "step": 16510
11591
+ },
11592
+ {
11593
+ "epoch": 0.30656673556067426,
11594
+ "grad_norm": 34.0625,
11595
+ "learning_rate": 9.952098961457932e-06,
11596
+ "loss": 22.3669,
11597
+ "step": 16520
11598
+ },
11599
+ {
11600
+ "epoch": 0.3067523086451541,
11601
+ "grad_norm": 36.40625,
11602
+ "learning_rate": 9.95206996567189e-06,
11603
+ "loss": 21.9809,
11604
+ "step": 16530
11605
+ },
11606
+ {
11607
+ "epoch": 0.30693788172963393,
11608
+ "grad_norm": 34.09375,
11609
+ "learning_rate": 9.952040969885847e-06,
11610
+ "loss": 21.7875,
11611
+ "step": 16540
11612
+ },
11613
+ {
11614
+ "epoch": 0.30712345481411374,
11615
+ "grad_norm": 35.71875,
11616
+ "learning_rate": 9.952011974099804e-06,
11617
+ "loss": 21.9626,
11618
+ "step": 16550
11619
+ },
11620
+ {
11621
+ "epoch": 0.3073090278985936,
11622
+ "grad_norm": 38.78125,
11623
+ "learning_rate": 9.951982978313762e-06,
11624
+ "loss": 21.6948,
11625
+ "step": 16560
11626
+ },
11627
+ {
11628
+ "epoch": 0.3074946009830734,
11629
+ "grad_norm": 33.6875,
11630
+ "learning_rate": 9.95195398252772e-06,
11631
+ "loss": 21.1979,
11632
+ "step": 16570
11633
+ },
11634
+ {
11635
+ "epoch": 0.30768017406755327,
11636
+ "grad_norm": 33.4375,
11637
+ "learning_rate": 9.951924986741678e-06,
11638
+ "loss": 22.1714,
11639
+ "step": 16580
11640
+ },
11641
+ {
11642
+ "epoch": 0.3078657471520331,
11643
+ "grad_norm": 34.78125,
11644
+ "learning_rate": 9.951895990955636e-06,
11645
+ "loss": 21.804,
11646
+ "step": 16590
11647
+ },
11648
+ {
11649
+ "epoch": 0.3080513202365129,
11650
+ "grad_norm": 34.09375,
11651
+ "learning_rate": 9.951866995169593e-06,
11652
+ "loss": 21.6856,
11653
+ "step": 16600
11654
+ },
11655
+ {
11656
+ "epoch": 0.30823689332099274,
11657
+ "grad_norm": 33.34375,
11658
+ "learning_rate": 9.95183799938355e-06,
11659
+ "loss": 22.2583,
11660
+ "step": 16610
11661
+ },
11662
+ {
11663
+ "epoch": 0.30842246640547255,
11664
+ "grad_norm": 34.15625,
11665
+ "learning_rate": 9.951809003597508e-06,
11666
+ "loss": 22.07,
11667
+ "step": 16620
11668
+ },
11669
+ {
11670
+ "epoch": 0.30860803948995236,
11671
+ "grad_norm": 35.84375,
11672
+ "learning_rate": 9.951780007811465e-06,
11673
+ "loss": 21.9016,
11674
+ "step": 16630
11675
+ },
11676
+ {
11677
+ "epoch": 0.3087936125744322,
11678
+ "grad_norm": 33.75,
11679
+ "learning_rate": 9.951751012025423e-06,
11680
+ "loss": 21.72,
11681
+ "step": 16640
11682
+ },
11683
+ {
11684
+ "epoch": 0.308979185658912,
11685
+ "grad_norm": 35.28125,
11686
+ "learning_rate": 9.951722016239382e-06,
11687
+ "loss": 21.76,
11688
+ "step": 16650
11689
+ },
11690
+ {
11691
+ "epoch": 0.30916475874339183,
11692
+ "grad_norm": 33.96875,
11693
+ "learning_rate": 9.951693020453338e-06,
11694
+ "loss": 22.1734,
11695
+ "step": 16660
11696
+ },
11697
+ {
11698
+ "epoch": 0.3093503318278717,
11699
+ "grad_norm": 34.9375,
11700
+ "learning_rate": 9.951664024667295e-06,
11701
+ "loss": 21.7641,
11702
+ "step": 16670
11703
+ },
11704
+ {
11705
+ "epoch": 0.3095359049123515,
11706
+ "grad_norm": 33.5625,
11707
+ "learning_rate": 9.951635028881254e-06,
11708
+ "loss": 21.7486,
11709
+ "step": 16680
11710
+ },
11711
+ {
11712
+ "epoch": 0.30972147799683136,
11713
+ "grad_norm": 35.65625,
11714
+ "learning_rate": 9.951606033095211e-06,
11715
+ "loss": 22.0392,
11716
+ "step": 16690
11717
+ },
11718
+ {
11719
+ "epoch": 0.30990705108131117,
11720
+ "grad_norm": 35.84375,
11721
+ "learning_rate": 9.951577037309169e-06,
11722
+ "loss": 21.8191,
11723
+ "step": 16700
11724
+ },
11725
+ {
11726
+ "epoch": 0.310092624165791,
11727
+ "grad_norm": 32.84375,
11728
+ "learning_rate": 9.951548041523126e-06,
11729
+ "loss": 21.7801,
11730
+ "step": 16710
11731
+ },
11732
+ {
11733
+ "epoch": 0.31027819725027084,
11734
+ "grad_norm": 38.625,
11735
+ "learning_rate": 9.951519045737084e-06,
11736
+ "loss": 22.5178,
11737
+ "step": 16720
11738
+ },
11739
+ {
11740
+ "epoch": 0.31046377033475064,
11741
+ "grad_norm": 35.28125,
11742
+ "learning_rate": 9.951490049951041e-06,
11743
+ "loss": 21.7387,
11744
+ "step": 16730
11745
+ },
11746
+ {
11747
+ "epoch": 0.31064934341923045,
11748
+ "grad_norm": 34.9375,
11749
+ "learning_rate": 9.951461054164999e-06,
11750
+ "loss": 22.385,
11751
+ "step": 16740
11752
+ },
11753
+ {
11754
+ "epoch": 0.3108349165037103,
11755
+ "grad_norm": 35.0,
11756
+ "learning_rate": 9.951432058378958e-06,
11757
+ "loss": 22.0111,
11758
+ "step": 16750
11759
+ },
11760
+ {
11761
+ "epoch": 0.3110204895881901,
11762
+ "grad_norm": 34.25,
11763
+ "learning_rate": 9.951403062592913e-06,
11764
+ "loss": 21.9642,
11765
+ "step": 16760
11766
+ },
11767
+ {
11768
+ "epoch": 0.31120606267267,
11769
+ "grad_norm": 35.59375,
11770
+ "learning_rate": 9.95137406680687e-06,
11771
+ "loss": 21.8199,
11772
+ "step": 16770
11773
+ },
11774
+ {
11775
+ "epoch": 0.3113916357571498,
11776
+ "grad_norm": 36.8125,
11777
+ "learning_rate": 9.95134507102083e-06,
11778
+ "loss": 21.8454,
11779
+ "step": 16780
11780
+ },
11781
+ {
11782
+ "epoch": 0.3115772088416296,
11783
+ "grad_norm": 34.875,
11784
+ "learning_rate": 9.951316075234787e-06,
11785
+ "loss": 21.7867,
11786
+ "step": 16790
11787
+ },
11788
+ {
11789
+ "epoch": 0.31176278192610946,
11790
+ "grad_norm": 34.875,
11791
+ "learning_rate": 9.951287079448745e-06,
11792
+ "loss": 21.7425,
11793
+ "step": 16800
11794
+ },
11795
+ {
11796
+ "epoch": 0.31194835501058926,
11797
+ "grad_norm": 33.875,
11798
+ "learning_rate": 9.951258083662702e-06,
11799
+ "loss": 22.1539,
11800
+ "step": 16810
11801
+ },
11802
+ {
11803
+ "epoch": 0.31213392809506907,
11804
+ "grad_norm": 33.5625,
11805
+ "learning_rate": 9.95122908787666e-06,
11806
+ "loss": 22.1815,
11807
+ "step": 16820
11808
+ },
11809
+ {
11810
+ "epoch": 0.31231950117954893,
11811
+ "grad_norm": 34.25,
11812
+ "learning_rate": 9.951200092090617e-06,
11813
+ "loss": 21.9751,
11814
+ "step": 16830
11815
+ },
11816
+ {
11817
+ "epoch": 0.31250507426402874,
11818
+ "grad_norm": 35.46875,
11819
+ "learning_rate": 9.951171096304574e-06,
11820
+ "loss": 22.1181,
11821
+ "step": 16840
11822
+ },
11823
+ {
11824
+ "epoch": 0.3126906473485086,
11825
+ "grad_norm": 35.75,
11826
+ "learning_rate": 9.951142100518533e-06,
11827
+ "loss": 21.8169,
11828
+ "step": 16850
11829
+ },
11830
+ {
11831
+ "epoch": 0.3128762204329884,
11832
+ "grad_norm": 37.78125,
11833
+ "learning_rate": 9.95111310473249e-06,
11834
+ "loss": 22.324,
11835
+ "step": 16860
11836
+ },
11837
+ {
11838
+ "epoch": 0.3130617935174682,
11839
+ "grad_norm": 36.25,
11840
+ "learning_rate": 9.951084108946447e-06,
11841
+ "loss": 22.0339,
11842
+ "step": 16870
11843
+ },
11844
+ {
11845
+ "epoch": 0.3132473666019481,
11846
+ "grad_norm": 33.53125,
11847
+ "learning_rate": 9.951055113160406e-06,
11848
+ "loss": 22.3883,
11849
+ "step": 16880
11850
+ },
11851
+ {
11852
+ "epoch": 0.3134329396864279,
11853
+ "grad_norm": 35.71875,
11854
+ "learning_rate": 9.951026117374363e-06,
11855
+ "loss": 21.4248,
11856
+ "step": 16890
11857
+ },
11858
+ {
11859
+ "epoch": 0.3136185127709077,
11860
+ "grad_norm": 34.0625,
11861
+ "learning_rate": 9.95099712158832e-06,
11862
+ "loss": 21.7226,
11863
+ "step": 16900
11864
+ },
11865
+ {
11866
+ "epoch": 0.31380408585538755,
11867
+ "grad_norm": 34.71875,
11868
+ "learning_rate": 9.950968125802278e-06,
11869
+ "loss": 22.1848,
11870
+ "step": 16910
11871
+ },
11872
+ {
11873
+ "epoch": 0.31398965893986736,
11874
+ "grad_norm": 35.6875,
11875
+ "learning_rate": 9.950939130016235e-06,
11876
+ "loss": 22.076,
11877
+ "step": 16920
11878
+ },
11879
+ {
11880
+ "epoch": 0.31417523202434716,
11881
+ "grad_norm": 36.5625,
11882
+ "learning_rate": 9.950910134230193e-06,
11883
+ "loss": 22.3584,
11884
+ "step": 16930
11885
+ },
11886
+ {
11887
+ "epoch": 0.314360805108827,
11888
+ "grad_norm": 35.78125,
11889
+ "learning_rate": 9.95088113844415e-06,
11890
+ "loss": 22.2669,
11891
+ "step": 16940
11892
+ },
11893
+ {
11894
+ "epoch": 0.31454637819330683,
11895
+ "grad_norm": 33.875,
11896
+ "learning_rate": 9.95085214265811e-06,
11897
+ "loss": 22.3504,
11898
+ "step": 16950
11899
+ },
11900
+ {
11901
+ "epoch": 0.3147319512777867,
11902
+ "grad_norm": 35.03125,
11903
+ "learning_rate": 9.950823146872067e-06,
11904
+ "loss": 21.7265,
11905
+ "step": 16960
11906
+ },
11907
+ {
11908
+ "epoch": 0.3149175243622665,
11909
+ "grad_norm": 33.75,
11910
+ "learning_rate": 9.950794151086024e-06,
11911
+ "loss": 22.3763,
11912
+ "step": 16970
11913
+ },
11914
+ {
11915
+ "epoch": 0.3151030974467463,
11916
+ "grad_norm": 33.25,
11917
+ "learning_rate": 9.950765155299981e-06,
11918
+ "loss": 21.5872,
11919
+ "step": 16980
11920
+ },
11921
+ {
11922
+ "epoch": 0.31528867053122617,
11923
+ "grad_norm": 35.1875,
11924
+ "learning_rate": 9.950736159513939e-06,
11925
+ "loss": 22.0631,
11926
+ "step": 16990
11927
+ },
11928
+ {
11929
+ "epoch": 0.315474243615706,
11930
+ "grad_norm": 35.46875,
11931
+ "learning_rate": 9.950707163727896e-06,
11932
+ "loss": 22.1039,
11933
+ "step": 17000
11934
+ },
11935
+ {
11936
+ "epoch": 0.3156598167001858,
11937
+ "grad_norm": 35.5625,
11938
+ "learning_rate": 9.950678167941854e-06,
11939
+ "loss": 21.9955,
11940
+ "step": 17010
11941
+ },
11942
+ {
11943
+ "epoch": 0.31584538978466564,
11944
+ "grad_norm": 35.1875,
11945
+ "learning_rate": 9.950649172155811e-06,
11946
+ "loss": 21.5005,
11947
+ "step": 17020
11948
+ },
11949
+ {
11950
+ "epoch": 0.31603096286914545,
11951
+ "grad_norm": 36.375,
11952
+ "learning_rate": 9.950620176369768e-06,
11953
+ "loss": 21.925,
11954
+ "step": 17030
11955
+ },
11956
+ {
11957
+ "epoch": 0.3162165359536253,
11958
+ "grad_norm": 34.5,
11959
+ "learning_rate": 9.950591180583726e-06,
11960
+ "loss": 21.8868,
11961
+ "step": 17040
11962
+ },
11963
+ {
11964
+ "epoch": 0.3164021090381051,
11965
+ "grad_norm": 35.03125,
11966
+ "learning_rate": 9.950562184797685e-06,
11967
+ "loss": 22.0574,
11968
+ "step": 17050
11969
+ },
11970
+ {
11971
+ "epoch": 0.3165876821225849,
11972
+ "grad_norm": 36.71875,
11973
+ "learning_rate": 9.950533189011642e-06,
11974
+ "loss": 22.0364,
11975
+ "step": 17060
11976
+ },
11977
+ {
11978
+ "epoch": 0.3167732552070648,
11979
+ "grad_norm": 33.875,
11980
+ "learning_rate": 9.9505041932256e-06,
11981
+ "loss": 21.8544,
11982
+ "step": 17070
11983
+ },
11984
+ {
11985
+ "epoch": 0.3169588282915446,
11986
+ "grad_norm": 33.75,
11987
+ "learning_rate": 9.950475197439557e-06,
11988
+ "loss": 21.9953,
11989
+ "step": 17080
11990
+ },
11991
+ {
11992
+ "epoch": 0.3171444013760244,
11993
+ "grad_norm": 34.78125,
11994
+ "learning_rate": 9.950446201653515e-06,
11995
+ "loss": 22.0436,
11996
+ "step": 17090
11997
+ },
11998
+ {
11999
+ "epoch": 0.31732997446050426,
12000
+ "grad_norm": 36.59375,
12001
+ "learning_rate": 9.950417205867472e-06,
12002
+ "loss": 21.6638,
12003
+ "step": 17100
12004
+ },
12005
+ {
12006
+ "epoch": 0.31751554754498407,
12007
+ "grad_norm": 34.0625,
12008
+ "learning_rate": 9.95038821008143e-06,
12009
+ "loss": 21.9026,
12010
+ "step": 17110
12011
+ },
12012
+ {
12013
+ "epoch": 0.31770112062946393,
12014
+ "grad_norm": 35.375,
12015
+ "learning_rate": 9.950359214295387e-06,
12016
+ "loss": 21.7069,
12017
+ "step": 17120
12018
+ },
12019
+ {
12020
+ "epoch": 0.31788669371394374,
12021
+ "grad_norm": 34.84375,
12022
+ "learning_rate": 9.950330218509346e-06,
12023
+ "loss": 21.673,
12024
+ "step": 17130
12025
+ },
12026
+ {
12027
+ "epoch": 0.31807226679842354,
12028
+ "grad_norm": 35.78125,
12029
+ "learning_rate": 9.950301222723302e-06,
12030
+ "loss": 21.9466,
12031
+ "step": 17140
12032
+ },
12033
+ {
12034
+ "epoch": 0.3182578398829034,
12035
+ "grad_norm": 37.34375,
12036
+ "learning_rate": 9.950272226937259e-06,
12037
+ "loss": 21.7084,
12038
+ "step": 17150
12039
+ },
12040
+ {
12041
+ "epoch": 0.3184434129673832,
12042
+ "grad_norm": 34.03125,
12043
+ "learning_rate": 9.950243231151218e-06,
12044
+ "loss": 21.572,
12045
+ "step": 17160
12046
+ },
12047
+ {
12048
+ "epoch": 0.318628986051863,
12049
+ "grad_norm": 35.9375,
12050
+ "learning_rate": 9.950214235365176e-06,
12051
+ "loss": 22.2523,
12052
+ "step": 17170
12053
+ },
12054
+ {
12055
+ "epoch": 0.3188145591363429,
12056
+ "grad_norm": 34.34375,
12057
+ "learning_rate": 9.950185239579133e-06,
12058
+ "loss": 22.0218,
12059
+ "step": 17180
12060
+ },
12061
+ {
12062
+ "epoch": 0.3190001322208227,
12063
+ "grad_norm": 33.78125,
12064
+ "learning_rate": 9.95015624379309e-06,
12065
+ "loss": 21.2069,
12066
+ "step": 17190
12067
+ },
12068
+ {
12069
+ "epoch": 0.3191857053053025,
12070
+ "grad_norm": 34.90625,
12071
+ "learning_rate": 9.950127248007048e-06,
12072
+ "loss": 22.0038,
12073
+ "step": 17200
12074
+ },
12075
+ {
12076
+ "epoch": 0.31937127838978235,
12077
+ "grad_norm": 33.375,
12078
+ "learning_rate": 9.950098252221005e-06,
12079
+ "loss": 21.5058,
12080
+ "step": 17210
12081
+ },
12082
+ {
12083
+ "epoch": 0.31955685147426216,
12084
+ "grad_norm": 34.34375,
12085
+ "learning_rate": 9.950069256434963e-06,
12086
+ "loss": 21.8254,
12087
+ "step": 17220
12088
+ },
12089
+ {
12090
+ "epoch": 0.319742424558742,
12091
+ "grad_norm": 36.15625,
12092
+ "learning_rate": 9.950040260648922e-06,
12093
+ "loss": 21.5007,
12094
+ "step": 17230
12095
+ },
12096
+ {
12097
+ "epoch": 0.31992799764322183,
12098
+ "grad_norm": 33.84375,
12099
+ "learning_rate": 9.950011264862879e-06,
12100
+ "loss": 22.6218,
12101
+ "step": 17240
12102
+ },
12103
+ {
12104
+ "epoch": 0.32011357072770164,
12105
+ "grad_norm": 34.96875,
12106
+ "learning_rate": 9.949982269076835e-06,
12107
+ "loss": 21.892,
12108
+ "step": 17250
12109
+ },
12110
+ {
12111
+ "epoch": 0.3202991438121815,
12112
+ "grad_norm": 35.15625,
12113
+ "learning_rate": 9.949953273290794e-06,
12114
+ "loss": 21.8203,
12115
+ "step": 17260
12116
+ },
12117
+ {
12118
+ "epoch": 0.3204847168966613,
12119
+ "grad_norm": 32.78125,
12120
+ "learning_rate": 9.949924277504751e-06,
12121
+ "loss": 22.0928,
12122
+ "step": 17270
12123
+ },
12124
+ {
12125
+ "epoch": 0.3206702899811411,
12126
+ "grad_norm": 34.09375,
12127
+ "learning_rate": 9.949895281718709e-06,
12128
+ "loss": 22.1172,
12129
+ "step": 17280
12130
+ },
12131
+ {
12132
+ "epoch": 0.32085586306562097,
12133
+ "grad_norm": 37.1875,
12134
+ "learning_rate": 9.949866285932666e-06,
12135
+ "loss": 21.4919,
12136
+ "step": 17290
12137
+ },
12138
+ {
12139
+ "epoch": 0.3210414361501008,
12140
+ "grad_norm": 36.09375,
12141
+ "learning_rate": 9.949837290146623e-06,
12142
+ "loss": 21.8468,
12143
+ "step": 17300
12144
+ },
12145
+ {
12146
+ "epoch": 0.32122700923458064,
12147
+ "grad_norm": 36.21875,
12148
+ "learning_rate": 9.949808294360581e-06,
12149
+ "loss": 21.9655,
12150
+ "step": 17310
12151
+ },
12152
+ {
12153
+ "epoch": 0.32141258231906045,
12154
+ "grad_norm": 35.8125,
12155
+ "learning_rate": 9.949779298574538e-06,
12156
+ "loss": 21.8789,
12157
+ "step": 17320
12158
+ },
12159
+ {
12160
+ "epoch": 0.32159815540354025,
12161
+ "grad_norm": 35.9375,
12162
+ "learning_rate": 9.949750302788497e-06,
12163
+ "loss": 21.9964,
12164
+ "step": 17330
12165
+ },
12166
+ {
12167
+ "epoch": 0.3217837284880201,
12168
+ "grad_norm": 35.40625,
12169
+ "learning_rate": 9.949721307002455e-06,
12170
+ "loss": 22.188,
12171
+ "step": 17340
12172
+ },
12173
+ {
12174
+ "epoch": 0.3219693015724999,
12175
+ "grad_norm": 35.0,
12176
+ "learning_rate": 9.94969231121641e-06,
12177
+ "loss": 21.9757,
12178
+ "step": 17350
12179
+ },
12180
+ {
12181
+ "epoch": 0.32215487465697973,
12182
+ "grad_norm": 35.34375,
12183
+ "learning_rate": 9.94966331543037e-06,
12184
+ "loss": 21.7447,
12185
+ "step": 17360
12186
+ },
12187
+ {
12188
+ "epoch": 0.3223404477414596,
12189
+ "grad_norm": 36.0625,
12190
+ "learning_rate": 9.949634319644327e-06,
12191
+ "loss": 21.7478,
12192
+ "step": 17370
12193
+ },
12194
+ {
12195
+ "epoch": 0.3225260208259394,
12196
+ "grad_norm": 36.125,
12197
+ "learning_rate": 9.949605323858284e-06,
12198
+ "loss": 22.5053,
12199
+ "step": 17380
12200
+ },
12201
+ {
12202
+ "epoch": 0.32271159391041926,
12203
+ "grad_norm": 35.09375,
12204
+ "learning_rate": 9.949576328072242e-06,
12205
+ "loss": 21.8131,
12206
+ "step": 17390
12207
+ },
12208
+ {
12209
+ "epoch": 0.32289716699489907,
12210
+ "grad_norm": 36.75,
12211
+ "learning_rate": 9.949547332286201e-06,
12212
+ "loss": 22.0129,
12213
+ "step": 17400
12214
+ },
12215
+ {
12216
+ "epoch": 0.32308274007937887,
12217
+ "grad_norm": 33.84375,
12218
+ "learning_rate": 9.949518336500157e-06,
12219
+ "loss": 21.799,
12220
+ "step": 17410
12221
+ },
12222
+ {
12223
+ "epoch": 0.32326831316385873,
12224
+ "grad_norm": 34.875,
12225
+ "learning_rate": 9.949489340714114e-06,
12226
+ "loss": 21.6667,
12227
+ "step": 17420
12228
+ },
12229
+ {
12230
+ "epoch": 0.32345388624833854,
12231
+ "grad_norm": 34.375,
12232
+ "learning_rate": 9.949460344928073e-06,
12233
+ "loss": 22.099,
12234
+ "step": 17430
12235
+ },
12236
+ {
12237
+ "epoch": 0.32363945933281835,
12238
+ "grad_norm": 35.375,
12239
+ "learning_rate": 9.94943134914203e-06,
12240
+ "loss": 21.9377,
12241
+ "step": 17440
12242
+ },
12243
+ {
12244
+ "epoch": 0.3238250324172982,
12245
+ "grad_norm": 36.8125,
12246
+ "learning_rate": 9.949402353355988e-06,
12247
+ "loss": 22.0756,
12248
+ "step": 17450
12249
+ },
12250
+ {
12251
+ "epoch": 0.324010605501778,
12252
+ "grad_norm": 33.84375,
12253
+ "learning_rate": 9.949373357569945e-06,
12254
+ "loss": 21.7746,
12255
+ "step": 17460
12256
+ },
12257
+ {
12258
+ "epoch": 0.3241961785862578,
12259
+ "grad_norm": 36.78125,
12260
+ "learning_rate": 9.949344361783903e-06,
12261
+ "loss": 21.9471,
12262
+ "step": 17470
12263
+ },
12264
+ {
12265
+ "epoch": 0.3243817516707377,
12266
+ "grad_norm": 36.4375,
12267
+ "learning_rate": 9.94931536599786e-06,
12268
+ "loss": 21.7579,
12269
+ "step": 17480
12270
+ },
12271
+ {
12272
+ "epoch": 0.3245673247552175,
12273
+ "grad_norm": 34.96875,
12274
+ "learning_rate": 9.949286370211818e-06,
12275
+ "loss": 21.5875,
12276
+ "step": 17490
12277
+ },
12278
+ {
12279
+ "epoch": 0.32475289783969735,
12280
+ "grad_norm": 34.53125,
12281
+ "learning_rate": 9.949257374425777e-06,
12282
+ "loss": 21.6696,
12283
+ "step": 17500
12284
+ },
12285
+ {
12286
+ "epoch": 0.32493847092417716,
12287
+ "grad_norm": 34.6875,
12288
+ "learning_rate": 9.949228378639732e-06,
12289
+ "loss": 22.0292,
12290
+ "step": 17510
12291
+ },
12292
+ {
12293
+ "epoch": 0.32512404400865696,
12294
+ "grad_norm": 34.34375,
12295
+ "learning_rate": 9.94919938285369e-06,
12296
+ "loss": 21.6901,
12297
+ "step": 17520
12298
+ },
12299
+ {
12300
+ "epoch": 0.3253096170931368,
12301
+ "grad_norm": 34.71875,
12302
+ "learning_rate": 9.949170387067649e-06,
12303
+ "loss": 21.5895,
12304
+ "step": 17530
12305
+ },
12306
+ {
12307
+ "epoch": 0.32549519017761663,
12308
+ "grad_norm": 34.625,
12309
+ "learning_rate": 9.949141391281606e-06,
12310
+ "loss": 21.7416,
12311
+ "step": 17540
12312
+ },
12313
+ {
12314
+ "epoch": 0.32568076326209644,
12315
+ "grad_norm": 34.0,
12316
+ "learning_rate": 9.949112395495564e-06,
12317
+ "loss": 21.7771,
12318
+ "step": 17550
12319
+ },
12320
+ {
12321
+ "epoch": 0.3258663363465763,
12322
+ "grad_norm": 35.875,
12323
+ "learning_rate": 9.949083399709521e-06,
12324
+ "loss": 21.8724,
12325
+ "step": 17560
12326
+ },
12327
+ {
12328
+ "epoch": 0.3260519094310561,
12329
+ "grad_norm": 34.78125,
12330
+ "learning_rate": 9.949054403923479e-06,
12331
+ "loss": 21.6644,
12332
+ "step": 17570
12333
+ },
12334
+ {
12335
+ "epoch": 0.32623748251553597,
12336
+ "grad_norm": 35.0,
12337
+ "learning_rate": 9.949025408137436e-06,
12338
+ "loss": 22.1336,
12339
+ "step": 17580
12340
+ },
12341
+ {
12342
+ "epoch": 0.3264230556000158,
12343
+ "grad_norm": 34.46875,
12344
+ "learning_rate": 9.948996412351393e-06,
12345
+ "loss": 21.7064,
12346
+ "step": 17590
12347
+ },
12348
+ {
12349
+ "epoch": 0.3266086286844956,
12350
+ "grad_norm": 35.375,
12351
+ "learning_rate": 9.94896741656535e-06,
12352
+ "loss": 21.7408,
12353
+ "step": 17600
12354
+ },
12355
+ {
12356
+ "epoch": 0.32679420176897545,
12357
+ "grad_norm": 33.625,
12358
+ "learning_rate": 9.94893842077931e-06,
12359
+ "loss": 21.6225,
12360
+ "step": 17610
12361
+ },
12362
+ {
12363
+ "epoch": 0.32697977485345525,
12364
+ "grad_norm": 35.84375,
12365
+ "learning_rate": 9.948909424993266e-06,
12366
+ "loss": 21.4826,
12367
+ "step": 17620
12368
+ },
12369
+ {
12370
+ "epoch": 0.32716534793793506,
12371
+ "grad_norm": 33.90625,
12372
+ "learning_rate": 9.948880429207225e-06,
12373
+ "loss": 21.8825,
12374
+ "step": 17630
12375
+ },
12376
+ {
12377
+ "epoch": 0.3273509210224149,
12378
+ "grad_norm": 36.03125,
12379
+ "learning_rate": 9.948851433421182e-06,
12380
+ "loss": 21.4889,
12381
+ "step": 17640
12382
+ },
12383
+ {
12384
+ "epoch": 0.3275364941068947,
12385
+ "grad_norm": 32.84375,
12386
+ "learning_rate": 9.94882243763514e-06,
12387
+ "loss": 21.5165,
12388
+ "step": 17650
12389
+ },
12390
+ {
12391
+ "epoch": 0.3277220671913746,
12392
+ "grad_norm": 35.0625,
12393
+ "learning_rate": 9.948793441849097e-06,
12394
+ "loss": 21.6394,
12395
+ "step": 17660
12396
+ },
12397
+ {
12398
+ "epoch": 0.3279076402758544,
12399
+ "grad_norm": 35.0625,
12400
+ "learning_rate": 9.948764446063054e-06,
12401
+ "loss": 21.623,
12402
+ "step": 17670
12403
+ },
12404
+ {
12405
+ "epoch": 0.3280932133603342,
12406
+ "grad_norm": 34.125,
12407
+ "learning_rate": 9.948735450277012e-06,
12408
+ "loss": 22.1933,
12409
+ "step": 17680
12410
+ },
12411
+ {
12412
+ "epoch": 0.32827878644481406,
12413
+ "grad_norm": 36.65625,
12414
+ "learning_rate": 9.948706454490969e-06,
12415
+ "loss": 21.7992,
12416
+ "step": 17690
12417
+ },
12418
+ {
12419
+ "epoch": 0.32846435952929387,
12420
+ "grad_norm": 35.15625,
12421
+ "learning_rate": 9.948677458704927e-06,
12422
+ "loss": 21.9871,
12423
+ "step": 17700
12424
+ },
12425
+ {
12426
+ "epoch": 0.3286499326137737,
12427
+ "grad_norm": 38.0,
12428
+ "learning_rate": 9.948648462918886e-06,
12429
+ "loss": 22.3474,
12430
+ "step": 17710
12431
+ },
12432
+ {
12433
+ "epoch": 0.32883550569825354,
12434
+ "grad_norm": 36.0,
12435
+ "learning_rate": 9.948619467132843e-06,
12436
+ "loss": 22.0369,
12437
+ "step": 17720
12438
+ },
12439
+ {
12440
+ "epoch": 0.32902107878273334,
12441
+ "grad_norm": 35.3125,
12442
+ "learning_rate": 9.948590471346799e-06,
12443
+ "loss": 21.3599,
12444
+ "step": 17730
12445
+ },
12446
+ {
12447
+ "epoch": 0.3292066518672132,
12448
+ "grad_norm": 34.90625,
12449
+ "learning_rate": 9.948561475560758e-06,
12450
+ "loss": 21.8559,
12451
+ "step": 17740
12452
+ },
12453
+ {
12454
+ "epoch": 0.329392224951693,
12455
+ "grad_norm": 35.875,
12456
+ "learning_rate": 9.948532479774715e-06,
12457
+ "loss": 21.8818,
12458
+ "step": 17750
12459
+ },
12460
+ {
12461
+ "epoch": 0.3295777980361728,
12462
+ "grad_norm": 32.96875,
12463
+ "learning_rate": 9.948503483988673e-06,
12464
+ "loss": 21.4791,
12465
+ "step": 17760
12466
+ },
12467
+ {
12468
+ "epoch": 0.3297633711206527,
12469
+ "grad_norm": 33.21875,
12470
+ "learning_rate": 9.94847448820263e-06,
12471
+ "loss": 21.6707,
12472
+ "step": 17770
12473
+ },
12474
+ {
12475
+ "epoch": 0.3299489442051325,
12476
+ "grad_norm": 34.875,
12477
+ "learning_rate": 9.948445492416588e-06,
12478
+ "loss": 21.4599,
12479
+ "step": 17780
12480
+ },
12481
+ {
12482
+ "epoch": 0.3301345172896123,
12483
+ "grad_norm": 35.625,
12484
+ "learning_rate": 9.948416496630545e-06,
12485
+ "loss": 21.9671,
12486
+ "step": 17790
12487
+ },
12488
+ {
12489
+ "epoch": 0.33032009037409216,
12490
+ "grad_norm": 34.0625,
12491
+ "learning_rate": 9.948387500844502e-06,
12492
+ "loss": 21.7585,
12493
+ "step": 17800
12494
+ },
12495
+ {
12496
+ "epoch": 0.33050566345857196,
12497
+ "grad_norm": 36.21875,
12498
+ "learning_rate": 9.948358505058461e-06,
12499
+ "loss": 21.5565,
12500
+ "step": 17810
12501
+ },
12502
+ {
12503
+ "epoch": 0.33069123654305177,
12504
+ "grad_norm": 33.5625,
12505
+ "learning_rate": 9.948329509272419e-06,
12506
+ "loss": 21.3623,
12507
+ "step": 17820
12508
+ },
12509
+ {
12510
+ "epoch": 0.33087680962753163,
12511
+ "grad_norm": 35.21875,
12512
+ "learning_rate": 9.948300513486376e-06,
12513
+ "loss": 21.5242,
12514
+ "step": 17830
12515
+ },
12516
+ {
12517
+ "epoch": 0.33106238271201144,
12518
+ "grad_norm": 33.96875,
12519
+ "learning_rate": 9.948271517700334e-06,
12520
+ "loss": 22.0939,
12521
+ "step": 17840
12522
+ },
12523
+ {
12524
+ "epoch": 0.3312479557964913,
12525
+ "grad_norm": 33.96875,
12526
+ "learning_rate": 9.948242521914291e-06,
12527
+ "loss": 21.8529,
12528
+ "step": 17850
12529
+ },
12530
+ {
12531
+ "epoch": 0.3314335288809711,
12532
+ "grad_norm": 36.78125,
12533
+ "learning_rate": 9.948213526128248e-06,
12534
+ "loss": 21.7178,
12535
+ "step": 17860
12536
+ },
12537
+ {
12538
+ "epoch": 0.3316191019654509,
12539
+ "grad_norm": 36.84375,
12540
+ "learning_rate": 9.948184530342206e-06,
12541
+ "loss": 21.9362,
12542
+ "step": 17870
12543
+ },
12544
+ {
12545
+ "epoch": 0.3318046750499308,
12546
+ "grad_norm": 35.375,
12547
+ "learning_rate": 9.948155534556165e-06,
12548
+ "loss": 21.678,
12549
+ "step": 17880
12550
+ },
12551
+ {
12552
+ "epoch": 0.3319902481344106,
12553
+ "grad_norm": 35.78125,
12554
+ "learning_rate": 9.94812653877012e-06,
12555
+ "loss": 21.7038,
12556
+ "step": 17890
12557
+ },
12558
+ {
12559
+ "epoch": 0.3321758212188904,
12560
+ "grad_norm": 36.3125,
12561
+ "learning_rate": 9.948097542984078e-06,
12562
+ "loss": 21.7922,
12563
+ "step": 17900
12564
+ },
12565
+ {
12566
+ "epoch": 0.33236139430337025,
12567
+ "grad_norm": 35.65625,
12568
+ "learning_rate": 9.948068547198037e-06,
12569
+ "loss": 21.4636,
12570
+ "step": 17910
12571
+ },
12572
+ {
12573
+ "epoch": 0.33254696738785006,
12574
+ "grad_norm": 33.84375,
12575
+ "learning_rate": 9.948039551411995e-06,
12576
+ "loss": 21.4673,
12577
+ "step": 17920
12578
+ },
12579
+ {
12580
+ "epoch": 0.3327325404723299,
12581
+ "grad_norm": 34.625,
12582
+ "learning_rate": 9.948010555625952e-06,
12583
+ "loss": 21.6747,
12584
+ "step": 17930
12585
+ },
12586
+ {
12587
+ "epoch": 0.3329181135568097,
12588
+ "grad_norm": 34.46875,
12589
+ "learning_rate": 9.94798155983991e-06,
12590
+ "loss": 21.5015,
12591
+ "step": 17940
12592
+ },
12593
+ {
12594
+ "epoch": 0.33310368664128953,
12595
+ "grad_norm": 34.65625,
12596
+ "learning_rate": 9.947952564053867e-06,
12597
+ "loss": 22.1432,
12598
+ "step": 17950
12599
+ },
12600
+ {
12601
+ "epoch": 0.3332892597257694,
12602
+ "grad_norm": 33.96875,
12603
+ "learning_rate": 9.947923568267824e-06,
12604
+ "loss": 21.695,
12605
+ "step": 17960
12606
+ },
12607
+ {
12608
+ "epoch": 0.3334748328102492,
12609
+ "grad_norm": 37.53125,
12610
+ "learning_rate": 9.947894572481782e-06,
12611
+ "loss": 21.7699,
12612
+ "step": 17970
12613
+ },
12614
+ {
12615
+ "epoch": 0.333660405894729,
12616
+ "grad_norm": 33.53125,
12617
+ "learning_rate": 9.94786557669574e-06,
12618
+ "loss": 21.9693,
12619
+ "step": 17980
12620
+ },
12621
+ {
12622
+ "epoch": 0.33384597897920887,
12623
+ "grad_norm": 36.03125,
12624
+ "learning_rate": 9.947836580909698e-06,
12625
+ "loss": 21.9864,
12626
+ "step": 17990
12627
+ },
12628
+ {
12629
+ "epoch": 0.3340315520636887,
12630
+ "grad_norm": 35.40625,
12631
+ "learning_rate": 9.947807585123654e-06,
12632
+ "loss": 21.6196,
12633
+ "step": 18000
12634
+ },
12635
+ {
12636
+ "epoch": 0.33421712514816854,
12637
+ "grad_norm": 36.0625,
12638
+ "learning_rate": 9.947778589337613e-06,
12639
+ "loss": 21.6735,
12640
+ "step": 18010
12641
+ },
12642
+ {
12643
+ "epoch": 0.33440269823264834,
12644
+ "grad_norm": 33.78125,
12645
+ "learning_rate": 9.94774959355157e-06,
12646
+ "loss": 21.4556,
12647
+ "step": 18020
12648
+ },
12649
+ {
12650
+ "epoch": 0.33458827131712815,
12651
+ "grad_norm": 34.125,
12652
+ "learning_rate": 9.947720597765528e-06,
12653
+ "loss": 22.1459,
12654
+ "step": 18030
12655
+ },
12656
+ {
12657
+ "epoch": 0.334773844401608,
12658
+ "grad_norm": 34.65625,
12659
+ "learning_rate": 9.947691601979485e-06,
12660
+ "loss": 21.8858,
12661
+ "step": 18040
12662
+ },
12663
+ {
12664
+ "epoch": 0.3349594174860878,
12665
+ "grad_norm": 37.84375,
12666
+ "learning_rate": 9.947662606193443e-06,
12667
+ "loss": 21.5832,
12668
+ "step": 18050
12669
+ },
12670
+ {
12671
+ "epoch": 0.3351449905705676,
12672
+ "grad_norm": 34.6875,
12673
+ "learning_rate": 9.9476336104074e-06,
12674
+ "loss": 21.8271,
12675
+ "step": 18060
12676
+ },
12677
+ {
12678
+ "epoch": 0.3353305636550475,
12679
+ "grad_norm": 36.25,
12680
+ "learning_rate": 9.947604614621357e-06,
12681
+ "loss": 21.8723,
12682
+ "step": 18070
12683
+ },
12684
+ {
12685
+ "epoch": 0.3355161367395273,
12686
+ "grad_norm": 34.9375,
12687
+ "learning_rate": 9.947575618835315e-06,
12688
+ "loss": 21.7849,
12689
+ "step": 18080
12690
+ },
12691
+ {
12692
+ "epoch": 0.3357017098240071,
12693
+ "grad_norm": 35.8125,
12694
+ "learning_rate": 9.947546623049274e-06,
12695
+ "loss": 21.3291,
12696
+ "step": 18090
12697
+ },
12698
+ {
12699
+ "epoch": 0.33588728290848696,
12700
+ "grad_norm": 33.65625,
12701
+ "learning_rate": 9.94751762726323e-06,
12702
+ "loss": 21.2726,
12703
+ "step": 18100
12704
+ },
12705
+ {
12706
+ "epoch": 0.33607285599296677,
12707
+ "grad_norm": 34.09375,
12708
+ "learning_rate": 9.947488631477189e-06,
12709
+ "loss": 22.0293,
12710
+ "step": 18110
12711
+ },
12712
+ {
12713
+ "epoch": 0.33625842907744663,
12714
+ "grad_norm": 34.0,
12715
+ "learning_rate": 9.947459635691146e-06,
12716
+ "loss": 21.8386,
12717
+ "step": 18120
12718
+ },
12719
+ {
12720
+ "epoch": 0.33644400216192644,
12721
+ "grad_norm": 35.09375,
12722
+ "learning_rate": 9.947430639905104e-06,
12723
+ "loss": 21.636,
12724
+ "step": 18130
12725
+ },
12726
+ {
12727
+ "epoch": 0.33662957524640624,
12728
+ "grad_norm": 31.484375,
12729
+ "learning_rate": 9.947401644119061e-06,
12730
+ "loss": 21.6889,
12731
+ "step": 18140
12732
+ },
12733
+ {
12734
+ "epoch": 0.3368151483308861,
12735
+ "grad_norm": 36.78125,
12736
+ "learning_rate": 9.947372648333018e-06,
12737
+ "loss": 21.4289,
12738
+ "step": 18150
12739
+ },
12740
+ {
12741
+ "epoch": 0.3370007214153659,
12742
+ "grad_norm": 34.9375,
12743
+ "learning_rate": 9.947343652546976e-06,
12744
+ "loss": 21.5468,
12745
+ "step": 18160
12746
+ },
12747
+ {
12748
+ "epoch": 0.3371862944998457,
12749
+ "grad_norm": 34.25,
12750
+ "learning_rate": 9.947314656760933e-06,
12751
+ "loss": 21.5357,
12752
+ "step": 18170
12753
+ },
12754
+ {
12755
+ "epoch": 0.3373718675843256,
12756
+ "grad_norm": 33.6875,
12757
+ "learning_rate": 9.94728566097489e-06,
12758
+ "loss": 21.5039,
12759
+ "step": 18180
12760
+ },
12761
+ {
12762
+ "epoch": 0.3375574406688054,
12763
+ "grad_norm": 36.46875,
12764
+ "learning_rate": 9.94725666518885e-06,
12765
+ "loss": 21.9166,
12766
+ "step": 18190
12767
+ },
12768
+ {
12769
+ "epoch": 0.33774301375328525,
12770
+ "grad_norm": 35.09375,
12771
+ "learning_rate": 9.947227669402807e-06,
12772
+ "loss": 22.2752,
12773
+ "step": 18200
12774
+ },
12775
+ {
12776
+ "epoch": 0.33792858683776505,
12777
+ "grad_norm": 34.8125,
12778
+ "learning_rate": 9.947198673616763e-06,
12779
+ "loss": 21.7475,
12780
+ "step": 18210
12781
+ },
12782
+ {
12783
+ "epoch": 0.33811415992224486,
12784
+ "grad_norm": 36.3125,
12785
+ "learning_rate": 9.947169677830722e-06,
12786
+ "loss": 22.0181,
12787
+ "step": 18220
12788
+ },
12789
+ {
12790
+ "epoch": 0.3382997330067247,
12791
+ "grad_norm": 34.875,
12792
+ "learning_rate": 9.94714068204468e-06,
12793
+ "loss": 21.1761,
12794
+ "step": 18230
12795
+ },
12796
+ {
12797
+ "epoch": 0.33848530609120453,
12798
+ "grad_norm": 33.65625,
12799
+ "learning_rate": 9.947111686258637e-06,
12800
+ "loss": 21.6197,
12801
+ "step": 18240
12802
+ },
12803
+ {
12804
+ "epoch": 0.33867087917568434,
12805
+ "grad_norm": 34.1875,
12806
+ "learning_rate": 9.947082690472594e-06,
12807
+ "loss": 21.3926,
12808
+ "step": 18250
12809
+ },
12810
+ {
12811
+ "epoch": 0.3388564522601642,
12812
+ "grad_norm": 34.3125,
12813
+ "learning_rate": 9.947053694686552e-06,
12814
+ "loss": 21.5869,
12815
+ "step": 18260
12816
+ },
12817
+ {
12818
+ "epoch": 0.339042025344644,
12819
+ "grad_norm": 34.25,
12820
+ "learning_rate": 9.947024698900509e-06,
12821
+ "loss": 22.0923,
12822
+ "step": 18270
12823
+ },
12824
+ {
12825
+ "epoch": 0.33922759842912387,
12826
+ "grad_norm": 36.625,
12827
+ "learning_rate": 9.946995703114466e-06,
12828
+ "loss": 21.7863,
12829
+ "step": 18280
12830
+ },
12831
+ {
12832
+ "epoch": 0.3394131715136037,
12833
+ "grad_norm": 34.21875,
12834
+ "learning_rate": 9.946966707328425e-06,
12835
+ "loss": 21.7017,
12836
+ "step": 18290
12837
+ },
12838
+ {
12839
+ "epoch": 0.3395987445980835,
12840
+ "grad_norm": 35.03125,
12841
+ "learning_rate": 9.946937711542383e-06,
12842
+ "loss": 21.9386,
12843
+ "step": 18300
12844
+ },
12845
+ {
12846
+ "epoch": 0.33978431768256334,
12847
+ "grad_norm": 35.5,
12848
+ "learning_rate": 9.94690871575634e-06,
12849
+ "loss": 21.9028,
12850
+ "step": 18310
12851
+ },
12852
+ {
12853
+ "epoch": 0.33996989076704315,
12854
+ "grad_norm": 36.46875,
12855
+ "learning_rate": 9.946879719970298e-06,
12856
+ "loss": 21.3511,
12857
+ "step": 18320
12858
+ },
12859
+ {
12860
+ "epoch": 0.34015546385152295,
12861
+ "grad_norm": 35.1875,
12862
+ "learning_rate": 9.946850724184255e-06,
12863
+ "loss": 21.4002,
12864
+ "step": 18330
12865
+ },
12866
+ {
12867
+ "epoch": 0.3403410369360028,
12868
+ "grad_norm": 33.96875,
12869
+ "learning_rate": 9.946821728398212e-06,
12870
+ "loss": 21.6701,
12871
+ "step": 18340
12872
+ },
12873
+ {
12874
+ "epoch": 0.3405266100204826,
12875
+ "grad_norm": 34.4375,
12876
+ "learning_rate": 9.94679273261217e-06,
12877
+ "loss": 21.6811,
12878
+ "step": 18350
12879
+ },
12880
+ {
12881
+ "epoch": 0.34071218310496243,
12882
+ "grad_norm": 33.09375,
12883
+ "learning_rate": 9.946763736826129e-06,
12884
+ "loss": 21.7387,
12885
+ "step": 18360
12886
+ },
12887
+ {
12888
+ "epoch": 0.3408977561894423,
12889
+ "grad_norm": 35.0625,
12890
+ "learning_rate": 9.946734741040085e-06,
12891
+ "loss": 22.08,
12892
+ "step": 18370
12893
+ },
12894
+ {
12895
+ "epoch": 0.3410833292739221,
12896
+ "grad_norm": 35.90625,
12897
+ "learning_rate": 9.946705745254042e-06,
12898
+ "loss": 21.8435,
12899
+ "step": 18380
12900
+ },
12901
+ {
12902
+ "epoch": 0.34126890235840196,
12903
+ "grad_norm": 33.8125,
12904
+ "learning_rate": 9.946676749468001e-06,
12905
+ "loss": 21.7757,
12906
+ "step": 18390
12907
+ },
12908
+ {
12909
+ "epoch": 0.34145447544288177,
12910
+ "grad_norm": 35.625,
12911
+ "learning_rate": 9.946647753681959e-06,
12912
+ "loss": 21.6643,
12913
+ "step": 18400
12914
+ },
12915
+ {
12916
+ "epoch": 0.3416400485273616,
12917
+ "grad_norm": 36.875,
12918
+ "learning_rate": 9.946618757895916e-06,
12919
+ "loss": 21.495,
12920
+ "step": 18410
12921
+ },
12922
+ {
12923
+ "epoch": 0.34182562161184143,
12924
+ "grad_norm": 37.375,
12925
+ "learning_rate": 9.946589762109873e-06,
12926
+ "loss": 21.7553,
12927
+ "step": 18420
12928
+ },
12929
+ {
12930
+ "epoch": 0.34201119469632124,
12931
+ "grad_norm": 34.625,
12932
+ "learning_rate": 9.94656076632383e-06,
12933
+ "loss": 21.4652,
12934
+ "step": 18430
12935
+ },
12936
+ {
12937
+ "epoch": 0.34219676778080105,
12938
+ "grad_norm": 34.71875,
12939
+ "learning_rate": 9.946531770537788e-06,
12940
+ "loss": 21.7942,
12941
+ "step": 18440
12942
+ },
12943
+ {
12944
+ "epoch": 0.3423823408652809,
12945
+ "grad_norm": 35.21875,
12946
+ "learning_rate": 9.946502774751746e-06,
12947
+ "loss": 21.7887,
12948
+ "step": 18450
12949
+ },
12950
+ {
12951
+ "epoch": 0.3425679139497607,
12952
+ "grad_norm": 34.5,
12953
+ "learning_rate": 9.946473778965705e-06,
12954
+ "loss": 21.8405,
12955
+ "step": 18460
12956
+ },
12957
+ {
12958
+ "epoch": 0.3427534870342406,
12959
+ "grad_norm": 33.3125,
12960
+ "learning_rate": 9.946444783179662e-06,
12961
+ "loss": 21.3681,
12962
+ "step": 18470
12963
+ },
12964
+ {
12965
+ "epoch": 0.3429390601187204,
12966
+ "grad_norm": 34.90625,
12967
+ "learning_rate": 9.946415787393618e-06,
12968
+ "loss": 21.4823,
12969
+ "step": 18480
12970
+ },
12971
+ {
12972
+ "epoch": 0.3431246332032002,
12973
+ "grad_norm": 34.78125,
12974
+ "learning_rate": 9.946386791607577e-06,
12975
+ "loss": 21.7108,
12976
+ "step": 18490
12977
+ },
12978
+ {
12979
+ "epoch": 0.34331020628768005,
12980
+ "grad_norm": 33.875,
12981
+ "learning_rate": 9.946357795821534e-06,
12982
+ "loss": 21.6695,
12983
+ "step": 18500
12984
+ },
12985
+ {
12986
+ "epoch": 0.34349577937215986,
12987
+ "grad_norm": 33.8125,
12988
+ "learning_rate": 9.946328800035492e-06,
12989
+ "loss": 21.7427,
12990
+ "step": 18510
12991
+ },
12992
+ {
12993
+ "epoch": 0.34368135245663967,
12994
+ "grad_norm": 35.03125,
12995
+ "learning_rate": 9.94629980424945e-06,
12996
+ "loss": 21.5864,
12997
+ "step": 18520
12998
+ },
12999
+ {
13000
+ "epoch": 0.34386692554111953,
13001
+ "grad_norm": 33.1875,
13002
+ "learning_rate": 9.946270808463407e-06,
13003
+ "loss": 21.6827,
13004
+ "step": 18530
13005
+ },
13006
+ {
13007
+ "epoch": 0.34405249862559933,
13008
+ "grad_norm": 36.78125,
13009
+ "learning_rate": 9.946241812677364e-06,
13010
+ "loss": 22.1864,
13011
+ "step": 18540
13012
+ },
13013
+ {
13014
+ "epoch": 0.3442380717100792,
13015
+ "grad_norm": 34.53125,
13016
+ "learning_rate": 9.946212816891321e-06,
13017
+ "loss": 21.6761,
13018
+ "step": 18550
13019
+ },
13020
+ {
13021
+ "epoch": 0.344423644794559,
13022
+ "grad_norm": 35.75,
13023
+ "learning_rate": 9.94618382110528e-06,
13024
+ "loss": 21.5846,
13025
+ "step": 18560
13026
+ },
13027
+ {
13028
+ "epoch": 0.3446092178790388,
13029
+ "grad_norm": 34.0625,
13030
+ "learning_rate": 9.946154825319238e-06,
13031
+ "loss": 21.4387,
13032
+ "step": 18570
13033
+ },
13034
+ {
13035
+ "epoch": 0.34479479096351867,
13036
+ "grad_norm": 34.1875,
13037
+ "learning_rate": 9.946125829533195e-06,
13038
+ "loss": 21.8834,
13039
+ "step": 18580
13040
+ },
13041
+ {
13042
+ "epoch": 0.3449803640479985,
13043
+ "grad_norm": 32.5,
13044
+ "learning_rate": 9.946096833747153e-06,
13045
+ "loss": 21.6082,
13046
+ "step": 18590
13047
+ },
13048
+ {
13049
+ "epoch": 0.3451659371324783,
13050
+ "grad_norm": 34.71875,
13051
+ "learning_rate": 9.94606783796111e-06,
13052
+ "loss": 21.4526,
13053
+ "step": 18600
13054
+ },
13055
+ {
13056
+ "epoch": 0.34535151021695815,
13057
+ "grad_norm": 35.8125,
13058
+ "learning_rate": 9.946038842175068e-06,
13059
+ "loss": 21.8455,
13060
+ "step": 18610
13061
+ },
13062
+ {
13063
+ "epoch": 0.34553708330143795,
13064
+ "grad_norm": 34.78125,
13065
+ "learning_rate": 9.946009846389025e-06,
13066
+ "loss": 21.6449,
13067
+ "step": 18620
13068
+ },
13069
+ {
13070
+ "epoch": 0.34572265638591776,
13071
+ "grad_norm": 35.125,
13072
+ "learning_rate": 9.945980850602982e-06,
13073
+ "loss": 21.0863,
13074
+ "step": 18630
13075
+ },
13076
+ {
13077
+ "epoch": 0.3459082294703976,
13078
+ "grad_norm": 34.84375,
13079
+ "learning_rate": 9.94595185481694e-06,
13080
+ "loss": 21.0995,
13081
+ "step": 18640
13082
+ },
13083
+ {
13084
+ "epoch": 0.3460938025548774,
13085
+ "grad_norm": 37.25,
13086
+ "learning_rate": 9.945922859030897e-06,
13087
+ "loss": 21.4652,
13088
+ "step": 18650
13089
+ },
13090
+ {
13091
+ "epoch": 0.3462793756393573,
13092
+ "grad_norm": 33.875,
13093
+ "learning_rate": 9.945893863244855e-06,
13094
+ "loss": 21.5573,
13095
+ "step": 18660
13096
+ },
13097
+ {
13098
+ "epoch": 0.3464649487238371,
13099
+ "grad_norm": 32.21875,
13100
+ "learning_rate": 9.945864867458814e-06,
13101
+ "loss": 21.6797,
13102
+ "step": 18670
13103
+ },
13104
+ {
13105
+ "epoch": 0.3466505218083169,
13106
+ "grad_norm": 35.8125,
13107
+ "learning_rate": 9.945835871672771e-06,
13108
+ "loss": 21.5204,
13109
+ "step": 18680
13110
+ },
13111
+ {
13112
+ "epoch": 0.34683609489279676,
13113
+ "grad_norm": 34.625,
13114
+ "learning_rate": 9.945806875886728e-06,
13115
+ "loss": 21.4348,
13116
+ "step": 18690
13117
+ },
13118
+ {
13119
+ "epoch": 0.34702166797727657,
13120
+ "grad_norm": 35.25,
13121
+ "learning_rate": 9.945777880100686e-06,
13122
+ "loss": 21.6341,
13123
+ "step": 18700
13124
+ },
13125
+ {
13126
+ "epoch": 0.3472072410617564,
13127
+ "grad_norm": 36.0625,
13128
+ "learning_rate": 9.945748884314643e-06,
13129
+ "loss": 21.5154,
13130
+ "step": 18710
13131
+ },
13132
+ {
13133
+ "epoch": 0.34739281414623624,
13134
+ "grad_norm": 34.40625,
13135
+ "learning_rate": 9.9457198885286e-06,
13136
+ "loss": 21.8781,
13137
+ "step": 18720
13138
+ },
13139
+ {
13140
+ "epoch": 0.34757838723071605,
13141
+ "grad_norm": 36.40625,
13142
+ "learning_rate": 9.945690892742558e-06,
13143
+ "loss": 21.9311,
13144
+ "step": 18730
13145
+ },
13146
+ {
13147
+ "epoch": 0.3477639603151959,
13148
+ "grad_norm": 34.625,
13149
+ "learning_rate": 9.945661896956517e-06,
13150
+ "loss": 21.7879,
13151
+ "step": 18740
13152
+ },
13153
+ {
13154
+ "epoch": 0.3479495333996757,
13155
+ "grad_norm": 34.78125,
13156
+ "learning_rate": 9.945632901170473e-06,
13157
+ "loss": 21.7787,
13158
+ "step": 18750
13159
+ },
13160
+ {
13161
+ "epoch": 0.3481351064841555,
13162
+ "grad_norm": 36.125,
13163
+ "learning_rate": 9.94560390538443e-06,
13164
+ "loss": 21.6464,
13165
+ "step": 18760
13166
+ },
13167
+ {
13168
+ "epoch": 0.3483206795686354,
13169
+ "grad_norm": 34.5625,
13170
+ "learning_rate": 9.94557490959839e-06,
13171
+ "loss": 21.5736,
13172
+ "step": 18770
13173
+ },
13174
+ {
13175
+ "epoch": 0.3485062526531152,
13176
+ "grad_norm": 34.75,
13177
+ "learning_rate": 9.945545913812347e-06,
13178
+ "loss": 21.753,
13179
+ "step": 18780
13180
+ },
13181
+ {
13182
+ "epoch": 0.348691825737595,
13183
+ "grad_norm": 36.125,
13184
+ "learning_rate": 9.945516918026304e-06,
13185
+ "loss": 21.4789,
13186
+ "step": 18790
13187
+ },
13188
+ {
13189
+ "epoch": 0.34887739882207486,
13190
+ "grad_norm": 34.90625,
13191
+ "learning_rate": 9.945487922240262e-06,
13192
+ "loss": 21.8735,
13193
+ "step": 18800
13194
+ },
13195
+ {
13196
+ "epoch": 0.34906297190655466,
13197
+ "grad_norm": 35.21875,
13198
+ "learning_rate": 9.945458926454219e-06,
13199
+ "loss": 21.6979,
13200
+ "step": 18810
13201
+ },
13202
+ {
13203
+ "epoch": 0.3492485449910345,
13204
+ "grad_norm": 35.03125,
13205
+ "learning_rate": 9.945429930668176e-06,
13206
+ "loss": 21.2156,
13207
+ "step": 18820
13208
+ },
13209
+ {
13210
+ "epoch": 0.34943411807551433,
13211
+ "grad_norm": 35.125,
13212
+ "learning_rate": 9.945400934882134e-06,
13213
+ "loss": 22.11,
13214
+ "step": 18830
13215
+ },
13216
+ {
13217
+ "epoch": 0.34961969115999414,
13218
+ "grad_norm": 34.90625,
13219
+ "learning_rate": 9.945371939096093e-06,
13220
+ "loss": 21.4453,
13221
+ "step": 18840
13222
+ },
13223
+ {
13224
+ "epoch": 0.349805264244474,
13225
+ "grad_norm": 34.5,
13226
+ "learning_rate": 9.945342943310049e-06,
13227
+ "loss": 21.5445,
13228
+ "step": 18850
13229
+ },
13230
+ {
13231
+ "epoch": 0.3499908373289538,
13232
+ "grad_norm": 36.71875,
13233
+ "learning_rate": 9.945313947524006e-06,
13234
+ "loss": 21.6646,
13235
+ "step": 18860
13236
+ },
13237
+ {
13238
+ "epoch": 0.3501764104134336,
13239
+ "grad_norm": 33.53125,
13240
+ "learning_rate": 9.945284951737965e-06,
13241
+ "loss": 21.4822,
13242
+ "step": 18870
13243
+ },
13244
+ {
13245
+ "epoch": 0.3503619834979135,
13246
+ "grad_norm": 35.71875,
13247
+ "learning_rate": 9.945255955951923e-06,
13248
+ "loss": 21.2241,
13249
+ "step": 18880
13250
+ },
13251
+ {
13252
+ "epoch": 0.3505475565823933,
13253
+ "grad_norm": 33.78125,
13254
+ "learning_rate": 9.94522696016588e-06,
13255
+ "loss": 21.5542,
13256
+ "step": 18890
13257
+ },
13258
+ {
13259
+ "epoch": 0.3507331296668731,
13260
+ "grad_norm": 33.15625,
13261
+ "learning_rate": 9.945197964379837e-06,
13262
+ "loss": 21.23,
13263
+ "step": 18900
13264
+ },
13265
+ {
13266
+ "epoch": 0.35091870275135295,
13267
+ "grad_norm": 34.9375,
13268
+ "learning_rate": 9.945168968593795e-06,
13269
+ "loss": 21.4728,
13270
+ "step": 18910
13271
+ },
13272
+ {
13273
+ "epoch": 0.35110427583583276,
13274
+ "grad_norm": 35.84375,
13275
+ "learning_rate": 9.945139972807752e-06,
13276
+ "loss": 21.7163,
13277
+ "step": 18920
13278
+ },
13279
+ {
13280
+ "epoch": 0.3512898489203126,
13281
+ "grad_norm": 35.03125,
13282
+ "learning_rate": 9.94511097702171e-06,
13283
+ "loss": 21.5852,
13284
+ "step": 18930
13285
+ },
13286
+ {
13287
+ "epoch": 0.3514754220047924,
13288
+ "grad_norm": 36.84375,
13289
+ "learning_rate": 9.945081981235669e-06,
13290
+ "loss": 21.5413,
13291
+ "step": 18940
13292
+ },
13293
+ {
13294
+ "epoch": 0.35166099508927223,
13295
+ "grad_norm": 34.625,
13296
+ "learning_rate": 9.945052985449626e-06,
13297
+ "loss": 21.824,
13298
+ "step": 18950
13299
+ },
13300
+ {
13301
+ "epoch": 0.3518465681737521,
13302
+ "grad_norm": 36.28125,
13303
+ "learning_rate": 9.945023989663582e-06,
13304
+ "loss": 21.5073,
13305
+ "step": 18960
13306
+ },
13307
+ {
13308
+ "epoch": 0.3520321412582319,
13309
+ "grad_norm": 35.25,
13310
+ "learning_rate": 9.944994993877541e-06,
13311
+ "loss": 21.6476,
13312
+ "step": 18970
13313
+ },
13314
+ {
13315
+ "epoch": 0.3522177143427117,
13316
+ "grad_norm": 36.21875,
13317
+ "learning_rate": 9.944965998091498e-06,
13318
+ "loss": 21.9834,
13319
+ "step": 18980
13320
+ },
13321
+ {
13322
+ "epoch": 0.35240328742719157,
13323
+ "grad_norm": 35.75,
13324
+ "learning_rate": 9.944937002305456e-06,
13325
+ "loss": 21.3338,
13326
+ "step": 18990
13327
+ },
13328
+ {
13329
+ "epoch": 0.3525888605116714,
13330
+ "grad_norm": 32.65625,
13331
+ "learning_rate": 9.944908006519413e-06,
13332
+ "loss": 21.7214,
13333
+ "step": 19000
13334
+ },
13335
+ {
13336
+ "epoch": 0.35277443359615124,
13337
+ "grad_norm": 35.03125,
13338
+ "learning_rate": 9.944879010733372e-06,
13339
+ "loss": 21.3903,
13340
+ "step": 19010
13341
+ },
13342
+ {
13343
+ "epoch": 0.35296000668063104,
13344
+ "grad_norm": 36.28125,
13345
+ "learning_rate": 9.944850014947328e-06,
13346
+ "loss": 21.4519,
13347
+ "step": 19020
13348
+ },
13349
+ {
13350
+ "epoch": 0.35314557976511085,
13351
+ "grad_norm": 35.0625,
13352
+ "learning_rate": 9.944821019161285e-06,
13353
+ "loss": 21.318,
13354
+ "step": 19030
13355
+ },
13356
+ {
13357
+ "epoch": 0.3533311528495907,
13358
+ "grad_norm": 36.03125,
13359
+ "learning_rate": 9.944792023375245e-06,
13360
+ "loss": 21.5192,
13361
+ "step": 19040
13362
+ },
13363
+ {
13364
+ "epoch": 0.3535167259340705,
13365
+ "grad_norm": 36.65625,
13366
+ "learning_rate": 9.944763027589202e-06,
13367
+ "loss": 21.8759,
13368
+ "step": 19050
13369
+ },
13370
+ {
13371
+ "epoch": 0.3537022990185503,
13372
+ "grad_norm": 36.5625,
13373
+ "learning_rate": 9.94473403180316e-06,
13374
+ "loss": 21.4364,
13375
+ "step": 19060
13376
+ },
13377
+ {
13378
+ "epoch": 0.3538878721030302,
13379
+ "grad_norm": 33.71875,
13380
+ "learning_rate": 9.944705036017117e-06,
13381
+ "loss": 21.6865,
13382
+ "step": 19070
13383
+ },
13384
+ {
13385
+ "epoch": 0.35407344518751,
13386
+ "grad_norm": 33.5625,
13387
+ "learning_rate": 9.944676040231074e-06,
13388
+ "loss": 21.4553,
13389
+ "step": 19080
13390
+ },
13391
+ {
13392
+ "epoch": 0.35425901827198986,
13393
+ "grad_norm": 34.375,
13394
+ "learning_rate": 9.944647044445032e-06,
13395
+ "loss": 21.4714,
13396
+ "step": 19090
13397
+ },
13398
+ {
13399
+ "epoch": 0.35444459135646966,
13400
+ "grad_norm": 35.5,
13401
+ "learning_rate": 9.944618048658989e-06,
13402
+ "loss": 21.2766,
13403
+ "step": 19100
13404
+ },
13405
+ {
13406
+ "epoch": 0.35463016444094947,
13407
+ "grad_norm": 36.40625,
13408
+ "learning_rate": 9.944589052872946e-06,
13409
+ "loss": 21.7674,
13410
+ "step": 19110
13411
+ },
13412
+ {
13413
+ "epoch": 0.35481573752542933,
13414
+ "grad_norm": 33.59375,
13415
+ "learning_rate": 9.944560057086904e-06,
13416
+ "loss": 21.3237,
13417
+ "step": 19120
13418
+ },
13419
+ {
13420
+ "epoch": 0.35500131060990914,
13421
+ "grad_norm": 34.21875,
13422
+ "learning_rate": 9.944531061300861e-06,
13423
+ "loss": 21.6426,
13424
+ "step": 19130
13425
+ },
13426
+ {
13427
+ "epoch": 0.35518688369438894,
13428
+ "grad_norm": 38.28125,
13429
+ "learning_rate": 9.94450206551482e-06,
13430
+ "loss": 21.7993,
13431
+ "step": 19140
13432
+ },
13433
+ {
13434
+ "epoch": 0.3553724567788688,
13435
+ "grad_norm": 34.78125,
13436
+ "learning_rate": 9.944473069728778e-06,
13437
+ "loss": 21.6046,
13438
+ "step": 19150
13439
+ },
13440
+ {
13441
+ "epoch": 0.3555580298633486,
13442
+ "grad_norm": 34.625,
13443
+ "learning_rate": 9.944444073942735e-06,
13444
+ "loss": 21.9528,
13445
+ "step": 19160
13446
+ },
13447
+ {
13448
+ "epoch": 0.3557436029478285,
13449
+ "grad_norm": 33.4375,
13450
+ "learning_rate": 9.944415078156693e-06,
13451
+ "loss": 21.5006,
13452
+ "step": 19170
13453
+ },
13454
+ {
13455
+ "epoch": 0.3559291760323083,
13456
+ "grad_norm": 33.1875,
13457
+ "learning_rate": 9.94438608237065e-06,
13458
+ "loss": 21.2048,
13459
+ "step": 19180
13460
+ },
13461
+ {
13462
+ "epoch": 0.3561147491167881,
13463
+ "grad_norm": 34.15625,
13464
+ "learning_rate": 9.944357086584607e-06,
13465
+ "loss": 21.4316,
13466
+ "step": 19190
13467
+ },
13468
+ {
13469
+ "epoch": 0.35630032220126795,
13470
+ "grad_norm": 35.46875,
13471
+ "learning_rate": 9.944328090798565e-06,
13472
+ "loss": 21.9165,
13473
+ "step": 19200
13474
+ },
13475
+ {
13476
+ "epoch": 0.35648589528574776,
13477
+ "grad_norm": 36.6875,
13478
+ "learning_rate": 9.944299095012522e-06,
13479
+ "loss": 21.4528,
13480
+ "step": 19210
13481
+ },
13482
+ {
13483
+ "epoch": 0.35667146837022756,
13484
+ "grad_norm": 34.5625,
13485
+ "learning_rate": 9.944270099226481e-06,
13486
+ "loss": 21.9393,
13487
+ "step": 19220
13488
+ },
13489
+ {
13490
+ "epoch": 0.3568570414547074,
13491
+ "grad_norm": 36.8125,
13492
+ "learning_rate": 9.944241103440437e-06,
13493
+ "loss": 21.4406,
13494
+ "step": 19230
13495
+ },
13496
+ {
13497
+ "epoch": 0.35704261453918723,
13498
+ "grad_norm": 34.09375,
13499
+ "learning_rate": 9.944212107654394e-06,
13500
+ "loss": 21.279,
13501
+ "step": 19240
13502
+ },
13503
+ {
13504
+ "epoch": 0.35722818762366704,
13505
+ "grad_norm": 34.875,
13506
+ "learning_rate": 9.944183111868353e-06,
13507
+ "loss": 20.9569,
13508
+ "step": 19250
13509
+ },
13510
+ {
13511
+ "epoch": 0.3574137607081469,
13512
+ "grad_norm": 34.96875,
13513
+ "learning_rate": 9.944154116082311e-06,
13514
+ "loss": 21.32,
13515
+ "step": 19260
13516
+ },
13517
+ {
13518
+ "epoch": 0.3575993337926267,
13519
+ "grad_norm": 34.21875,
13520
+ "learning_rate": 9.944125120296268e-06,
13521
+ "loss": 21.2426,
13522
+ "step": 19270
13523
+ },
13524
+ {
13525
+ "epoch": 0.35778490687710657,
13526
+ "grad_norm": 34.53125,
13527
+ "learning_rate": 9.944096124510226e-06,
13528
+ "loss": 21.2983,
13529
+ "step": 19280
13530
+ },
13531
+ {
13532
+ "epoch": 0.3579704799615864,
13533
+ "grad_norm": 35.375,
13534
+ "learning_rate": 9.944067128724183e-06,
13535
+ "loss": 21.687,
13536
+ "step": 19290
13537
+ },
13538
+ {
13539
+ "epoch": 0.3581560530460662,
13540
+ "grad_norm": 35.5,
13541
+ "learning_rate": 9.94403813293814e-06,
13542
+ "loss": 21.6977,
13543
+ "step": 19300
13544
+ },
13545
+ {
13546
+ "epoch": 0.35834162613054604,
13547
+ "grad_norm": 35.3125,
13548
+ "learning_rate": 9.944009137152098e-06,
13549
+ "loss": 21.5511,
13550
+ "step": 19310
13551
+ },
13552
+ {
13553
+ "epoch": 0.35852719921502585,
13554
+ "grad_norm": 34.84375,
13555
+ "learning_rate": 9.943980141366057e-06,
13556
+ "loss": 21.4945,
13557
+ "step": 19320
13558
+ },
13559
+ {
13560
+ "epoch": 0.35871277229950566,
13561
+ "grad_norm": 33.125,
13562
+ "learning_rate": 9.943951145580014e-06,
13563
+ "loss": 21.4095,
13564
+ "step": 19330
13565
+ },
13566
+ {
13567
+ "epoch": 0.3588983453839855,
13568
+ "grad_norm": 35.4375,
13569
+ "learning_rate": 9.94392214979397e-06,
13570
+ "loss": 21.623,
13571
+ "step": 19340
13572
+ },
13573
+ {
13574
+ "epoch": 0.3590839184684653,
13575
+ "grad_norm": 35.96875,
13576
+ "learning_rate": 9.94389315400793e-06,
13577
+ "loss": 21.2826,
13578
+ "step": 19350
13579
+ },
13580
+ {
13581
+ "epoch": 0.3592694915529452,
13582
+ "grad_norm": 33.1875,
13583
+ "learning_rate": 9.943864158221887e-06,
13584
+ "loss": 21.2331,
13585
+ "step": 19360
13586
+ },
13587
+ {
13588
+ "epoch": 0.359455064637425,
13589
+ "grad_norm": 33.0,
13590
+ "learning_rate": 9.943835162435844e-06,
13591
+ "loss": 21.4414,
13592
+ "step": 19370
13593
+ },
13594
+ {
13595
+ "epoch": 0.3596406377219048,
13596
+ "grad_norm": 34.4375,
13597
+ "learning_rate": 9.943806166649801e-06,
13598
+ "loss": 21.6911,
13599
+ "step": 19380
13600
+ },
13601
+ {
13602
+ "epoch": 0.35982621080638466,
13603
+ "grad_norm": 36.46875,
13604
+ "learning_rate": 9.943777170863759e-06,
13605
+ "loss": 21.9565,
13606
+ "step": 19390
13607
+ },
13608
+ {
13609
+ "epoch": 0.36001178389086447,
13610
+ "grad_norm": 33.5625,
13611
+ "learning_rate": 9.943748175077716e-06,
13612
+ "loss": 21.1798,
13613
+ "step": 19400
13614
+ },
13615
+ {
13616
+ "epoch": 0.3601973569753443,
13617
+ "grad_norm": 35.0625,
13618
+ "learning_rate": 9.943719179291674e-06,
13619
+ "loss": 21.36,
13620
+ "step": 19410
13621
+ },
13622
+ {
13623
+ "epoch": 0.36038293005982414,
13624
+ "grad_norm": 35.15625,
13625
+ "learning_rate": 9.943690183505633e-06,
13626
+ "loss": 21.8302,
13627
+ "step": 19420
13628
+ },
13629
+ {
13630
+ "epoch": 0.36056850314430394,
13631
+ "grad_norm": 35.09375,
13632
+ "learning_rate": 9.94366118771959e-06,
13633
+ "loss": 21.5303,
13634
+ "step": 19430
13635
+ },
13636
+ {
13637
+ "epoch": 0.3607540762287838,
13638
+ "grad_norm": 34.625,
13639
+ "learning_rate": 9.943632191933546e-06,
13640
+ "loss": 21.3503,
13641
+ "step": 19440
13642
+ },
13643
+ {
13644
+ "epoch": 0.3609396493132636,
13645
+ "grad_norm": 35.5625,
13646
+ "learning_rate": 9.943603196147505e-06,
13647
+ "loss": 21.5557,
13648
+ "step": 19450
13649
+ },
13650
+ {
13651
+ "epoch": 0.3611252223977434,
13652
+ "grad_norm": 36.0,
13653
+ "learning_rate": 9.943574200361462e-06,
13654
+ "loss": 21.2134,
13655
+ "step": 19460
13656
+ },
13657
+ {
13658
+ "epoch": 0.3613107954822233,
13659
+ "grad_norm": 36.375,
13660
+ "learning_rate": 9.94354520457542e-06,
13661
+ "loss": 21.1214,
13662
+ "step": 19470
13663
+ },
13664
+ {
13665
+ "epoch": 0.3614963685667031,
13666
+ "grad_norm": 35.4375,
13667
+ "learning_rate": 9.943516208789377e-06,
13668
+ "loss": 21.4935,
13669
+ "step": 19480
13670
+ },
13671
+ {
13672
+ "epoch": 0.3616819416511829,
13673
+ "grad_norm": 34.65625,
13674
+ "learning_rate": 9.943487213003336e-06,
13675
+ "loss": 21.3517,
13676
+ "step": 19490
13677
+ },
13678
+ {
13679
+ "epoch": 0.36186751473566275,
13680
+ "grad_norm": 34.4375,
13681
+ "learning_rate": 9.943458217217292e-06,
13682
+ "loss": 21.4391,
13683
+ "step": 19500
13684
+ },
13685
+ {
13686
+ "epoch": 0.36205308782014256,
13687
+ "grad_norm": 37.65625,
13688
+ "learning_rate": 9.94342922143125e-06,
13689
+ "loss": 21.6687,
13690
+ "step": 19510
13691
+ },
13692
+ {
13693
+ "epoch": 0.36223866090462237,
13694
+ "grad_norm": 35.65625,
13695
+ "learning_rate": 9.943400225645209e-06,
13696
+ "loss": 20.8702,
13697
+ "step": 19520
13698
+ },
13699
+ {
13700
+ "epoch": 0.36242423398910223,
13701
+ "grad_norm": 36.90625,
13702
+ "learning_rate": 9.943371229859166e-06,
13703
+ "loss": 21.0082,
13704
+ "step": 19530
13705
+ },
13706
+ {
13707
+ "epoch": 0.36260980707358204,
13708
+ "grad_norm": 33.25,
13709
+ "learning_rate": 9.943342234073123e-06,
13710
+ "loss": 21.6859,
13711
+ "step": 19540
13712
+ },
13713
+ {
13714
+ "epoch": 0.3627953801580619,
13715
+ "grad_norm": 35.90625,
13716
+ "learning_rate": 9.94331323828708e-06,
13717
+ "loss": 21.2681,
13718
+ "step": 19550
13719
+ },
13720
+ {
13721
+ "epoch": 0.3629809532425417,
13722
+ "grad_norm": 35.75,
13723
+ "learning_rate": 9.943284242501038e-06,
13724
+ "loss": 21.4027,
13725
+ "step": 19560
13726
+ },
13727
+ {
13728
+ "epoch": 0.3631665263270215,
13729
+ "grad_norm": 35.0625,
13730
+ "learning_rate": 9.943255246714996e-06,
13731
+ "loss": 21.4005,
13732
+ "step": 19570
13733
+ },
13734
+ {
13735
+ "epoch": 0.3633520994115014,
13736
+ "grad_norm": 35.0,
13737
+ "learning_rate": 9.943226250928953e-06,
13738
+ "loss": 22.0243,
13739
+ "step": 19580
13740
+ },
13741
+ {
13742
+ "epoch": 0.3635376724959812,
13743
+ "grad_norm": 33.84375,
13744
+ "learning_rate": 9.943197255142912e-06,
13745
+ "loss": 21.1517,
13746
+ "step": 19590
13747
+ },
13748
+ {
13749
+ "epoch": 0.363723245580461,
13750
+ "grad_norm": 34.84375,
13751
+ "learning_rate": 9.94316825935687e-06,
13752
+ "loss": 21.5607,
13753
+ "step": 19600
13754
+ },
13755
+ {
13756
+ "epoch": 0.36390881866494085,
13757
+ "grad_norm": 34.40625,
13758
+ "learning_rate": 9.943139263570825e-06,
13759
+ "loss": 21.4868,
13760
+ "step": 19610
13761
+ },
13762
+ {
13763
+ "epoch": 0.36409439174942065,
13764
+ "grad_norm": 36.9375,
13765
+ "learning_rate": 9.943110267784784e-06,
13766
+ "loss": 21.6598,
13767
+ "step": 19620
13768
+ },
13769
+ {
13770
+ "epoch": 0.3642799648339005,
13771
+ "grad_norm": 34.0625,
13772
+ "learning_rate": 9.943081271998742e-06,
13773
+ "loss": 21.3738,
13774
+ "step": 19630
13775
+ },
13776
+ {
13777
+ "epoch": 0.3644655379183803,
13778
+ "grad_norm": 37.0,
13779
+ "learning_rate": 9.943052276212699e-06,
13780
+ "loss": 21.3922,
13781
+ "step": 19640
13782
+ },
13783
+ {
13784
+ "epoch": 0.36465111100286013,
13785
+ "grad_norm": 34.78125,
13786
+ "learning_rate": 9.943023280426657e-06,
13787
+ "loss": 21.692,
13788
+ "step": 19650
13789
+ },
13790
+ {
13791
+ "epoch": 0.36483668408734,
13792
+ "grad_norm": 34.9375,
13793
+ "learning_rate": 9.942994284640614e-06,
13794
+ "loss": 21.7272,
13795
+ "step": 19660
13796
+ },
13797
+ {
13798
+ "epoch": 0.3650222571718198,
13799
+ "grad_norm": 36.0,
13800
+ "learning_rate": 9.942965288854571e-06,
13801
+ "loss": 21.4188,
13802
+ "step": 19670
13803
+ },
13804
+ {
13805
+ "epoch": 0.3652078302562996,
13806
+ "grad_norm": 33.375,
13807
+ "learning_rate": 9.942936293068529e-06,
13808
+ "loss": 21.5631,
13809
+ "step": 19680
13810
+ },
13811
+ {
13812
+ "epoch": 0.36539340334077947,
13813
+ "grad_norm": 33.40625,
13814
+ "learning_rate": 9.942907297282486e-06,
13815
+ "loss": 21.3399,
13816
+ "step": 19690
13817
+ },
13818
+ {
13819
+ "epoch": 0.36557897642525927,
13820
+ "grad_norm": 32.84375,
13821
+ "learning_rate": 9.942878301496445e-06,
13822
+ "loss": 21.2938,
13823
+ "step": 19700
13824
+ },
13825
+ {
13826
+ "epoch": 0.36576454950973913,
13827
+ "grad_norm": 34.0625,
13828
+ "learning_rate": 9.942849305710401e-06,
13829
+ "loss": 21.2609,
13830
+ "step": 19710
13831
+ },
13832
+ {
13833
+ "epoch": 0.36595012259421894,
13834
+ "grad_norm": 35.21875,
13835
+ "learning_rate": 9.942820309924358e-06,
13836
+ "loss": 21.7159,
13837
+ "step": 19720
13838
+ },
13839
+ {
13840
+ "epoch": 0.36613569567869875,
13841
+ "grad_norm": 36.09375,
13842
+ "learning_rate": 9.942791314138317e-06,
13843
+ "loss": 21.7571,
13844
+ "step": 19730
13845
+ },
13846
+ {
13847
+ "epoch": 0.3663212687631786,
13848
+ "grad_norm": 37.9375,
13849
+ "learning_rate": 9.942762318352275e-06,
13850
+ "loss": 21.2937,
13851
+ "step": 19740
13852
+ },
13853
+ {
13854
+ "epoch": 0.3665068418476584,
13855
+ "grad_norm": 37.84375,
13856
+ "learning_rate": 9.942733322566232e-06,
13857
+ "loss": 21.4546,
13858
+ "step": 19750
13859
+ },
13860
+ {
13861
+ "epoch": 0.3666924149321382,
13862
+ "grad_norm": 35.71875,
13863
+ "learning_rate": 9.94270432678019e-06,
13864
+ "loss": 21.3717,
13865
+ "step": 19760
13866
+ },
13867
+ {
13868
+ "epoch": 0.3668779880166181,
13869
+ "grad_norm": 34.8125,
13870
+ "learning_rate": 9.942675330994147e-06,
13871
+ "loss": 22.14,
13872
+ "step": 19770
13873
+ },
13874
+ {
13875
+ "epoch": 0.3670635611010979,
13876
+ "grad_norm": 36.3125,
13877
+ "learning_rate": 9.942646335208105e-06,
13878
+ "loss": 21.1358,
13879
+ "step": 19780
13880
+ },
13881
+ {
13882
+ "epoch": 0.3672491341855777,
13883
+ "grad_norm": 34.84375,
13884
+ "learning_rate": 9.942617339422062e-06,
13885
+ "loss": 21.3856,
13886
+ "step": 19790
13887
+ },
13888
+ {
13889
+ "epoch": 0.36743470727005756,
13890
+ "grad_norm": 36.03125,
13891
+ "learning_rate": 9.942588343636021e-06,
13892
+ "loss": 21.5621,
13893
+ "step": 19800
13894
+ },
13895
+ {
13896
+ "epoch": 0.36762028035453737,
13897
+ "grad_norm": 34.15625,
13898
+ "learning_rate": 9.942559347849978e-06,
13899
+ "loss": 21.6981,
13900
+ "step": 19810
13901
+ },
13902
+ {
13903
+ "epoch": 0.3678058534390172,
13904
+ "grad_norm": 35.8125,
13905
+ "learning_rate": 9.942530352063934e-06,
13906
+ "loss": 21.2015,
13907
+ "step": 19820
13908
+ },
13909
+ {
13910
+ "epoch": 0.36799142652349703,
13911
+ "grad_norm": 37.4375,
13912
+ "learning_rate": 9.942501356277893e-06,
13913
+ "loss": 22.0159,
13914
+ "step": 19830
13915
+ },
13916
+ {
13917
+ "epoch": 0.36817699960797684,
13918
+ "grad_norm": 34.21875,
13919
+ "learning_rate": 9.94247236049185e-06,
13920
+ "loss": 21.571,
13921
+ "step": 19840
13922
+ },
13923
+ {
13924
+ "epoch": 0.3683625726924567,
13925
+ "grad_norm": 35.375,
13926
+ "learning_rate": 9.942443364705808e-06,
13927
+ "loss": 21.416,
13928
+ "step": 19850
13929
+ },
13930
+ {
13931
+ "epoch": 0.3685481457769365,
13932
+ "grad_norm": 34.625,
13933
+ "learning_rate": 9.942414368919765e-06,
13934
+ "loss": 20.9028,
13935
+ "step": 19860
13936
+ },
13937
+ {
13938
+ "epoch": 0.3687337188614163,
13939
+ "grad_norm": 35.4375,
13940
+ "learning_rate": 9.942385373133723e-06,
13941
+ "loss": 21.527,
13942
+ "step": 19870
13943
+ },
13944
+ {
13945
+ "epoch": 0.3689192919458962,
13946
+ "grad_norm": 34.375,
13947
+ "learning_rate": 9.94235637734768e-06,
13948
+ "loss": 21.1836,
13949
+ "step": 19880
13950
+ },
13951
+ {
13952
+ "epoch": 0.369104865030376,
13953
+ "grad_norm": 38.21875,
13954
+ "learning_rate": 9.942327381561638e-06,
13955
+ "loss": 21.2912,
13956
+ "step": 19890
13957
+ },
13958
+ {
13959
+ "epoch": 0.36929043811485585,
13960
+ "grad_norm": 35.875,
13961
+ "learning_rate": 9.942298385775597e-06,
13962
+ "loss": 21.7566,
13963
+ "step": 19900
13964
+ },
13965
+ {
13966
+ "epoch": 0.36947601119933565,
13967
+ "grad_norm": 37.375,
13968
+ "learning_rate": 9.942269389989554e-06,
13969
+ "loss": 21.5261,
13970
+ "step": 19910
13971
+ },
13972
+ {
13973
+ "epoch": 0.36966158428381546,
13974
+ "grad_norm": 35.96875,
13975
+ "learning_rate": 9.942240394203512e-06,
13976
+ "loss": 21.7249,
13977
+ "step": 19920
13978
+ },
13979
+ {
13980
+ "epoch": 0.3698471573682953,
13981
+ "grad_norm": 34.5625,
13982
+ "learning_rate": 9.942211398417469e-06,
13983
+ "loss": 21.9828,
13984
+ "step": 19930
13985
+ },
13986
+ {
13987
+ "epoch": 0.3700327304527751,
13988
+ "grad_norm": 35.96875,
13989
+ "learning_rate": 9.942182402631426e-06,
13990
+ "loss": 21.419,
13991
+ "step": 19940
13992
+ },
13993
+ {
13994
+ "epoch": 0.37021830353725493,
13995
+ "grad_norm": 35.59375,
13996
+ "learning_rate": 9.942153406845384e-06,
13997
+ "loss": 21.6967,
13998
+ "step": 19950
13999
+ },
14000
+ {
14001
+ "epoch": 0.3704038766217348,
14002
+ "grad_norm": 33.3125,
14003
+ "learning_rate": 9.942124411059341e-06,
14004
+ "loss": 21.5348,
14005
+ "step": 19960
14006
+ },
14007
+ {
14008
+ "epoch": 0.3705894497062146,
14009
+ "grad_norm": 35.34375,
14010
+ "learning_rate": 9.9420954152733e-06,
14011
+ "loss": 21.3894,
14012
+ "step": 19970
14013
+ },
14014
+ {
14015
+ "epoch": 0.37077502279069446,
14016
+ "grad_norm": 33.28125,
14017
+ "learning_rate": 9.942066419487256e-06,
14018
+ "loss": 21.2541,
14019
+ "step": 19980
14020
+ },
14021
+ {
14022
+ "epoch": 0.37096059587517427,
14023
+ "grad_norm": 36.15625,
14024
+ "learning_rate": 9.942037423701213e-06,
14025
+ "loss": 21.0304,
14026
+ "step": 19990
14027
+ },
14028
+ {
14029
+ "epoch": 0.3711461689596541,
14030
+ "grad_norm": 35.65625,
14031
+ "learning_rate": 9.942008427915173e-06,
14032
+ "loss": 21.6275,
14033
+ "step": 20000
14034
+ },
14035
+ {
14036
+ "epoch": 0.3711461689596541,
14037
+ "eval_loss": 2.6757473945617676,
14038
+ "eval_runtime": 453.3076,
14039
+ "eval_samples_per_second": 3203.381,
14040
+ "eval_steps_per_second": 50.054,
14041
+ "step": 20000
14042
  }
14043
  ],
14044
  "logging_steps": 10,
 
14058
  "attributes": {}
14059
  }
14060
  },
14061
+ "total_flos": 3.491045518082048e+18,
14062
  "train_batch_size": 8,
14063
  "trial_name": null,
14064
  "trial_params": null