CocoRoF commited on
Commit
1cc2ae9
·
verified ·
1 Parent(s): 07cc069

Training in progress, step 35000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:877ef9b1ef203d1c02f31d1f5d81565b3878eb39121c8153f356847a2fd8eef8
3
  size 306619286
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf11920f2c9ec0d2d19594110c2b51d301a3be0b7a5c64b90c553593388f3b96
3
  size 306619286
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78438408549281a57107eeb546cb6b697cb5ef9532b1f55bd2b74f24bbf771c3
3
  size 919972410
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9fd7ea0617e8351507c27a226d306b44f57a13cfca8832dbb9a6a416ba79c7b
3
  size 919972410
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb18ac8d6db3307b1c242f7cb069fc8b8dab957434ddfcafcac997cfd6a43abf
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04cb5208648fd09a2e0403d51973f74ffbfd93cbd5da59e1e99c8df03769a86c
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bdab708057b5f34a402d9a2b4443f5f93a8e8ee2ddb66d955f0a15ad394ecc5
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7034685b36b93a4dd3a50697b0b1c314b249b2189ec2cb96b757312b1514a579
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:599882a30c163a5a2a000c4e74b320ecc4a55aa1b079882fd66aa3d2559d19e7
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e851fe1c1de0057f4eecefed6a131fa9021334eb43f6e7e65fdb270a25ac864
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:567c3b482c209c2778fc017e39a38642c488edda20673ef29f571ef7177ad81e
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:978379030048e432baa510ec4fc9514faa08fe564ab964b3a4d05e8f60306495
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f9ffe9a916e778423aaed4ec842923c9ccfdd3d7a4fbad10dc6a3bfc278fb8e
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdbc75d90af112615b53d15931e8157a80e37bcd110aac9a3089f5f6f5344171
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7ede8a81aa3c780fb9c3cb57537752a782c4aed1dcecb7aafd6ca5a7ea90252
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c8a310f6ca2ca89570eb2cc68544656b30224f00b2d6d96eeda6e0cb8be50ab
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b1c5c0c0afa907d332467e631e6cee80ba476689aa0caa77689ca273d83b3e4
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c5b8110fcf6e044b6860c6305be969cfe03129549b92dc6fc2394448e9265d6
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73025ac422abb13303ee974109cf39f6f848de7f7013e828d04aa4e2ec0e6757
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f936acaf5a2d5fe8c38d945450417facbf1577584c216908a396d3cc20bec88
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1909da172bf01f799e2ef8934ccdab3f4895ac7509eafdfbd8adccaf29c2176
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ff31aac428f9992f606e05ff9d9b75bec2abb517b825e89760b21fb1796744f
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5567192534394811,
5
  "eval_steps": 5000,
6
- "global_step": 30000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -21055,6 +21055,3514 @@
21055
  "eval_samples_per_second": 3189.489,
21056
  "eval_steps_per_second": 49.837,
21057
  "step": 30000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21058
  }
21059
  ],
21060
  "logging_steps": 10,
@@ -21074,7 +24582,7 @@
21074
  "attributes": {}
21075
  }
21076
  },
21077
- "total_flos": 5.236568277123072e+18,
21078
  "train_batch_size": 8,
21079
  "trial_name": null,
21080
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6495057956793947,
5
  "eval_steps": 5000,
6
+ "global_step": 35000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
21055
  "eval_samples_per_second": 3189.489,
21056
  "eval_steps_per_second": 49.837,
21057
  "step": 30000
21058
+ },
21059
+ {
21060
+ "epoch": 0.556904826523961,
21061
+ "grad_norm": 36.46875,
21062
+ "learning_rate": 9.912983646086715e-06,
21063
+ "loss": 19.7191,
21064
+ "step": 30010
21065
+ },
21066
+ {
21067
+ "epoch": 0.5570903996084408,
21068
+ "grad_norm": 34.21875,
21069
+ "learning_rate": 9.912954650300672e-06,
21070
+ "loss": 20.2245,
21071
+ "step": 30020
21072
+ },
21073
+ {
21074
+ "epoch": 0.5572759726929206,
21075
+ "grad_norm": 35.1875,
21076
+ "learning_rate": 9.91292565451463e-06,
21077
+ "loss": 20.1206,
21078
+ "step": 30030
21079
+ },
21080
+ {
21081
+ "epoch": 0.5574615457774005,
21082
+ "grad_norm": 33.0,
21083
+ "learning_rate": 9.912896658728587e-06,
21084
+ "loss": 20.1384,
21085
+ "step": 30040
21086
+ },
21087
+ {
21088
+ "epoch": 0.5576471188618802,
21089
+ "grad_norm": 34.1875,
21090
+ "learning_rate": 9.912867662942546e-06,
21091
+ "loss": 20.4741,
21092
+ "step": 30050
21093
+ },
21094
+ {
21095
+ "epoch": 0.5578326919463601,
21096
+ "grad_norm": 33.9375,
21097
+ "learning_rate": 9.912838667156502e-06,
21098
+ "loss": 19.7416,
21099
+ "step": 30060
21100
+ },
21101
+ {
21102
+ "epoch": 0.55801826503084,
21103
+ "grad_norm": 35.4375,
21104
+ "learning_rate": 9.91280967137046e-06,
21105
+ "loss": 20.5265,
21106
+ "step": 30070
21107
+ },
21108
+ {
21109
+ "epoch": 0.5582038381153197,
21110
+ "grad_norm": 33.90625,
21111
+ "learning_rate": 9.912780675584419e-06,
21112
+ "loss": 20.1603,
21113
+ "step": 30080
21114
+ },
21115
+ {
21116
+ "epoch": 0.5583894111997996,
21117
+ "grad_norm": 37.40625,
21118
+ "learning_rate": 9.912751679798376e-06,
21119
+ "loss": 20.2072,
21120
+ "step": 30090
21121
+ },
21122
+ {
21123
+ "epoch": 0.5585749842842794,
21124
+ "grad_norm": 34.90625,
21125
+ "learning_rate": 9.912722684012333e-06,
21126
+ "loss": 20.7503,
21127
+ "step": 30100
21128
+ },
21129
+ {
21130
+ "epoch": 0.5587605573687592,
21131
+ "grad_norm": 34.71875,
21132
+ "learning_rate": 9.91269368822629e-06,
21133
+ "loss": 20.1174,
21134
+ "step": 30110
21135
+ },
21136
+ {
21137
+ "epoch": 0.558946130453239,
21138
+ "grad_norm": 33.6875,
21139
+ "learning_rate": 9.912664692440248e-06,
21140
+ "loss": 19.6637,
21141
+ "step": 30120
21142
+ },
21143
+ {
21144
+ "epoch": 0.5591317035377189,
21145
+ "grad_norm": 33.6875,
21146
+ "learning_rate": 9.912635696654206e-06,
21147
+ "loss": 20.3168,
21148
+ "step": 30130
21149
+ },
21150
+ {
21151
+ "epoch": 0.5593172766221988,
21152
+ "grad_norm": 35.15625,
21153
+ "learning_rate": 9.912606700868163e-06,
21154
+ "loss": 20.4504,
21155
+ "step": 30140
21156
+ },
21157
+ {
21158
+ "epoch": 0.5595028497066785,
21159
+ "grad_norm": 32.21875,
21160
+ "learning_rate": 9.912577705082122e-06,
21161
+ "loss": 19.8619,
21162
+ "step": 30150
21163
+ },
21164
+ {
21165
+ "epoch": 0.5596884227911584,
21166
+ "grad_norm": 34.46875,
21167
+ "learning_rate": 9.91254870929608e-06,
21168
+ "loss": 20.3352,
21169
+ "step": 30160
21170
+ },
21171
+ {
21172
+ "epoch": 0.5598739958756382,
21173
+ "grad_norm": 35.09375,
21174
+ "learning_rate": 9.912519713510035e-06,
21175
+ "loss": 20.1644,
21176
+ "step": 30170
21177
+ },
21178
+ {
21179
+ "epoch": 0.560059568960118,
21180
+ "grad_norm": 35.6875,
21181
+ "learning_rate": 9.912490717723994e-06,
21182
+ "loss": 20.4346,
21183
+ "step": 30180
21184
+ },
21185
+ {
21186
+ "epoch": 0.5602451420445979,
21187
+ "grad_norm": 34.90625,
21188
+ "learning_rate": 9.912461721937952e-06,
21189
+ "loss": 20.3004,
21190
+ "step": 30190
21191
+ },
21192
+ {
21193
+ "epoch": 0.5604307151290777,
21194
+ "grad_norm": 35.3125,
21195
+ "learning_rate": 9.912432726151909e-06,
21196
+ "loss": 19.9627,
21197
+ "step": 30200
21198
+ },
21199
+ {
21200
+ "epoch": 0.5606162882135575,
21201
+ "grad_norm": 36.46875,
21202
+ "learning_rate": 9.912403730365866e-06,
21203
+ "loss": 20.271,
21204
+ "step": 30210
21205
+ },
21206
+ {
21207
+ "epoch": 0.5608018612980373,
21208
+ "grad_norm": 34.9375,
21209
+ "learning_rate": 9.912374734579824e-06,
21210
+ "loss": 20.5842,
21211
+ "step": 30220
21212
+ },
21213
+ {
21214
+ "epoch": 0.5609874343825172,
21215
+ "grad_norm": 34.03125,
21216
+ "learning_rate": 9.912345738793781e-06,
21217
+ "loss": 20.1096,
21218
+ "step": 30230
21219
+ },
21220
+ {
21221
+ "epoch": 0.5611730074669969,
21222
+ "grad_norm": 35.6875,
21223
+ "learning_rate": 9.912316743007739e-06,
21224
+ "loss": 20.4759,
21225
+ "step": 30240
21226
+ },
21227
+ {
21228
+ "epoch": 0.5613585805514768,
21229
+ "grad_norm": 33.0,
21230
+ "learning_rate": 9.912287747221698e-06,
21231
+ "loss": 20.4337,
21232
+ "step": 30250
21233
+ },
21234
+ {
21235
+ "epoch": 0.5615441536359567,
21236
+ "grad_norm": 35.34375,
21237
+ "learning_rate": 9.912258751435655e-06,
21238
+ "loss": 20.2153,
21239
+ "step": 30260
21240
+ },
21241
+ {
21242
+ "epoch": 0.5617297267204364,
21243
+ "grad_norm": 37.25,
21244
+ "learning_rate": 9.912229755649613e-06,
21245
+ "loss": 19.9206,
21246
+ "step": 30270
21247
+ },
21248
+ {
21249
+ "epoch": 0.5619152998049163,
21250
+ "grad_norm": 35.5625,
21251
+ "learning_rate": 9.91220075986357e-06,
21252
+ "loss": 19.9902,
21253
+ "step": 30280
21254
+ },
21255
+ {
21256
+ "epoch": 0.5621008728893961,
21257
+ "grad_norm": 35.0625,
21258
+ "learning_rate": 9.912171764077527e-06,
21259
+ "loss": 19.9837,
21260
+ "step": 30290
21261
+ },
21262
+ {
21263
+ "epoch": 0.562286445973876,
21264
+ "grad_norm": 33.53125,
21265
+ "learning_rate": 9.912142768291485e-06,
21266
+ "loss": 20.1273,
21267
+ "step": 30300
21268
+ },
21269
+ {
21270
+ "epoch": 0.5624720190583558,
21271
+ "grad_norm": 36.96875,
21272
+ "learning_rate": 9.912113772505442e-06,
21273
+ "loss": 20.5882,
21274
+ "step": 30310
21275
+ },
21276
+ {
21277
+ "epoch": 0.5626575921428356,
21278
+ "grad_norm": 34.46875,
21279
+ "learning_rate": 9.912084776719401e-06,
21280
+ "loss": 20.1443,
21281
+ "step": 30320
21282
+ },
21283
+ {
21284
+ "epoch": 0.5628431652273155,
21285
+ "grad_norm": 33.875,
21286
+ "learning_rate": 9.912055780933357e-06,
21287
+ "loss": 20.4219,
21288
+ "step": 30330
21289
+ },
21290
+ {
21291
+ "epoch": 0.5630287383117952,
21292
+ "grad_norm": 34.375,
21293
+ "learning_rate": 9.912026785147314e-06,
21294
+ "loss": 20.6782,
21295
+ "step": 30340
21296
+ },
21297
+ {
21298
+ "epoch": 0.5632143113962751,
21299
+ "grad_norm": 36.65625,
21300
+ "learning_rate": 9.911997789361274e-06,
21301
+ "loss": 20.4529,
21302
+ "step": 30350
21303
+ },
21304
+ {
21305
+ "epoch": 0.563399884480755,
21306
+ "grad_norm": 32.75,
21307
+ "learning_rate": 9.911968793575231e-06,
21308
+ "loss": 20.2558,
21309
+ "step": 30360
21310
+ },
21311
+ {
21312
+ "epoch": 0.5635854575652347,
21313
+ "grad_norm": 34.46875,
21314
+ "learning_rate": 9.911939797789188e-06,
21315
+ "loss": 20.1141,
21316
+ "step": 30370
21317
+ },
21318
+ {
21319
+ "epoch": 0.5637710306497146,
21320
+ "grad_norm": 34.375,
21321
+ "learning_rate": 9.911910802003146e-06,
21322
+ "loss": 20.2123,
21323
+ "step": 30380
21324
+ },
21325
+ {
21326
+ "epoch": 0.5639566037341944,
21327
+ "grad_norm": 34.65625,
21328
+ "learning_rate": 9.911881806217103e-06,
21329
+ "loss": 20.2384,
21330
+ "step": 30390
21331
+ },
21332
+ {
21333
+ "epoch": 0.5641421768186742,
21334
+ "grad_norm": 36.96875,
21335
+ "learning_rate": 9.91185281043106e-06,
21336
+ "loss": 20.17,
21337
+ "step": 30400
21338
+ },
21339
+ {
21340
+ "epoch": 0.564327749903154,
21341
+ "grad_norm": 34.0625,
21342
+ "learning_rate": 9.911823814645018e-06,
21343
+ "loss": 20.2601,
21344
+ "step": 30410
21345
+ },
21346
+ {
21347
+ "epoch": 0.5645133229876339,
21348
+ "grad_norm": 33.875,
21349
+ "learning_rate": 9.911794818858977e-06,
21350
+ "loss": 20.4207,
21351
+ "step": 30420
21352
+ },
21353
+ {
21354
+ "epoch": 0.5646988960721137,
21355
+ "grad_norm": 34.90625,
21356
+ "learning_rate": 9.911765823072935e-06,
21357
+ "loss": 20.1737,
21358
+ "step": 30430
21359
+ },
21360
+ {
21361
+ "epoch": 0.5648844691565935,
21362
+ "grad_norm": 34.4375,
21363
+ "learning_rate": 9.91173682728689e-06,
21364
+ "loss": 20.2786,
21365
+ "step": 30440
21366
+ },
21367
+ {
21368
+ "epoch": 0.5650700422410734,
21369
+ "grad_norm": 34.59375,
21370
+ "learning_rate": 9.91170783150085e-06,
21371
+ "loss": 20.2238,
21372
+ "step": 30450
21373
+ },
21374
+ {
21375
+ "epoch": 0.5652556153255531,
21376
+ "grad_norm": 35.4375,
21377
+ "learning_rate": 9.911678835714807e-06,
21378
+ "loss": 20.7817,
21379
+ "step": 30460
21380
+ },
21381
+ {
21382
+ "epoch": 0.565441188410033,
21383
+ "grad_norm": 36.0625,
21384
+ "learning_rate": 9.911649839928764e-06,
21385
+ "loss": 20.3482,
21386
+ "step": 30470
21387
+ },
21388
+ {
21389
+ "epoch": 0.5656267614945129,
21390
+ "grad_norm": 34.1875,
21391
+ "learning_rate": 9.911620844142722e-06,
21392
+ "loss": 20.3726,
21393
+ "step": 30480
21394
+ },
21395
+ {
21396
+ "epoch": 0.5658123345789927,
21397
+ "grad_norm": 36.28125,
21398
+ "learning_rate": 9.911591848356679e-06,
21399
+ "loss": 20.351,
21400
+ "step": 30490
21401
+ },
21402
+ {
21403
+ "epoch": 0.5659979076634725,
21404
+ "grad_norm": 35.625,
21405
+ "learning_rate": 9.911562852570636e-06,
21406
+ "loss": 20.0794,
21407
+ "step": 30500
21408
+ },
21409
+ {
21410
+ "epoch": 0.5661834807479523,
21411
+ "grad_norm": 34.3125,
21412
+ "learning_rate": 9.911533856784594e-06,
21413
+ "loss": 20.5169,
21414
+ "step": 30510
21415
+ },
21416
+ {
21417
+ "epoch": 0.5663690538324322,
21418
+ "grad_norm": 37.6875,
21419
+ "learning_rate": 9.911504860998551e-06,
21420
+ "loss": 20.1937,
21421
+ "step": 30520
21422
+ },
21423
+ {
21424
+ "epoch": 0.5665546269169119,
21425
+ "grad_norm": 34.5,
21426
+ "learning_rate": 9.91147586521251e-06,
21427
+ "loss": 20.0421,
21428
+ "step": 30530
21429
+ },
21430
+ {
21431
+ "epoch": 0.5667402000013918,
21432
+ "grad_norm": 36.625,
21433
+ "learning_rate": 9.911446869426468e-06,
21434
+ "loss": 20.5098,
21435
+ "step": 30540
21436
+ },
21437
+ {
21438
+ "epoch": 0.5669257730858717,
21439
+ "grad_norm": 33.78125,
21440
+ "learning_rate": 9.911417873640425e-06,
21441
+ "loss": 20.1718,
21442
+ "step": 30550
21443
+ },
21444
+ {
21445
+ "epoch": 0.5671113461703514,
21446
+ "grad_norm": 35.375,
21447
+ "learning_rate": 9.911388877854383e-06,
21448
+ "loss": 20.703,
21449
+ "step": 30560
21450
+ },
21451
+ {
21452
+ "epoch": 0.5672969192548313,
21453
+ "grad_norm": 35.84375,
21454
+ "learning_rate": 9.91135988206834e-06,
21455
+ "loss": 20.0026,
21456
+ "step": 30570
21457
+ },
21458
+ {
21459
+ "epoch": 0.5674824923393111,
21460
+ "grad_norm": 37.21875,
21461
+ "learning_rate": 9.911330886282297e-06,
21462
+ "loss": 19.9628,
21463
+ "step": 30580
21464
+ },
21465
+ {
21466
+ "epoch": 0.5676680654237909,
21467
+ "grad_norm": 35.59375,
21468
+ "learning_rate": 9.911301890496255e-06,
21469
+ "loss": 20.4465,
21470
+ "step": 30590
21471
+ },
21472
+ {
21473
+ "epoch": 0.5678536385082708,
21474
+ "grad_norm": 35.4375,
21475
+ "learning_rate": 9.911272894710212e-06,
21476
+ "loss": 20.0498,
21477
+ "step": 30600
21478
+ },
21479
+ {
21480
+ "epoch": 0.5680392115927506,
21481
+ "grad_norm": 38.21875,
21482
+ "learning_rate": 9.91124389892417e-06,
21483
+ "loss": 20.5764,
21484
+ "step": 30610
21485
+ },
21486
+ {
21487
+ "epoch": 0.5682247846772304,
21488
+ "grad_norm": 34.34375,
21489
+ "learning_rate": 9.911214903138127e-06,
21490
+ "loss": 20.0934,
21491
+ "step": 30620
21492
+ },
21493
+ {
21494
+ "epoch": 0.5684103577617102,
21495
+ "grad_norm": 34.75,
21496
+ "learning_rate": 9.911185907352086e-06,
21497
+ "loss": 19.9147,
21498
+ "step": 30630
21499
+ },
21500
+ {
21501
+ "epoch": 0.5685959308461901,
21502
+ "grad_norm": 33.65625,
21503
+ "learning_rate": 9.911156911566043e-06,
21504
+ "loss": 20.4771,
21505
+ "step": 30640
21506
+ },
21507
+ {
21508
+ "epoch": 0.5687815039306698,
21509
+ "grad_norm": 34.96875,
21510
+ "learning_rate": 9.91112791578e-06,
21511
+ "loss": 19.7005,
21512
+ "step": 30650
21513
+ },
21514
+ {
21515
+ "epoch": 0.5689670770151497,
21516
+ "grad_norm": 34.21875,
21517
+ "learning_rate": 9.911098919993958e-06,
21518
+ "loss": 20.2171,
21519
+ "step": 30660
21520
+ },
21521
+ {
21522
+ "epoch": 0.5691526500996296,
21523
+ "grad_norm": 34.6875,
21524
+ "learning_rate": 9.911069924207916e-06,
21525
+ "loss": 20.4261,
21526
+ "step": 30670
21527
+ },
21528
+ {
21529
+ "epoch": 0.5693382231841094,
21530
+ "grad_norm": 35.5,
21531
+ "learning_rate": 9.911040928421873e-06,
21532
+ "loss": 19.8097,
21533
+ "step": 30680
21534
+ },
21535
+ {
21536
+ "epoch": 0.5695237962685892,
21537
+ "grad_norm": 37.15625,
21538
+ "learning_rate": 9.91101193263583e-06,
21539
+ "loss": 20.2062,
21540
+ "step": 30690
21541
+ },
21542
+ {
21543
+ "epoch": 0.569709369353069,
21544
+ "grad_norm": 37.3125,
21545
+ "learning_rate": 9.91098293684979e-06,
21546
+ "loss": 20.2614,
21547
+ "step": 30700
21548
+ },
21549
+ {
21550
+ "epoch": 0.5698949424375489,
21551
+ "grad_norm": 34.96875,
21552
+ "learning_rate": 9.910953941063745e-06,
21553
+ "loss": 20.3764,
21554
+ "step": 30710
21555
+ },
21556
+ {
21557
+ "epoch": 0.5700805155220287,
21558
+ "grad_norm": 35.1875,
21559
+ "learning_rate": 9.910924945277703e-06,
21560
+ "loss": 19.9522,
21561
+ "step": 30720
21562
+ },
21563
+ {
21564
+ "epoch": 0.5702660886065085,
21565
+ "grad_norm": 35.9375,
21566
+ "learning_rate": 9.910895949491662e-06,
21567
+ "loss": 19.9363,
21568
+ "step": 30730
21569
+ },
21570
+ {
21571
+ "epoch": 0.5704516616909884,
21572
+ "grad_norm": 36.71875,
21573
+ "learning_rate": 9.91086695370562e-06,
21574
+ "loss": 20.4394,
21575
+ "step": 30740
21576
+ },
21577
+ {
21578
+ "epoch": 0.5706372347754681,
21579
+ "grad_norm": 36.78125,
21580
+ "learning_rate": 9.910837957919577e-06,
21581
+ "loss": 20.6453,
21582
+ "step": 30750
21583
+ },
21584
+ {
21585
+ "epoch": 0.570822807859948,
21586
+ "grad_norm": 34.3125,
21587
+ "learning_rate": 9.910808962133534e-06,
21588
+ "loss": 20.342,
21589
+ "step": 30760
21590
+ },
21591
+ {
21592
+ "epoch": 0.5710083809444279,
21593
+ "grad_norm": 36.25,
21594
+ "learning_rate": 9.910779966347491e-06,
21595
+ "loss": 20.1425,
21596
+ "step": 30770
21597
+ },
21598
+ {
21599
+ "epoch": 0.5711939540289076,
21600
+ "grad_norm": 36.40625,
21601
+ "learning_rate": 9.910750970561449e-06,
21602
+ "loss": 19.9823,
21603
+ "step": 30780
21604
+ },
21605
+ {
21606
+ "epoch": 0.5713795271133875,
21607
+ "grad_norm": 32.53125,
21608
+ "learning_rate": 9.910721974775406e-06,
21609
+ "loss": 19.6331,
21610
+ "step": 30790
21611
+ },
21612
+ {
21613
+ "epoch": 0.5715651001978673,
21614
+ "grad_norm": 35.34375,
21615
+ "learning_rate": 9.910692978989365e-06,
21616
+ "loss": 20.1497,
21617
+ "step": 30800
21618
+ },
21619
+ {
21620
+ "epoch": 0.5717506732823471,
21621
+ "grad_norm": 33.40625,
21622
+ "learning_rate": 9.910663983203321e-06,
21623
+ "loss": 20.4599,
21624
+ "step": 30810
21625
+ },
21626
+ {
21627
+ "epoch": 0.5719362463668269,
21628
+ "grad_norm": 35.96875,
21629
+ "learning_rate": 9.910634987417278e-06,
21630
+ "loss": 20.056,
21631
+ "step": 30820
21632
+ },
21633
+ {
21634
+ "epoch": 0.5721218194513068,
21635
+ "grad_norm": 35.84375,
21636
+ "learning_rate": 9.910605991631238e-06,
21637
+ "loss": 19.8844,
21638
+ "step": 30830
21639
+ },
21640
+ {
21641
+ "epoch": 0.5723073925357867,
21642
+ "grad_norm": 34.34375,
21643
+ "learning_rate": 9.910576995845195e-06,
21644
+ "loss": 20.1571,
21645
+ "step": 30840
21646
+ },
21647
+ {
21648
+ "epoch": 0.5724929656202664,
21649
+ "grad_norm": 33.75,
21650
+ "learning_rate": 9.910548000059152e-06,
21651
+ "loss": 20.6345,
21652
+ "step": 30850
21653
+ },
21654
+ {
21655
+ "epoch": 0.5726785387047463,
21656
+ "grad_norm": 35.4375,
21657
+ "learning_rate": 9.91051900427311e-06,
21658
+ "loss": 20.0653,
21659
+ "step": 30860
21660
+ },
21661
+ {
21662
+ "epoch": 0.5728641117892261,
21663
+ "grad_norm": 35.03125,
21664
+ "learning_rate": 9.910490008487067e-06,
21665
+ "loss": 20.1354,
21666
+ "step": 30870
21667
+ },
21668
+ {
21669
+ "epoch": 0.5730496848737059,
21670
+ "grad_norm": 36.5625,
21671
+ "learning_rate": 9.910461012701025e-06,
21672
+ "loss": 19.7588,
21673
+ "step": 30880
21674
+ },
21675
+ {
21676
+ "epoch": 0.5732352579581858,
21677
+ "grad_norm": 36.34375,
21678
+ "learning_rate": 9.910432016914982e-06,
21679
+ "loss": 20.1258,
21680
+ "step": 30890
21681
+ },
21682
+ {
21683
+ "epoch": 0.5734208310426656,
21684
+ "grad_norm": 35.9375,
21685
+ "learning_rate": 9.910403021128941e-06,
21686
+ "loss": 19.8419,
21687
+ "step": 30900
21688
+ },
21689
+ {
21690
+ "epoch": 0.5736064041271454,
21691
+ "grad_norm": 36.09375,
21692
+ "learning_rate": 9.910374025342899e-06,
21693
+ "loss": 20.1061,
21694
+ "step": 30910
21695
+ },
21696
+ {
21697
+ "epoch": 0.5737919772116252,
21698
+ "grad_norm": 37.25,
21699
+ "learning_rate": 9.910345029556854e-06,
21700
+ "loss": 20.4488,
21701
+ "step": 30920
21702
+ },
21703
+ {
21704
+ "epoch": 0.5739775502961051,
21705
+ "grad_norm": 34.40625,
21706
+ "learning_rate": 9.910316033770813e-06,
21707
+ "loss": 20.1627,
21708
+ "step": 30930
21709
+ },
21710
+ {
21711
+ "epoch": 0.5741631233805848,
21712
+ "grad_norm": 35.34375,
21713
+ "learning_rate": 9.91028703798477e-06,
21714
+ "loss": 20.1895,
21715
+ "step": 30940
21716
+ },
21717
+ {
21718
+ "epoch": 0.5743486964650647,
21719
+ "grad_norm": 35.15625,
21720
+ "learning_rate": 9.910258042198728e-06,
21721
+ "loss": 20.327,
21722
+ "step": 30950
21723
+ },
21724
+ {
21725
+ "epoch": 0.5745342695495446,
21726
+ "grad_norm": 35.09375,
21727
+ "learning_rate": 9.910229046412686e-06,
21728
+ "loss": 19.9475,
21729
+ "step": 30960
21730
+ },
21731
+ {
21732
+ "epoch": 0.5747198426340243,
21733
+ "grad_norm": 34.96875,
21734
+ "learning_rate": 9.910200050626643e-06,
21735
+ "loss": 20.4849,
21736
+ "step": 30970
21737
+ },
21738
+ {
21739
+ "epoch": 0.5749054157185042,
21740
+ "grad_norm": 35.28125,
21741
+ "learning_rate": 9.9101710548406e-06,
21742
+ "loss": 20.0981,
21743
+ "step": 30980
21744
+ },
21745
+ {
21746
+ "epoch": 0.575090988802984,
21747
+ "grad_norm": 35.53125,
21748
+ "learning_rate": 9.910142059054558e-06,
21749
+ "loss": 20.0985,
21750
+ "step": 30990
21751
+ },
21752
+ {
21753
+ "epoch": 0.5752765618874638,
21754
+ "grad_norm": 36.1875,
21755
+ "learning_rate": 9.910113063268517e-06,
21756
+ "loss": 20.5353,
21757
+ "step": 31000
21758
+ },
21759
+ {
21760
+ "epoch": 0.5754621349719437,
21761
+ "grad_norm": 34.6875,
21762
+ "learning_rate": 9.910084067482474e-06,
21763
+ "loss": 19.9665,
21764
+ "step": 31010
21765
+ },
21766
+ {
21767
+ "epoch": 0.5756477080564235,
21768
+ "grad_norm": 34.40625,
21769
+ "learning_rate": 9.910055071696432e-06,
21770
+ "loss": 20.7101,
21771
+ "step": 31020
21772
+ },
21773
+ {
21774
+ "epoch": 0.5758332811409034,
21775
+ "grad_norm": 37.21875,
21776
+ "learning_rate": 9.910026075910389e-06,
21777
+ "loss": 20.1796,
21778
+ "step": 31030
21779
+ },
21780
+ {
21781
+ "epoch": 0.5760188542253831,
21782
+ "grad_norm": 34.53125,
21783
+ "learning_rate": 9.909997080124347e-06,
21784
+ "loss": 20.1421,
21785
+ "step": 31040
21786
+ },
21787
+ {
21788
+ "epoch": 0.576204427309863,
21789
+ "grad_norm": 32.40625,
21790
+ "learning_rate": 9.909968084338304e-06,
21791
+ "loss": 20.1085,
21792
+ "step": 31050
21793
+ },
21794
+ {
21795
+ "epoch": 0.5763900003943428,
21796
+ "grad_norm": 34.78125,
21797
+ "learning_rate": 9.909939088552261e-06,
21798
+ "loss": 20.3996,
21799
+ "step": 31060
21800
+ },
21801
+ {
21802
+ "epoch": 0.5765755734788226,
21803
+ "grad_norm": 34.875,
21804
+ "learning_rate": 9.909910092766219e-06,
21805
+ "loss": 20.6353,
21806
+ "step": 31070
21807
+ },
21808
+ {
21809
+ "epoch": 0.5767611465633025,
21810
+ "grad_norm": 35.6875,
21811
+ "learning_rate": 9.909881096980176e-06,
21812
+ "loss": 20.0609,
21813
+ "step": 31080
21814
+ },
21815
+ {
21816
+ "epoch": 0.5769467196477823,
21817
+ "grad_norm": 34.625,
21818
+ "learning_rate": 9.909852101194134e-06,
21819
+ "loss": 19.9845,
21820
+ "step": 31090
21821
+ },
21822
+ {
21823
+ "epoch": 0.5771322927322621,
21824
+ "grad_norm": 34.125,
21825
+ "learning_rate": 9.909823105408091e-06,
21826
+ "loss": 19.8181,
21827
+ "step": 31100
21828
+ },
21829
+ {
21830
+ "epoch": 0.5773178658167419,
21831
+ "grad_norm": 34.25,
21832
+ "learning_rate": 9.90979410962205e-06,
21833
+ "loss": 20.0183,
21834
+ "step": 31110
21835
+ },
21836
+ {
21837
+ "epoch": 0.5775034389012218,
21838
+ "grad_norm": 34.09375,
21839
+ "learning_rate": 9.909765113836007e-06,
21840
+ "loss": 19.9869,
21841
+ "step": 31120
21842
+ },
21843
+ {
21844
+ "epoch": 0.5776890119857016,
21845
+ "grad_norm": 34.21875,
21846
+ "learning_rate": 9.909736118049965e-06,
21847
+ "loss": 20.2748,
21848
+ "step": 31130
21849
+ },
21850
+ {
21851
+ "epoch": 0.5778745850701814,
21852
+ "grad_norm": 34.09375,
21853
+ "learning_rate": 9.909707122263922e-06,
21854
+ "loss": 19.7719,
21855
+ "step": 31140
21856
+ },
21857
+ {
21858
+ "epoch": 0.5780601581546613,
21859
+ "grad_norm": 36.15625,
21860
+ "learning_rate": 9.90967812647788e-06,
21861
+ "loss": 20.0686,
21862
+ "step": 31150
21863
+ },
21864
+ {
21865
+ "epoch": 0.578245731239141,
21866
+ "grad_norm": 36.4375,
21867
+ "learning_rate": 9.909649130691837e-06,
21868
+ "loss": 20.0449,
21869
+ "step": 31160
21870
+ },
21871
+ {
21872
+ "epoch": 0.5784313043236209,
21873
+ "grad_norm": 34.65625,
21874
+ "learning_rate": 9.909620134905795e-06,
21875
+ "loss": 20.3113,
21876
+ "step": 31170
21877
+ },
21878
+ {
21879
+ "epoch": 0.5786168774081007,
21880
+ "grad_norm": 35.5,
21881
+ "learning_rate": 9.909591139119754e-06,
21882
+ "loss": 20.0866,
21883
+ "step": 31180
21884
+ },
21885
+ {
21886
+ "epoch": 0.5788024504925806,
21887
+ "grad_norm": 34.5625,
21888
+ "learning_rate": 9.90956214333371e-06,
21889
+ "loss": 19.9436,
21890
+ "step": 31190
21891
+ },
21892
+ {
21893
+ "epoch": 0.5789880235770604,
21894
+ "grad_norm": 34.3125,
21895
+ "learning_rate": 9.909533147547667e-06,
21896
+ "loss": 20.16,
21897
+ "step": 31200
21898
+ },
21899
+ {
21900
+ "epoch": 0.5791735966615402,
21901
+ "grad_norm": 32.8125,
21902
+ "learning_rate": 9.909504151761626e-06,
21903
+ "loss": 20.0966,
21904
+ "step": 31210
21905
+ },
21906
+ {
21907
+ "epoch": 0.5793591697460201,
21908
+ "grad_norm": 36.03125,
21909
+ "learning_rate": 9.909475155975583e-06,
21910
+ "loss": 19.8559,
21911
+ "step": 31220
21912
+ },
21913
+ {
21914
+ "epoch": 0.5795447428304998,
21915
+ "grad_norm": 34.625,
21916
+ "learning_rate": 9.90944616018954e-06,
21917
+ "loss": 20.0601,
21918
+ "step": 31230
21919
+ },
21920
+ {
21921
+ "epoch": 0.5797303159149797,
21922
+ "grad_norm": 36.28125,
21923
+ "learning_rate": 9.909417164403498e-06,
21924
+ "loss": 19.8656,
21925
+ "step": 31240
21926
+ },
21927
+ {
21928
+ "epoch": 0.5799158889994596,
21929
+ "grad_norm": 34.125,
21930
+ "learning_rate": 9.909388168617455e-06,
21931
+ "loss": 20.3291,
21932
+ "step": 31250
21933
+ },
21934
+ {
21935
+ "epoch": 0.5801014620839393,
21936
+ "grad_norm": 33.84375,
21937
+ "learning_rate": 9.909359172831413e-06,
21938
+ "loss": 20.4572,
21939
+ "step": 31260
21940
+ },
21941
+ {
21942
+ "epoch": 0.5802870351684192,
21943
+ "grad_norm": 34.03125,
21944
+ "learning_rate": 9.90933017704537e-06,
21945
+ "loss": 19.946,
21946
+ "step": 31270
21947
+ },
21948
+ {
21949
+ "epoch": 0.580472608252899,
21950
+ "grad_norm": 33.9375,
21951
+ "learning_rate": 9.90930118125933e-06,
21952
+ "loss": 20.13,
21953
+ "step": 31280
21954
+ },
21955
+ {
21956
+ "epoch": 0.5806581813373788,
21957
+ "grad_norm": 33.90625,
21958
+ "learning_rate": 9.909272185473287e-06,
21959
+ "loss": 20.2398,
21960
+ "step": 31290
21961
+ },
21962
+ {
21963
+ "epoch": 0.5808437544218586,
21964
+ "grad_norm": 33.09375,
21965
+ "learning_rate": 9.909243189687243e-06,
21966
+ "loss": 20.1966,
21967
+ "step": 31300
21968
+ },
21969
+ {
21970
+ "epoch": 0.5810293275063385,
21971
+ "grad_norm": 34.8125,
21972
+ "learning_rate": 9.909214193901202e-06,
21973
+ "loss": 20.0873,
21974
+ "step": 31310
21975
+ },
21976
+ {
21977
+ "epoch": 0.5812149005908183,
21978
+ "grad_norm": 34.875,
21979
+ "learning_rate": 9.909185198115159e-06,
21980
+ "loss": 19.9998,
21981
+ "step": 31320
21982
+ },
21983
+ {
21984
+ "epoch": 0.5814004736752981,
21985
+ "grad_norm": 33.8125,
21986
+ "learning_rate": 9.909156202329116e-06,
21987
+ "loss": 20.1087,
21988
+ "step": 31330
21989
+ },
21990
+ {
21991
+ "epoch": 0.581586046759778,
21992
+ "grad_norm": 36.125,
21993
+ "learning_rate": 9.909127206543074e-06,
21994
+ "loss": 20.1545,
21995
+ "step": 31340
21996
+ },
21997
+ {
21998
+ "epoch": 0.5817716198442577,
21999
+ "grad_norm": 34.40625,
22000
+ "learning_rate": 9.909098210757031e-06,
22001
+ "loss": 20.2046,
22002
+ "step": 31350
22003
+ },
22004
+ {
22005
+ "epoch": 0.5819571929287376,
22006
+ "grad_norm": 37.1875,
22007
+ "learning_rate": 9.909069214970989e-06,
22008
+ "loss": 20.0161,
22009
+ "step": 31360
22010
+ },
22011
+ {
22012
+ "epoch": 0.5821427660132175,
22013
+ "grad_norm": 36.125,
22014
+ "learning_rate": 9.909040219184946e-06,
22015
+ "loss": 19.5715,
22016
+ "step": 31370
22017
+ },
22018
+ {
22019
+ "epoch": 0.5823283390976973,
22020
+ "grad_norm": 35.59375,
22021
+ "learning_rate": 9.909011223398905e-06,
22022
+ "loss": 19.9348,
22023
+ "step": 31380
22024
+ },
22025
+ {
22026
+ "epoch": 0.5825139121821771,
22027
+ "grad_norm": 35.28125,
22028
+ "learning_rate": 9.908982227612863e-06,
22029
+ "loss": 20.2421,
22030
+ "step": 31390
22031
+ },
22032
+ {
22033
+ "epoch": 0.5826994852666569,
22034
+ "grad_norm": 35.09375,
22035
+ "learning_rate": 9.908953231826818e-06,
22036
+ "loss": 19.6778,
22037
+ "step": 31400
22038
+ },
22039
+ {
22040
+ "epoch": 0.5828850583511368,
22041
+ "grad_norm": 33.65625,
22042
+ "learning_rate": 9.908924236040777e-06,
22043
+ "loss": 20.1597,
22044
+ "step": 31410
22045
+ },
22046
+ {
22047
+ "epoch": 0.5830706314356165,
22048
+ "grad_norm": 34.0625,
22049
+ "learning_rate": 9.908895240254735e-06,
22050
+ "loss": 19.7993,
22051
+ "step": 31420
22052
+ },
22053
+ {
22054
+ "epoch": 0.5832562045200964,
22055
+ "grad_norm": 32.1875,
22056
+ "learning_rate": 9.908866244468692e-06,
22057
+ "loss": 19.741,
22058
+ "step": 31430
22059
+ },
22060
+ {
22061
+ "epoch": 0.5834417776045763,
22062
+ "grad_norm": 35.96875,
22063
+ "learning_rate": 9.90883724868265e-06,
22064
+ "loss": 19.8878,
22065
+ "step": 31440
22066
+ },
22067
+ {
22068
+ "epoch": 0.583627350689056,
22069
+ "grad_norm": 35.09375,
22070
+ "learning_rate": 9.908808252896609e-06,
22071
+ "loss": 20.1307,
22072
+ "step": 31450
22073
+ },
22074
+ {
22075
+ "epoch": 0.5838129237735359,
22076
+ "grad_norm": 36.75,
22077
+ "learning_rate": 9.908779257110564e-06,
22078
+ "loss": 20.0339,
22079
+ "step": 31460
22080
+ },
22081
+ {
22082
+ "epoch": 0.5839984968580157,
22083
+ "grad_norm": 36.1875,
22084
+ "learning_rate": 9.908750261324522e-06,
22085
+ "loss": 20.0168,
22086
+ "step": 31470
22087
+ },
22088
+ {
22089
+ "epoch": 0.5841840699424955,
22090
+ "grad_norm": 33.65625,
22091
+ "learning_rate": 9.908721265538481e-06,
22092
+ "loss": 19.8763,
22093
+ "step": 31480
22094
+ },
22095
+ {
22096
+ "epoch": 0.5843696430269754,
22097
+ "grad_norm": 35.71875,
22098
+ "learning_rate": 9.908692269752438e-06,
22099
+ "loss": 19.7346,
22100
+ "step": 31490
22101
+ },
22102
+ {
22103
+ "epoch": 0.5845552161114552,
22104
+ "grad_norm": 34.90625,
22105
+ "learning_rate": 9.908663273966396e-06,
22106
+ "loss": 19.9969,
22107
+ "step": 31500
22108
+ },
22109
+ {
22110
+ "epoch": 0.584740789195935,
22111
+ "grad_norm": 35.8125,
22112
+ "learning_rate": 9.908634278180353e-06,
22113
+ "loss": 19.6326,
22114
+ "step": 31510
22115
+ },
22116
+ {
22117
+ "epoch": 0.5849263622804148,
22118
+ "grad_norm": 35.4375,
22119
+ "learning_rate": 9.90860528239431e-06,
22120
+ "loss": 20.0874,
22121
+ "step": 31520
22122
+ },
22123
+ {
22124
+ "epoch": 0.5851119353648947,
22125
+ "grad_norm": 36.0625,
22126
+ "learning_rate": 9.908576286608268e-06,
22127
+ "loss": 20.0785,
22128
+ "step": 31530
22129
+ },
22130
+ {
22131
+ "epoch": 0.5852975084493744,
22132
+ "grad_norm": 37.03125,
22133
+ "learning_rate": 9.908547290822225e-06,
22134
+ "loss": 20.1829,
22135
+ "step": 31540
22136
+ },
22137
+ {
22138
+ "epoch": 0.5854830815338543,
22139
+ "grad_norm": 36.34375,
22140
+ "learning_rate": 9.908518295036183e-06,
22141
+ "loss": 20.1522,
22142
+ "step": 31550
22143
+ },
22144
+ {
22145
+ "epoch": 0.5856686546183342,
22146
+ "grad_norm": 34.0625,
22147
+ "learning_rate": 9.90848929925014e-06,
22148
+ "loss": 20.0708,
22149
+ "step": 31560
22150
+ },
22151
+ {
22152
+ "epoch": 0.585854227702814,
22153
+ "grad_norm": 35.84375,
22154
+ "learning_rate": 9.908460303464098e-06,
22155
+ "loss": 19.979,
22156
+ "step": 31570
22157
+ },
22158
+ {
22159
+ "epoch": 0.5860398007872938,
22160
+ "grad_norm": 34.3125,
22161
+ "learning_rate": 9.908431307678055e-06,
22162
+ "loss": 20.0508,
22163
+ "step": 31580
22164
+ },
22165
+ {
22166
+ "epoch": 0.5862253738717736,
22167
+ "grad_norm": 35.3125,
22168
+ "learning_rate": 9.908402311892014e-06,
22169
+ "loss": 20.0383,
22170
+ "step": 31590
22171
+ },
22172
+ {
22173
+ "epoch": 0.5864109469562535,
22174
+ "grad_norm": 36.5,
22175
+ "learning_rate": 9.908373316105971e-06,
22176
+ "loss": 19.7061,
22177
+ "step": 31600
22178
+ },
22179
+ {
22180
+ "epoch": 0.5865965200407333,
22181
+ "grad_norm": 34.21875,
22182
+ "learning_rate": 9.908344320319929e-06,
22183
+ "loss": 20.0088,
22184
+ "step": 31610
22185
+ },
22186
+ {
22187
+ "epoch": 0.5867820931252131,
22188
+ "grad_norm": 36.875,
22189
+ "learning_rate": 9.908315324533886e-06,
22190
+ "loss": 20.0155,
22191
+ "step": 31620
22192
+ },
22193
+ {
22194
+ "epoch": 0.586967666209693,
22195
+ "grad_norm": 34.28125,
22196
+ "learning_rate": 9.908286328747844e-06,
22197
+ "loss": 19.8223,
22198
+ "step": 31630
22199
+ },
22200
+ {
22201
+ "epoch": 0.5871532392941727,
22202
+ "grad_norm": 36.21875,
22203
+ "learning_rate": 9.908257332961801e-06,
22204
+ "loss": 19.9218,
22205
+ "step": 31640
22206
+ },
22207
+ {
22208
+ "epoch": 0.5873388123786526,
22209
+ "grad_norm": 35.125,
22210
+ "learning_rate": 9.908228337175759e-06,
22211
+ "loss": 20.3176,
22212
+ "step": 31650
22213
+ },
22214
+ {
22215
+ "epoch": 0.5875243854631325,
22216
+ "grad_norm": 34.5,
22217
+ "learning_rate": 9.908199341389718e-06,
22218
+ "loss": 19.8512,
22219
+ "step": 31660
22220
+ },
22221
+ {
22222
+ "epoch": 0.5877099585476122,
22223
+ "grad_norm": 33.65625,
22224
+ "learning_rate": 9.908170345603673e-06,
22225
+ "loss": 19.7684,
22226
+ "step": 31670
22227
+ },
22228
+ {
22229
+ "epoch": 0.5878955316320921,
22230
+ "grad_norm": 35.0625,
22231
+ "learning_rate": 9.90814134981763e-06,
22232
+ "loss": 19.7739,
22233
+ "step": 31680
22234
+ },
22235
+ {
22236
+ "epoch": 0.5880811047165719,
22237
+ "grad_norm": 33.90625,
22238
+ "learning_rate": 9.90811235403159e-06,
22239
+ "loss": 19.9795,
22240
+ "step": 31690
22241
+ },
22242
+ {
22243
+ "epoch": 0.5882666778010517,
22244
+ "grad_norm": 33.34375,
22245
+ "learning_rate": 9.908083358245547e-06,
22246
+ "loss": 19.7892,
22247
+ "step": 31700
22248
+ },
22249
+ {
22250
+ "epoch": 0.5884522508855315,
22251
+ "grad_norm": 33.3125,
22252
+ "learning_rate": 9.908054362459505e-06,
22253
+ "loss": 20.0117,
22254
+ "step": 31710
22255
+ },
22256
+ {
22257
+ "epoch": 0.5886378239700114,
22258
+ "grad_norm": 34.65625,
22259
+ "learning_rate": 9.908025366673462e-06,
22260
+ "loss": 20.1514,
22261
+ "step": 31720
22262
+ },
22263
+ {
22264
+ "epoch": 0.5888233970544913,
22265
+ "grad_norm": 34.15625,
22266
+ "learning_rate": 9.90799637088742e-06,
22267
+ "loss": 20.2976,
22268
+ "step": 31730
22269
+ },
22270
+ {
22271
+ "epoch": 0.589008970138971,
22272
+ "grad_norm": 34.9375,
22273
+ "learning_rate": 9.907967375101377e-06,
22274
+ "loss": 20.0669,
22275
+ "step": 31740
22276
+ },
22277
+ {
22278
+ "epoch": 0.5891945432234509,
22279
+ "grad_norm": 33.125,
22280
+ "learning_rate": 9.907938379315334e-06,
22281
+ "loss": 19.8704,
22282
+ "step": 31750
22283
+ },
22284
+ {
22285
+ "epoch": 0.5893801163079307,
22286
+ "grad_norm": 34.34375,
22287
+ "learning_rate": 9.907909383529293e-06,
22288
+ "loss": 19.7355,
22289
+ "step": 31760
22290
+ },
22291
+ {
22292
+ "epoch": 0.5895656893924105,
22293
+ "grad_norm": 34.8125,
22294
+ "learning_rate": 9.90788038774325e-06,
22295
+ "loss": 19.8751,
22296
+ "step": 31770
22297
+ },
22298
+ {
22299
+ "epoch": 0.5897512624768904,
22300
+ "grad_norm": 33.5625,
22301
+ "learning_rate": 9.907851391957207e-06,
22302
+ "loss": 19.3739,
22303
+ "step": 31780
22304
+ },
22305
+ {
22306
+ "epoch": 0.5899368355613702,
22307
+ "grad_norm": 33.9375,
22308
+ "learning_rate": 9.907822396171166e-06,
22309
+ "loss": 20.0175,
22310
+ "step": 31790
22311
+ },
22312
+ {
22313
+ "epoch": 0.59012240864585,
22314
+ "grad_norm": 35.9375,
22315
+ "learning_rate": 9.907793400385123e-06,
22316
+ "loss": 19.7817,
22317
+ "step": 31800
22318
+ },
22319
+ {
22320
+ "epoch": 0.5903079817303298,
22321
+ "grad_norm": 35.15625,
22322
+ "learning_rate": 9.90776440459908e-06,
22323
+ "loss": 19.909,
22324
+ "step": 31810
22325
+ },
22326
+ {
22327
+ "epoch": 0.5904935548148097,
22328
+ "grad_norm": 34.8125,
22329
+ "learning_rate": 9.907735408813038e-06,
22330
+ "loss": 20.1357,
22331
+ "step": 31820
22332
+ },
22333
+ {
22334
+ "epoch": 0.5906791278992894,
22335
+ "grad_norm": 34.25,
22336
+ "learning_rate": 9.907706413026995e-06,
22337
+ "loss": 19.3806,
22338
+ "step": 31830
22339
+ },
22340
+ {
22341
+ "epoch": 0.5908647009837693,
22342
+ "grad_norm": 34.84375,
22343
+ "learning_rate": 9.907677417240953e-06,
22344
+ "loss": 19.9921,
22345
+ "step": 31840
22346
+ },
22347
+ {
22348
+ "epoch": 0.5910502740682492,
22349
+ "grad_norm": 33.21875,
22350
+ "learning_rate": 9.90764842145491e-06,
22351
+ "loss": 19.6932,
22352
+ "step": 31850
22353
+ },
22354
+ {
22355
+ "epoch": 0.5912358471527289,
22356
+ "grad_norm": 34.875,
22357
+ "learning_rate": 9.90761942566887e-06,
22358
+ "loss": 19.9271,
22359
+ "step": 31860
22360
+ },
22361
+ {
22362
+ "epoch": 0.5914214202372088,
22363
+ "grad_norm": 35.1875,
22364
+ "learning_rate": 9.907590429882827e-06,
22365
+ "loss": 20.209,
22366
+ "step": 31870
22367
+ },
22368
+ {
22369
+ "epoch": 0.5916069933216886,
22370
+ "grad_norm": 34.625,
22371
+ "learning_rate": 9.907561434096784e-06,
22372
+ "loss": 19.9407,
22373
+ "step": 31880
22374
+ },
22375
+ {
22376
+ "epoch": 0.5917925664061684,
22377
+ "grad_norm": 35.0625,
22378
+ "learning_rate": 9.907532438310741e-06,
22379
+ "loss": 20.1724,
22380
+ "step": 31890
22381
+ },
22382
+ {
22383
+ "epoch": 0.5919781394906483,
22384
+ "grad_norm": 33.96875,
22385
+ "learning_rate": 9.907503442524699e-06,
22386
+ "loss": 19.872,
22387
+ "step": 31900
22388
+ },
22389
+ {
22390
+ "epoch": 0.5921637125751281,
22391
+ "grad_norm": 34.28125,
22392
+ "learning_rate": 9.907474446738656e-06,
22393
+ "loss": 20.0196,
22394
+ "step": 31910
22395
+ },
22396
+ {
22397
+ "epoch": 0.592349285659608,
22398
+ "grad_norm": 33.34375,
22399
+ "learning_rate": 9.907445450952614e-06,
22400
+ "loss": 20.2614,
22401
+ "step": 31920
22402
+ },
22403
+ {
22404
+ "epoch": 0.5925348587440877,
22405
+ "grad_norm": 35.1875,
22406
+ "learning_rate": 9.907416455166573e-06,
22407
+ "loss": 19.9631,
22408
+ "step": 31930
22409
+ },
22410
+ {
22411
+ "epoch": 0.5927204318285676,
22412
+ "grad_norm": 33.15625,
22413
+ "learning_rate": 9.907387459380528e-06,
22414
+ "loss": 19.8016,
22415
+ "step": 31940
22416
+ },
22417
+ {
22418
+ "epoch": 0.5929060049130475,
22419
+ "grad_norm": 35.09375,
22420
+ "learning_rate": 9.907358463594486e-06,
22421
+ "loss": 19.6743,
22422
+ "step": 31950
22423
+ },
22424
+ {
22425
+ "epoch": 0.5930915779975272,
22426
+ "grad_norm": 35.09375,
22427
+ "learning_rate": 9.907329467808445e-06,
22428
+ "loss": 20.0119,
22429
+ "step": 31960
22430
+ },
22431
+ {
22432
+ "epoch": 0.5932771510820071,
22433
+ "grad_norm": 36.375,
22434
+ "learning_rate": 9.907300472022402e-06,
22435
+ "loss": 20.3043,
22436
+ "step": 31970
22437
+ },
22438
+ {
22439
+ "epoch": 0.5934627241664869,
22440
+ "grad_norm": 34.84375,
22441
+ "learning_rate": 9.90727147623636e-06,
22442
+ "loss": 20.0462,
22443
+ "step": 31980
22444
+ },
22445
+ {
22446
+ "epoch": 0.5936482972509667,
22447
+ "grad_norm": 35.75,
22448
+ "learning_rate": 9.907242480450317e-06,
22449
+ "loss": 19.8187,
22450
+ "step": 31990
22451
+ },
22452
+ {
22453
+ "epoch": 0.5938338703354465,
22454
+ "grad_norm": 34.875,
22455
+ "learning_rate": 9.907213484664275e-06,
22456
+ "loss": 20.3801,
22457
+ "step": 32000
22458
+ },
22459
+ {
22460
+ "epoch": 0.5940194434199264,
22461
+ "grad_norm": 35.15625,
22462
+ "learning_rate": 9.907184488878232e-06,
22463
+ "loss": 20.3156,
22464
+ "step": 32010
22465
+ },
22466
+ {
22467
+ "epoch": 0.5942050165044062,
22468
+ "grad_norm": 36.3125,
22469
+ "learning_rate": 9.90715549309219e-06,
22470
+ "loss": 20.2171,
22471
+ "step": 32020
22472
+ },
22473
+ {
22474
+ "epoch": 0.594390589588886,
22475
+ "grad_norm": 35.84375,
22476
+ "learning_rate": 9.907126497306147e-06,
22477
+ "loss": 20.237,
22478
+ "step": 32030
22479
+ },
22480
+ {
22481
+ "epoch": 0.5945761626733659,
22482
+ "grad_norm": 35.5625,
22483
+ "learning_rate": 9.907097501520106e-06,
22484
+ "loss": 20.6729,
22485
+ "step": 32040
22486
+ },
22487
+ {
22488
+ "epoch": 0.5947617357578456,
22489
+ "grad_norm": 35.375,
22490
+ "learning_rate": 9.907068505734062e-06,
22491
+ "loss": 20.135,
22492
+ "step": 32050
22493
+ },
22494
+ {
22495
+ "epoch": 0.5949473088423255,
22496
+ "grad_norm": 36.75,
22497
+ "learning_rate": 9.90703950994802e-06,
22498
+ "loss": 20.1497,
22499
+ "step": 32060
22500
+ },
22501
+ {
22502
+ "epoch": 0.5951328819268054,
22503
+ "grad_norm": 34.8125,
22504
+ "learning_rate": 9.907010514161978e-06,
22505
+ "loss": 19.918,
22506
+ "step": 32070
22507
+ },
22508
+ {
22509
+ "epoch": 0.5953184550112851,
22510
+ "grad_norm": 33.53125,
22511
+ "learning_rate": 9.906981518375936e-06,
22512
+ "loss": 20.1092,
22513
+ "step": 32080
22514
+ },
22515
+ {
22516
+ "epoch": 0.595504028095765,
22517
+ "grad_norm": 34.8125,
22518
+ "learning_rate": 9.906952522589893e-06,
22519
+ "loss": 20.1522,
22520
+ "step": 32090
22521
+ },
22522
+ {
22523
+ "epoch": 0.5956896011802448,
22524
+ "grad_norm": 35.5,
22525
+ "learning_rate": 9.90692352680385e-06,
22526
+ "loss": 20.0647,
22527
+ "step": 32100
22528
+ },
22529
+ {
22530
+ "epoch": 0.5958751742647247,
22531
+ "grad_norm": 35.375,
22532
+ "learning_rate": 9.906894531017808e-06,
22533
+ "loss": 20.1672,
22534
+ "step": 32110
22535
+ },
22536
+ {
22537
+ "epoch": 0.5960607473492044,
22538
+ "grad_norm": 32.96875,
22539
+ "learning_rate": 9.906865535231765e-06,
22540
+ "loss": 19.8313,
22541
+ "step": 32120
22542
+ },
22543
+ {
22544
+ "epoch": 0.5962463204336843,
22545
+ "grad_norm": 35.34375,
22546
+ "learning_rate": 9.906836539445723e-06,
22547
+ "loss": 20.6778,
22548
+ "step": 32130
22549
+ },
22550
+ {
22551
+ "epoch": 0.5964318935181642,
22552
+ "grad_norm": 33.6875,
22553
+ "learning_rate": 9.906807543659682e-06,
22554
+ "loss": 20.0226,
22555
+ "step": 32140
22556
+ },
22557
+ {
22558
+ "epoch": 0.5966174666026439,
22559
+ "grad_norm": 36.03125,
22560
+ "learning_rate": 9.906778547873637e-06,
22561
+ "loss": 19.692,
22562
+ "step": 32150
22563
+ },
22564
+ {
22565
+ "epoch": 0.5968030396871238,
22566
+ "grad_norm": 34.375,
22567
+ "learning_rate": 9.906749552087595e-06,
22568
+ "loss": 20.0812,
22569
+ "step": 32160
22570
+ },
22571
+ {
22572
+ "epoch": 0.5969886127716036,
22573
+ "grad_norm": 34.5625,
22574
+ "learning_rate": 9.906720556301554e-06,
22575
+ "loss": 20.4617,
22576
+ "step": 32170
22577
+ },
22578
+ {
22579
+ "epoch": 0.5971741858560834,
22580
+ "grad_norm": 34.625,
22581
+ "learning_rate": 9.906691560515511e-06,
22582
+ "loss": 20.2078,
22583
+ "step": 32180
22584
+ },
22585
+ {
22586
+ "epoch": 0.5973597589405633,
22587
+ "grad_norm": 35.40625,
22588
+ "learning_rate": 9.906662564729469e-06,
22589
+ "loss": 19.5445,
22590
+ "step": 32190
22591
+ },
22592
+ {
22593
+ "epoch": 0.5975453320250431,
22594
+ "grad_norm": 35.3125,
22595
+ "learning_rate": 9.906633568943426e-06,
22596
+ "loss": 20.0572,
22597
+ "step": 32200
22598
+ },
22599
+ {
22600
+ "epoch": 0.5977309051095229,
22601
+ "grad_norm": 35.46875,
22602
+ "learning_rate": 9.906604573157383e-06,
22603
+ "loss": 20.1442,
22604
+ "step": 32210
22605
+ },
22606
+ {
22607
+ "epoch": 0.5979164781940027,
22608
+ "grad_norm": 35.75,
22609
+ "learning_rate": 9.906575577371341e-06,
22610
+ "loss": 20.0047,
22611
+ "step": 32220
22612
+ },
22613
+ {
22614
+ "epoch": 0.5981020512784826,
22615
+ "grad_norm": 36.40625,
22616
+ "learning_rate": 9.906546581585298e-06,
22617
+ "loss": 20.1083,
22618
+ "step": 32230
22619
+ },
22620
+ {
22621
+ "epoch": 0.5982876243629623,
22622
+ "grad_norm": 34.9375,
22623
+ "learning_rate": 9.906517585799257e-06,
22624
+ "loss": 20.0461,
22625
+ "step": 32240
22626
+ },
22627
+ {
22628
+ "epoch": 0.5984731974474422,
22629
+ "grad_norm": 36.0,
22630
+ "learning_rate": 9.906488590013215e-06,
22631
+ "loss": 20.0892,
22632
+ "step": 32250
22633
+ },
22634
+ {
22635
+ "epoch": 0.5986587705319221,
22636
+ "grad_norm": 34.875,
22637
+ "learning_rate": 9.90645959422717e-06,
22638
+ "loss": 19.8061,
22639
+ "step": 32260
22640
+ },
22641
+ {
22642
+ "epoch": 0.5988443436164019,
22643
+ "grad_norm": 36.0,
22644
+ "learning_rate": 9.90643059844113e-06,
22645
+ "loss": 20.1581,
22646
+ "step": 32270
22647
+ },
22648
+ {
22649
+ "epoch": 0.5990299167008817,
22650
+ "grad_norm": 34.90625,
22651
+ "learning_rate": 9.906401602655087e-06,
22652
+ "loss": 19.8351,
22653
+ "step": 32280
22654
+ },
22655
+ {
22656
+ "epoch": 0.5992154897853615,
22657
+ "grad_norm": 37.03125,
22658
+ "learning_rate": 9.906372606869044e-06,
22659
+ "loss": 20.1744,
22660
+ "step": 32290
22661
+ },
22662
+ {
22663
+ "epoch": 0.5994010628698414,
22664
+ "grad_norm": 34.75,
22665
+ "learning_rate": 9.906343611083002e-06,
22666
+ "loss": 19.8549,
22667
+ "step": 32300
22668
+ },
22669
+ {
22670
+ "epoch": 0.5995866359543212,
22671
+ "grad_norm": 35.53125,
22672
+ "learning_rate": 9.906314615296961e-06,
22673
+ "loss": 19.901,
22674
+ "step": 32310
22675
+ },
22676
+ {
22677
+ "epoch": 0.599772209038801,
22678
+ "grad_norm": 35.59375,
22679
+ "learning_rate": 9.906285619510917e-06,
22680
+ "loss": 20.4425,
22681
+ "step": 32320
22682
+ },
22683
+ {
22684
+ "epoch": 0.5999577821232809,
22685
+ "grad_norm": 37.6875,
22686
+ "learning_rate": 9.906256623724874e-06,
22687
+ "loss": 20.1879,
22688
+ "step": 32330
22689
+ },
22690
+ {
22691
+ "epoch": 0.6001433552077606,
22692
+ "grad_norm": 35.25,
22693
+ "learning_rate": 9.906227627938833e-06,
22694
+ "loss": 19.9083,
22695
+ "step": 32340
22696
+ },
22697
+ {
22698
+ "epoch": 0.6003289282922405,
22699
+ "grad_norm": 35.5625,
22700
+ "learning_rate": 9.90619863215279e-06,
22701
+ "loss": 19.769,
22702
+ "step": 32350
22703
+ },
22704
+ {
22705
+ "epoch": 0.6005145013767204,
22706
+ "grad_norm": 36.09375,
22707
+ "learning_rate": 9.906169636366748e-06,
22708
+ "loss": 19.8395,
22709
+ "step": 32360
22710
+ },
22711
+ {
22712
+ "epoch": 0.6007000744612001,
22713
+ "grad_norm": 36.34375,
22714
+ "learning_rate": 9.906140640580705e-06,
22715
+ "loss": 20.1598,
22716
+ "step": 32370
22717
+ },
22718
+ {
22719
+ "epoch": 0.60088564754568,
22720
+ "grad_norm": 36.8125,
22721
+ "learning_rate": 9.906111644794663e-06,
22722
+ "loss": 19.9506,
22723
+ "step": 32380
22724
+ },
22725
+ {
22726
+ "epoch": 0.6010712206301598,
22727
+ "grad_norm": 35.1875,
22728
+ "learning_rate": 9.90608264900862e-06,
22729
+ "loss": 20.0782,
22730
+ "step": 32390
22731
+ },
22732
+ {
22733
+ "epoch": 0.6012567937146396,
22734
+ "grad_norm": 35.09375,
22735
+ "learning_rate": 9.906053653222578e-06,
22736
+ "loss": 19.96,
22737
+ "step": 32400
22738
+ },
22739
+ {
22740
+ "epoch": 0.6014423667991194,
22741
+ "grad_norm": 35.03125,
22742
+ "learning_rate": 9.906024657436537e-06,
22743
+ "loss": 20.2103,
22744
+ "step": 32410
22745
+ },
22746
+ {
22747
+ "epoch": 0.6016279398835993,
22748
+ "grad_norm": 33.46875,
22749
+ "learning_rate": 9.905995661650492e-06,
22750
+ "loss": 20.0204,
22751
+ "step": 32420
22752
+ },
22753
+ {
22754
+ "epoch": 0.6018135129680791,
22755
+ "grad_norm": 35.6875,
22756
+ "learning_rate": 9.90596666586445e-06,
22757
+ "loss": 19.9806,
22758
+ "step": 32430
22759
+ },
22760
+ {
22761
+ "epoch": 0.6019990860525589,
22762
+ "grad_norm": 35.25,
22763
+ "learning_rate": 9.905937670078409e-06,
22764
+ "loss": 19.869,
22765
+ "step": 32440
22766
+ },
22767
+ {
22768
+ "epoch": 0.6021846591370388,
22769
+ "grad_norm": 36.84375,
22770
+ "learning_rate": 9.905908674292366e-06,
22771
+ "loss": 20.2251,
22772
+ "step": 32450
22773
+ },
22774
+ {
22775
+ "epoch": 0.6023702322215186,
22776
+ "grad_norm": 34.25,
22777
+ "learning_rate": 9.905879678506324e-06,
22778
+ "loss": 20.3163,
22779
+ "step": 32460
22780
+ },
22781
+ {
22782
+ "epoch": 0.6025558053059984,
22783
+ "grad_norm": 34.25,
22784
+ "learning_rate": 9.905850682720281e-06,
22785
+ "loss": 19.7672,
22786
+ "step": 32470
22787
+ },
22788
+ {
22789
+ "epoch": 0.6027413783904783,
22790
+ "grad_norm": 34.8125,
22791
+ "learning_rate": 9.905821686934239e-06,
22792
+ "loss": 20.1227,
22793
+ "step": 32480
22794
+ },
22795
+ {
22796
+ "epoch": 0.6029269514749581,
22797
+ "grad_norm": 35.90625,
22798
+ "learning_rate": 9.905792691148196e-06,
22799
+ "loss": 20.2727,
22800
+ "step": 32490
22801
+ },
22802
+ {
22803
+ "epoch": 0.6031125245594379,
22804
+ "grad_norm": 35.5625,
22805
+ "learning_rate": 9.905763695362153e-06,
22806
+ "loss": 19.9203,
22807
+ "step": 32500
22808
+ },
22809
+ {
22810
+ "epoch": 0.6032980976439177,
22811
+ "grad_norm": 32.9375,
22812
+ "learning_rate": 9.905734699576112e-06,
22813
+ "loss": 19.4214,
22814
+ "step": 32510
22815
+ },
22816
+ {
22817
+ "epoch": 0.6034836707283976,
22818
+ "grad_norm": 35.5625,
22819
+ "learning_rate": 9.90570570379007e-06,
22820
+ "loss": 19.8526,
22821
+ "step": 32520
22822
+ },
22823
+ {
22824
+ "epoch": 0.6036692438128773,
22825
+ "grad_norm": 35.28125,
22826
+ "learning_rate": 9.905676708004026e-06,
22827
+ "loss": 19.5245,
22828
+ "step": 32530
22829
+ },
22830
+ {
22831
+ "epoch": 0.6038548168973572,
22832
+ "grad_norm": 38.78125,
22833
+ "learning_rate": 9.905647712217985e-06,
22834
+ "loss": 20.1409,
22835
+ "step": 32540
22836
+ },
22837
+ {
22838
+ "epoch": 0.6040403899818371,
22839
+ "grad_norm": 34.6875,
22840
+ "learning_rate": 9.905618716431942e-06,
22841
+ "loss": 19.7652,
22842
+ "step": 32550
22843
+ },
22844
+ {
22845
+ "epoch": 0.6042259630663168,
22846
+ "grad_norm": 34.46875,
22847
+ "learning_rate": 9.9055897206459e-06,
22848
+ "loss": 20.3414,
22849
+ "step": 32560
22850
+ },
22851
+ {
22852
+ "epoch": 0.6044115361507967,
22853
+ "grad_norm": 36.09375,
22854
+ "learning_rate": 9.905560724859857e-06,
22855
+ "loss": 20.0828,
22856
+ "step": 32570
22857
+ },
22858
+ {
22859
+ "epoch": 0.6045971092352765,
22860
+ "grad_norm": 35.0,
22861
+ "learning_rate": 9.905531729073814e-06,
22862
+ "loss": 20.2113,
22863
+ "step": 32580
22864
+ },
22865
+ {
22866
+ "epoch": 0.6047826823197563,
22867
+ "grad_norm": 35.03125,
22868
+ "learning_rate": 9.905502733287772e-06,
22869
+ "loss": 19.9929,
22870
+ "step": 32590
22871
+ },
22872
+ {
22873
+ "epoch": 0.6049682554042362,
22874
+ "grad_norm": 33.90625,
22875
+ "learning_rate": 9.90547373750173e-06,
22876
+ "loss": 19.9798,
22877
+ "step": 32600
22878
+ },
22879
+ {
22880
+ "epoch": 0.605153828488716,
22881
+ "grad_norm": 35.0625,
22882
+ "learning_rate": 9.905444741715687e-06,
22883
+ "loss": 20.134,
22884
+ "step": 32610
22885
+ },
22886
+ {
22887
+ "epoch": 0.6053394015731959,
22888
+ "grad_norm": 35.21875,
22889
+ "learning_rate": 9.905415745929646e-06,
22890
+ "loss": 20.5119,
22891
+ "step": 32620
22892
+ },
22893
+ {
22894
+ "epoch": 0.6055249746576756,
22895
+ "grad_norm": 36.15625,
22896
+ "learning_rate": 9.905386750143603e-06,
22897
+ "loss": 20.1409,
22898
+ "step": 32630
22899
+ },
22900
+ {
22901
+ "epoch": 0.6057105477421555,
22902
+ "grad_norm": 34.75,
22903
+ "learning_rate": 9.90535775435756e-06,
22904
+ "loss": 19.9287,
22905
+ "step": 32640
22906
+ },
22907
+ {
22908
+ "epoch": 0.6058961208266354,
22909
+ "grad_norm": 34.625,
22910
+ "learning_rate": 9.905328758571518e-06,
22911
+ "loss": 20.036,
22912
+ "step": 32650
22913
+ },
22914
+ {
22915
+ "epoch": 0.6060816939111151,
22916
+ "grad_norm": 34.53125,
22917
+ "learning_rate": 9.905299762785475e-06,
22918
+ "loss": 19.8514,
22919
+ "step": 32660
22920
+ },
22921
+ {
22922
+ "epoch": 0.606267266995595,
22923
+ "grad_norm": 34.78125,
22924
+ "learning_rate": 9.905270766999433e-06,
22925
+ "loss": 19.7428,
22926
+ "step": 32670
22927
+ },
22928
+ {
22929
+ "epoch": 0.6064528400800748,
22930
+ "grad_norm": 32.53125,
22931
+ "learning_rate": 9.90524177121339e-06,
22932
+ "loss": 19.5254,
22933
+ "step": 32680
22934
+ },
22935
+ {
22936
+ "epoch": 0.6066384131645546,
22937
+ "grad_norm": 35.9375,
22938
+ "learning_rate": 9.905212775427348e-06,
22939
+ "loss": 19.9658,
22940
+ "step": 32690
22941
+ },
22942
+ {
22943
+ "epoch": 0.6068239862490344,
22944
+ "grad_norm": 36.375,
22945
+ "learning_rate": 9.905183779641305e-06,
22946
+ "loss": 19.4217,
22947
+ "step": 32700
22948
+ },
22949
+ {
22950
+ "epoch": 0.6070095593335143,
22951
+ "grad_norm": 35.3125,
22952
+ "learning_rate": 9.905154783855262e-06,
22953
+ "loss": 19.6262,
22954
+ "step": 32710
22955
+ },
22956
+ {
22957
+ "epoch": 0.607195132417994,
22958
+ "grad_norm": 35.5625,
22959
+ "learning_rate": 9.905125788069221e-06,
22960
+ "loss": 19.8789,
22961
+ "step": 32720
22962
+ },
22963
+ {
22964
+ "epoch": 0.6073807055024739,
22965
+ "grad_norm": 36.3125,
22966
+ "learning_rate": 9.905096792283179e-06,
22967
+ "loss": 19.6746,
22968
+ "step": 32730
22969
+ },
22970
+ {
22971
+ "epoch": 0.6075662785869538,
22972
+ "grad_norm": 37.15625,
22973
+ "learning_rate": 9.905067796497135e-06,
22974
+ "loss": 19.791,
22975
+ "step": 32740
22976
+ },
22977
+ {
22978
+ "epoch": 0.6077518516714335,
22979
+ "grad_norm": 33.1875,
22980
+ "learning_rate": 9.905038800711094e-06,
22981
+ "loss": 19.8378,
22982
+ "step": 32750
22983
+ },
22984
+ {
22985
+ "epoch": 0.6079374247559134,
22986
+ "grad_norm": 36.8125,
22987
+ "learning_rate": 9.905009804925051e-06,
22988
+ "loss": 20.3618,
22989
+ "step": 32760
22990
+ },
22991
+ {
22992
+ "epoch": 0.6081229978403933,
22993
+ "grad_norm": 36.65625,
22994
+ "learning_rate": 9.904980809139008e-06,
22995
+ "loss": 19.9257,
22996
+ "step": 32770
22997
+ },
22998
+ {
22999
+ "epoch": 0.608308570924873,
23000
+ "grad_norm": 35.03125,
23001
+ "learning_rate": 9.904951813352966e-06,
23002
+ "loss": 20.2374,
23003
+ "step": 32780
23004
+ },
23005
+ {
23006
+ "epoch": 0.6084941440093529,
23007
+ "grad_norm": 34.0,
23008
+ "learning_rate": 9.904922817566925e-06,
23009
+ "loss": 19.7495,
23010
+ "step": 32790
23011
+ },
23012
+ {
23013
+ "epoch": 0.6086797170938327,
23014
+ "grad_norm": 34.75,
23015
+ "learning_rate": 9.90489382178088e-06,
23016
+ "loss": 19.7513,
23017
+ "step": 32800
23018
+ },
23019
+ {
23020
+ "epoch": 0.6088652901783126,
23021
+ "grad_norm": 35.5,
23022
+ "learning_rate": 9.904864825994838e-06,
23023
+ "loss": 20.0577,
23024
+ "step": 32810
23025
+ },
23026
+ {
23027
+ "epoch": 0.6090508632627923,
23028
+ "grad_norm": 33.53125,
23029
+ "learning_rate": 9.904835830208797e-06,
23030
+ "loss": 19.5465,
23031
+ "step": 32820
23032
+ },
23033
+ {
23034
+ "epoch": 0.6092364363472722,
23035
+ "grad_norm": 34.78125,
23036
+ "learning_rate": 9.904806834422755e-06,
23037
+ "loss": 20.4513,
23038
+ "step": 32830
23039
+ },
23040
+ {
23041
+ "epoch": 0.6094220094317521,
23042
+ "grad_norm": 35.5,
23043
+ "learning_rate": 9.904777838636712e-06,
23044
+ "loss": 20.2129,
23045
+ "step": 32840
23046
+ },
23047
+ {
23048
+ "epoch": 0.6096075825162318,
23049
+ "grad_norm": 35.03125,
23050
+ "learning_rate": 9.90474884285067e-06,
23051
+ "loss": 19.8204,
23052
+ "step": 32850
23053
+ },
23054
+ {
23055
+ "epoch": 0.6097931556007117,
23056
+ "grad_norm": 34.75,
23057
+ "learning_rate": 9.904719847064627e-06,
23058
+ "loss": 19.8698,
23059
+ "step": 32860
23060
+ },
23061
+ {
23062
+ "epoch": 0.6099787286851915,
23063
+ "grad_norm": 36.0,
23064
+ "learning_rate": 9.904690851278584e-06,
23065
+ "loss": 20.123,
23066
+ "step": 32870
23067
+ },
23068
+ {
23069
+ "epoch": 0.6101643017696713,
23070
+ "grad_norm": 35.375,
23071
+ "learning_rate": 9.904661855492542e-06,
23072
+ "loss": 20.2864,
23073
+ "step": 32880
23074
+ },
23075
+ {
23076
+ "epoch": 0.6103498748541512,
23077
+ "grad_norm": 36.6875,
23078
+ "learning_rate": 9.9046328597065e-06,
23079
+ "loss": 20.3273,
23080
+ "step": 32890
23081
+ },
23082
+ {
23083
+ "epoch": 0.610535447938631,
23084
+ "grad_norm": 34.78125,
23085
+ "learning_rate": 9.904603863920456e-06,
23086
+ "loss": 20.335,
23087
+ "step": 32900
23088
+ },
23089
+ {
23090
+ "epoch": 0.6107210210231108,
23091
+ "grad_norm": 33.125,
23092
+ "learning_rate": 9.904574868134414e-06,
23093
+ "loss": 19.5401,
23094
+ "step": 32910
23095
+ },
23096
+ {
23097
+ "epoch": 0.6109065941075906,
23098
+ "grad_norm": 35.25,
23099
+ "learning_rate": 9.904545872348373e-06,
23100
+ "loss": 20.044,
23101
+ "step": 32920
23102
+ },
23103
+ {
23104
+ "epoch": 0.6110921671920705,
23105
+ "grad_norm": 35.875,
23106
+ "learning_rate": 9.90451687656233e-06,
23107
+ "loss": 19.6458,
23108
+ "step": 32930
23109
+ },
23110
+ {
23111
+ "epoch": 0.6112777402765502,
23112
+ "grad_norm": 34.6875,
23113
+ "learning_rate": 9.904487880776288e-06,
23114
+ "loss": 19.9171,
23115
+ "step": 32940
23116
+ },
23117
+ {
23118
+ "epoch": 0.6114633133610301,
23119
+ "grad_norm": 35.4375,
23120
+ "learning_rate": 9.904458884990245e-06,
23121
+ "loss": 19.9779,
23122
+ "step": 32950
23123
+ },
23124
+ {
23125
+ "epoch": 0.61164888644551,
23126
+ "grad_norm": 33.09375,
23127
+ "learning_rate": 9.904429889204203e-06,
23128
+ "loss": 19.7718,
23129
+ "step": 32960
23130
+ },
23131
+ {
23132
+ "epoch": 0.6118344595299897,
23133
+ "grad_norm": 36.40625,
23134
+ "learning_rate": 9.90440089341816e-06,
23135
+ "loss": 19.9427,
23136
+ "step": 32970
23137
+ },
23138
+ {
23139
+ "epoch": 0.6120200326144696,
23140
+ "grad_norm": 33.1875,
23141
+ "learning_rate": 9.904371897632117e-06,
23142
+ "loss": 19.7202,
23143
+ "step": 32980
23144
+ },
23145
+ {
23146
+ "epoch": 0.6122056056989494,
23147
+ "grad_norm": 36.0625,
23148
+ "learning_rate": 9.904342901846077e-06,
23149
+ "loss": 19.4229,
23150
+ "step": 32990
23151
+ },
23152
+ {
23153
+ "epoch": 0.6123911787834293,
23154
+ "grad_norm": 34.375,
23155
+ "learning_rate": 9.904313906060034e-06,
23156
+ "loss": 20.1885,
23157
+ "step": 33000
23158
+ },
23159
+ {
23160
+ "epoch": 0.612576751867909,
23161
+ "grad_norm": 38.125,
23162
+ "learning_rate": 9.90428491027399e-06,
23163
+ "loss": 20.1329,
23164
+ "step": 33010
23165
+ },
23166
+ {
23167
+ "epoch": 0.6127623249523889,
23168
+ "grad_norm": 35.0625,
23169
+ "learning_rate": 9.904255914487949e-06,
23170
+ "loss": 19.4893,
23171
+ "step": 33020
23172
+ },
23173
+ {
23174
+ "epoch": 0.6129478980368688,
23175
+ "grad_norm": 35.375,
23176
+ "learning_rate": 9.904226918701906e-06,
23177
+ "loss": 19.8205,
23178
+ "step": 33030
23179
+ },
23180
+ {
23181
+ "epoch": 0.6131334711213485,
23182
+ "grad_norm": 33.34375,
23183
+ "learning_rate": 9.904197922915864e-06,
23184
+ "loss": 20.1173,
23185
+ "step": 33040
23186
+ },
23187
+ {
23188
+ "epoch": 0.6133190442058284,
23189
+ "grad_norm": 34.8125,
23190
+ "learning_rate": 9.904168927129821e-06,
23191
+ "loss": 20.4497,
23192
+ "step": 33050
23193
+ },
23194
+ {
23195
+ "epoch": 0.6135046172903083,
23196
+ "grad_norm": 34.65625,
23197
+ "learning_rate": 9.904139931343778e-06,
23198
+ "loss": 20.0398,
23199
+ "step": 33060
23200
+ },
23201
+ {
23202
+ "epoch": 0.613690190374788,
23203
+ "grad_norm": 34.46875,
23204
+ "learning_rate": 9.904110935557736e-06,
23205
+ "loss": 20.0827,
23206
+ "step": 33070
23207
+ },
23208
+ {
23209
+ "epoch": 0.6138757634592679,
23210
+ "grad_norm": 33.0,
23211
+ "learning_rate": 9.904081939771693e-06,
23212
+ "loss": 19.8621,
23213
+ "step": 33080
23214
+ },
23215
+ {
23216
+ "epoch": 0.6140613365437477,
23217
+ "grad_norm": 35.34375,
23218
+ "learning_rate": 9.904052943985652e-06,
23219
+ "loss": 20.097,
23220
+ "step": 33090
23221
+ },
23222
+ {
23223
+ "epoch": 0.6142469096282275,
23224
+ "grad_norm": 34.96875,
23225
+ "learning_rate": 9.90402394819961e-06,
23226
+ "loss": 19.9632,
23227
+ "step": 33100
23228
+ },
23229
+ {
23230
+ "epoch": 0.6144324827127073,
23231
+ "grad_norm": 36.40625,
23232
+ "learning_rate": 9.903994952413567e-06,
23233
+ "loss": 19.9311,
23234
+ "step": 33110
23235
+ },
23236
+ {
23237
+ "epoch": 0.6146180557971872,
23238
+ "grad_norm": 35.40625,
23239
+ "learning_rate": 9.903965956627524e-06,
23240
+ "loss": 19.8179,
23241
+ "step": 33120
23242
+ },
23243
+ {
23244
+ "epoch": 0.614803628881667,
23245
+ "grad_norm": 35.15625,
23246
+ "learning_rate": 9.903936960841482e-06,
23247
+ "loss": 19.5918,
23248
+ "step": 33130
23249
+ },
23250
+ {
23251
+ "epoch": 0.6149892019661468,
23252
+ "grad_norm": 32.78125,
23253
+ "learning_rate": 9.90390796505544e-06,
23254
+ "loss": 19.6736,
23255
+ "step": 33140
23256
+ },
23257
+ {
23258
+ "epoch": 0.6151747750506267,
23259
+ "grad_norm": 34.0,
23260
+ "learning_rate": 9.903878969269397e-06,
23261
+ "loss": 19.9292,
23262
+ "step": 33150
23263
+ },
23264
+ {
23265
+ "epoch": 0.6153603481351065,
23266
+ "grad_norm": 33.84375,
23267
+ "learning_rate": 9.903849973483354e-06,
23268
+ "loss": 20.5943,
23269
+ "step": 33160
23270
+ },
23271
+ {
23272
+ "epoch": 0.6155459212195863,
23273
+ "grad_norm": 35.96875,
23274
+ "learning_rate": 9.903820977697312e-06,
23275
+ "loss": 20.0126,
23276
+ "step": 33170
23277
+ },
23278
+ {
23279
+ "epoch": 0.6157314943040662,
23280
+ "grad_norm": 35.09375,
23281
+ "learning_rate": 9.903791981911269e-06,
23282
+ "loss": 20.3576,
23283
+ "step": 33180
23284
+ },
23285
+ {
23286
+ "epoch": 0.615917067388546,
23287
+ "grad_norm": 34.9375,
23288
+ "learning_rate": 9.903762986125226e-06,
23289
+ "loss": 19.9106,
23290
+ "step": 33190
23291
+ },
23292
+ {
23293
+ "epoch": 0.6161026404730258,
23294
+ "grad_norm": 33.25,
23295
+ "learning_rate": 9.903733990339185e-06,
23296
+ "loss": 19.8425,
23297
+ "step": 33200
23298
+ },
23299
+ {
23300
+ "epoch": 0.6162882135575056,
23301
+ "grad_norm": 35.78125,
23302
+ "learning_rate": 9.903704994553143e-06,
23303
+ "loss": 19.8126,
23304
+ "step": 33210
23305
+ },
23306
+ {
23307
+ "epoch": 0.6164737866419855,
23308
+ "grad_norm": 34.0,
23309
+ "learning_rate": 9.9036759987671e-06,
23310
+ "loss": 19.5984,
23311
+ "step": 33220
23312
+ },
23313
+ {
23314
+ "epoch": 0.6166593597264652,
23315
+ "grad_norm": 34.5625,
23316
+ "learning_rate": 9.903647002981058e-06,
23317
+ "loss": 20.4081,
23318
+ "step": 33230
23319
+ },
23320
+ {
23321
+ "epoch": 0.6168449328109451,
23322
+ "grad_norm": 34.21875,
23323
+ "learning_rate": 9.903618007195015e-06,
23324
+ "loss": 20.1874,
23325
+ "step": 33240
23326
+ },
23327
+ {
23328
+ "epoch": 0.617030505895425,
23329
+ "grad_norm": 34.59375,
23330
+ "learning_rate": 9.903589011408972e-06,
23331
+ "loss": 19.9073,
23332
+ "step": 33250
23333
+ },
23334
+ {
23335
+ "epoch": 0.6172160789799047,
23336
+ "grad_norm": 37.0625,
23337
+ "learning_rate": 9.90356001562293e-06,
23338
+ "loss": 19.5716,
23339
+ "step": 33260
23340
+ },
23341
+ {
23342
+ "epoch": 0.6174016520643846,
23343
+ "grad_norm": 33.8125,
23344
+ "learning_rate": 9.903531019836889e-06,
23345
+ "loss": 19.334,
23346
+ "step": 33270
23347
+ },
23348
+ {
23349
+ "epoch": 0.6175872251488644,
23350
+ "grad_norm": 33.125,
23351
+ "learning_rate": 9.903502024050845e-06,
23352
+ "loss": 20.0157,
23353
+ "step": 33280
23354
+ },
23355
+ {
23356
+ "epoch": 0.6177727982333442,
23357
+ "grad_norm": 33.28125,
23358
+ "learning_rate": 9.903473028264802e-06,
23359
+ "loss": 19.8102,
23360
+ "step": 33290
23361
+ },
23362
+ {
23363
+ "epoch": 0.617958371317824,
23364
+ "grad_norm": 33.65625,
23365
+ "learning_rate": 9.903444032478761e-06,
23366
+ "loss": 19.8039,
23367
+ "step": 33300
23368
+ },
23369
+ {
23370
+ "epoch": 0.6181439444023039,
23371
+ "grad_norm": 34.78125,
23372
+ "learning_rate": 9.903415036692719e-06,
23373
+ "loss": 19.945,
23374
+ "step": 33310
23375
+ },
23376
+ {
23377
+ "epoch": 0.6183295174867837,
23378
+ "grad_norm": 35.21875,
23379
+ "learning_rate": 9.903386040906676e-06,
23380
+ "loss": 20.1298,
23381
+ "step": 33320
23382
+ },
23383
+ {
23384
+ "epoch": 0.6185150905712635,
23385
+ "grad_norm": 35.65625,
23386
+ "learning_rate": 9.903357045120633e-06,
23387
+ "loss": 20.0132,
23388
+ "step": 33330
23389
+ },
23390
+ {
23391
+ "epoch": 0.6187006636557434,
23392
+ "grad_norm": 36.4375,
23393
+ "learning_rate": 9.90332804933459e-06,
23394
+ "loss": 19.7294,
23395
+ "step": 33340
23396
+ },
23397
+ {
23398
+ "epoch": 0.6188862367402233,
23399
+ "grad_norm": 34.59375,
23400
+ "learning_rate": 9.903299053548548e-06,
23401
+ "loss": 20.4257,
23402
+ "step": 33350
23403
+ },
23404
+ {
23405
+ "epoch": 0.619071809824703,
23406
+ "grad_norm": 36.46875,
23407
+ "learning_rate": 9.903270057762506e-06,
23408
+ "loss": 19.9613,
23409
+ "step": 33360
23410
+ },
23411
+ {
23412
+ "epoch": 0.6192573829091829,
23413
+ "grad_norm": 33.65625,
23414
+ "learning_rate": 9.903241061976465e-06,
23415
+ "loss": 19.5663,
23416
+ "step": 33370
23417
+ },
23418
+ {
23419
+ "epoch": 0.6194429559936627,
23420
+ "grad_norm": 36.78125,
23421
+ "learning_rate": 9.903212066190422e-06,
23422
+ "loss": 19.7608,
23423
+ "step": 33380
23424
+ },
23425
+ {
23426
+ "epoch": 0.6196285290781425,
23427
+ "grad_norm": 35.375,
23428
+ "learning_rate": 9.903183070404378e-06,
23429
+ "loss": 19.912,
23430
+ "step": 33390
23431
+ },
23432
+ {
23433
+ "epoch": 0.6198141021626223,
23434
+ "grad_norm": 34.34375,
23435
+ "learning_rate": 9.903154074618337e-06,
23436
+ "loss": 19.6677,
23437
+ "step": 33400
23438
+ },
23439
+ {
23440
+ "epoch": 0.6199996752471022,
23441
+ "grad_norm": 35.46875,
23442
+ "learning_rate": 9.903125078832294e-06,
23443
+ "loss": 19.9394,
23444
+ "step": 33410
23445
+ },
23446
+ {
23447
+ "epoch": 0.620185248331582,
23448
+ "grad_norm": 36.4375,
23449
+ "learning_rate": 9.903096083046252e-06,
23450
+ "loss": 19.5774,
23451
+ "step": 33420
23452
+ },
23453
+ {
23454
+ "epoch": 0.6203708214160618,
23455
+ "grad_norm": 36.28125,
23456
+ "learning_rate": 9.90306708726021e-06,
23457
+ "loss": 19.8794,
23458
+ "step": 33430
23459
+ },
23460
+ {
23461
+ "epoch": 0.6205563945005417,
23462
+ "grad_norm": 37.65625,
23463
+ "learning_rate": 9.903038091474167e-06,
23464
+ "loss": 20.2317,
23465
+ "step": 33440
23466
+ },
23467
+ {
23468
+ "epoch": 0.6207419675850214,
23469
+ "grad_norm": 35.75,
23470
+ "learning_rate": 9.903009095688124e-06,
23471
+ "loss": 20.0775,
23472
+ "step": 33450
23473
+ },
23474
+ {
23475
+ "epoch": 0.6209275406695013,
23476
+ "grad_norm": 35.0,
23477
+ "learning_rate": 9.902980099902081e-06,
23478
+ "loss": 20.0078,
23479
+ "step": 33460
23480
+ },
23481
+ {
23482
+ "epoch": 0.6211131137539811,
23483
+ "grad_norm": 35.71875,
23484
+ "learning_rate": 9.90295110411604e-06,
23485
+ "loss": 19.8658,
23486
+ "step": 33470
23487
+ },
23488
+ {
23489
+ "epoch": 0.6212986868384609,
23490
+ "grad_norm": 34.71875,
23491
+ "learning_rate": 9.902922108329998e-06,
23492
+ "loss": 19.6372,
23493
+ "step": 33480
23494
+ },
23495
+ {
23496
+ "epoch": 0.6214842599229408,
23497
+ "grad_norm": 36.0625,
23498
+ "learning_rate": 9.902893112543954e-06,
23499
+ "loss": 19.5675,
23500
+ "step": 33490
23501
+ },
23502
+ {
23503
+ "epoch": 0.6216698330074206,
23504
+ "grad_norm": 35.25,
23505
+ "learning_rate": 9.902864116757913e-06,
23506
+ "loss": 19.9335,
23507
+ "step": 33500
23508
+ },
23509
+ {
23510
+ "epoch": 0.6218554060919004,
23511
+ "grad_norm": 34.28125,
23512
+ "learning_rate": 9.90283512097187e-06,
23513
+ "loss": 19.6987,
23514
+ "step": 33510
23515
+ },
23516
+ {
23517
+ "epoch": 0.6220409791763802,
23518
+ "grad_norm": 32.0625,
23519
+ "learning_rate": 9.902806125185828e-06,
23520
+ "loss": 19.9386,
23521
+ "step": 33520
23522
+ },
23523
+ {
23524
+ "epoch": 0.6222265522608601,
23525
+ "grad_norm": 36.5625,
23526
+ "learning_rate": 9.902777129399785e-06,
23527
+ "loss": 19.802,
23528
+ "step": 33530
23529
+ },
23530
+ {
23531
+ "epoch": 0.62241212534534,
23532
+ "grad_norm": 34.71875,
23533
+ "learning_rate": 9.902748133613742e-06,
23534
+ "loss": 19.8391,
23535
+ "step": 33540
23536
+ },
23537
+ {
23538
+ "epoch": 0.6225976984298197,
23539
+ "grad_norm": 35.875,
23540
+ "learning_rate": 9.9027191378277e-06,
23541
+ "loss": 19.5903,
23542
+ "step": 33550
23543
+ },
23544
+ {
23545
+ "epoch": 0.6227832715142996,
23546
+ "grad_norm": 34.375,
23547
+ "learning_rate": 9.902690142041657e-06,
23548
+ "loss": 19.9132,
23549
+ "step": 33560
23550
+ },
23551
+ {
23552
+ "epoch": 0.6229688445987794,
23553
+ "grad_norm": 36.96875,
23554
+ "learning_rate": 9.902661146255616e-06,
23555
+ "loss": 19.7901,
23556
+ "step": 33570
23557
+ },
23558
+ {
23559
+ "epoch": 0.6231544176832592,
23560
+ "grad_norm": 33.25,
23561
+ "learning_rate": 9.902632150469574e-06,
23562
+ "loss": 19.8296,
23563
+ "step": 33580
23564
+ },
23565
+ {
23566
+ "epoch": 0.623339990767739,
23567
+ "grad_norm": 36.0625,
23568
+ "learning_rate": 9.902603154683531e-06,
23569
+ "loss": 19.664,
23570
+ "step": 33590
23571
+ },
23572
+ {
23573
+ "epoch": 0.6235255638522189,
23574
+ "grad_norm": 35.84375,
23575
+ "learning_rate": 9.902574158897489e-06,
23576
+ "loss": 19.6831,
23577
+ "step": 33600
23578
+ },
23579
+ {
23580
+ "epoch": 0.6237111369366987,
23581
+ "grad_norm": 35.3125,
23582
+ "learning_rate": 9.902545163111446e-06,
23583
+ "loss": 19.5117,
23584
+ "step": 33610
23585
+ },
23586
+ {
23587
+ "epoch": 0.6238967100211785,
23588
+ "grad_norm": 36.40625,
23589
+ "learning_rate": 9.902516167325403e-06,
23590
+ "loss": 19.7426,
23591
+ "step": 33620
23592
+ },
23593
+ {
23594
+ "epoch": 0.6240822831056584,
23595
+ "grad_norm": 34.96875,
23596
+ "learning_rate": 9.90248717153936e-06,
23597
+ "loss": 19.4962,
23598
+ "step": 33630
23599
+ },
23600
+ {
23601
+ "epoch": 0.6242678561901381,
23602
+ "grad_norm": 35.28125,
23603
+ "learning_rate": 9.902458175753318e-06,
23604
+ "loss": 19.9088,
23605
+ "step": 33640
23606
+ },
23607
+ {
23608
+ "epoch": 0.624453429274618,
23609
+ "grad_norm": 34.15625,
23610
+ "learning_rate": 9.902429179967277e-06,
23611
+ "loss": 19.5959,
23612
+ "step": 33650
23613
+ },
23614
+ {
23615
+ "epoch": 0.6246390023590979,
23616
+ "grad_norm": 35.40625,
23617
+ "learning_rate": 9.902400184181233e-06,
23618
+ "loss": 19.8292,
23619
+ "step": 33660
23620
+ },
23621
+ {
23622
+ "epoch": 0.6248245754435776,
23623
+ "grad_norm": 35.46875,
23624
+ "learning_rate": 9.90237118839519e-06,
23625
+ "loss": 19.9731,
23626
+ "step": 33670
23627
+ },
23628
+ {
23629
+ "epoch": 0.6250101485280575,
23630
+ "grad_norm": 33.78125,
23631
+ "learning_rate": 9.90234219260915e-06,
23632
+ "loss": 19.4511,
23633
+ "step": 33680
23634
+ },
23635
+ {
23636
+ "epoch": 0.6251957216125373,
23637
+ "grad_norm": 35.1875,
23638
+ "learning_rate": 9.902313196823107e-06,
23639
+ "loss": 19.9273,
23640
+ "step": 33690
23641
+ },
23642
+ {
23643
+ "epoch": 0.6253812946970172,
23644
+ "grad_norm": 34.34375,
23645
+ "learning_rate": 9.902284201037064e-06,
23646
+ "loss": 20.1118,
23647
+ "step": 33700
23648
+ },
23649
+ {
23650
+ "epoch": 0.625566867781497,
23651
+ "grad_norm": 33.1875,
23652
+ "learning_rate": 9.902255205251022e-06,
23653
+ "loss": 19.8202,
23654
+ "step": 33710
23655
+ },
23656
+ {
23657
+ "epoch": 0.6257524408659768,
23658
+ "grad_norm": 37.34375,
23659
+ "learning_rate": 9.902226209464979e-06,
23660
+ "loss": 20.2005,
23661
+ "step": 33720
23662
+ },
23663
+ {
23664
+ "epoch": 0.6259380139504567,
23665
+ "grad_norm": 34.4375,
23666
+ "learning_rate": 9.902197213678936e-06,
23667
+ "loss": 20.3347,
23668
+ "step": 33730
23669
+ },
23670
+ {
23671
+ "epoch": 0.6261235870349364,
23672
+ "grad_norm": 35.78125,
23673
+ "learning_rate": 9.902168217892894e-06,
23674
+ "loss": 20.3401,
23675
+ "step": 33740
23676
+ },
23677
+ {
23678
+ "epoch": 0.6263091601194163,
23679
+ "grad_norm": 36.0625,
23680
+ "learning_rate": 9.902139222106853e-06,
23681
+ "loss": 19.5948,
23682
+ "step": 33750
23683
+ },
23684
+ {
23685
+ "epoch": 0.6264947332038961,
23686
+ "grad_norm": 34.75,
23687
+ "learning_rate": 9.902110226320809e-06,
23688
+ "loss": 20.1637,
23689
+ "step": 33760
23690
+ },
23691
+ {
23692
+ "epoch": 0.6266803062883759,
23693
+ "grad_norm": 34.0,
23694
+ "learning_rate": 9.902081230534766e-06,
23695
+ "loss": 19.9732,
23696
+ "step": 33770
23697
+ },
23698
+ {
23699
+ "epoch": 0.6268658793728558,
23700
+ "grad_norm": 33.65625,
23701
+ "learning_rate": 9.902052234748725e-06,
23702
+ "loss": 19.7471,
23703
+ "step": 33780
23704
+ },
23705
+ {
23706
+ "epoch": 0.6270514524573356,
23707
+ "grad_norm": 33.875,
23708
+ "learning_rate": 9.902023238962683e-06,
23709
+ "loss": 19.665,
23710
+ "step": 33790
23711
+ },
23712
+ {
23713
+ "epoch": 0.6272370255418154,
23714
+ "grad_norm": 34.5,
23715
+ "learning_rate": 9.90199424317664e-06,
23716
+ "loss": 19.9919,
23717
+ "step": 33800
23718
+ },
23719
+ {
23720
+ "epoch": 0.6274225986262952,
23721
+ "grad_norm": 34.34375,
23722
+ "learning_rate": 9.901965247390597e-06,
23723
+ "loss": 19.9491,
23724
+ "step": 33810
23725
+ },
23726
+ {
23727
+ "epoch": 0.6276081717107751,
23728
+ "grad_norm": 35.0,
23729
+ "learning_rate": 9.901936251604555e-06,
23730
+ "loss": 19.8758,
23731
+ "step": 33820
23732
+ },
23733
+ {
23734
+ "epoch": 0.6277937447952548,
23735
+ "grad_norm": 37.84375,
23736
+ "learning_rate": 9.901907255818512e-06,
23737
+ "loss": 19.9295,
23738
+ "step": 33830
23739
+ },
23740
+ {
23741
+ "epoch": 0.6279793178797347,
23742
+ "grad_norm": 35.28125,
23743
+ "learning_rate": 9.90187826003247e-06,
23744
+ "loss": 19.8433,
23745
+ "step": 33840
23746
+ },
23747
+ {
23748
+ "epoch": 0.6281648909642146,
23749
+ "grad_norm": 36.28125,
23750
+ "learning_rate": 9.901849264246429e-06,
23751
+ "loss": 19.9194,
23752
+ "step": 33850
23753
+ },
23754
+ {
23755
+ "epoch": 0.6283504640486943,
23756
+ "grad_norm": 35.78125,
23757
+ "learning_rate": 9.901820268460386e-06,
23758
+ "loss": 19.8389,
23759
+ "step": 33860
23760
+ },
23761
+ {
23762
+ "epoch": 0.6285360371331742,
23763
+ "grad_norm": 35.3125,
23764
+ "learning_rate": 9.901791272674342e-06,
23765
+ "loss": 19.8355,
23766
+ "step": 33870
23767
+ },
23768
+ {
23769
+ "epoch": 0.628721610217654,
23770
+ "grad_norm": 36.125,
23771
+ "learning_rate": 9.901762276888301e-06,
23772
+ "loss": 19.8938,
23773
+ "step": 33880
23774
+ },
23775
+ {
23776
+ "epoch": 0.6289071833021339,
23777
+ "grad_norm": 36.65625,
23778
+ "learning_rate": 9.901733281102258e-06,
23779
+ "loss": 20.1254,
23780
+ "step": 33890
23781
+ },
23782
+ {
23783
+ "epoch": 0.6290927563866137,
23784
+ "grad_norm": 35.0625,
23785
+ "learning_rate": 9.901704285316216e-06,
23786
+ "loss": 19.8177,
23787
+ "step": 33900
23788
+ },
23789
+ {
23790
+ "epoch": 0.6292783294710935,
23791
+ "grad_norm": 35.84375,
23792
+ "learning_rate": 9.901675289530173e-06,
23793
+ "loss": 19.5455,
23794
+ "step": 33910
23795
+ },
23796
+ {
23797
+ "epoch": 0.6294639025555734,
23798
+ "grad_norm": 35.65625,
23799
+ "learning_rate": 9.90164629374413e-06,
23800
+ "loss": 19.5187,
23801
+ "step": 33920
23802
+ },
23803
+ {
23804
+ "epoch": 0.6296494756400531,
23805
+ "grad_norm": 36.65625,
23806
+ "learning_rate": 9.901617297958088e-06,
23807
+ "loss": 19.8171,
23808
+ "step": 33930
23809
+ },
23810
+ {
23811
+ "epoch": 0.629835048724533,
23812
+ "grad_norm": 35.875,
23813
+ "learning_rate": 9.901588302172045e-06,
23814
+ "loss": 20.0522,
23815
+ "step": 33940
23816
+ },
23817
+ {
23818
+ "epoch": 0.6300206218090129,
23819
+ "grad_norm": 34.375,
23820
+ "learning_rate": 9.901559306386005e-06,
23821
+ "loss": 19.8402,
23822
+ "step": 33950
23823
+ },
23824
+ {
23825
+ "epoch": 0.6302061948934926,
23826
+ "grad_norm": 34.84375,
23827
+ "learning_rate": 9.901530310599962e-06,
23828
+ "loss": 19.9172,
23829
+ "step": 33960
23830
+ },
23831
+ {
23832
+ "epoch": 0.6303917679779725,
23833
+ "grad_norm": 36.8125,
23834
+ "learning_rate": 9.90150131481392e-06,
23835
+ "loss": 20.0115,
23836
+ "step": 33970
23837
+ },
23838
+ {
23839
+ "epoch": 0.6305773410624523,
23840
+ "grad_norm": 34.9375,
23841
+ "learning_rate": 9.901472319027877e-06,
23842
+ "loss": 19.6994,
23843
+ "step": 33980
23844
+ },
23845
+ {
23846
+ "epoch": 0.6307629141469321,
23847
+ "grad_norm": 34.90625,
23848
+ "learning_rate": 9.901443323241834e-06,
23849
+ "loss": 19.816,
23850
+ "step": 33990
23851
+ },
23852
+ {
23853
+ "epoch": 0.630948487231412,
23854
+ "grad_norm": 34.8125,
23855
+ "learning_rate": 9.901414327455792e-06,
23856
+ "loss": 19.6213,
23857
+ "step": 34000
23858
+ },
23859
+ {
23860
+ "epoch": 0.6311340603158918,
23861
+ "grad_norm": 34.9375,
23862
+ "learning_rate": 9.901385331669749e-06,
23863
+ "loss": 20.15,
23864
+ "step": 34010
23865
+ },
23866
+ {
23867
+ "epoch": 0.6313196334003716,
23868
+ "grad_norm": 35.03125,
23869
+ "learning_rate": 9.901356335883708e-06,
23870
+ "loss": 19.3955,
23871
+ "step": 34020
23872
+ },
23873
+ {
23874
+ "epoch": 0.6315052064848514,
23875
+ "grad_norm": 36.3125,
23876
+ "learning_rate": 9.901327340097664e-06,
23877
+ "loss": 19.6105,
23878
+ "step": 34030
23879
+ },
23880
+ {
23881
+ "epoch": 0.6316907795693313,
23882
+ "grad_norm": 34.875,
23883
+ "learning_rate": 9.901298344311621e-06,
23884
+ "loss": 19.9898,
23885
+ "step": 34040
23886
+ },
23887
+ {
23888
+ "epoch": 0.6318763526538111,
23889
+ "grad_norm": 35.75,
23890
+ "learning_rate": 9.90126934852558e-06,
23891
+ "loss": 19.8148,
23892
+ "step": 34050
23893
+ },
23894
+ {
23895
+ "epoch": 0.6320619257382909,
23896
+ "grad_norm": 36.5,
23897
+ "learning_rate": 9.901240352739538e-06,
23898
+ "loss": 19.6133,
23899
+ "step": 34060
23900
+ },
23901
+ {
23902
+ "epoch": 0.6322474988227708,
23903
+ "grad_norm": 31.59375,
23904
+ "learning_rate": 9.901211356953495e-06,
23905
+ "loss": 20.1236,
23906
+ "step": 34070
23907
+ },
23908
+ {
23909
+ "epoch": 0.6324330719072506,
23910
+ "grad_norm": 35.15625,
23911
+ "learning_rate": 9.901182361167453e-06,
23912
+ "loss": 20.4988,
23913
+ "step": 34080
23914
+ },
23915
+ {
23916
+ "epoch": 0.6326186449917304,
23917
+ "grad_norm": 35.625,
23918
+ "learning_rate": 9.90115336538141e-06,
23919
+ "loss": 19.7418,
23920
+ "step": 34090
23921
+ },
23922
+ {
23923
+ "epoch": 0.6328042180762102,
23924
+ "grad_norm": 34.8125,
23925
+ "learning_rate": 9.901124369595367e-06,
23926
+ "loss": 19.2006,
23927
+ "step": 34100
23928
+ },
23929
+ {
23930
+ "epoch": 0.6329897911606901,
23931
+ "grad_norm": 35.40625,
23932
+ "learning_rate": 9.901095373809325e-06,
23933
+ "loss": 19.9027,
23934
+ "step": 34110
23935
+ },
23936
+ {
23937
+ "epoch": 0.6331753642451698,
23938
+ "grad_norm": 34.71875,
23939
+ "learning_rate": 9.901066378023282e-06,
23940
+ "loss": 19.8453,
23941
+ "step": 34120
23942
+ },
23943
+ {
23944
+ "epoch": 0.6333609373296497,
23945
+ "grad_norm": 36.40625,
23946
+ "learning_rate": 9.901037382237241e-06,
23947
+ "loss": 19.7594,
23948
+ "step": 34130
23949
+ },
23950
+ {
23951
+ "epoch": 0.6335465104141296,
23952
+ "grad_norm": 35.15625,
23953
+ "learning_rate": 9.901008386451197e-06,
23954
+ "loss": 19.8507,
23955
+ "step": 34140
23956
+ },
23957
+ {
23958
+ "epoch": 0.6337320834986093,
23959
+ "grad_norm": 33.40625,
23960
+ "learning_rate": 9.900979390665156e-06,
23961
+ "loss": 19.9106,
23962
+ "step": 34150
23963
+ },
23964
+ {
23965
+ "epoch": 0.6339176565830892,
23966
+ "grad_norm": 35.6875,
23967
+ "learning_rate": 9.900950394879113e-06,
23968
+ "loss": 19.5232,
23969
+ "step": 34160
23970
+ },
23971
+ {
23972
+ "epoch": 0.634103229667569,
23973
+ "grad_norm": 35.9375,
23974
+ "learning_rate": 9.900921399093071e-06,
23975
+ "loss": 20.0958,
23976
+ "step": 34170
23977
+ },
23978
+ {
23979
+ "epoch": 0.6342888027520488,
23980
+ "grad_norm": 35.53125,
23981
+ "learning_rate": 9.900892403307028e-06,
23982
+ "loss": 19.7891,
23983
+ "step": 34180
23984
+ },
23985
+ {
23986
+ "epoch": 0.6344743758365287,
23987
+ "grad_norm": 35.5,
23988
+ "learning_rate": 9.900863407520986e-06,
23989
+ "loss": 19.612,
23990
+ "step": 34190
23991
+ },
23992
+ {
23993
+ "epoch": 0.6346599489210085,
23994
+ "grad_norm": 36.3125,
23995
+ "learning_rate": 9.900834411734943e-06,
23996
+ "loss": 19.3468,
23997
+ "step": 34200
23998
+ },
23999
+ {
24000
+ "epoch": 0.6348455220054883,
24001
+ "grad_norm": 34.71875,
24002
+ "learning_rate": 9.9008054159489e-06,
24003
+ "loss": 19.8482,
24004
+ "step": 34210
24005
+ },
24006
+ {
24007
+ "epoch": 0.6350310950899681,
24008
+ "grad_norm": 36.78125,
24009
+ "learning_rate": 9.900776420162858e-06,
24010
+ "loss": 19.6807,
24011
+ "step": 34220
24012
+ },
24013
+ {
24014
+ "epoch": 0.635216668174448,
24015
+ "grad_norm": 37.4375,
24016
+ "learning_rate": 9.900747424376817e-06,
24017
+ "loss": 19.9276,
24018
+ "step": 34230
24019
+ },
24020
+ {
24021
+ "epoch": 0.6354022412589279,
24022
+ "grad_norm": 34.21875,
24023
+ "learning_rate": 9.900718428590774e-06,
24024
+ "loss": 19.8411,
24025
+ "step": 34240
24026
+ },
24027
+ {
24028
+ "epoch": 0.6355878143434076,
24029
+ "grad_norm": 36.1875,
24030
+ "learning_rate": 9.90068943280473e-06,
24031
+ "loss": 19.7345,
24032
+ "step": 34250
24033
+ },
24034
+ {
24035
+ "epoch": 0.6357733874278875,
24036
+ "grad_norm": 32.90625,
24037
+ "learning_rate": 9.90066043701869e-06,
24038
+ "loss": 19.9992,
24039
+ "step": 34260
24040
+ },
24041
+ {
24042
+ "epoch": 0.6359589605123673,
24043
+ "grad_norm": 35.25,
24044
+ "learning_rate": 9.900631441232647e-06,
24045
+ "loss": 19.8676,
24046
+ "step": 34270
24047
+ },
24048
+ {
24049
+ "epoch": 0.6361445335968471,
24050
+ "grad_norm": 37.65625,
24051
+ "learning_rate": 9.900602445446604e-06,
24052
+ "loss": 20.3433,
24053
+ "step": 34280
24054
+ },
24055
+ {
24056
+ "epoch": 0.636330106681327,
24057
+ "grad_norm": 34.875,
24058
+ "learning_rate": 9.900573449660561e-06,
24059
+ "loss": 19.8522,
24060
+ "step": 34290
24061
+ },
24062
+ {
24063
+ "epoch": 0.6365156797658068,
24064
+ "grad_norm": 35.46875,
24065
+ "learning_rate": 9.900544453874519e-06,
24066
+ "loss": 20.1288,
24067
+ "step": 34300
24068
+ },
24069
+ {
24070
+ "epoch": 0.6367012528502866,
24071
+ "grad_norm": 34.5625,
24072
+ "learning_rate": 9.900515458088476e-06,
24073
+ "loss": 19.6961,
24074
+ "step": 34310
24075
+ },
24076
+ {
24077
+ "epoch": 0.6368868259347664,
24078
+ "grad_norm": 34.6875,
24079
+ "learning_rate": 9.900486462302434e-06,
24080
+ "loss": 19.2526,
24081
+ "step": 34320
24082
+ },
24083
+ {
24084
+ "epoch": 0.6370723990192463,
24085
+ "grad_norm": 35.0,
24086
+ "learning_rate": 9.900457466516393e-06,
24087
+ "loss": 19.4355,
24088
+ "step": 34330
24089
+ },
24090
+ {
24091
+ "epoch": 0.637257972103726,
24092
+ "grad_norm": 34.75,
24093
+ "learning_rate": 9.90042847073035e-06,
24094
+ "loss": 19.768,
24095
+ "step": 34340
24096
+ },
24097
+ {
24098
+ "epoch": 0.6374435451882059,
24099
+ "grad_norm": 36.21875,
24100
+ "learning_rate": 9.900399474944306e-06,
24101
+ "loss": 19.6199,
24102
+ "step": 34350
24103
+ },
24104
+ {
24105
+ "epoch": 0.6376291182726858,
24106
+ "grad_norm": 33.34375,
24107
+ "learning_rate": 9.900370479158265e-06,
24108
+ "loss": 19.721,
24109
+ "step": 34360
24110
+ },
24111
+ {
24112
+ "epoch": 0.6378146913571655,
24113
+ "grad_norm": 35.0625,
24114
+ "learning_rate": 9.900341483372222e-06,
24115
+ "loss": 19.5648,
24116
+ "step": 34370
24117
+ },
24118
+ {
24119
+ "epoch": 0.6380002644416454,
24120
+ "grad_norm": 36.5,
24121
+ "learning_rate": 9.90031248758618e-06,
24122
+ "loss": 19.8864,
24123
+ "step": 34380
24124
+ },
24125
+ {
24126
+ "epoch": 0.6381858375261252,
24127
+ "grad_norm": 36.53125,
24128
+ "learning_rate": 9.900283491800137e-06,
24129
+ "loss": 19.8828,
24130
+ "step": 34390
24131
+ },
24132
+ {
24133
+ "epoch": 0.638371410610605,
24134
+ "grad_norm": 33.9375,
24135
+ "learning_rate": 9.900254496014096e-06,
24136
+ "loss": 19.6419,
24137
+ "step": 34400
24138
+ },
24139
+ {
24140
+ "epoch": 0.6385569836950848,
24141
+ "grad_norm": 36.625,
24142
+ "learning_rate": 9.900225500228052e-06,
24143
+ "loss": 19.6846,
24144
+ "step": 34410
24145
+ },
24146
+ {
24147
+ "epoch": 0.6387425567795647,
24148
+ "grad_norm": 34.78125,
24149
+ "learning_rate": 9.90019650444201e-06,
24150
+ "loss": 20.1099,
24151
+ "step": 34420
24152
+ },
24153
+ {
24154
+ "epoch": 0.6389281298640446,
24155
+ "grad_norm": 37.34375,
24156
+ "learning_rate": 9.900167508655969e-06,
24157
+ "loss": 19.9376,
24158
+ "step": 34430
24159
+ },
24160
+ {
24161
+ "epoch": 0.6391137029485243,
24162
+ "grad_norm": 36.65625,
24163
+ "learning_rate": 9.900138512869926e-06,
24164
+ "loss": 19.7929,
24165
+ "step": 34440
24166
+ },
24167
+ {
24168
+ "epoch": 0.6392992760330042,
24169
+ "grad_norm": 35.5,
24170
+ "learning_rate": 9.900109517083883e-06,
24171
+ "loss": 20.0175,
24172
+ "step": 34450
24173
+ },
24174
+ {
24175
+ "epoch": 0.639484849117484,
24176
+ "grad_norm": 34.65625,
24177
+ "learning_rate": 9.90008052129784e-06,
24178
+ "loss": 19.6058,
24179
+ "step": 34460
24180
+ },
24181
+ {
24182
+ "epoch": 0.6396704222019638,
24183
+ "grad_norm": 35.375,
24184
+ "learning_rate": 9.900051525511798e-06,
24185
+ "loss": 19.6707,
24186
+ "step": 34470
24187
+ },
24188
+ {
24189
+ "epoch": 0.6398559952864437,
24190
+ "grad_norm": 35.5625,
24191
+ "learning_rate": 9.900022529725756e-06,
24192
+ "loss": 20.0595,
24193
+ "step": 34480
24194
+ },
24195
+ {
24196
+ "epoch": 0.6400415683709235,
24197
+ "grad_norm": 34.875,
24198
+ "learning_rate": 9.899993533939713e-06,
24199
+ "loss": 20.3709,
24200
+ "step": 34490
24201
+ },
24202
+ {
24203
+ "epoch": 0.6402271414554033,
24204
+ "grad_norm": 34.53125,
24205
+ "learning_rate": 9.899964538153672e-06,
24206
+ "loss": 20.1387,
24207
+ "step": 34500
24208
+ },
24209
+ {
24210
+ "epoch": 0.6404127145398831,
24211
+ "grad_norm": 34.9375,
24212
+ "learning_rate": 9.899935542367628e-06,
24213
+ "loss": 19.8495,
24214
+ "step": 34510
24215
+ },
24216
+ {
24217
+ "epoch": 0.640598287624363,
24218
+ "grad_norm": 33.9375,
24219
+ "learning_rate": 9.899906546581585e-06,
24220
+ "loss": 19.439,
24221
+ "step": 34520
24222
+ },
24223
+ {
24224
+ "epoch": 0.6407838607088427,
24225
+ "grad_norm": 36.15625,
24226
+ "learning_rate": 9.899877550795544e-06,
24227
+ "loss": 19.9088,
24228
+ "step": 34530
24229
+ },
24230
+ {
24231
+ "epoch": 0.6409694337933226,
24232
+ "grad_norm": 36.03125,
24233
+ "learning_rate": 9.899848555009502e-06,
24234
+ "loss": 20.363,
24235
+ "step": 34540
24236
+ },
24237
+ {
24238
+ "epoch": 0.6411550068778025,
24239
+ "grad_norm": 35.84375,
24240
+ "learning_rate": 9.899819559223459e-06,
24241
+ "loss": 19.5979,
24242
+ "step": 34550
24243
+ },
24244
+ {
24245
+ "epoch": 0.6413405799622822,
24246
+ "grad_norm": 35.5,
24247
+ "learning_rate": 9.899790563437417e-06,
24248
+ "loss": 19.6288,
24249
+ "step": 34560
24250
+ },
24251
+ {
24252
+ "epoch": 0.6415261530467621,
24253
+ "grad_norm": 32.9375,
24254
+ "learning_rate": 9.899761567651374e-06,
24255
+ "loss": 19.7712,
24256
+ "step": 34570
24257
+ },
24258
+ {
24259
+ "epoch": 0.6417117261312419,
24260
+ "grad_norm": 34.78125,
24261
+ "learning_rate": 9.899732571865331e-06,
24262
+ "loss": 19.8826,
24263
+ "step": 34580
24264
+ },
24265
+ {
24266
+ "epoch": 0.6418972992157218,
24267
+ "grad_norm": 35.53125,
24268
+ "learning_rate": 9.899703576079289e-06,
24269
+ "loss": 19.6118,
24270
+ "step": 34590
24271
+ },
24272
+ {
24273
+ "epoch": 0.6420828723002016,
24274
+ "grad_norm": 35.28125,
24275
+ "learning_rate": 9.899674580293248e-06,
24276
+ "loss": 20.0948,
24277
+ "step": 34600
24278
+ },
24279
+ {
24280
+ "epoch": 0.6422684453846814,
24281
+ "grad_norm": 34.90625,
24282
+ "learning_rate": 9.899645584507205e-06,
24283
+ "loss": 19.9192,
24284
+ "step": 34610
24285
+ },
24286
+ {
24287
+ "epoch": 0.6424540184691613,
24288
+ "grad_norm": 35.21875,
24289
+ "learning_rate": 9.899616588721161e-06,
24290
+ "loss": 20.5888,
24291
+ "step": 34620
24292
+ },
24293
+ {
24294
+ "epoch": 0.642639591553641,
24295
+ "grad_norm": 34.59375,
24296
+ "learning_rate": 9.89958759293512e-06,
24297
+ "loss": 19.441,
24298
+ "step": 34630
24299
+ },
24300
+ {
24301
+ "epoch": 0.6428251646381209,
24302
+ "grad_norm": 35.21875,
24303
+ "learning_rate": 9.899558597149077e-06,
24304
+ "loss": 19.7351,
24305
+ "step": 34640
24306
+ },
24307
+ {
24308
+ "epoch": 0.6430107377226008,
24309
+ "grad_norm": 35.625,
24310
+ "learning_rate": 9.899529601363035e-06,
24311
+ "loss": 19.9079,
24312
+ "step": 34650
24313
+ },
24314
+ {
24315
+ "epoch": 0.6431963108070805,
24316
+ "grad_norm": 36.4375,
24317
+ "learning_rate": 9.899500605576992e-06,
24318
+ "loss": 19.9245,
24319
+ "step": 34660
24320
+ },
24321
+ {
24322
+ "epoch": 0.6433818838915604,
24323
+ "grad_norm": 35.75,
24324
+ "learning_rate": 9.89947160979095e-06,
24325
+ "loss": 19.9472,
24326
+ "step": 34670
24327
+ },
24328
+ {
24329
+ "epoch": 0.6435674569760402,
24330
+ "grad_norm": 35.09375,
24331
+ "learning_rate": 9.899442614004907e-06,
24332
+ "loss": 19.8009,
24333
+ "step": 34680
24334
+ },
24335
+ {
24336
+ "epoch": 0.64375303006052,
24337
+ "grad_norm": 35.15625,
24338
+ "learning_rate": 9.899413618218865e-06,
24339
+ "loss": 20.0998,
24340
+ "step": 34690
24341
+ },
24342
+ {
24343
+ "epoch": 0.6439386031449998,
24344
+ "grad_norm": 35.96875,
24345
+ "learning_rate": 9.899384622432822e-06,
24346
+ "loss": 19.661,
24347
+ "step": 34700
24348
+ },
24349
+ {
24350
+ "epoch": 0.6441241762294797,
24351
+ "grad_norm": 35.28125,
24352
+ "learning_rate": 9.899355626646781e-06,
24353
+ "loss": 20.1212,
24354
+ "step": 34710
24355
+ },
24356
+ {
24357
+ "epoch": 0.6443097493139595,
24358
+ "grad_norm": 36.4375,
24359
+ "learning_rate": 9.899326630860738e-06,
24360
+ "loss": 19.703,
24361
+ "step": 34720
24362
+ },
24363
+ {
24364
+ "epoch": 0.6444953223984393,
24365
+ "grad_norm": 36.09375,
24366
+ "learning_rate": 9.899297635074694e-06,
24367
+ "loss": 19.6669,
24368
+ "step": 34730
24369
+ },
24370
+ {
24371
+ "epoch": 0.6446808954829192,
24372
+ "grad_norm": 36.34375,
24373
+ "learning_rate": 9.899268639288653e-06,
24374
+ "loss": 20.0713,
24375
+ "step": 34740
24376
+ },
24377
+ {
24378
+ "epoch": 0.6448664685673989,
24379
+ "grad_norm": 34.8125,
24380
+ "learning_rate": 9.89923964350261e-06,
24381
+ "loss": 19.7988,
24382
+ "step": 34750
24383
+ },
24384
+ {
24385
+ "epoch": 0.6450520416518788,
24386
+ "grad_norm": 34.875,
24387
+ "learning_rate": 9.899210647716568e-06,
24388
+ "loss": 19.8849,
24389
+ "step": 34760
24390
+ },
24391
+ {
24392
+ "epoch": 0.6452376147363587,
24393
+ "grad_norm": 34.34375,
24394
+ "learning_rate": 9.899181651930525e-06,
24395
+ "loss": 19.7736,
24396
+ "step": 34770
24397
+ },
24398
+ {
24399
+ "epoch": 0.6454231878208385,
24400
+ "grad_norm": 36.3125,
24401
+ "learning_rate": 9.899152656144483e-06,
24402
+ "loss": 20.3017,
24403
+ "step": 34780
24404
+ },
24405
+ {
24406
+ "epoch": 0.6456087609053183,
24407
+ "grad_norm": 33.65625,
24408
+ "learning_rate": 9.89912366035844e-06,
24409
+ "loss": 20.0971,
24410
+ "step": 34790
24411
+ },
24412
+ {
24413
+ "epoch": 0.6457943339897981,
24414
+ "grad_norm": 36.03125,
24415
+ "learning_rate": 9.899094664572398e-06,
24416
+ "loss": 19.5588,
24417
+ "step": 34800
24418
+ },
24419
+ {
24420
+ "epoch": 0.645979907074278,
24421
+ "grad_norm": 33.84375,
24422
+ "learning_rate": 9.899065668786357e-06,
24423
+ "loss": 19.7038,
24424
+ "step": 34810
24425
+ },
24426
+ {
24427
+ "epoch": 0.6461654801587577,
24428
+ "grad_norm": 35.78125,
24429
+ "learning_rate": 9.899036673000314e-06,
24430
+ "loss": 20.0109,
24431
+ "step": 34820
24432
+ },
24433
+ {
24434
+ "epoch": 0.6463510532432376,
24435
+ "grad_norm": 35.03125,
24436
+ "learning_rate": 9.899007677214272e-06,
24437
+ "loss": 19.5806,
24438
+ "step": 34830
24439
+ },
24440
+ {
24441
+ "epoch": 0.6465366263277175,
24442
+ "grad_norm": 34.125,
24443
+ "learning_rate": 9.898978681428229e-06,
24444
+ "loss": 19.8743,
24445
+ "step": 34840
24446
+ },
24447
+ {
24448
+ "epoch": 0.6467221994121972,
24449
+ "grad_norm": 36.375,
24450
+ "learning_rate": 9.898949685642186e-06,
24451
+ "loss": 19.4995,
24452
+ "step": 34850
24453
+ },
24454
+ {
24455
+ "epoch": 0.6469077724966771,
24456
+ "grad_norm": 35.46875,
24457
+ "learning_rate": 9.898920689856144e-06,
24458
+ "loss": 19.5762,
24459
+ "step": 34860
24460
+ },
24461
+ {
24462
+ "epoch": 0.6470933455811569,
24463
+ "grad_norm": 35.375,
24464
+ "learning_rate": 9.898891694070101e-06,
24465
+ "loss": 19.7716,
24466
+ "step": 34870
24467
+ },
24468
+ {
24469
+ "epoch": 0.6472789186656367,
24470
+ "grad_norm": 34.5,
24471
+ "learning_rate": 9.89886269828406e-06,
24472
+ "loss": 19.8697,
24473
+ "step": 34880
24474
+ },
24475
+ {
24476
+ "epoch": 0.6474644917501166,
24477
+ "grad_norm": 36.03125,
24478
+ "learning_rate": 9.898833702498016e-06,
24479
+ "loss": 19.7747,
24480
+ "step": 34890
24481
+ },
24482
+ {
24483
+ "epoch": 0.6476500648345964,
24484
+ "grad_norm": 35.15625,
24485
+ "learning_rate": 9.898804706711973e-06,
24486
+ "loss": 19.6159,
24487
+ "step": 34900
24488
+ },
24489
+ {
24490
+ "epoch": 0.6478356379190762,
24491
+ "grad_norm": 35.53125,
24492
+ "learning_rate": 9.898775710925933e-06,
24493
+ "loss": 20.2549,
24494
+ "step": 34910
24495
+ },
24496
+ {
24497
+ "epoch": 0.648021211003556,
24498
+ "grad_norm": 35.09375,
24499
+ "learning_rate": 9.89874671513989e-06,
24500
+ "loss": 20.0461,
24501
+ "step": 34920
24502
+ },
24503
+ {
24504
+ "epoch": 0.6482067840880359,
24505
+ "grad_norm": 34.125,
24506
+ "learning_rate": 9.898717719353847e-06,
24507
+ "loss": 19.3693,
24508
+ "step": 34930
24509
+ },
24510
+ {
24511
+ "epoch": 0.6483923571725156,
24512
+ "grad_norm": 34.625,
24513
+ "learning_rate": 9.898688723567805e-06,
24514
+ "loss": 19.9014,
24515
+ "step": 34940
24516
+ },
24517
+ {
24518
+ "epoch": 0.6485779302569955,
24519
+ "grad_norm": 37.65625,
24520
+ "learning_rate": 9.898659727781762e-06,
24521
+ "loss": 20.2016,
24522
+ "step": 34950
24523
+ },
24524
+ {
24525
+ "epoch": 0.6487635033414754,
24526
+ "grad_norm": 35.53125,
24527
+ "learning_rate": 9.89863073199572e-06,
24528
+ "loss": 20.0671,
24529
+ "step": 34960
24530
+ },
24531
+ {
24532
+ "epoch": 0.6489490764259552,
24533
+ "grad_norm": 34.5,
24534
+ "learning_rate": 9.898601736209677e-06,
24535
+ "loss": 19.811,
24536
+ "step": 34970
24537
+ },
24538
+ {
24539
+ "epoch": 0.649134649510435,
24540
+ "grad_norm": 35.40625,
24541
+ "learning_rate": 9.898572740423636e-06,
24542
+ "loss": 19.5194,
24543
+ "step": 34980
24544
+ },
24545
+ {
24546
+ "epoch": 0.6493202225949148,
24547
+ "grad_norm": 38.28125,
24548
+ "learning_rate": 9.898543744637594e-06,
24549
+ "loss": 19.9472,
24550
+ "step": 34990
24551
+ },
24552
+ {
24553
+ "epoch": 0.6495057956793947,
24554
+ "grad_norm": 33.96875,
24555
+ "learning_rate": 9.89851474885155e-06,
24556
+ "loss": 19.7037,
24557
+ "step": 35000
24558
+ },
24559
+ {
24560
+ "epoch": 0.6495057956793947,
24561
+ "eval_loss": 2.4709317684173584,
24562
+ "eval_runtime": 455.6259,
24563
+ "eval_samples_per_second": 3187.082,
24564
+ "eval_steps_per_second": 49.8,
24565
+ "step": 35000
24566
  }
24567
  ],
24568
  "logging_steps": 10,
 
24582
  "attributes": {}
24583
  }
24584
  },
24585
+ "total_flos": 6.109329656643584e+18,
24586
  "train_batch_size": 8,
24587
  "trial_name": null,
24588
  "trial_params": null