CocoRoF commited on
Commit
a223e0d
·
verified ·
1 Parent(s): da7bebd

Training in progress, step 40000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf11920f2c9ec0d2d19594110c2b51d301a3be0b7a5c64b90c553593388f3b96
3
  size 306619286
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecd57c7eace6adeec4710fec99cac2074403be145022a172c36c987b89390434
3
  size 306619286
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9fd7ea0617e8351507c27a226d306b44f57a13cfca8832dbb9a6a416ba79c7b
3
  size 919972410
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51104d0f8c0c31f4a4bccea6f88abc44b2a966c89edf808f6831a68d92b864e7
3
  size 919972410
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04cb5208648fd09a2e0403d51973f74ffbfd93cbd5da59e1e99c8df03769a86c
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e735ed11597ed40a2b6854e0229902e1a21fedc0a0dbc608ca905fae57d5b06b
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7034685b36b93a4dd3a50697b0b1c314b249b2189ec2cb96b757312b1514a579
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ba3815fc0953b1b7f08cea092dfc0a62c4bbc2a2c68780d3f4dd0b5e22582a7
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e851fe1c1de0057f4eecefed6a131fa9021334eb43f6e7e65fdb270a25ac864
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:647ac15563fcad903adbb616e9b2c36b237a3ed5939d088620212da969930f6c
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:978379030048e432baa510ec4fc9514faa08fe564ab964b3a4d05e8f60306495
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93e3733c5b180986b7efbec17b663bf5231343d187374d184768fcd913797167
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdbc75d90af112615b53d15931e8157a80e37bcd110aac9a3089f5f6f5344171
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9820ea4fec1b01f3da091290c3e8b5ddb86a3a3fa17285c248b64910c2d0b4f0
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c8a310f6ca2ca89570eb2cc68544656b30224f00b2d6d96eeda6e0cb8be50ab
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7413035def085e41776a629afc94fc24fe5a955f1ad83b32f9b370ab60f9a18d
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c5b8110fcf6e044b6860c6305be969cfe03129549b92dc6fc2394448e9265d6
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91e3953bcbf4089415abffbd914fbbe4580121f6c843eabbf70624c5ed144814
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f936acaf5a2d5fe8c38d945450417facbf1577584c216908a396d3cc20bec88
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:369fde7bff4dfc0d6b9cf773cf9b0352696083f84763999e05a631ee6d52c5e3
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ff31aac428f9992f606e05ff9d9b75bec2abb517b825e89760b21fb1796744f
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5802fc71a5ce36cca3a7f664b0fbad4d08efb9895a1c5eaa5692a421831e9c11
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6495057956793947,
5
  "eval_steps": 5000,
6
- "global_step": 35000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -24563,6 +24563,3514 @@
24563
  "eval_samples_per_second": 3187.082,
24564
  "eval_steps_per_second": 49.8,
24565
  "step": 35000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24566
  }
24567
  ],
24568
  "logging_steps": 10,
@@ -24582,7 +28090,7 @@
24582
  "attributes": {}
24583
  }
24584
  },
24585
- "total_flos": 6.109329656643584e+18,
24586
  "train_batch_size": 8,
24587
  "trial_name": null,
24588
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7422923379193082,
5
  "eval_steps": 5000,
6
+ "global_step": 40000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
24563
  "eval_samples_per_second": 3187.082,
24564
  "eval_steps_per_second": 49.8,
24565
  "step": 35000
24566
+ },
24567
+ {
24568
+ "epoch": 0.6496913687638745,
24569
+ "grad_norm": 35.21875,
24570
+ "learning_rate": 9.898485753065508e-06,
24571
+ "loss": 19.7997,
24572
+ "step": 35010
24573
+ },
24574
+ {
24575
+ "epoch": 0.6498769418483543,
24576
+ "grad_norm": 37.65625,
24577
+ "learning_rate": 9.898456757279466e-06,
24578
+ "loss": 20.4342,
24579
+ "step": 35020
24580
+ },
24581
+ {
24582
+ "epoch": 0.6500625149328342,
24583
+ "grad_norm": 34.59375,
24584
+ "learning_rate": 9.898427761493423e-06,
24585
+ "loss": 19.5847,
24586
+ "step": 35030
24587
+ },
24588
+ {
24589
+ "epoch": 0.6502480880173139,
24590
+ "grad_norm": 35.5,
24591
+ "learning_rate": 9.89839876570738e-06,
24592
+ "loss": 19.8744,
24593
+ "step": 35040
24594
+ },
24595
+ {
24596
+ "epoch": 0.6504336611017938,
24597
+ "grad_norm": 36.15625,
24598
+ "learning_rate": 9.898369769921338e-06,
24599
+ "loss": 19.74,
24600
+ "step": 35050
24601
+ },
24602
+ {
24603
+ "epoch": 0.6506192341862737,
24604
+ "grad_norm": 36.96875,
24605
+ "learning_rate": 9.898340774135295e-06,
24606
+ "loss": 19.1062,
24607
+ "step": 35060
24608
+ },
24609
+ {
24610
+ "epoch": 0.6508048072707534,
24611
+ "grad_norm": 36.21875,
24612
+ "learning_rate": 9.898311778349253e-06,
24613
+ "loss": 19.9495,
24614
+ "step": 35070
24615
+ },
24616
+ {
24617
+ "epoch": 0.6509903803552333,
24618
+ "grad_norm": 33.96875,
24619
+ "learning_rate": 9.898282782563212e-06,
24620
+ "loss": 19.6127,
24621
+ "step": 35080
24622
+ },
24623
+ {
24624
+ "epoch": 0.6511759534397131,
24625
+ "grad_norm": 34.03125,
24626
+ "learning_rate": 9.89825378677717e-06,
24627
+ "loss": 19.839,
24628
+ "step": 35090
24629
+ },
24630
+ {
24631
+ "epoch": 0.6513615265241929,
24632
+ "grad_norm": 35.8125,
24633
+ "learning_rate": 9.898224790991125e-06,
24634
+ "loss": 19.3891,
24635
+ "step": 35100
24636
+ },
24637
+ {
24638
+ "epoch": 0.6515470996086727,
24639
+ "grad_norm": 37.0,
24640
+ "learning_rate": 9.898195795205084e-06,
24641
+ "loss": 19.8909,
24642
+ "step": 35110
24643
+ },
24644
+ {
24645
+ "epoch": 0.6517326726931526,
24646
+ "grad_norm": 33.90625,
24647
+ "learning_rate": 9.898166799419041e-06,
24648
+ "loss": 19.5183,
24649
+ "step": 35120
24650
+ },
24651
+ {
24652
+ "epoch": 0.6519182457776325,
24653
+ "grad_norm": 36.625,
24654
+ "learning_rate": 9.898137803632999e-06,
24655
+ "loss": 20.0471,
24656
+ "step": 35130
24657
+ },
24658
+ {
24659
+ "epoch": 0.6521038188621122,
24660
+ "grad_norm": 35.0,
24661
+ "learning_rate": 9.898108807846956e-06,
24662
+ "loss": 19.6311,
24663
+ "step": 35140
24664
+ },
24665
+ {
24666
+ "epoch": 0.6522893919465921,
24667
+ "grad_norm": 35.90625,
24668
+ "learning_rate": 9.898079812060914e-06,
24669
+ "loss": 19.8913,
24670
+ "step": 35150
24671
+ },
24672
+ {
24673
+ "epoch": 0.6524749650310719,
24674
+ "grad_norm": 35.5,
24675
+ "learning_rate": 9.898050816274871e-06,
24676
+ "loss": 19.7133,
24677
+ "step": 35160
24678
+ },
24679
+ {
24680
+ "epoch": 0.6526605381155517,
24681
+ "grad_norm": 36.65625,
24682
+ "learning_rate": 9.898021820488829e-06,
24683
+ "loss": 19.5409,
24684
+ "step": 35170
24685
+ },
24686
+ {
24687
+ "epoch": 0.6528461112000316,
24688
+ "grad_norm": 36.78125,
24689
+ "learning_rate": 9.897992824702786e-06,
24690
+ "loss": 19.4254,
24691
+ "step": 35180
24692
+ },
24693
+ {
24694
+ "epoch": 0.6530316842845114,
24695
+ "grad_norm": 36.09375,
24696
+ "learning_rate": 9.897963828916745e-06,
24697
+ "loss": 20.0565,
24698
+ "step": 35190
24699
+ },
24700
+ {
24701
+ "epoch": 0.6532172573689912,
24702
+ "grad_norm": 34.0,
24703
+ "learning_rate": 9.897934833130702e-06,
24704
+ "loss": 19.5167,
24705
+ "step": 35200
24706
+ },
24707
+ {
24708
+ "epoch": 0.653402830453471,
24709
+ "grad_norm": 36.03125,
24710
+ "learning_rate": 9.89790583734466e-06,
24711
+ "loss": 19.5704,
24712
+ "step": 35210
24713
+ },
24714
+ {
24715
+ "epoch": 0.6535884035379509,
24716
+ "grad_norm": 35.15625,
24717
+ "learning_rate": 9.897876841558617e-06,
24718
+ "loss": 19.4049,
24719
+ "step": 35220
24720
+ },
24721
+ {
24722
+ "epoch": 0.6537739766224306,
24723
+ "grad_norm": 35.78125,
24724
+ "learning_rate": 9.897847845772575e-06,
24725
+ "loss": 19.5957,
24726
+ "step": 35230
24727
+ },
24728
+ {
24729
+ "epoch": 0.6539595497069105,
24730
+ "grad_norm": 34.46875,
24731
+ "learning_rate": 9.897818849986532e-06,
24732
+ "loss": 19.1128,
24733
+ "step": 35240
24734
+ },
24735
+ {
24736
+ "epoch": 0.6541451227913904,
24737
+ "grad_norm": 36.09375,
24738
+ "learning_rate": 9.89778985420049e-06,
24739
+ "loss": 19.9392,
24740
+ "step": 35250
24741
+ },
24742
+ {
24743
+ "epoch": 0.6543306958758701,
24744
+ "grad_norm": 34.28125,
24745
+ "learning_rate": 9.897760858414447e-06,
24746
+ "loss": 19.5701,
24747
+ "step": 35260
24748
+ },
24749
+ {
24750
+ "epoch": 0.65451626896035,
24751
+ "grad_norm": 37.03125,
24752
+ "learning_rate": 9.897731862628404e-06,
24753
+ "loss": 19.6126,
24754
+ "step": 35270
24755
+ },
24756
+ {
24757
+ "epoch": 0.6547018420448298,
24758
+ "grad_norm": 35.25,
24759
+ "learning_rate": 9.897702866842362e-06,
24760
+ "loss": 19.5181,
24761
+ "step": 35280
24762
+ },
24763
+ {
24764
+ "epoch": 0.6548874151293096,
24765
+ "grad_norm": 33.96875,
24766
+ "learning_rate": 9.89767387105632e-06,
24767
+ "loss": 20.0623,
24768
+ "step": 35290
24769
+ },
24770
+ {
24771
+ "epoch": 0.6550729882137895,
24772
+ "grad_norm": 34.96875,
24773
+ "learning_rate": 9.897644875270278e-06,
24774
+ "loss": 19.9774,
24775
+ "step": 35300
24776
+ },
24777
+ {
24778
+ "epoch": 0.6552585612982693,
24779
+ "grad_norm": 35.0,
24780
+ "learning_rate": 9.897615879484236e-06,
24781
+ "loss": 20.0012,
24782
+ "step": 35310
24783
+ },
24784
+ {
24785
+ "epoch": 0.6554441343827492,
24786
+ "grad_norm": 33.4375,
24787
+ "learning_rate": 9.897586883698193e-06,
24788
+ "loss": 19.374,
24789
+ "step": 35320
24790
+ },
24791
+ {
24792
+ "epoch": 0.6556297074672289,
24793
+ "grad_norm": 36.15625,
24794
+ "learning_rate": 9.89755788791215e-06,
24795
+ "loss": 19.6139,
24796
+ "step": 35330
24797
+ },
24798
+ {
24799
+ "epoch": 0.6558152805517088,
24800
+ "grad_norm": 34.71875,
24801
+ "learning_rate": 9.897528892126108e-06,
24802
+ "loss": 20.0223,
24803
+ "step": 35340
24804
+ },
24805
+ {
24806
+ "epoch": 0.6560008536361887,
24807
+ "grad_norm": 36.3125,
24808
+ "learning_rate": 9.897499896340065e-06,
24809
+ "loss": 19.6657,
24810
+ "step": 35350
24811
+ },
24812
+ {
24813
+ "epoch": 0.6561864267206684,
24814
+ "grad_norm": 35.90625,
24815
+ "learning_rate": 9.897470900554024e-06,
24816
+ "loss": 19.4794,
24817
+ "step": 35360
24818
+ },
24819
+ {
24820
+ "epoch": 0.6563719998051483,
24821
+ "grad_norm": 35.1875,
24822
+ "learning_rate": 9.89744190476798e-06,
24823
+ "loss": 19.885,
24824
+ "step": 35370
24825
+ },
24826
+ {
24827
+ "epoch": 0.6565575728896281,
24828
+ "grad_norm": 34.875,
24829
+ "learning_rate": 9.897412908981937e-06,
24830
+ "loss": 19.5217,
24831
+ "step": 35380
24832
+ },
24833
+ {
24834
+ "epoch": 0.6567431459741079,
24835
+ "grad_norm": 38.28125,
24836
+ "learning_rate": 9.897383913195897e-06,
24837
+ "loss": 19.5472,
24838
+ "step": 35390
24839
+ },
24840
+ {
24841
+ "epoch": 0.6569287190585877,
24842
+ "grad_norm": 35.5,
24843
+ "learning_rate": 9.897354917409854e-06,
24844
+ "loss": 20.072,
24845
+ "step": 35400
24846
+ },
24847
+ {
24848
+ "epoch": 0.6571142921430676,
24849
+ "grad_norm": 36.125,
24850
+ "learning_rate": 9.897325921623811e-06,
24851
+ "loss": 19.4725,
24852
+ "step": 35410
24853
+ },
24854
+ {
24855
+ "epoch": 0.6572998652275474,
24856
+ "grad_norm": 34.21875,
24857
+ "learning_rate": 9.897296925837769e-06,
24858
+ "loss": 19.6822,
24859
+ "step": 35420
24860
+ },
24861
+ {
24862
+ "epoch": 0.6574854383120272,
24863
+ "grad_norm": 34.96875,
24864
+ "learning_rate": 9.897267930051726e-06,
24865
+ "loss": 19.8072,
24866
+ "step": 35430
24867
+ },
24868
+ {
24869
+ "epoch": 0.6576710113965071,
24870
+ "grad_norm": 35.90625,
24871
+ "learning_rate": 9.897238934265684e-06,
24872
+ "loss": 19.9793,
24873
+ "step": 35440
24874
+ },
24875
+ {
24876
+ "epoch": 0.6578565844809868,
24877
+ "grad_norm": 35.625,
24878
+ "learning_rate": 9.897209938479641e-06,
24879
+ "loss": 19.8958,
24880
+ "step": 35450
24881
+ },
24882
+ {
24883
+ "epoch": 0.6580421575654667,
24884
+ "grad_norm": 36.75,
24885
+ "learning_rate": 9.8971809426936e-06,
24886
+ "loss": 19.4194,
24887
+ "step": 35460
24888
+ },
24889
+ {
24890
+ "epoch": 0.6582277306499466,
24891
+ "grad_norm": 37.4375,
24892
+ "learning_rate": 9.897151946907558e-06,
24893
+ "loss": 20.0411,
24894
+ "step": 35470
24895
+ },
24896
+ {
24897
+ "epoch": 0.6584133037344264,
24898
+ "grad_norm": 35.5625,
24899
+ "learning_rate": 9.897122951121513e-06,
24900
+ "loss": 19.4788,
24901
+ "step": 35480
24902
+ },
24903
+ {
24904
+ "epoch": 0.6585988768189062,
24905
+ "grad_norm": 35.75,
24906
+ "learning_rate": 9.897093955335472e-06,
24907
+ "loss": 19.7224,
24908
+ "step": 35490
24909
+ },
24910
+ {
24911
+ "epoch": 0.658784449903386,
24912
+ "grad_norm": 33.25,
24913
+ "learning_rate": 9.89706495954943e-06,
24914
+ "loss": 19.5881,
24915
+ "step": 35500
24916
+ },
24917
+ {
24918
+ "epoch": 0.6589700229878659,
24919
+ "grad_norm": 35.1875,
24920
+ "learning_rate": 9.897035963763387e-06,
24921
+ "loss": 19.8756,
24922
+ "step": 35510
24923
+ },
24924
+ {
24925
+ "epoch": 0.6591555960723456,
24926
+ "grad_norm": 35.40625,
24927
+ "learning_rate": 9.897006967977345e-06,
24928
+ "loss": 19.8553,
24929
+ "step": 35520
24930
+ },
24931
+ {
24932
+ "epoch": 0.6593411691568255,
24933
+ "grad_norm": 35.9375,
24934
+ "learning_rate": 9.896977972191302e-06,
24935
+ "loss": 19.6849,
24936
+ "step": 35530
24937
+ },
24938
+ {
24939
+ "epoch": 0.6595267422413054,
24940
+ "grad_norm": 36.0,
24941
+ "learning_rate": 9.89694897640526e-06,
24942
+ "loss": 19.5625,
24943
+ "step": 35540
24944
+ },
24945
+ {
24946
+ "epoch": 0.6597123153257851,
24947
+ "grad_norm": 34.21875,
24948
+ "learning_rate": 9.896919980619217e-06,
24949
+ "loss": 19.38,
24950
+ "step": 35550
24951
+ },
24952
+ {
24953
+ "epoch": 0.659897888410265,
24954
+ "grad_norm": 35.125,
24955
+ "learning_rate": 9.896890984833176e-06,
24956
+ "loss": 20.0188,
24957
+ "step": 35560
24958
+ },
24959
+ {
24960
+ "epoch": 0.6600834614947448,
24961
+ "grad_norm": 36.90625,
24962
+ "learning_rate": 9.896861989047133e-06,
24963
+ "loss": 19.509,
24964
+ "step": 35570
24965
+ },
24966
+ {
24967
+ "epoch": 0.6602690345792246,
24968
+ "grad_norm": 36.0625,
24969
+ "learning_rate": 9.89683299326109e-06,
24970
+ "loss": 19.6172,
24971
+ "step": 35580
24972
+ },
24973
+ {
24974
+ "epoch": 0.6604546076637045,
24975
+ "grad_norm": 36.75,
24976
+ "learning_rate": 9.896803997475048e-06,
24977
+ "loss": 20.0115,
24978
+ "step": 35590
24979
+ },
24980
+ {
24981
+ "epoch": 0.6606401807481843,
24982
+ "grad_norm": 35.9375,
24983
+ "learning_rate": 9.896775001689006e-06,
24984
+ "loss": 19.4408,
24985
+ "step": 35600
24986
+ },
24987
+ {
24988
+ "epoch": 0.6608257538326641,
24989
+ "grad_norm": 36.8125,
24990
+ "learning_rate": 9.896746005902963e-06,
24991
+ "loss": 19.8966,
24992
+ "step": 35610
24993
+ },
24994
+ {
24995
+ "epoch": 0.6610113269171439,
24996
+ "grad_norm": 35.4375,
24997
+ "learning_rate": 9.89671701011692e-06,
24998
+ "loss": 19.3682,
24999
+ "step": 35620
25000
+ },
25001
+ {
25002
+ "epoch": 0.6611969000016238,
25003
+ "grad_norm": 36.6875,
25004
+ "learning_rate": 9.896688014330878e-06,
25005
+ "loss": 19.6727,
25006
+ "step": 35630
25007
+ },
25008
+ {
25009
+ "epoch": 0.6613824730861035,
25010
+ "grad_norm": 36.0625,
25011
+ "learning_rate": 9.896659018544835e-06,
25012
+ "loss": 19.741,
25013
+ "step": 35640
25014
+ },
25015
+ {
25016
+ "epoch": 0.6615680461705834,
25017
+ "grad_norm": 36.15625,
25018
+ "learning_rate": 9.896630022758793e-06,
25019
+ "loss": 20.0741,
25020
+ "step": 35650
25021
+ },
25022
+ {
25023
+ "epoch": 0.6617536192550633,
25024
+ "grad_norm": 35.09375,
25025
+ "learning_rate": 9.896601026972752e-06,
25026
+ "loss": 19.2563,
25027
+ "step": 35660
25028
+ },
25029
+ {
25030
+ "epoch": 0.6619391923395431,
25031
+ "grad_norm": 36.25,
25032
+ "learning_rate": 9.896572031186709e-06,
25033
+ "loss": 19.8901,
25034
+ "step": 35670
25035
+ },
25036
+ {
25037
+ "epoch": 0.6621247654240229,
25038
+ "grad_norm": 34.71875,
25039
+ "learning_rate": 9.896543035400666e-06,
25040
+ "loss": 19.9751,
25041
+ "step": 35680
25042
+ },
25043
+ {
25044
+ "epoch": 0.6623103385085027,
25045
+ "grad_norm": 33.90625,
25046
+ "learning_rate": 9.896514039614624e-06,
25047
+ "loss": 19.0984,
25048
+ "step": 35690
25049
+ },
25050
+ {
25051
+ "epoch": 0.6624959115929826,
25052
+ "grad_norm": 35.09375,
25053
+ "learning_rate": 9.896485043828581e-06,
25054
+ "loss": 19.549,
25055
+ "step": 35700
25056
+ },
25057
+ {
25058
+ "epoch": 0.6626814846774624,
25059
+ "grad_norm": 34.40625,
25060
+ "learning_rate": 9.896456048042539e-06,
25061
+ "loss": 19.4845,
25062
+ "step": 35710
25063
+ },
25064
+ {
25065
+ "epoch": 0.6628670577619422,
25066
+ "grad_norm": 35.8125,
25067
+ "learning_rate": 9.896427052256496e-06,
25068
+ "loss": 19.5595,
25069
+ "step": 35720
25070
+ },
25071
+ {
25072
+ "epoch": 0.6630526308464221,
25073
+ "grad_norm": 36.21875,
25074
+ "learning_rate": 9.896398056470453e-06,
25075
+ "loss": 20.1986,
25076
+ "step": 35730
25077
+ },
25078
+ {
25079
+ "epoch": 0.6632382039309018,
25080
+ "grad_norm": 35.09375,
25081
+ "learning_rate": 9.896369060684413e-06,
25082
+ "loss": 19.7129,
25083
+ "step": 35740
25084
+ },
25085
+ {
25086
+ "epoch": 0.6634237770153817,
25087
+ "grad_norm": 37.5,
25088
+ "learning_rate": 9.896340064898368e-06,
25089
+ "loss": 19.6799,
25090
+ "step": 35750
25091
+ },
25092
+ {
25093
+ "epoch": 0.6636093500998616,
25094
+ "grad_norm": 34.65625,
25095
+ "learning_rate": 9.896311069112326e-06,
25096
+ "loss": 19.6695,
25097
+ "step": 35760
25098
+ },
25099
+ {
25100
+ "epoch": 0.6637949231843413,
25101
+ "grad_norm": 36.28125,
25102
+ "learning_rate": 9.896282073326285e-06,
25103
+ "loss": 19.8379,
25104
+ "step": 35770
25105
+ },
25106
+ {
25107
+ "epoch": 0.6639804962688212,
25108
+ "grad_norm": 36.0625,
25109
+ "learning_rate": 9.896253077540242e-06,
25110
+ "loss": 19.4283,
25111
+ "step": 35780
25112
+ },
25113
+ {
25114
+ "epoch": 0.664166069353301,
25115
+ "grad_norm": 36.75,
25116
+ "learning_rate": 9.8962240817542e-06,
25117
+ "loss": 19.5346,
25118
+ "step": 35790
25119
+ },
25120
+ {
25121
+ "epoch": 0.6643516424377808,
25122
+ "grad_norm": 36.625,
25123
+ "learning_rate": 9.896195085968157e-06,
25124
+ "loss": 19.9317,
25125
+ "step": 35800
25126
+ },
25127
+ {
25128
+ "epoch": 0.6645372155222606,
25129
+ "grad_norm": 36.65625,
25130
+ "learning_rate": 9.896166090182114e-06,
25131
+ "loss": 20.057,
25132
+ "step": 35810
25133
+ },
25134
+ {
25135
+ "epoch": 0.6647227886067405,
25136
+ "grad_norm": 37.375,
25137
+ "learning_rate": 9.896137094396072e-06,
25138
+ "loss": 19.5665,
25139
+ "step": 35820
25140
+ },
25141
+ {
25142
+ "epoch": 0.6649083616912203,
25143
+ "grad_norm": 34.875,
25144
+ "learning_rate": 9.89610809861003e-06,
25145
+ "loss": 19.5112,
25146
+ "step": 35830
25147
+ },
25148
+ {
25149
+ "epoch": 0.6650939347757001,
25150
+ "grad_norm": 35.03125,
25151
+ "learning_rate": 9.896079102823988e-06,
25152
+ "loss": 19.7005,
25153
+ "step": 35840
25154
+ },
25155
+ {
25156
+ "epoch": 0.66527950786018,
25157
+ "grad_norm": 36.46875,
25158
+ "learning_rate": 9.896050107037944e-06,
25159
+ "loss": 19.6946,
25160
+ "step": 35850
25161
+ },
25162
+ {
25163
+ "epoch": 0.6654650809446598,
25164
+ "grad_norm": 35.96875,
25165
+ "learning_rate": 9.896021111251901e-06,
25166
+ "loss": 19.9033,
25167
+ "step": 35860
25168
+ },
25169
+ {
25170
+ "epoch": 0.6656506540291396,
25171
+ "grad_norm": 35.15625,
25172
+ "learning_rate": 9.89599211546586e-06,
25173
+ "loss": 19.7513,
25174
+ "step": 35870
25175
+ },
25176
+ {
25177
+ "epoch": 0.6658362271136194,
25178
+ "grad_norm": 34.28125,
25179
+ "learning_rate": 9.895963119679818e-06,
25180
+ "loss": 19.5514,
25181
+ "step": 35880
25182
+ },
25183
+ {
25184
+ "epoch": 0.6660218001980993,
25185
+ "grad_norm": 34.75,
25186
+ "learning_rate": 9.895934123893775e-06,
25187
+ "loss": 19.5261,
25188
+ "step": 35890
25189
+ },
25190
+ {
25191
+ "epoch": 0.6662073732825791,
25192
+ "grad_norm": 34.25,
25193
+ "learning_rate": 9.895905128107733e-06,
25194
+ "loss": 20.0393,
25195
+ "step": 35900
25196
+ },
25197
+ {
25198
+ "epoch": 0.6663929463670589,
25199
+ "grad_norm": 34.40625,
25200
+ "learning_rate": 9.89587613232169e-06,
25201
+ "loss": 19.6479,
25202
+ "step": 35910
25203
+ },
25204
+ {
25205
+ "epoch": 0.6665785194515388,
25206
+ "grad_norm": 37.53125,
25207
+ "learning_rate": 9.895847136535648e-06,
25208
+ "loss": 19.9184,
25209
+ "step": 35920
25210
+ },
25211
+ {
25212
+ "epoch": 0.6667640925360185,
25213
+ "grad_norm": 36.28125,
25214
+ "learning_rate": 9.895818140749605e-06,
25215
+ "loss": 19.9199,
25216
+ "step": 35930
25217
+ },
25218
+ {
25219
+ "epoch": 0.6669496656204984,
25220
+ "grad_norm": 38.84375,
25221
+ "learning_rate": 9.895789144963564e-06,
25222
+ "loss": 20.2039,
25223
+ "step": 35940
25224
+ },
25225
+ {
25226
+ "epoch": 0.6671352387049783,
25227
+ "grad_norm": 33.3125,
25228
+ "learning_rate": 9.895760149177522e-06,
25229
+ "loss": 19.3214,
25230
+ "step": 35950
25231
+ },
25232
+ {
25233
+ "epoch": 0.667320811789458,
25234
+ "grad_norm": 36.34375,
25235
+ "learning_rate": 9.895731153391477e-06,
25236
+ "loss": 19.4983,
25237
+ "step": 35960
25238
+ },
25239
+ {
25240
+ "epoch": 0.6675063848739379,
25241
+ "grad_norm": 32.9375,
25242
+ "learning_rate": 9.895702157605436e-06,
25243
+ "loss": 19.6202,
25244
+ "step": 35970
25245
+ },
25246
+ {
25247
+ "epoch": 0.6676919579584177,
25248
+ "grad_norm": 37.0625,
25249
+ "learning_rate": 9.895673161819394e-06,
25250
+ "loss": 19.7313,
25251
+ "step": 35980
25252
+ },
25253
+ {
25254
+ "epoch": 0.6678775310428975,
25255
+ "grad_norm": 36.90625,
25256
+ "learning_rate": 9.895644166033351e-06,
25257
+ "loss": 19.6996,
25258
+ "step": 35990
25259
+ },
25260
+ {
25261
+ "epoch": 0.6680631041273773,
25262
+ "grad_norm": 34.625,
25263
+ "learning_rate": 9.895615170247309e-06,
25264
+ "loss": 19.2428,
25265
+ "step": 36000
25266
+ },
25267
+ {
25268
+ "epoch": 0.6682486772118572,
25269
+ "grad_norm": 35.0,
25270
+ "learning_rate": 9.895586174461268e-06,
25271
+ "loss": 20.2196,
25272
+ "step": 36010
25273
+ },
25274
+ {
25275
+ "epoch": 0.6684342502963371,
25276
+ "grad_norm": 35.0625,
25277
+ "learning_rate": 9.895557178675223e-06,
25278
+ "loss": 19.687,
25279
+ "step": 36020
25280
+ },
25281
+ {
25282
+ "epoch": 0.6686198233808168,
25283
+ "grad_norm": 35.53125,
25284
+ "learning_rate": 9.89552818288918e-06,
25285
+ "loss": 19.5315,
25286
+ "step": 36030
25287
+ },
25288
+ {
25289
+ "epoch": 0.6688053964652967,
25290
+ "grad_norm": 34.5625,
25291
+ "learning_rate": 9.89549918710314e-06,
25292
+ "loss": 19.8379,
25293
+ "step": 36040
25294
+ },
25295
+ {
25296
+ "epoch": 0.6689909695497765,
25297
+ "grad_norm": 33.53125,
25298
+ "learning_rate": 9.895470191317097e-06,
25299
+ "loss": 19.9126,
25300
+ "step": 36050
25301
+ },
25302
+ {
25303
+ "epoch": 0.6691765426342563,
25304
+ "grad_norm": 36.5,
25305
+ "learning_rate": 9.895441195531055e-06,
25306
+ "loss": 19.7775,
25307
+ "step": 36060
25308
+ },
25309
+ {
25310
+ "epoch": 0.6693621157187362,
25311
+ "grad_norm": 35.375,
25312
+ "learning_rate": 9.895412199745012e-06,
25313
+ "loss": 19.745,
25314
+ "step": 36070
25315
+ },
25316
+ {
25317
+ "epoch": 0.669547688803216,
25318
+ "grad_norm": 35.375,
25319
+ "learning_rate": 9.89538320395897e-06,
25320
+ "loss": 19.7045,
25321
+ "step": 36080
25322
+ },
25323
+ {
25324
+ "epoch": 0.6697332618876958,
25325
+ "grad_norm": 34.6875,
25326
+ "learning_rate": 9.895354208172927e-06,
25327
+ "loss": 18.9317,
25328
+ "step": 36090
25329
+ },
25330
+ {
25331
+ "epoch": 0.6699188349721756,
25332
+ "grad_norm": 35.59375,
25333
+ "learning_rate": 9.895325212386884e-06,
25334
+ "loss": 19.8415,
25335
+ "step": 36100
25336
+ },
25337
+ {
25338
+ "epoch": 0.6701044080566555,
25339
+ "grad_norm": 34.71875,
25340
+ "learning_rate": 9.895296216600843e-06,
25341
+ "loss": 19.7913,
25342
+ "step": 36110
25343
+ },
25344
+ {
25345
+ "epoch": 0.6702899811411352,
25346
+ "grad_norm": 35.59375,
25347
+ "learning_rate": 9.895267220814799e-06,
25348
+ "loss": 19.5304,
25349
+ "step": 36120
25350
+ },
25351
+ {
25352
+ "epoch": 0.6704755542256151,
25353
+ "grad_norm": 34.96875,
25354
+ "learning_rate": 9.895238225028757e-06,
25355
+ "loss": 19.4498,
25356
+ "step": 36130
25357
+ },
25358
+ {
25359
+ "epoch": 0.670661127310095,
25360
+ "grad_norm": 35.25,
25361
+ "learning_rate": 9.895209229242716e-06,
25362
+ "loss": 19.6703,
25363
+ "step": 36140
25364
+ },
25365
+ {
25366
+ "epoch": 0.6708467003945747,
25367
+ "grad_norm": 34.21875,
25368
+ "learning_rate": 9.895180233456673e-06,
25369
+ "loss": 19.4621,
25370
+ "step": 36150
25371
+ },
25372
+ {
25373
+ "epoch": 0.6710322734790546,
25374
+ "grad_norm": 34.625,
25375
+ "learning_rate": 9.89515123767063e-06,
25376
+ "loss": 19.6551,
25377
+ "step": 36160
25378
+ },
25379
+ {
25380
+ "epoch": 0.6712178465635344,
25381
+ "grad_norm": 33.625,
25382
+ "learning_rate": 9.895122241884588e-06,
25383
+ "loss": 19.5649,
25384
+ "step": 36170
25385
+ },
25386
+ {
25387
+ "epoch": 0.6714034196480142,
25388
+ "grad_norm": 37.3125,
25389
+ "learning_rate": 9.895093246098545e-06,
25390
+ "loss": 19.501,
25391
+ "step": 36180
25392
+ },
25393
+ {
25394
+ "epoch": 0.6715889927324941,
25395
+ "grad_norm": 33.90625,
25396
+ "learning_rate": 9.895064250312503e-06,
25397
+ "loss": 19.7173,
25398
+ "step": 36190
25399
+ },
25400
+ {
25401
+ "epoch": 0.6717745658169739,
25402
+ "grad_norm": 37.9375,
25403
+ "learning_rate": 9.89503525452646e-06,
25404
+ "loss": 19.8307,
25405
+ "step": 36200
25406
+ },
25407
+ {
25408
+ "epoch": 0.6719601389014538,
25409
+ "grad_norm": 37.375,
25410
+ "learning_rate": 9.895006258740418e-06,
25411
+ "loss": 19.3379,
25412
+ "step": 36210
25413
+ },
25414
+ {
25415
+ "epoch": 0.6721457119859335,
25416
+ "grad_norm": 35.4375,
25417
+ "learning_rate": 9.894977262954377e-06,
25418
+ "loss": 19.5971,
25419
+ "step": 36220
25420
+ },
25421
+ {
25422
+ "epoch": 0.6723312850704134,
25423
+ "grad_norm": 35.78125,
25424
+ "learning_rate": 9.894948267168332e-06,
25425
+ "loss": 19.6289,
25426
+ "step": 36230
25427
+ },
25428
+ {
25429
+ "epoch": 0.6725168581548933,
25430
+ "grad_norm": 34.59375,
25431
+ "learning_rate": 9.894919271382291e-06,
25432
+ "loss": 19.5154,
25433
+ "step": 36240
25434
+ },
25435
+ {
25436
+ "epoch": 0.672702431239373,
25437
+ "grad_norm": 37.40625,
25438
+ "learning_rate": 9.894890275596249e-06,
25439
+ "loss": 19.7842,
25440
+ "step": 36250
25441
+ },
25442
+ {
25443
+ "epoch": 0.6728880043238529,
25444
+ "grad_norm": 35.71875,
25445
+ "learning_rate": 9.894861279810206e-06,
25446
+ "loss": 19.624,
25447
+ "step": 36260
25448
+ },
25449
+ {
25450
+ "epoch": 0.6730735774083327,
25451
+ "grad_norm": 36.0625,
25452
+ "learning_rate": 9.894832284024164e-06,
25453
+ "loss": 19.7039,
25454
+ "step": 36270
25455
+ },
25456
+ {
25457
+ "epoch": 0.6732591504928125,
25458
+ "grad_norm": 35.125,
25459
+ "learning_rate": 9.894803288238121e-06,
25460
+ "loss": 19.9208,
25461
+ "step": 36280
25462
+ },
25463
+ {
25464
+ "epoch": 0.6734447235772923,
25465
+ "grad_norm": 35.34375,
25466
+ "learning_rate": 9.894774292452078e-06,
25467
+ "loss": 19.3393,
25468
+ "step": 36290
25469
+ },
25470
+ {
25471
+ "epoch": 0.6736302966617722,
25472
+ "grad_norm": 35.53125,
25473
+ "learning_rate": 9.894745296666036e-06,
25474
+ "loss": 19.7506,
25475
+ "step": 36300
25476
+ },
25477
+ {
25478
+ "epoch": 0.673815869746252,
25479
+ "grad_norm": 37.28125,
25480
+ "learning_rate": 9.894716300879993e-06,
25481
+ "loss": 19.8507,
25482
+ "step": 36310
25483
+ },
25484
+ {
25485
+ "epoch": 0.6740014428307318,
25486
+ "grad_norm": 35.78125,
25487
+ "learning_rate": 9.894687305093952e-06,
25488
+ "loss": 19.6936,
25489
+ "step": 36320
25490
+ },
25491
+ {
25492
+ "epoch": 0.6741870159152117,
25493
+ "grad_norm": 37.65625,
25494
+ "learning_rate": 9.89465830930791e-06,
25495
+ "loss": 19.689,
25496
+ "step": 36330
25497
+ },
25498
+ {
25499
+ "epoch": 0.6743725889996914,
25500
+ "grad_norm": 37.0625,
25501
+ "learning_rate": 9.894629313521865e-06,
25502
+ "loss": 19.8617,
25503
+ "step": 36340
25504
+ },
25505
+ {
25506
+ "epoch": 0.6745581620841713,
25507
+ "grad_norm": 38.6875,
25508
+ "learning_rate": 9.894600317735825e-06,
25509
+ "loss": 19.7849,
25510
+ "step": 36350
25511
+ },
25512
+ {
25513
+ "epoch": 0.6747437351686512,
25514
+ "grad_norm": 36.71875,
25515
+ "learning_rate": 9.894571321949782e-06,
25516
+ "loss": 19.5811,
25517
+ "step": 36360
25518
+ },
25519
+ {
25520
+ "epoch": 0.6749293082531309,
25521
+ "grad_norm": 35.625,
25522
+ "learning_rate": 9.89454232616374e-06,
25523
+ "loss": 19.6559,
25524
+ "step": 36370
25525
+ },
25526
+ {
25527
+ "epoch": 0.6751148813376108,
25528
+ "grad_norm": 36.90625,
25529
+ "learning_rate": 9.894513330377697e-06,
25530
+ "loss": 19.5922,
25531
+ "step": 36380
25532
+ },
25533
+ {
25534
+ "epoch": 0.6753004544220906,
25535
+ "grad_norm": 37.0,
25536
+ "learning_rate": 9.894484334591654e-06,
25537
+ "loss": 19.6276,
25538
+ "step": 36390
25539
+ },
25540
+ {
25541
+ "epoch": 0.6754860275065705,
25542
+ "grad_norm": 35.84375,
25543
+ "learning_rate": 9.894455338805612e-06,
25544
+ "loss": 19.9161,
25545
+ "step": 36400
25546
+ },
25547
+ {
25548
+ "epoch": 0.6756716005910502,
25549
+ "grad_norm": 36.6875,
25550
+ "learning_rate": 9.894426343019569e-06,
25551
+ "loss": 19.9491,
25552
+ "step": 36410
25553
+ },
25554
+ {
25555
+ "epoch": 0.6758571736755301,
25556
+ "grad_norm": 35.03125,
25557
+ "learning_rate": 9.894397347233528e-06,
25558
+ "loss": 19.4294,
25559
+ "step": 36420
25560
+ },
25561
+ {
25562
+ "epoch": 0.67604274676001,
25563
+ "grad_norm": 35.25,
25564
+ "learning_rate": 9.894368351447486e-06,
25565
+ "loss": 19.8872,
25566
+ "step": 36430
25567
+ },
25568
+ {
25569
+ "epoch": 0.6762283198444897,
25570
+ "grad_norm": 35.84375,
25571
+ "learning_rate": 9.894339355661441e-06,
25572
+ "loss": 19.9173,
25573
+ "step": 36440
25574
+ },
25575
+ {
25576
+ "epoch": 0.6764138929289696,
25577
+ "grad_norm": 36.59375,
25578
+ "learning_rate": 9.8943103598754e-06,
25579
+ "loss": 19.9363,
25580
+ "step": 36450
25581
+ },
25582
+ {
25583
+ "epoch": 0.6765994660134494,
25584
+ "grad_norm": 36.03125,
25585
+ "learning_rate": 9.894281364089358e-06,
25586
+ "loss": 20.222,
25587
+ "step": 36460
25588
+ },
25589
+ {
25590
+ "epoch": 0.6767850390979292,
25591
+ "grad_norm": 36.09375,
25592
+ "learning_rate": 9.894252368303315e-06,
25593
+ "loss": 19.7718,
25594
+ "step": 36470
25595
+ },
25596
+ {
25597
+ "epoch": 0.6769706121824091,
25598
+ "grad_norm": 36.5,
25599
+ "learning_rate": 9.894223372517273e-06,
25600
+ "loss": 19.5082,
25601
+ "step": 36480
25602
+ },
25603
+ {
25604
+ "epoch": 0.6771561852668889,
25605
+ "grad_norm": 33.59375,
25606
+ "learning_rate": 9.894194376731232e-06,
25607
+ "loss": 19.2585,
25608
+ "step": 36490
25609
+ },
25610
+ {
25611
+ "epoch": 0.6773417583513687,
25612
+ "grad_norm": 34.875,
25613
+ "learning_rate": 9.894165380945187e-06,
25614
+ "loss": 19.7396,
25615
+ "step": 36500
25616
+ },
25617
+ {
25618
+ "epoch": 0.6775273314358485,
25619
+ "grad_norm": 36.6875,
25620
+ "learning_rate": 9.894136385159145e-06,
25621
+ "loss": 20.1396,
25622
+ "step": 36510
25623
+ },
25624
+ {
25625
+ "epoch": 0.6777129045203284,
25626
+ "grad_norm": 34.9375,
25627
+ "learning_rate": 9.894107389373104e-06,
25628
+ "loss": 19.3631,
25629
+ "step": 36520
25630
+ },
25631
+ {
25632
+ "epoch": 0.6778984776048081,
25633
+ "grad_norm": 36.0625,
25634
+ "learning_rate": 9.894078393587061e-06,
25635
+ "loss": 19.5291,
25636
+ "step": 36530
25637
+ },
25638
+ {
25639
+ "epoch": 0.678084050689288,
25640
+ "grad_norm": 35.625,
25641
+ "learning_rate": 9.894049397801019e-06,
25642
+ "loss": 19.3359,
25643
+ "step": 36540
25644
+ },
25645
+ {
25646
+ "epoch": 0.6782696237737679,
25647
+ "grad_norm": 34.03125,
25648
+ "learning_rate": 9.894020402014976e-06,
25649
+ "loss": 19.4528,
25650
+ "step": 36550
25651
+ },
25652
+ {
25653
+ "epoch": 0.6784551968582477,
25654
+ "grad_norm": 35.0,
25655
+ "learning_rate": 9.893991406228934e-06,
25656
+ "loss": 19.4136,
25657
+ "step": 36560
25658
+ },
25659
+ {
25660
+ "epoch": 0.6786407699427275,
25661
+ "grad_norm": 35.125,
25662
+ "learning_rate": 9.893962410442891e-06,
25663
+ "loss": 19.4879,
25664
+ "step": 36570
25665
+ },
25666
+ {
25667
+ "epoch": 0.6788263430272073,
25668
+ "grad_norm": 35.4375,
25669
+ "learning_rate": 9.893933414656848e-06,
25670
+ "loss": 19.7995,
25671
+ "step": 36580
25672
+ },
25673
+ {
25674
+ "epoch": 0.6790119161116872,
25675
+ "grad_norm": 33.875,
25676
+ "learning_rate": 9.893904418870807e-06,
25677
+ "loss": 19.754,
25678
+ "step": 36590
25679
+ },
25680
+ {
25681
+ "epoch": 0.679197489196167,
25682
+ "grad_norm": 37.03125,
25683
+ "learning_rate": 9.893875423084765e-06,
25684
+ "loss": 20.028,
25685
+ "step": 36600
25686
+ },
25687
+ {
25688
+ "epoch": 0.6793830622806468,
25689
+ "grad_norm": 35.46875,
25690
+ "learning_rate": 9.89384642729872e-06,
25691
+ "loss": 19.4872,
25692
+ "step": 36610
25693
+ },
25694
+ {
25695
+ "epoch": 0.6795686353651267,
25696
+ "grad_norm": 36.8125,
25697
+ "learning_rate": 9.89381743151268e-06,
25698
+ "loss": 19.7629,
25699
+ "step": 36620
25700
+ },
25701
+ {
25702
+ "epoch": 0.6797542084496064,
25703
+ "grad_norm": 32.21875,
25704
+ "learning_rate": 9.893788435726637e-06,
25705
+ "loss": 19.2777,
25706
+ "step": 36630
25707
+ },
25708
+ {
25709
+ "epoch": 0.6799397815340863,
25710
+ "grad_norm": 35.375,
25711
+ "learning_rate": 9.893759439940594e-06,
25712
+ "loss": 19.8635,
25713
+ "step": 36640
25714
+ },
25715
+ {
25716
+ "epoch": 0.6801253546185662,
25717
+ "grad_norm": 37.40625,
25718
+ "learning_rate": 9.893730444154552e-06,
25719
+ "loss": 19.7788,
25720
+ "step": 36650
25721
+ },
25722
+ {
25723
+ "epoch": 0.6803109277030459,
25724
+ "grad_norm": 35.375,
25725
+ "learning_rate": 9.89370144836851e-06,
25726
+ "loss": 19.6674,
25727
+ "step": 36660
25728
+ },
25729
+ {
25730
+ "epoch": 0.6804965007875258,
25731
+ "grad_norm": 35.6875,
25732
+ "learning_rate": 9.893672452582467e-06,
25733
+ "loss": 19.4339,
25734
+ "step": 36670
25735
+ },
25736
+ {
25737
+ "epoch": 0.6806820738720056,
25738
+ "grad_norm": 34.09375,
25739
+ "learning_rate": 9.893643456796424e-06,
25740
+ "loss": 19.9585,
25741
+ "step": 36680
25742
+ },
25743
+ {
25744
+ "epoch": 0.6808676469564854,
25745
+ "grad_norm": 37.40625,
25746
+ "learning_rate": 9.893614461010382e-06,
25747
+ "loss": 19.705,
25748
+ "step": 36690
25749
+ },
25750
+ {
25751
+ "epoch": 0.6810532200409652,
25752
+ "grad_norm": 34.71875,
25753
+ "learning_rate": 9.89358546522434e-06,
25754
+ "loss": 19.4342,
25755
+ "step": 36700
25756
+ },
25757
+ {
25758
+ "epoch": 0.6812387931254451,
25759
+ "grad_norm": 34.65625,
25760
+ "learning_rate": 9.893556469438296e-06,
25761
+ "loss": 19.4025,
25762
+ "step": 36710
25763
+ },
25764
+ {
25765
+ "epoch": 0.6814243662099249,
25766
+ "grad_norm": 34.09375,
25767
+ "learning_rate": 9.893527473652255e-06,
25768
+ "loss": 19.8077,
25769
+ "step": 36720
25770
+ },
25771
+ {
25772
+ "epoch": 0.6816099392944047,
25773
+ "grad_norm": 34.46875,
25774
+ "learning_rate": 9.893498477866213e-06,
25775
+ "loss": 19.4602,
25776
+ "step": 36730
25777
+ },
25778
+ {
25779
+ "epoch": 0.6817955123788846,
25780
+ "grad_norm": 35.78125,
25781
+ "learning_rate": 9.89346948208017e-06,
25782
+ "loss": 19.6626,
25783
+ "step": 36740
25784
+ },
25785
+ {
25786
+ "epoch": 0.6819810854633644,
25787
+ "grad_norm": 34.3125,
25788
+ "learning_rate": 9.893440486294128e-06,
25789
+ "loss": 19.751,
25790
+ "step": 36750
25791
+ },
25792
+ {
25793
+ "epoch": 0.6821666585478442,
25794
+ "grad_norm": 33.3125,
25795
+ "learning_rate": 9.893411490508085e-06,
25796
+ "loss": 19.3236,
25797
+ "step": 36760
25798
+ },
25799
+ {
25800
+ "epoch": 0.6823522316323241,
25801
+ "grad_norm": 35.15625,
25802
+ "learning_rate": 9.893382494722042e-06,
25803
+ "loss": 19.6032,
25804
+ "step": 36770
25805
+ },
25806
+ {
25807
+ "epoch": 0.6825378047168039,
25808
+ "grad_norm": 36.8125,
25809
+ "learning_rate": 9.893353498936e-06,
25810
+ "loss": 19.9787,
25811
+ "step": 36780
25812
+ },
25813
+ {
25814
+ "epoch": 0.6827233778012837,
25815
+ "grad_norm": 37.0,
25816
+ "learning_rate": 9.893324503149957e-06,
25817
+ "loss": 20.0506,
25818
+ "step": 36790
25819
+ },
25820
+ {
25821
+ "epoch": 0.6829089508857635,
25822
+ "grad_norm": 35.6875,
25823
+ "learning_rate": 9.893295507363916e-06,
25824
+ "loss": 19.2396,
25825
+ "step": 36800
25826
+ },
25827
+ {
25828
+ "epoch": 0.6830945239702434,
25829
+ "grad_norm": 33.75,
25830
+ "learning_rate": 9.893266511577874e-06,
25831
+ "loss": 19.7276,
25832
+ "step": 36810
25833
+ },
25834
+ {
25835
+ "epoch": 0.6832800970547231,
25836
+ "grad_norm": 35.5,
25837
+ "learning_rate": 9.89323751579183e-06,
25838
+ "loss": 19.6616,
25839
+ "step": 36820
25840
+ },
25841
+ {
25842
+ "epoch": 0.683465670139203,
25843
+ "grad_norm": 35.625,
25844
+ "learning_rate": 9.893208520005789e-06,
25845
+ "loss": 19.53,
25846
+ "step": 36830
25847
+ },
25848
+ {
25849
+ "epoch": 0.6836512432236829,
25850
+ "grad_norm": 34.8125,
25851
+ "learning_rate": 9.893179524219746e-06,
25852
+ "loss": 19.5154,
25853
+ "step": 36840
25854
+ },
25855
+ {
25856
+ "epoch": 0.6838368163081626,
25857
+ "grad_norm": 34.53125,
25858
+ "learning_rate": 9.893150528433703e-06,
25859
+ "loss": 19.7291,
25860
+ "step": 36850
25861
+ },
25862
+ {
25863
+ "epoch": 0.6840223893926425,
25864
+ "grad_norm": 35.96875,
25865
+ "learning_rate": 9.89312153264766e-06,
25866
+ "loss": 19.5387,
25867
+ "step": 36860
25868
+ },
25869
+ {
25870
+ "epoch": 0.6842079624771223,
25871
+ "grad_norm": 35.53125,
25872
+ "learning_rate": 9.893092536861618e-06,
25873
+ "loss": 19.7129,
25874
+ "step": 36870
25875
+ },
25876
+ {
25877
+ "epoch": 0.6843935355616021,
25878
+ "grad_norm": 34.71875,
25879
+ "learning_rate": 9.893063541075576e-06,
25880
+ "loss": 19.8325,
25881
+ "step": 36880
25882
+ },
25883
+ {
25884
+ "epoch": 0.684579108646082,
25885
+ "grad_norm": 34.625,
25886
+ "learning_rate": 9.893034545289533e-06,
25887
+ "loss": 19.6929,
25888
+ "step": 36890
25889
+ },
25890
+ {
25891
+ "epoch": 0.6847646817305618,
25892
+ "grad_norm": 37.28125,
25893
+ "learning_rate": 9.893005549503492e-06,
25894
+ "loss": 19.5159,
25895
+ "step": 36900
25896
+ },
25897
+ {
25898
+ "epoch": 0.6849502548150417,
25899
+ "grad_norm": 34.28125,
25900
+ "learning_rate": 9.89297655371745e-06,
25901
+ "loss": 19.5611,
25902
+ "step": 36910
25903
+ },
25904
+ {
25905
+ "epoch": 0.6851358278995214,
25906
+ "grad_norm": 34.59375,
25907
+ "learning_rate": 9.892947557931407e-06,
25908
+ "loss": 19.5881,
25909
+ "step": 36920
25910
+ },
25911
+ {
25912
+ "epoch": 0.6853214009840013,
25913
+ "grad_norm": 35.0625,
25914
+ "learning_rate": 9.892918562145364e-06,
25915
+ "loss": 19.5037,
25916
+ "step": 36930
25917
+ },
25918
+ {
25919
+ "epoch": 0.6855069740684812,
25920
+ "grad_norm": 34.53125,
25921
+ "learning_rate": 9.892889566359322e-06,
25922
+ "loss": 19.7405,
25923
+ "step": 36940
25924
+ },
25925
+ {
25926
+ "epoch": 0.6856925471529609,
25927
+ "grad_norm": 34.96875,
25928
+ "learning_rate": 9.89286057057328e-06,
25929
+ "loss": 19.6976,
25930
+ "step": 36950
25931
+ },
25932
+ {
25933
+ "epoch": 0.6858781202374408,
25934
+ "grad_norm": 33.96875,
25935
+ "learning_rate": 9.892831574787237e-06,
25936
+ "loss": 19.5494,
25937
+ "step": 36960
25938
+ },
25939
+ {
25940
+ "epoch": 0.6860636933219206,
25941
+ "grad_norm": 36.40625,
25942
+ "learning_rate": 9.892802579001196e-06,
25943
+ "loss": 19.4275,
25944
+ "step": 36970
25945
+ },
25946
+ {
25947
+ "epoch": 0.6862492664064004,
25948
+ "grad_norm": 36.03125,
25949
+ "learning_rate": 9.892773583215151e-06,
25950
+ "loss": 19.6989,
25951
+ "step": 36980
25952
+ },
25953
+ {
25954
+ "epoch": 0.6864348394908802,
25955
+ "grad_norm": 37.59375,
25956
+ "learning_rate": 9.892744587429109e-06,
25957
+ "loss": 19.6063,
25958
+ "step": 36990
25959
+ },
25960
+ {
25961
+ "epoch": 0.6866204125753601,
25962
+ "grad_norm": 38.40625,
25963
+ "learning_rate": 9.892715591643068e-06,
25964
+ "loss": 19.5551,
25965
+ "step": 37000
25966
+ },
25967
+ {
25968
+ "epoch": 0.6868059856598399,
25969
+ "grad_norm": 36.21875,
25970
+ "learning_rate": 9.892686595857025e-06,
25971
+ "loss": 19.2527,
25972
+ "step": 37010
25973
+ },
25974
+ {
25975
+ "epoch": 0.6869915587443197,
25976
+ "grad_norm": 36.15625,
25977
+ "learning_rate": 9.892657600070983e-06,
25978
+ "loss": 19.5331,
25979
+ "step": 37020
25980
+ },
25981
+ {
25982
+ "epoch": 0.6871771318287996,
25983
+ "grad_norm": 35.5,
25984
+ "learning_rate": 9.89262860428494e-06,
25985
+ "loss": 19.0556,
25986
+ "step": 37030
25987
+ },
25988
+ {
25989
+ "epoch": 0.6873627049132793,
25990
+ "grad_norm": 34.125,
25991
+ "learning_rate": 9.892599608498898e-06,
25992
+ "loss": 19.6261,
25993
+ "step": 37040
25994
+ },
25995
+ {
25996
+ "epoch": 0.6875482779977592,
25997
+ "grad_norm": 37.3125,
25998
+ "learning_rate": 9.892570612712855e-06,
25999
+ "loss": 19.2994,
26000
+ "step": 37050
26001
+ },
26002
+ {
26003
+ "epoch": 0.6877338510822391,
26004
+ "grad_norm": 37.5625,
26005
+ "learning_rate": 9.892541616926812e-06,
26006
+ "loss": 19.6494,
26007
+ "step": 37060
26008
+ },
26009
+ {
26010
+ "epoch": 0.6879194241667188,
26011
+ "grad_norm": 34.96875,
26012
+ "learning_rate": 9.892512621140771e-06,
26013
+ "loss": 19.6874,
26014
+ "step": 37070
26015
+ },
26016
+ {
26017
+ "epoch": 0.6881049972511987,
26018
+ "grad_norm": 35.125,
26019
+ "learning_rate": 9.892483625354729e-06,
26020
+ "loss": 19.6228,
26021
+ "step": 37080
26022
+ },
26023
+ {
26024
+ "epoch": 0.6882905703356785,
26025
+ "grad_norm": 36.875,
26026
+ "learning_rate": 9.892454629568685e-06,
26027
+ "loss": 19.251,
26028
+ "step": 37090
26029
+ },
26030
+ {
26031
+ "epoch": 0.6884761434201584,
26032
+ "grad_norm": 34.90625,
26033
+ "learning_rate": 9.892425633782644e-06,
26034
+ "loss": 19.8272,
26035
+ "step": 37100
26036
+ },
26037
+ {
26038
+ "epoch": 0.6886617165046381,
26039
+ "grad_norm": 37.8125,
26040
+ "learning_rate": 9.892396637996601e-06,
26041
+ "loss": 19.4844,
26042
+ "step": 37110
26043
+ },
26044
+ {
26045
+ "epoch": 0.688847289589118,
26046
+ "grad_norm": 33.09375,
26047
+ "learning_rate": 9.892367642210558e-06,
26048
+ "loss": 19.3807,
26049
+ "step": 37120
26050
+ },
26051
+ {
26052
+ "epoch": 0.6890328626735979,
26053
+ "grad_norm": 32.59375,
26054
+ "learning_rate": 9.892338646424516e-06,
26055
+ "loss": 19.1829,
26056
+ "step": 37130
26057
+ },
26058
+ {
26059
+ "epoch": 0.6892184357580776,
26060
+ "grad_norm": 34.65625,
26061
+ "learning_rate": 9.892309650638473e-06,
26062
+ "loss": 19.678,
26063
+ "step": 37140
26064
+ },
26065
+ {
26066
+ "epoch": 0.6894040088425575,
26067
+ "grad_norm": 34.34375,
26068
+ "learning_rate": 9.89228065485243e-06,
26069
+ "loss": 20.2141,
26070
+ "step": 37150
26071
+ },
26072
+ {
26073
+ "epoch": 0.6895895819270373,
26074
+ "grad_norm": 35.125,
26075
+ "learning_rate": 9.892251659066388e-06,
26076
+ "loss": 19.9404,
26077
+ "step": 37160
26078
+ },
26079
+ {
26080
+ "epoch": 0.6897751550115171,
26081
+ "grad_norm": 34.71875,
26082
+ "learning_rate": 9.892222663280347e-06,
26083
+ "loss": 19.518,
26084
+ "step": 37170
26085
+ },
26086
+ {
26087
+ "epoch": 0.689960728095997,
26088
+ "grad_norm": 35.0625,
26089
+ "learning_rate": 9.892193667494305e-06,
26090
+ "loss": 19.57,
26091
+ "step": 37180
26092
+ },
26093
+ {
26094
+ "epoch": 0.6901463011804768,
26095
+ "grad_norm": 35.46875,
26096
+ "learning_rate": 9.892164671708262e-06,
26097
+ "loss": 19.5693,
26098
+ "step": 37190
26099
+ },
26100
+ {
26101
+ "epoch": 0.6903318742649566,
26102
+ "grad_norm": 35.59375,
26103
+ "learning_rate": 9.89213567592222e-06,
26104
+ "loss": 19.3073,
26105
+ "step": 37200
26106
+ },
26107
+ {
26108
+ "epoch": 0.6905174473494364,
26109
+ "grad_norm": 35.8125,
26110
+ "learning_rate": 9.892106680136177e-06,
26111
+ "loss": 19.3111,
26112
+ "step": 37210
26113
+ },
26114
+ {
26115
+ "epoch": 0.6907030204339163,
26116
+ "grad_norm": 34.46875,
26117
+ "learning_rate": 9.892077684350134e-06,
26118
+ "loss": 19.6284,
26119
+ "step": 37220
26120
+ },
26121
+ {
26122
+ "epoch": 0.690888593518396,
26123
+ "grad_norm": 35.1875,
26124
+ "learning_rate": 9.892048688564092e-06,
26125
+ "loss": 19.6832,
26126
+ "step": 37230
26127
+ },
26128
+ {
26129
+ "epoch": 0.6910741666028759,
26130
+ "grad_norm": 37.375,
26131
+ "learning_rate": 9.892019692778049e-06,
26132
+ "loss": 19.5748,
26133
+ "step": 37240
26134
+ },
26135
+ {
26136
+ "epoch": 0.6912597396873558,
26137
+ "grad_norm": 35.28125,
26138
+ "learning_rate": 9.891990696992006e-06,
26139
+ "loss": 19.7095,
26140
+ "step": 37250
26141
+ },
26142
+ {
26143
+ "epoch": 0.6914453127718355,
26144
+ "grad_norm": 35.15625,
26145
+ "learning_rate": 9.891961701205964e-06,
26146
+ "loss": 19.378,
26147
+ "step": 37260
26148
+ },
26149
+ {
26150
+ "epoch": 0.6916308858563154,
26151
+ "grad_norm": 34.625,
26152
+ "learning_rate": 9.891932705419921e-06,
26153
+ "loss": 19.7614,
26154
+ "step": 37270
26155
+ },
26156
+ {
26157
+ "epoch": 0.6918164589407952,
26158
+ "grad_norm": 37.125,
26159
+ "learning_rate": 9.89190370963388e-06,
26160
+ "loss": 19.5859,
26161
+ "step": 37280
26162
+ },
26163
+ {
26164
+ "epoch": 0.6920020320252751,
26165
+ "grad_norm": 35.71875,
26166
+ "learning_rate": 9.891874713847838e-06,
26167
+ "loss": 19.326,
26168
+ "step": 37290
26169
+ },
26170
+ {
26171
+ "epoch": 0.6921876051097549,
26172
+ "grad_norm": 35.03125,
26173
+ "learning_rate": 9.891845718061795e-06,
26174
+ "loss": 19.6066,
26175
+ "step": 37300
26176
+ },
26177
+ {
26178
+ "epoch": 0.6923731781942347,
26179
+ "grad_norm": 38.125,
26180
+ "learning_rate": 9.891816722275753e-06,
26181
+ "loss": 19.2031,
26182
+ "step": 37310
26183
+ },
26184
+ {
26185
+ "epoch": 0.6925587512787146,
26186
+ "grad_norm": 34.5,
26187
+ "learning_rate": 9.89178772648971e-06,
26188
+ "loss": 19.5362,
26189
+ "step": 37320
26190
+ },
26191
+ {
26192
+ "epoch": 0.6927443243631943,
26193
+ "grad_norm": 37.3125,
26194
+ "learning_rate": 9.891758730703667e-06,
26195
+ "loss": 19.9289,
26196
+ "step": 37330
26197
+ },
26198
+ {
26199
+ "epoch": 0.6929298974476742,
26200
+ "grad_norm": 35.65625,
26201
+ "learning_rate": 9.891729734917625e-06,
26202
+ "loss": 19.7964,
26203
+ "step": 37340
26204
+ },
26205
+ {
26206
+ "epoch": 0.693115470532154,
26207
+ "grad_norm": 36.75,
26208
+ "learning_rate": 9.891700739131584e-06,
26209
+ "loss": 19.2571,
26210
+ "step": 37350
26211
+ },
26212
+ {
26213
+ "epoch": 0.6933010436166338,
26214
+ "grad_norm": 35.1875,
26215
+ "learning_rate": 9.89167174334554e-06,
26216
+ "loss": 19.4191,
26217
+ "step": 37360
26218
+ },
26219
+ {
26220
+ "epoch": 0.6934866167011137,
26221
+ "grad_norm": 34.5625,
26222
+ "learning_rate": 9.891642747559497e-06,
26223
+ "loss": 19.1347,
26224
+ "step": 37370
26225
+ },
26226
+ {
26227
+ "epoch": 0.6936721897855935,
26228
+ "grad_norm": 38.59375,
26229
+ "learning_rate": 9.891613751773456e-06,
26230
+ "loss": 19.4612,
26231
+ "step": 37380
26232
+ },
26233
+ {
26234
+ "epoch": 0.6938577628700733,
26235
+ "grad_norm": 35.25,
26236
+ "learning_rate": 9.891584755987414e-06,
26237
+ "loss": 19.9225,
26238
+ "step": 37390
26239
+ },
26240
+ {
26241
+ "epoch": 0.6940433359545531,
26242
+ "grad_norm": 34.5625,
26243
+ "learning_rate": 9.891555760201371e-06,
26244
+ "loss": 19.8018,
26245
+ "step": 37400
26246
+ },
26247
+ {
26248
+ "epoch": 0.694228909039033,
26249
+ "grad_norm": 35.375,
26250
+ "learning_rate": 9.891526764415328e-06,
26251
+ "loss": 19.4325,
26252
+ "step": 37410
26253
+ },
26254
+ {
26255
+ "epoch": 0.6944144821235128,
26256
+ "grad_norm": 33.3125,
26257
+ "learning_rate": 9.891497768629286e-06,
26258
+ "loss": 19.7925,
26259
+ "step": 37420
26260
+ },
26261
+ {
26262
+ "epoch": 0.6946000552079926,
26263
+ "grad_norm": 37.28125,
26264
+ "learning_rate": 9.891468772843243e-06,
26265
+ "loss": 19.9303,
26266
+ "step": 37430
26267
+ },
26268
+ {
26269
+ "epoch": 0.6947856282924725,
26270
+ "grad_norm": 35.46875,
26271
+ "learning_rate": 9.8914397770572e-06,
26272
+ "loss": 19.8091,
26273
+ "step": 37440
26274
+ },
26275
+ {
26276
+ "epoch": 0.6949712013769523,
26277
+ "grad_norm": 37.6875,
26278
+ "learning_rate": 9.89141078127116e-06,
26279
+ "loss": 19.6351,
26280
+ "step": 37450
26281
+ },
26282
+ {
26283
+ "epoch": 0.6951567744614321,
26284
+ "grad_norm": 35.90625,
26285
+ "learning_rate": 9.891381785485115e-06,
26286
+ "loss": 19.8791,
26287
+ "step": 37460
26288
+ },
26289
+ {
26290
+ "epoch": 0.695342347545912,
26291
+ "grad_norm": 36.34375,
26292
+ "learning_rate": 9.891352789699073e-06,
26293
+ "loss": 19.6898,
26294
+ "step": 37470
26295
+ },
26296
+ {
26297
+ "epoch": 0.6955279206303918,
26298
+ "grad_norm": 35.6875,
26299
+ "learning_rate": 9.891323793913032e-06,
26300
+ "loss": 19.9378,
26301
+ "step": 37480
26302
+ },
26303
+ {
26304
+ "epoch": 0.6957134937148716,
26305
+ "grad_norm": 36.59375,
26306
+ "learning_rate": 9.89129479812699e-06,
26307
+ "loss": 19.562,
26308
+ "step": 37490
26309
+ },
26310
+ {
26311
+ "epoch": 0.6958990667993514,
26312
+ "grad_norm": 36.21875,
26313
+ "learning_rate": 9.891265802340947e-06,
26314
+ "loss": 19.947,
26315
+ "step": 37500
26316
+ },
26317
+ {
26318
+ "epoch": 0.6960846398838313,
26319
+ "grad_norm": 35.0625,
26320
+ "learning_rate": 9.891236806554904e-06,
26321
+ "loss": 19.8907,
26322
+ "step": 37510
26323
+ },
26324
+ {
26325
+ "epoch": 0.696270212968311,
26326
+ "grad_norm": 36.71875,
26327
+ "learning_rate": 9.891207810768862e-06,
26328
+ "loss": 20.0342,
26329
+ "step": 37520
26330
+ },
26331
+ {
26332
+ "epoch": 0.6964557860527909,
26333
+ "grad_norm": 35.09375,
26334
+ "learning_rate": 9.891178814982819e-06,
26335
+ "loss": 19.3272,
26336
+ "step": 37530
26337
+ },
26338
+ {
26339
+ "epoch": 0.6966413591372708,
26340
+ "grad_norm": 36.34375,
26341
+ "learning_rate": 9.891149819196776e-06,
26342
+ "loss": 19.3683,
26343
+ "step": 37540
26344
+ },
26345
+ {
26346
+ "epoch": 0.6968269322217505,
26347
+ "grad_norm": 36.25,
26348
+ "learning_rate": 9.891120823410735e-06,
26349
+ "loss": 19.7305,
26350
+ "step": 37550
26351
+ },
26352
+ {
26353
+ "epoch": 0.6970125053062304,
26354
+ "grad_norm": 35.25,
26355
+ "learning_rate": 9.891091827624693e-06,
26356
+ "loss": 19.2455,
26357
+ "step": 37560
26358
+ },
26359
+ {
26360
+ "epoch": 0.6971980783907102,
26361
+ "grad_norm": 37.25,
26362
+ "learning_rate": 9.891062831838649e-06,
26363
+ "loss": 19.4296,
26364
+ "step": 37570
26365
+ },
26366
+ {
26367
+ "epoch": 0.69738365147519,
26368
+ "grad_norm": 35.84375,
26369
+ "learning_rate": 9.891033836052608e-06,
26370
+ "loss": 20.02,
26371
+ "step": 37580
26372
+ },
26373
+ {
26374
+ "epoch": 0.6975692245596699,
26375
+ "grad_norm": 34.3125,
26376
+ "learning_rate": 9.891004840266565e-06,
26377
+ "loss": 19.2752,
26378
+ "step": 37590
26379
+ },
26380
+ {
26381
+ "epoch": 0.6977547976441497,
26382
+ "grad_norm": 36.21875,
26383
+ "learning_rate": 9.890975844480523e-06,
26384
+ "loss": 19.6226,
26385
+ "step": 37600
26386
+ },
26387
+ {
26388
+ "epoch": 0.6979403707286295,
26389
+ "grad_norm": 36.1875,
26390
+ "learning_rate": 9.89094684869448e-06,
26391
+ "loss": 19.3974,
26392
+ "step": 37610
26393
+ },
26394
+ {
26395
+ "epoch": 0.6981259438131093,
26396
+ "grad_norm": 34.3125,
26397
+ "learning_rate": 9.890917852908437e-06,
26398
+ "loss": 19.5617,
26399
+ "step": 37620
26400
+ },
26401
+ {
26402
+ "epoch": 0.6983115168975892,
26403
+ "grad_norm": 34.46875,
26404
+ "learning_rate": 9.890888857122395e-06,
26405
+ "loss": 19.4508,
26406
+ "step": 37630
26407
+ },
26408
+ {
26409
+ "epoch": 0.698497089982069,
26410
+ "grad_norm": 35.4375,
26411
+ "learning_rate": 9.890859861336352e-06,
26412
+ "loss": 19.2594,
26413
+ "step": 37640
26414
+ },
26415
+ {
26416
+ "epoch": 0.6986826630665488,
26417
+ "grad_norm": 36.59375,
26418
+ "learning_rate": 9.890830865550311e-06,
26419
+ "loss": 19.4359,
26420
+ "step": 37650
26421
+ },
26422
+ {
26423
+ "epoch": 0.6988682361510287,
26424
+ "grad_norm": 34.53125,
26425
+ "learning_rate": 9.890801869764269e-06,
26426
+ "loss": 19.662,
26427
+ "step": 37660
26428
+ },
26429
+ {
26430
+ "epoch": 0.6990538092355085,
26431
+ "grad_norm": 34.6875,
26432
+ "learning_rate": 9.890772873978226e-06,
26433
+ "loss": 19.5305,
26434
+ "step": 37670
26435
+ },
26436
+ {
26437
+ "epoch": 0.6992393823199883,
26438
+ "grad_norm": 34.09375,
26439
+ "learning_rate": 9.890743878192183e-06,
26440
+ "loss": 19.688,
26441
+ "step": 37680
26442
+ },
26443
+ {
26444
+ "epoch": 0.6994249554044681,
26445
+ "grad_norm": 35.15625,
26446
+ "learning_rate": 9.890714882406141e-06,
26447
+ "loss": 19.4327,
26448
+ "step": 37690
26449
+ },
26450
+ {
26451
+ "epoch": 0.699610528488948,
26452
+ "grad_norm": 36.375,
26453
+ "learning_rate": 9.890685886620098e-06,
26454
+ "loss": 19.6052,
26455
+ "step": 37700
26456
+ },
26457
+ {
26458
+ "epoch": 0.6997961015734278,
26459
+ "grad_norm": 34.125,
26460
+ "learning_rate": 9.890656890834056e-06,
26461
+ "loss": 19.9092,
26462
+ "step": 37710
26463
+ },
26464
+ {
26465
+ "epoch": 0.6999816746579076,
26466
+ "grad_norm": 35.78125,
26467
+ "learning_rate": 9.890627895048013e-06,
26468
+ "loss": 20.0716,
26469
+ "step": 37720
26470
+ },
26471
+ {
26472
+ "epoch": 0.7001672477423875,
26473
+ "grad_norm": 35.90625,
26474
+ "learning_rate": 9.89059889926197e-06,
26475
+ "loss": 19.438,
26476
+ "step": 37730
26477
+ },
26478
+ {
26479
+ "epoch": 0.7003528208268672,
26480
+ "grad_norm": 35.8125,
26481
+ "learning_rate": 9.890569903475928e-06,
26482
+ "loss": 19.2955,
26483
+ "step": 37740
26484
+ },
26485
+ {
26486
+ "epoch": 0.7005383939113471,
26487
+ "grad_norm": 37.78125,
26488
+ "learning_rate": 9.890540907689887e-06,
26489
+ "loss": 19.7556,
26490
+ "step": 37750
26491
+ },
26492
+ {
26493
+ "epoch": 0.700723966995827,
26494
+ "grad_norm": 34.5625,
26495
+ "learning_rate": 9.890511911903844e-06,
26496
+ "loss": 19.8376,
26497
+ "step": 37760
26498
+ },
26499
+ {
26500
+ "epoch": 0.7009095400803067,
26501
+ "grad_norm": 34.65625,
26502
+ "learning_rate": 9.890482916117802e-06,
26503
+ "loss": 19.174,
26504
+ "step": 37770
26505
+ },
26506
+ {
26507
+ "epoch": 0.7010951131647866,
26508
+ "grad_norm": 36.3125,
26509
+ "learning_rate": 9.89045392033176e-06,
26510
+ "loss": 19.5626,
26511
+ "step": 37780
26512
+ },
26513
+ {
26514
+ "epoch": 0.7012806862492664,
26515
+ "grad_norm": 35.0625,
26516
+ "learning_rate": 9.890424924545717e-06,
26517
+ "loss": 19.6777,
26518
+ "step": 37790
26519
+ },
26520
+ {
26521
+ "epoch": 0.7014662593337462,
26522
+ "grad_norm": 34.28125,
26523
+ "learning_rate": 9.890395928759674e-06,
26524
+ "loss": 19.4296,
26525
+ "step": 37800
26526
+ },
26527
+ {
26528
+ "epoch": 0.701651832418226,
26529
+ "grad_norm": 35.125,
26530
+ "learning_rate": 9.890366932973631e-06,
26531
+ "loss": 19.8895,
26532
+ "step": 37810
26533
+ },
26534
+ {
26535
+ "epoch": 0.7018374055027059,
26536
+ "grad_norm": 35.40625,
26537
+ "learning_rate": 9.890337937187589e-06,
26538
+ "loss": 19.6531,
26539
+ "step": 37820
26540
+ },
26541
+ {
26542
+ "epoch": 0.7020229785871858,
26543
+ "grad_norm": 34.6875,
26544
+ "learning_rate": 9.890308941401548e-06,
26545
+ "loss": 20.0812,
26546
+ "step": 37830
26547
+ },
26548
+ {
26549
+ "epoch": 0.7022085516716655,
26550
+ "grad_norm": 37.1875,
26551
+ "learning_rate": 9.890279945615504e-06,
26552
+ "loss": 19.4826,
26553
+ "step": 37840
26554
+ },
26555
+ {
26556
+ "epoch": 0.7023941247561454,
26557
+ "grad_norm": 35.1875,
26558
+ "learning_rate": 9.890250949829461e-06,
26559
+ "loss": 19.8685,
26560
+ "step": 37850
26561
+ },
26562
+ {
26563
+ "epoch": 0.7025796978406252,
26564
+ "grad_norm": 36.9375,
26565
+ "learning_rate": 9.89022195404342e-06,
26566
+ "loss": 20.043,
26567
+ "step": 37860
26568
+ },
26569
+ {
26570
+ "epoch": 0.702765270925105,
26571
+ "grad_norm": 37.09375,
26572
+ "learning_rate": 9.890192958257378e-06,
26573
+ "loss": 19.6171,
26574
+ "step": 37870
26575
+ },
26576
+ {
26577
+ "epoch": 0.7029508440095849,
26578
+ "grad_norm": 36.03125,
26579
+ "learning_rate": 9.890163962471335e-06,
26580
+ "loss": 19.0865,
26581
+ "step": 37880
26582
+ },
26583
+ {
26584
+ "epoch": 0.7031364170940647,
26585
+ "grad_norm": 35.09375,
26586
+ "learning_rate": 9.890134966685292e-06,
26587
+ "loss": 19.6744,
26588
+ "step": 37890
26589
+ },
26590
+ {
26591
+ "epoch": 0.7033219901785445,
26592
+ "grad_norm": 35.96875,
26593
+ "learning_rate": 9.89010597089925e-06,
26594
+ "loss": 19.748,
26595
+ "step": 37900
26596
+ },
26597
+ {
26598
+ "epoch": 0.7035075632630243,
26599
+ "grad_norm": 36.1875,
26600
+ "learning_rate": 9.890076975113207e-06,
26601
+ "loss": 19.6734,
26602
+ "step": 37910
26603
+ },
26604
+ {
26605
+ "epoch": 0.7036931363475042,
26606
+ "grad_norm": 33.34375,
26607
+ "learning_rate": 9.890047979327165e-06,
26608
+ "loss": 19.5977,
26609
+ "step": 37920
26610
+ },
26611
+ {
26612
+ "epoch": 0.7038787094319839,
26613
+ "grad_norm": 35.65625,
26614
+ "learning_rate": 9.890018983541124e-06,
26615
+ "loss": 19.6002,
26616
+ "step": 37930
26617
+ },
26618
+ {
26619
+ "epoch": 0.7040642825164638,
26620
+ "grad_norm": 36.375,
26621
+ "learning_rate": 9.889989987755081e-06,
26622
+ "loss": 19.5954,
26623
+ "step": 37940
26624
+ },
26625
+ {
26626
+ "epoch": 0.7042498556009437,
26627
+ "grad_norm": 34.375,
26628
+ "learning_rate": 9.889960991969037e-06,
26629
+ "loss": 19.6573,
26630
+ "step": 37950
26631
+ },
26632
+ {
26633
+ "epoch": 0.7044354286854234,
26634
+ "grad_norm": 36.5625,
26635
+ "learning_rate": 9.889931996182996e-06,
26636
+ "loss": 19.284,
26637
+ "step": 37960
26638
+ },
26639
+ {
26640
+ "epoch": 0.7046210017699033,
26641
+ "grad_norm": 34.125,
26642
+ "learning_rate": 9.889903000396953e-06,
26643
+ "loss": 19.5239,
26644
+ "step": 37970
26645
+ },
26646
+ {
26647
+ "epoch": 0.7048065748543831,
26648
+ "grad_norm": 34.09375,
26649
+ "learning_rate": 9.88987400461091e-06,
26650
+ "loss": 19.323,
26651
+ "step": 37980
26652
+ },
26653
+ {
26654
+ "epoch": 0.704992147938863,
26655
+ "grad_norm": 35.65625,
26656
+ "learning_rate": 9.889845008824868e-06,
26657
+ "loss": 18.7075,
26658
+ "step": 37990
26659
+ },
26660
+ {
26661
+ "epoch": 0.7051777210233428,
26662
+ "grad_norm": 35.59375,
26663
+ "learning_rate": 9.889816013038826e-06,
26664
+ "loss": 19.7255,
26665
+ "step": 38000
26666
+ },
26667
+ {
26668
+ "epoch": 0.7053632941078226,
26669
+ "grad_norm": 36.0625,
26670
+ "learning_rate": 9.889787017252783e-06,
26671
+ "loss": 19.4898,
26672
+ "step": 38010
26673
+ },
26674
+ {
26675
+ "epoch": 0.7055488671923025,
26676
+ "grad_norm": 36.0625,
26677
+ "learning_rate": 9.88975802146674e-06,
26678
+ "loss": 19.3605,
26679
+ "step": 38020
26680
+ },
26681
+ {
26682
+ "epoch": 0.7057344402767822,
26683
+ "grad_norm": 33.9375,
26684
+ "learning_rate": 9.8897290256807e-06,
26685
+ "loss": 19.6154,
26686
+ "step": 38030
26687
+ },
26688
+ {
26689
+ "epoch": 0.7059200133612621,
26690
+ "grad_norm": 35.65625,
26691
+ "learning_rate": 9.889700029894657e-06,
26692
+ "loss": 19.6784,
26693
+ "step": 38040
26694
+ },
26695
+ {
26696
+ "epoch": 0.706105586445742,
26697
+ "grad_norm": 35.15625,
26698
+ "learning_rate": 9.889671034108613e-06,
26699
+ "loss": 19.6619,
26700
+ "step": 38050
26701
+ },
26702
+ {
26703
+ "epoch": 0.7062911595302217,
26704
+ "grad_norm": 35.84375,
26705
+ "learning_rate": 9.889642038322572e-06,
26706
+ "loss": 18.8527,
26707
+ "step": 38060
26708
+ },
26709
+ {
26710
+ "epoch": 0.7064767326147016,
26711
+ "grad_norm": 34.5,
26712
+ "learning_rate": 9.889613042536529e-06,
26713
+ "loss": 19.0215,
26714
+ "step": 38070
26715
+ },
26716
+ {
26717
+ "epoch": 0.7066623056991814,
26718
+ "grad_norm": 34.90625,
26719
+ "learning_rate": 9.889584046750487e-06,
26720
+ "loss": 19.3586,
26721
+ "step": 38080
26722
+ },
26723
+ {
26724
+ "epoch": 0.7068478787836612,
26725
+ "grad_norm": 37.1875,
26726
+ "learning_rate": 9.889555050964444e-06,
26727
+ "loss": 19.8383,
26728
+ "step": 38090
26729
+ },
26730
+ {
26731
+ "epoch": 0.707033451868141,
26732
+ "grad_norm": 35.8125,
26733
+ "learning_rate": 9.889526055178403e-06,
26734
+ "loss": 19.9191,
26735
+ "step": 38100
26736
+ },
26737
+ {
26738
+ "epoch": 0.7072190249526209,
26739
+ "grad_norm": 35.875,
26740
+ "learning_rate": 9.889497059392359e-06,
26741
+ "loss": 19.1323,
26742
+ "step": 38110
26743
+ },
26744
+ {
26745
+ "epoch": 0.7074045980371007,
26746
+ "grad_norm": 36.0,
26747
+ "learning_rate": 9.889468063606316e-06,
26748
+ "loss": 20.3555,
26749
+ "step": 38120
26750
+ },
26751
+ {
26752
+ "epoch": 0.7075901711215805,
26753
+ "grad_norm": 36.1875,
26754
+ "learning_rate": 9.889439067820275e-06,
26755
+ "loss": 19.5806,
26756
+ "step": 38130
26757
+ },
26758
+ {
26759
+ "epoch": 0.7077757442060604,
26760
+ "grad_norm": 36.25,
26761
+ "learning_rate": 9.889410072034233e-06,
26762
+ "loss": 19.4524,
26763
+ "step": 38140
26764
+ },
26765
+ {
26766
+ "epoch": 0.7079613172905401,
26767
+ "grad_norm": 34.3125,
26768
+ "learning_rate": 9.88938107624819e-06,
26769
+ "loss": 18.9645,
26770
+ "step": 38150
26771
+ },
26772
+ {
26773
+ "epoch": 0.70814689037502,
26774
+ "grad_norm": 33.875,
26775
+ "learning_rate": 9.889352080462147e-06,
26776
+ "loss": 19.4386,
26777
+ "step": 38160
26778
+ },
26779
+ {
26780
+ "epoch": 0.7083324634594999,
26781
+ "grad_norm": 33.625,
26782
+ "learning_rate": 9.889323084676105e-06,
26783
+ "loss": 19.4483,
26784
+ "step": 38170
26785
+ },
26786
+ {
26787
+ "epoch": 0.7085180365439797,
26788
+ "grad_norm": 37.375,
26789
+ "learning_rate": 9.889294088890062e-06,
26790
+ "loss": 19.0382,
26791
+ "step": 38180
26792
+ },
26793
+ {
26794
+ "epoch": 0.7087036096284595,
26795
+ "grad_norm": 35.90625,
26796
+ "learning_rate": 9.88926509310402e-06,
26797
+ "loss": 19.0689,
26798
+ "step": 38190
26799
+ },
26800
+ {
26801
+ "epoch": 0.7088891827129393,
26802
+ "grad_norm": 37.3125,
26803
+ "learning_rate": 9.889236097317979e-06,
26804
+ "loss": 19.3899,
26805
+ "step": 38200
26806
+ },
26807
+ {
26808
+ "epoch": 0.7090747557974192,
26809
+ "grad_norm": 34.09375,
26810
+ "learning_rate": 9.889207101531935e-06,
26811
+ "loss": 19.3655,
26812
+ "step": 38210
26813
+ },
26814
+ {
26815
+ "epoch": 0.7092603288818989,
26816
+ "grad_norm": 35.875,
26817
+ "learning_rate": 9.889178105745892e-06,
26818
+ "loss": 19.9081,
26819
+ "step": 38220
26820
+ },
26821
+ {
26822
+ "epoch": 0.7094459019663788,
26823
+ "grad_norm": 36.625,
26824
+ "learning_rate": 9.889149109959851e-06,
26825
+ "loss": 19.4929,
26826
+ "step": 38230
26827
+ },
26828
+ {
26829
+ "epoch": 0.7096314750508587,
26830
+ "grad_norm": 34.0,
26831
+ "learning_rate": 9.889120114173808e-06,
26832
+ "loss": 19.5349,
26833
+ "step": 38240
26834
+ },
26835
+ {
26836
+ "epoch": 0.7098170481353384,
26837
+ "grad_norm": 35.75,
26838
+ "learning_rate": 9.889091118387766e-06,
26839
+ "loss": 19.5176,
26840
+ "step": 38250
26841
+ },
26842
+ {
26843
+ "epoch": 0.7100026212198183,
26844
+ "grad_norm": 34.96875,
26845
+ "learning_rate": 9.889062122601723e-06,
26846
+ "loss": 19.3043,
26847
+ "step": 38260
26848
+ },
26849
+ {
26850
+ "epoch": 0.7101881943042981,
26851
+ "grad_norm": 35.34375,
26852
+ "learning_rate": 9.88903312681568e-06,
26853
+ "loss": 20.086,
26854
+ "step": 38270
26855
+ },
26856
+ {
26857
+ "epoch": 0.7103737673887779,
26858
+ "grad_norm": 36.03125,
26859
+ "learning_rate": 9.889004131029638e-06,
26860
+ "loss": 19.7873,
26861
+ "step": 38280
26862
+ },
26863
+ {
26864
+ "epoch": 0.7105593404732578,
26865
+ "grad_norm": 36.09375,
26866
+ "learning_rate": 9.888975135243595e-06,
26867
+ "loss": 19.4605,
26868
+ "step": 38290
26869
+ },
26870
+ {
26871
+ "epoch": 0.7107449135577376,
26872
+ "grad_norm": 35.53125,
26873
+ "learning_rate": 9.888946139457553e-06,
26874
+ "loss": 19.2167,
26875
+ "step": 38300
26876
+ },
26877
+ {
26878
+ "epoch": 0.7109304866422174,
26879
+ "grad_norm": 38.0625,
26880
+ "learning_rate": 9.888917143671512e-06,
26881
+ "loss": 19.7455,
26882
+ "step": 38310
26883
+ },
26884
+ {
26885
+ "epoch": 0.7111160597266972,
26886
+ "grad_norm": 36.90625,
26887
+ "learning_rate": 9.888888147885468e-06,
26888
+ "loss": 19.2411,
26889
+ "step": 38320
26890
+ },
26891
+ {
26892
+ "epoch": 0.7113016328111771,
26893
+ "grad_norm": 35.65625,
26894
+ "learning_rate": 9.888859152099425e-06,
26895
+ "loss": 19.6434,
26896
+ "step": 38330
26897
+ },
26898
+ {
26899
+ "epoch": 0.711487205895657,
26900
+ "grad_norm": 35.5,
26901
+ "learning_rate": 9.888830156313384e-06,
26902
+ "loss": 19.2703,
26903
+ "step": 38340
26904
+ },
26905
+ {
26906
+ "epoch": 0.7116727789801367,
26907
+ "grad_norm": 36.0,
26908
+ "learning_rate": 9.888801160527342e-06,
26909
+ "loss": 19.8718,
26910
+ "step": 38350
26911
+ },
26912
+ {
26913
+ "epoch": 0.7118583520646166,
26914
+ "grad_norm": 35.875,
26915
+ "learning_rate": 9.888772164741299e-06,
26916
+ "loss": 19.3253,
26917
+ "step": 38360
26918
+ },
26919
+ {
26920
+ "epoch": 0.7120439251490964,
26921
+ "grad_norm": 36.5,
26922
+ "learning_rate": 9.888743168955256e-06,
26923
+ "loss": 19.5194,
26924
+ "step": 38370
26925
+ },
26926
+ {
26927
+ "epoch": 0.7122294982335762,
26928
+ "grad_norm": 35.28125,
26929
+ "learning_rate": 9.888714173169214e-06,
26930
+ "loss": 19.5886,
26931
+ "step": 38380
26932
+ },
26933
+ {
26934
+ "epoch": 0.712415071318056,
26935
+ "grad_norm": 35.53125,
26936
+ "learning_rate": 9.888685177383171e-06,
26937
+ "loss": 19.4309,
26938
+ "step": 38390
26939
+ },
26940
+ {
26941
+ "epoch": 0.7126006444025359,
26942
+ "grad_norm": 36.0,
26943
+ "learning_rate": 9.888656181597129e-06,
26944
+ "loss": 19.3463,
26945
+ "step": 38400
26946
+ },
26947
+ {
26948
+ "epoch": 0.7127862174870156,
26949
+ "grad_norm": 36.53125,
26950
+ "learning_rate": 9.888627185811088e-06,
26951
+ "loss": 19.6,
26952
+ "step": 38410
26953
+ },
26954
+ {
26955
+ "epoch": 0.7129717905714955,
26956
+ "grad_norm": 35.5,
26957
+ "learning_rate": 9.888598190025045e-06,
26958
+ "loss": 19.4615,
26959
+ "step": 38420
26960
+ },
26961
+ {
26962
+ "epoch": 0.7131573636559754,
26963
+ "grad_norm": 35.0,
26964
+ "learning_rate": 9.888569194239001e-06,
26965
+ "loss": 19.4513,
26966
+ "step": 38430
26967
+ },
26968
+ {
26969
+ "epoch": 0.7133429367404551,
26970
+ "grad_norm": 37.0625,
26971
+ "learning_rate": 9.88854019845296e-06,
26972
+ "loss": 19.8098,
26973
+ "step": 38440
26974
+ },
26975
+ {
26976
+ "epoch": 0.713528509824935,
26977
+ "grad_norm": 37.25,
26978
+ "learning_rate": 9.888511202666917e-06,
26979
+ "loss": 19.3014,
26980
+ "step": 38450
26981
+ },
26982
+ {
26983
+ "epoch": 0.7137140829094148,
26984
+ "grad_norm": 33.9375,
26985
+ "learning_rate": 9.888482206880875e-06,
26986
+ "loss": 19.7513,
26987
+ "step": 38460
26988
+ },
26989
+ {
26990
+ "epoch": 0.7138996559938946,
26991
+ "grad_norm": 34.625,
26992
+ "learning_rate": 9.888453211094832e-06,
26993
+ "loss": 19.3369,
26994
+ "step": 38470
26995
+ },
26996
+ {
26997
+ "epoch": 0.7140852290783745,
26998
+ "grad_norm": 38.0625,
26999
+ "learning_rate": 9.88842421530879e-06,
27000
+ "loss": 19.3678,
27001
+ "step": 38480
27002
+ },
27003
+ {
27004
+ "epoch": 0.7142708021628543,
27005
+ "grad_norm": 36.375,
27006
+ "learning_rate": 9.888395219522747e-06,
27007
+ "loss": 19.3446,
27008
+ "step": 38490
27009
+ },
27010
+ {
27011
+ "epoch": 0.7144563752473341,
27012
+ "grad_norm": 35.15625,
27013
+ "learning_rate": 9.888366223736704e-06,
27014
+ "loss": 19.1207,
27015
+ "step": 38500
27016
+ },
27017
+ {
27018
+ "epoch": 0.7146419483318139,
27019
+ "grad_norm": 35.28125,
27020
+ "learning_rate": 9.888337227950663e-06,
27021
+ "loss": 19.5949,
27022
+ "step": 38510
27023
+ },
27024
+ {
27025
+ "epoch": 0.7148275214162938,
27026
+ "grad_norm": 35.1875,
27027
+ "learning_rate": 9.888308232164621e-06,
27028
+ "loss": 19.5172,
27029
+ "step": 38520
27030
+ },
27031
+ {
27032
+ "epoch": 0.7150130945007737,
27033
+ "grad_norm": 36.71875,
27034
+ "learning_rate": 9.888279236378578e-06,
27035
+ "loss": 19.6522,
27036
+ "step": 38530
27037
+ },
27038
+ {
27039
+ "epoch": 0.7151986675852534,
27040
+ "grad_norm": 34.03125,
27041
+ "learning_rate": 9.888250240592536e-06,
27042
+ "loss": 19.8201,
27043
+ "step": 38540
27044
+ },
27045
+ {
27046
+ "epoch": 0.7153842406697333,
27047
+ "grad_norm": 37.0625,
27048
+ "learning_rate": 9.888221244806493e-06,
27049
+ "loss": 19.2714,
27050
+ "step": 38550
27051
+ },
27052
+ {
27053
+ "epoch": 0.7155698137542131,
27054
+ "grad_norm": 36.78125,
27055
+ "learning_rate": 9.88819224902045e-06,
27056
+ "loss": 19.2433,
27057
+ "step": 38560
27058
+ },
27059
+ {
27060
+ "epoch": 0.7157553868386929,
27061
+ "grad_norm": 35.78125,
27062
+ "learning_rate": 9.888163253234408e-06,
27063
+ "loss": 19.69,
27064
+ "step": 38570
27065
+ },
27066
+ {
27067
+ "epoch": 0.7159409599231727,
27068
+ "grad_norm": 34.5625,
27069
+ "learning_rate": 9.888134257448367e-06,
27070
+ "loss": 19.7708,
27071
+ "step": 38580
27072
+ },
27073
+ {
27074
+ "epoch": 0.7161265330076526,
27075
+ "grad_norm": 34.71875,
27076
+ "learning_rate": 9.888105261662323e-06,
27077
+ "loss": 19.347,
27078
+ "step": 38590
27079
+ },
27080
+ {
27081
+ "epoch": 0.7163121060921324,
27082
+ "grad_norm": 33.90625,
27083
+ "learning_rate": 9.88807626587628e-06,
27084
+ "loss": 19.3059,
27085
+ "step": 38600
27086
+ },
27087
+ {
27088
+ "epoch": 0.7164976791766122,
27089
+ "grad_norm": 37.15625,
27090
+ "learning_rate": 9.88804727009024e-06,
27091
+ "loss": 19.8677,
27092
+ "step": 38610
27093
+ },
27094
+ {
27095
+ "epoch": 0.7166832522610921,
27096
+ "grad_norm": 37.9375,
27097
+ "learning_rate": 9.888018274304197e-06,
27098
+ "loss": 19.4334,
27099
+ "step": 38620
27100
+ },
27101
+ {
27102
+ "epoch": 0.7168688253455718,
27103
+ "grad_norm": 35.25,
27104
+ "learning_rate": 9.887989278518154e-06,
27105
+ "loss": 19.2413,
27106
+ "step": 38630
27107
+ },
27108
+ {
27109
+ "epoch": 0.7170543984300517,
27110
+ "grad_norm": 37.40625,
27111
+ "learning_rate": 9.887960282732111e-06,
27112
+ "loss": 19.6072,
27113
+ "step": 38640
27114
+ },
27115
+ {
27116
+ "epoch": 0.7172399715145316,
27117
+ "grad_norm": 35.625,
27118
+ "learning_rate": 9.887931286946069e-06,
27119
+ "loss": 19.5617,
27120
+ "step": 38650
27121
+ },
27122
+ {
27123
+ "epoch": 0.7174255445990113,
27124
+ "grad_norm": 35.8125,
27125
+ "learning_rate": 9.887902291160026e-06,
27126
+ "loss": 19.2944,
27127
+ "step": 38660
27128
+ },
27129
+ {
27130
+ "epoch": 0.7176111176834912,
27131
+ "grad_norm": 36.78125,
27132
+ "learning_rate": 9.887873295373984e-06,
27133
+ "loss": 19.6959,
27134
+ "step": 38670
27135
+ },
27136
+ {
27137
+ "epoch": 0.717796690767971,
27138
+ "grad_norm": 37.4375,
27139
+ "learning_rate": 9.887844299587943e-06,
27140
+ "loss": 19.6264,
27141
+ "step": 38680
27142
+ },
27143
+ {
27144
+ "epoch": 0.7179822638524508,
27145
+ "grad_norm": 34.40625,
27146
+ "learning_rate": 9.8878153038019e-06,
27147
+ "loss": 19.4896,
27148
+ "step": 38690
27149
+ },
27150
+ {
27151
+ "epoch": 0.7181678369369306,
27152
+ "grad_norm": 34.53125,
27153
+ "learning_rate": 9.887786308015856e-06,
27154
+ "loss": 19.5352,
27155
+ "step": 38700
27156
+ },
27157
+ {
27158
+ "epoch": 0.7183534100214105,
27159
+ "grad_norm": 37.84375,
27160
+ "learning_rate": 9.887757312229815e-06,
27161
+ "loss": 19.5476,
27162
+ "step": 38710
27163
+ },
27164
+ {
27165
+ "epoch": 0.7185389831058904,
27166
+ "grad_norm": 37.0625,
27167
+ "learning_rate": 9.887728316443772e-06,
27168
+ "loss": 19.3343,
27169
+ "step": 38720
27170
+ },
27171
+ {
27172
+ "epoch": 0.7187245561903701,
27173
+ "grad_norm": 33.96875,
27174
+ "learning_rate": 9.88769932065773e-06,
27175
+ "loss": 19.3166,
27176
+ "step": 38730
27177
+ },
27178
+ {
27179
+ "epoch": 0.71891012927485,
27180
+ "grad_norm": 36.34375,
27181
+ "learning_rate": 9.887670324871687e-06,
27182
+ "loss": 19.3764,
27183
+ "step": 38740
27184
+ },
27185
+ {
27186
+ "epoch": 0.7190957023593298,
27187
+ "grad_norm": 36.65625,
27188
+ "learning_rate": 9.887641329085645e-06,
27189
+ "loss": 19.6944,
27190
+ "step": 38750
27191
+ },
27192
+ {
27193
+ "epoch": 0.7192812754438096,
27194
+ "grad_norm": 35.46875,
27195
+ "learning_rate": 9.887612333299602e-06,
27196
+ "loss": 19.4655,
27197
+ "step": 38760
27198
+ },
27199
+ {
27200
+ "epoch": 0.7194668485282895,
27201
+ "grad_norm": 36.375,
27202
+ "learning_rate": 9.88758333751356e-06,
27203
+ "loss": 19.8738,
27204
+ "step": 38770
27205
+ },
27206
+ {
27207
+ "epoch": 0.7196524216127693,
27208
+ "grad_norm": 35.0,
27209
+ "learning_rate": 9.887554341727517e-06,
27210
+ "loss": 19.594,
27211
+ "step": 38780
27212
+ },
27213
+ {
27214
+ "epoch": 0.7198379946972491,
27215
+ "grad_norm": 37.875,
27216
+ "learning_rate": 9.887525345941476e-06,
27217
+ "loss": 19.2177,
27218
+ "step": 38790
27219
+ },
27220
+ {
27221
+ "epoch": 0.7200235677817289,
27222
+ "grad_norm": 34.21875,
27223
+ "learning_rate": 9.887496350155432e-06,
27224
+ "loss": 19.5523,
27225
+ "step": 38800
27226
+ },
27227
+ {
27228
+ "epoch": 0.7202091408662088,
27229
+ "grad_norm": 37.1875,
27230
+ "learning_rate": 9.88746735436939e-06,
27231
+ "loss": 19.6112,
27232
+ "step": 38810
27233
+ },
27234
+ {
27235
+ "epoch": 0.7203947139506885,
27236
+ "grad_norm": 33.9375,
27237
+ "learning_rate": 9.887438358583348e-06,
27238
+ "loss": 19.4369,
27239
+ "step": 38820
27240
+ },
27241
+ {
27242
+ "epoch": 0.7205802870351684,
27243
+ "grad_norm": 38.28125,
27244
+ "learning_rate": 9.887409362797306e-06,
27245
+ "loss": 19.5935,
27246
+ "step": 38830
27247
+ },
27248
+ {
27249
+ "epoch": 0.7207658601196483,
27250
+ "grad_norm": 35.0,
27251
+ "learning_rate": 9.887380367011263e-06,
27252
+ "loss": 19.5737,
27253
+ "step": 38840
27254
+ },
27255
+ {
27256
+ "epoch": 0.720951433204128,
27257
+ "grad_norm": 35.96875,
27258
+ "learning_rate": 9.88735137122522e-06,
27259
+ "loss": 19.4008,
27260
+ "step": 38850
27261
+ },
27262
+ {
27263
+ "epoch": 0.7211370062886079,
27264
+ "grad_norm": 36.125,
27265
+ "learning_rate": 9.887322375439178e-06,
27266
+ "loss": 19.1818,
27267
+ "step": 38860
27268
+ },
27269
+ {
27270
+ "epoch": 0.7213225793730877,
27271
+ "grad_norm": 34.03125,
27272
+ "learning_rate": 9.887293379653135e-06,
27273
+ "loss": 19.4143,
27274
+ "step": 38870
27275
+ },
27276
+ {
27277
+ "epoch": 0.7215081524575676,
27278
+ "grad_norm": 34.46875,
27279
+ "learning_rate": 9.887264383867093e-06,
27280
+ "loss": 19.5025,
27281
+ "step": 38880
27282
+ },
27283
+ {
27284
+ "epoch": 0.7216937255420474,
27285
+ "grad_norm": 37.75,
27286
+ "learning_rate": 9.887235388081052e-06,
27287
+ "loss": 19.3903,
27288
+ "step": 38890
27289
+ },
27290
+ {
27291
+ "epoch": 0.7218792986265272,
27292
+ "grad_norm": 36.15625,
27293
+ "learning_rate": 9.88720639229501e-06,
27294
+ "loss": 19.2237,
27295
+ "step": 38900
27296
+ },
27297
+ {
27298
+ "epoch": 0.7220648717110071,
27299
+ "grad_norm": 35.21875,
27300
+ "learning_rate": 9.887177396508965e-06,
27301
+ "loss": 19.0859,
27302
+ "step": 38910
27303
+ },
27304
+ {
27305
+ "epoch": 0.7222504447954868,
27306
+ "grad_norm": 36.5,
27307
+ "learning_rate": 9.887148400722924e-06,
27308
+ "loss": 19.3678,
27309
+ "step": 38920
27310
+ },
27311
+ {
27312
+ "epoch": 0.7224360178799667,
27313
+ "grad_norm": 36.9375,
27314
+ "learning_rate": 9.887119404936881e-06,
27315
+ "loss": 19.5165,
27316
+ "step": 38930
27317
+ },
27318
+ {
27319
+ "epoch": 0.7226215909644466,
27320
+ "grad_norm": 35.40625,
27321
+ "learning_rate": 9.887090409150839e-06,
27322
+ "loss": 19.8087,
27323
+ "step": 38940
27324
+ },
27325
+ {
27326
+ "epoch": 0.7228071640489263,
27327
+ "grad_norm": 36.6875,
27328
+ "learning_rate": 9.887061413364796e-06,
27329
+ "loss": 19.7642,
27330
+ "step": 38950
27331
+ },
27332
+ {
27333
+ "epoch": 0.7229927371334062,
27334
+ "grad_norm": 35.09375,
27335
+ "learning_rate": 9.887032417578754e-06,
27336
+ "loss": 19.627,
27337
+ "step": 38960
27338
+ },
27339
+ {
27340
+ "epoch": 0.723178310217886,
27341
+ "grad_norm": 35.84375,
27342
+ "learning_rate": 9.887003421792711e-06,
27343
+ "loss": 19.519,
27344
+ "step": 38970
27345
+ },
27346
+ {
27347
+ "epoch": 0.7233638833023658,
27348
+ "grad_norm": 35.625,
27349
+ "learning_rate": 9.886974426006668e-06,
27350
+ "loss": 19.3384,
27351
+ "step": 38980
27352
+ },
27353
+ {
27354
+ "epoch": 0.7235494563868456,
27355
+ "grad_norm": 35.15625,
27356
+ "learning_rate": 9.886945430220628e-06,
27357
+ "loss": 20.0005,
27358
+ "step": 38990
27359
+ },
27360
+ {
27361
+ "epoch": 0.7237350294713255,
27362
+ "grad_norm": 38.75,
27363
+ "learning_rate": 9.886916434434585e-06,
27364
+ "loss": 19.7862,
27365
+ "step": 39000
27366
+ },
27367
+ {
27368
+ "epoch": 0.7239206025558053,
27369
+ "grad_norm": 35.1875,
27370
+ "learning_rate": 9.886887438648542e-06,
27371
+ "loss": 19.2984,
27372
+ "step": 39010
27373
+ },
27374
+ {
27375
+ "epoch": 0.7241061756402851,
27376
+ "grad_norm": 37.0625,
27377
+ "learning_rate": 9.8868584428625e-06,
27378
+ "loss": 19.7105,
27379
+ "step": 39020
27380
+ },
27381
+ {
27382
+ "epoch": 0.724291748724765,
27383
+ "grad_norm": 35.71875,
27384
+ "learning_rate": 9.886829447076457e-06,
27385
+ "loss": 19.3705,
27386
+ "step": 39030
27387
+ },
27388
+ {
27389
+ "epoch": 0.7244773218092447,
27390
+ "grad_norm": 35.0,
27391
+ "learning_rate": 9.886800451290415e-06,
27392
+ "loss": 19.2526,
27393
+ "step": 39040
27394
+ },
27395
+ {
27396
+ "epoch": 0.7246628948937246,
27397
+ "grad_norm": 34.84375,
27398
+ "learning_rate": 9.886771455504372e-06,
27399
+ "loss": 19.6478,
27400
+ "step": 39050
27401
+ },
27402
+ {
27403
+ "epoch": 0.7248484679782045,
27404
+ "grad_norm": 36.53125,
27405
+ "learning_rate": 9.886742459718331e-06,
27406
+ "loss": 19.5643,
27407
+ "step": 39060
27408
+ },
27409
+ {
27410
+ "epoch": 0.7250340410626843,
27411
+ "grad_norm": 39.0625,
27412
+ "learning_rate": 9.886713463932287e-06,
27413
+ "loss": 19.0415,
27414
+ "step": 39070
27415
+ },
27416
+ {
27417
+ "epoch": 0.7252196141471641,
27418
+ "grad_norm": 33.71875,
27419
+ "learning_rate": 9.886684468146244e-06,
27420
+ "loss": 19.0971,
27421
+ "step": 39080
27422
+ },
27423
+ {
27424
+ "epoch": 0.7254051872316439,
27425
+ "grad_norm": 34.90625,
27426
+ "learning_rate": 9.886655472360203e-06,
27427
+ "loss": 19.5161,
27428
+ "step": 39090
27429
+ },
27430
+ {
27431
+ "epoch": 0.7255907603161238,
27432
+ "grad_norm": 36.0625,
27433
+ "learning_rate": 9.88662647657416e-06,
27434
+ "loss": 19.7788,
27435
+ "step": 39100
27436
+ },
27437
+ {
27438
+ "epoch": 0.7257763334006035,
27439
+ "grad_norm": 35.90625,
27440
+ "learning_rate": 9.886597480788118e-06,
27441
+ "loss": 19.4104,
27442
+ "step": 39110
27443
+ },
27444
+ {
27445
+ "epoch": 0.7259619064850834,
27446
+ "grad_norm": 35.5,
27447
+ "learning_rate": 9.886568485002075e-06,
27448
+ "loss": 19.7646,
27449
+ "step": 39120
27450
+ },
27451
+ {
27452
+ "epoch": 0.7261474795695633,
27453
+ "grad_norm": 35.625,
27454
+ "learning_rate": 9.886539489216033e-06,
27455
+ "loss": 19.6163,
27456
+ "step": 39130
27457
+ },
27458
+ {
27459
+ "epoch": 0.726333052654043,
27460
+ "grad_norm": 36.34375,
27461
+ "learning_rate": 9.88651049342999e-06,
27462
+ "loss": 19.5463,
27463
+ "step": 39140
27464
+ },
27465
+ {
27466
+ "epoch": 0.7265186257385229,
27467
+ "grad_norm": 35.5625,
27468
+ "learning_rate": 9.886481497643948e-06,
27469
+ "loss": 19.2152,
27470
+ "step": 39150
27471
+ },
27472
+ {
27473
+ "epoch": 0.7267041988230027,
27474
+ "grad_norm": 34.84375,
27475
+ "learning_rate": 9.886452501857907e-06,
27476
+ "loss": 19.5463,
27477
+ "step": 39160
27478
+ },
27479
+ {
27480
+ "epoch": 0.7268897719074825,
27481
+ "grad_norm": 34.65625,
27482
+ "learning_rate": 9.886423506071864e-06,
27483
+ "loss": 19.6092,
27484
+ "step": 39170
27485
+ },
27486
+ {
27487
+ "epoch": 0.7270753449919624,
27488
+ "grad_norm": 34.59375,
27489
+ "learning_rate": 9.88639451028582e-06,
27490
+ "loss": 19.4284,
27491
+ "step": 39180
27492
+ },
27493
+ {
27494
+ "epoch": 0.7272609180764422,
27495
+ "grad_norm": 34.8125,
27496
+ "learning_rate": 9.886365514499779e-06,
27497
+ "loss": 19.3534,
27498
+ "step": 39190
27499
+ },
27500
+ {
27501
+ "epoch": 0.727446491160922,
27502
+ "grad_norm": 36.0,
27503
+ "learning_rate": 9.886336518713736e-06,
27504
+ "loss": 19.8356,
27505
+ "step": 39200
27506
+ },
27507
+ {
27508
+ "epoch": 0.7276320642454018,
27509
+ "grad_norm": 35.28125,
27510
+ "learning_rate": 9.886307522927694e-06,
27511
+ "loss": 19.2134,
27512
+ "step": 39210
27513
+ },
27514
+ {
27515
+ "epoch": 0.7278176373298817,
27516
+ "grad_norm": 37.53125,
27517
+ "learning_rate": 9.886278527141651e-06,
27518
+ "loss": 19.133,
27519
+ "step": 39220
27520
+ },
27521
+ {
27522
+ "epoch": 0.7280032104143614,
27523
+ "grad_norm": 35.375,
27524
+ "learning_rate": 9.886249531355609e-06,
27525
+ "loss": 19.0139,
27526
+ "step": 39230
27527
+ },
27528
+ {
27529
+ "epoch": 0.7281887834988413,
27530
+ "grad_norm": 37.25,
27531
+ "learning_rate": 9.886220535569566e-06,
27532
+ "loss": 19.1991,
27533
+ "step": 39240
27534
+ },
27535
+ {
27536
+ "epoch": 0.7283743565833212,
27537
+ "grad_norm": 34.84375,
27538
+ "learning_rate": 9.886191539783523e-06,
27539
+ "loss": 19.4178,
27540
+ "step": 39250
27541
+ },
27542
+ {
27543
+ "epoch": 0.728559929667801,
27544
+ "grad_norm": 35.28125,
27545
+ "learning_rate": 9.886162543997483e-06,
27546
+ "loss": 19.7294,
27547
+ "step": 39260
27548
+ },
27549
+ {
27550
+ "epoch": 0.7287455027522808,
27551
+ "grad_norm": 36.90625,
27552
+ "learning_rate": 9.88613354821144e-06,
27553
+ "loss": 19.5717,
27554
+ "step": 39270
27555
+ },
27556
+ {
27557
+ "epoch": 0.7289310758367606,
27558
+ "grad_norm": 36.5,
27559
+ "learning_rate": 9.886104552425397e-06,
27560
+ "loss": 19.4598,
27561
+ "step": 39280
27562
+ },
27563
+ {
27564
+ "epoch": 0.7291166489212405,
27565
+ "grad_norm": 36.96875,
27566
+ "learning_rate": 9.886075556639355e-06,
27567
+ "loss": 19.3378,
27568
+ "step": 39290
27569
+ },
27570
+ {
27571
+ "epoch": 0.7293022220057203,
27572
+ "grad_norm": 37.21875,
27573
+ "learning_rate": 9.886046560853312e-06,
27574
+ "loss": 19.6847,
27575
+ "step": 39300
27576
+ },
27577
+ {
27578
+ "epoch": 0.7294877950902001,
27579
+ "grad_norm": 35.3125,
27580
+ "learning_rate": 9.88601756506727e-06,
27581
+ "loss": 19.398,
27582
+ "step": 39310
27583
+ },
27584
+ {
27585
+ "epoch": 0.72967336817468,
27586
+ "grad_norm": 35.375,
27587
+ "learning_rate": 9.885988569281227e-06,
27588
+ "loss": 19.577,
27589
+ "step": 39320
27590
+ },
27591
+ {
27592
+ "epoch": 0.7298589412591597,
27593
+ "grad_norm": 34.8125,
27594
+ "learning_rate": 9.885959573495184e-06,
27595
+ "loss": 19.5882,
27596
+ "step": 39330
27597
+ },
27598
+ {
27599
+ "epoch": 0.7300445143436396,
27600
+ "grad_norm": 34.71875,
27601
+ "learning_rate": 9.885930577709142e-06,
27602
+ "loss": 19.9676,
27603
+ "step": 39340
27604
+ },
27605
+ {
27606
+ "epoch": 0.7302300874281195,
27607
+ "grad_norm": 34.1875,
27608
+ "learning_rate": 9.8859015819231e-06,
27609
+ "loss": 18.9682,
27610
+ "step": 39350
27611
+ },
27612
+ {
27613
+ "epoch": 0.7304156605125992,
27614
+ "grad_norm": 35.0625,
27615
+ "learning_rate": 9.885872586137057e-06,
27616
+ "loss": 18.9754,
27617
+ "step": 39360
27618
+ },
27619
+ {
27620
+ "epoch": 0.7306012335970791,
27621
+ "grad_norm": 36.59375,
27622
+ "learning_rate": 9.885843590351016e-06,
27623
+ "loss": 19.5236,
27624
+ "step": 39370
27625
+ },
27626
+ {
27627
+ "epoch": 0.7307868066815589,
27628
+ "grad_norm": 35.34375,
27629
+ "learning_rate": 9.885814594564973e-06,
27630
+ "loss": 19.5679,
27631
+ "step": 39380
27632
+ },
27633
+ {
27634
+ "epoch": 0.7309723797660387,
27635
+ "grad_norm": 34.25,
27636
+ "learning_rate": 9.88578559877893e-06,
27637
+ "loss": 19.5254,
27638
+ "step": 39390
27639
+ },
27640
+ {
27641
+ "epoch": 0.7311579528505185,
27642
+ "grad_norm": 35.0,
27643
+ "learning_rate": 9.885756602992888e-06,
27644
+ "loss": 19.4094,
27645
+ "step": 39400
27646
+ },
27647
+ {
27648
+ "epoch": 0.7313435259349984,
27649
+ "grad_norm": 35.40625,
27650
+ "learning_rate": 9.885727607206845e-06,
27651
+ "loss": 19.3836,
27652
+ "step": 39410
27653
+ },
27654
+ {
27655
+ "epoch": 0.7315290990194783,
27656
+ "grad_norm": 36.3125,
27657
+ "learning_rate": 9.885698611420803e-06,
27658
+ "loss": 19.7532,
27659
+ "step": 39420
27660
+ },
27661
+ {
27662
+ "epoch": 0.731714672103958,
27663
+ "grad_norm": 36.78125,
27664
+ "learning_rate": 9.88566961563476e-06,
27665
+ "loss": 19.3568,
27666
+ "step": 39430
27667
+ },
27668
+ {
27669
+ "epoch": 0.7319002451884379,
27670
+ "grad_norm": 34.90625,
27671
+ "learning_rate": 9.88564061984872e-06,
27672
+ "loss": 19.3629,
27673
+ "step": 39440
27674
+ },
27675
+ {
27676
+ "epoch": 0.7320858182729177,
27677
+ "grad_norm": 35.875,
27678
+ "learning_rate": 9.885611624062675e-06,
27679
+ "loss": 19.2368,
27680
+ "step": 39450
27681
+ },
27682
+ {
27683
+ "epoch": 0.7322713913573975,
27684
+ "grad_norm": 33.84375,
27685
+ "learning_rate": 9.885582628276632e-06,
27686
+ "loss": 19.4356,
27687
+ "step": 39460
27688
+ },
27689
+ {
27690
+ "epoch": 0.7324569644418774,
27691
+ "grad_norm": 34.8125,
27692
+ "learning_rate": 9.885553632490592e-06,
27693
+ "loss": 19.1461,
27694
+ "step": 39470
27695
+ },
27696
+ {
27697
+ "epoch": 0.7326425375263572,
27698
+ "grad_norm": 36.34375,
27699
+ "learning_rate": 9.885524636704549e-06,
27700
+ "loss": 19.5205,
27701
+ "step": 39480
27702
+ },
27703
+ {
27704
+ "epoch": 0.732828110610837,
27705
+ "grad_norm": 36.5,
27706
+ "learning_rate": 9.885495640918506e-06,
27707
+ "loss": 19.2334,
27708
+ "step": 39490
27709
+ },
27710
+ {
27711
+ "epoch": 0.7330136836953168,
27712
+ "grad_norm": 34.40625,
27713
+ "learning_rate": 9.885466645132464e-06,
27714
+ "loss": 19.2247,
27715
+ "step": 39500
27716
+ },
27717
+ {
27718
+ "epoch": 0.7331992567797967,
27719
+ "grad_norm": 36.90625,
27720
+ "learning_rate": 9.885437649346421e-06,
27721
+ "loss": 19.5144,
27722
+ "step": 39510
27723
+ },
27724
+ {
27725
+ "epoch": 0.7333848298642764,
27726
+ "grad_norm": 36.25,
27727
+ "learning_rate": 9.885408653560379e-06,
27728
+ "loss": 18.9981,
27729
+ "step": 39520
27730
+ },
27731
+ {
27732
+ "epoch": 0.7335704029487563,
27733
+ "grad_norm": 35.4375,
27734
+ "learning_rate": 9.885379657774336e-06,
27735
+ "loss": 19.2192,
27736
+ "step": 39530
27737
+ },
27738
+ {
27739
+ "epoch": 0.7337559760332362,
27740
+ "grad_norm": 36.8125,
27741
+ "learning_rate": 9.885350661988295e-06,
27742
+ "loss": 19.2174,
27743
+ "step": 39540
27744
+ },
27745
+ {
27746
+ "epoch": 0.7339415491177159,
27747
+ "grad_norm": 33.6875,
27748
+ "learning_rate": 9.88532166620225e-06,
27749
+ "loss": 19.6817,
27750
+ "step": 39550
27751
+ },
27752
+ {
27753
+ "epoch": 0.7341271222021958,
27754
+ "grad_norm": 36.875,
27755
+ "learning_rate": 9.885292670416208e-06,
27756
+ "loss": 18.8573,
27757
+ "step": 39560
27758
+ },
27759
+ {
27760
+ "epoch": 0.7343126952866756,
27761
+ "grad_norm": 37.375,
27762
+ "learning_rate": 9.885263674630167e-06,
27763
+ "loss": 19.7325,
27764
+ "step": 39570
27765
+ },
27766
+ {
27767
+ "epoch": 0.7344982683711554,
27768
+ "grad_norm": 37.125,
27769
+ "learning_rate": 9.885234678844125e-06,
27770
+ "loss": 19.4219,
27771
+ "step": 39580
27772
+ },
27773
+ {
27774
+ "epoch": 0.7346838414556353,
27775
+ "grad_norm": 35.46875,
27776
+ "learning_rate": 9.885205683058082e-06,
27777
+ "loss": 19.1926,
27778
+ "step": 39590
27779
+ },
27780
+ {
27781
+ "epoch": 0.7348694145401151,
27782
+ "grad_norm": 35.71875,
27783
+ "learning_rate": 9.88517668727204e-06,
27784
+ "loss": 19.064,
27785
+ "step": 39600
27786
+ },
27787
+ {
27788
+ "epoch": 0.735054987624595,
27789
+ "grad_norm": 36.34375,
27790
+ "learning_rate": 9.885147691485997e-06,
27791
+ "loss": 19.0486,
27792
+ "step": 39610
27793
+ },
27794
+ {
27795
+ "epoch": 0.7352405607090747,
27796
+ "grad_norm": 36.03125,
27797
+ "learning_rate": 9.885118695699954e-06,
27798
+ "loss": 19.7487,
27799
+ "step": 39620
27800
+ },
27801
+ {
27802
+ "epoch": 0.7354261337935546,
27803
+ "grad_norm": 36.25,
27804
+ "learning_rate": 9.885089699913912e-06,
27805
+ "loss": 19.5051,
27806
+ "step": 39630
27807
+ },
27808
+ {
27809
+ "epoch": 0.7356117068780345,
27810
+ "grad_norm": 36.375,
27811
+ "learning_rate": 9.88506070412787e-06,
27812
+ "loss": 19.2603,
27813
+ "step": 39640
27814
+ },
27815
+ {
27816
+ "epoch": 0.7357972799625142,
27817
+ "grad_norm": 35.5625,
27818
+ "learning_rate": 9.885031708341828e-06,
27819
+ "loss": 19.957,
27820
+ "step": 39650
27821
+ },
27822
+ {
27823
+ "epoch": 0.7359828530469941,
27824
+ "grad_norm": 36.34375,
27825
+ "learning_rate": 9.885002712555784e-06,
27826
+ "loss": 19.3855,
27827
+ "step": 39660
27828
+ },
27829
+ {
27830
+ "epoch": 0.7361684261314739,
27831
+ "grad_norm": 34.1875,
27832
+ "learning_rate": 9.884973716769743e-06,
27833
+ "loss": 19.6183,
27834
+ "step": 39670
27835
+ },
27836
+ {
27837
+ "epoch": 0.7363539992159537,
27838
+ "grad_norm": 37.125,
27839
+ "learning_rate": 9.8849447209837e-06,
27840
+ "loss": 19.1751,
27841
+ "step": 39680
27842
+ },
27843
+ {
27844
+ "epoch": 0.7365395723004335,
27845
+ "grad_norm": 35.15625,
27846
+ "learning_rate": 9.884915725197658e-06,
27847
+ "loss": 19.4365,
27848
+ "step": 39690
27849
+ },
27850
+ {
27851
+ "epoch": 0.7367251453849134,
27852
+ "grad_norm": 37.28125,
27853
+ "learning_rate": 9.884886729411615e-06,
27854
+ "loss": 19.867,
27855
+ "step": 39700
27856
+ },
27857
+ {
27858
+ "epoch": 0.7369107184693932,
27859
+ "grad_norm": 34.4375,
27860
+ "learning_rate": 9.884857733625574e-06,
27861
+ "loss": 19.4728,
27862
+ "step": 39710
27863
+ },
27864
+ {
27865
+ "epoch": 0.737096291553873,
27866
+ "grad_norm": 36.9375,
27867
+ "learning_rate": 9.88482873783953e-06,
27868
+ "loss": 19.0685,
27869
+ "step": 39720
27870
+ },
27871
+ {
27872
+ "epoch": 0.7372818646383529,
27873
+ "grad_norm": 35.34375,
27874
+ "learning_rate": 9.884799742053487e-06,
27875
+ "loss": 19.7118,
27876
+ "step": 39730
27877
+ },
27878
+ {
27879
+ "epoch": 0.7374674377228326,
27880
+ "grad_norm": 36.5,
27881
+ "learning_rate": 9.884770746267447e-06,
27882
+ "loss": 19.6547,
27883
+ "step": 39740
27884
+ },
27885
+ {
27886
+ "epoch": 0.7376530108073125,
27887
+ "grad_norm": 34.84375,
27888
+ "learning_rate": 9.884741750481404e-06,
27889
+ "loss": 19.3336,
27890
+ "step": 39750
27891
+ },
27892
+ {
27893
+ "epoch": 0.7378385838917924,
27894
+ "grad_norm": 33.96875,
27895
+ "learning_rate": 9.884712754695361e-06,
27896
+ "loss": 19.3172,
27897
+ "step": 39760
27898
+ },
27899
+ {
27900
+ "epoch": 0.7380241569762722,
27901
+ "grad_norm": 35.15625,
27902
+ "learning_rate": 9.884683758909319e-06,
27903
+ "loss": 19.659,
27904
+ "step": 39770
27905
+ },
27906
+ {
27907
+ "epoch": 0.738209730060752,
27908
+ "grad_norm": 34.75,
27909
+ "learning_rate": 9.884654763123276e-06,
27910
+ "loss": 19.564,
27911
+ "step": 39780
27912
+ },
27913
+ {
27914
+ "epoch": 0.7383953031452318,
27915
+ "grad_norm": 36.9375,
27916
+ "learning_rate": 9.884625767337234e-06,
27917
+ "loss": 19.4628,
27918
+ "step": 39790
27919
+ },
27920
+ {
27921
+ "epoch": 0.7385808762297117,
27922
+ "grad_norm": 35.28125,
27923
+ "learning_rate": 9.884596771551191e-06,
27924
+ "loss": 19.3677,
27925
+ "step": 39800
27926
+ },
27927
+ {
27928
+ "epoch": 0.7387664493141914,
27929
+ "grad_norm": 36.03125,
27930
+ "learning_rate": 9.884567775765148e-06,
27931
+ "loss": 19.5696,
27932
+ "step": 39810
27933
+ },
27934
+ {
27935
+ "epoch": 0.7389520223986713,
27936
+ "grad_norm": 34.5,
27937
+ "learning_rate": 9.884538779979106e-06,
27938
+ "loss": 19.2716,
27939
+ "step": 39820
27940
+ },
27941
+ {
27942
+ "epoch": 0.7391375954831512,
27943
+ "grad_norm": 35.53125,
27944
+ "learning_rate": 9.884509784193063e-06,
27945
+ "loss": 19.2286,
27946
+ "step": 39830
27947
+ },
27948
+ {
27949
+ "epoch": 0.7393231685676309,
27950
+ "grad_norm": 35.46875,
27951
+ "learning_rate": 9.884480788407022e-06,
27952
+ "loss": 19.1631,
27953
+ "step": 39840
27954
+ },
27955
+ {
27956
+ "epoch": 0.7395087416521108,
27957
+ "grad_norm": 35.96875,
27958
+ "learning_rate": 9.88445179262098e-06,
27959
+ "loss": 19.4365,
27960
+ "step": 39850
27961
+ },
27962
+ {
27963
+ "epoch": 0.7396943147365906,
27964
+ "grad_norm": 35.96875,
27965
+ "learning_rate": 9.884422796834937e-06,
27966
+ "loss": 19.5979,
27967
+ "step": 39860
27968
+ },
27969
+ {
27970
+ "epoch": 0.7398798878210704,
27971
+ "grad_norm": 36.9375,
27972
+ "learning_rate": 9.884393801048895e-06,
27973
+ "loss": 19.6595,
27974
+ "step": 39870
27975
+ },
27976
+ {
27977
+ "epoch": 0.7400654609055503,
27978
+ "grad_norm": 37.28125,
27979
+ "learning_rate": 9.884364805262852e-06,
27980
+ "loss": 19.4817,
27981
+ "step": 39880
27982
+ },
27983
+ {
27984
+ "epoch": 0.7402510339900301,
27985
+ "grad_norm": 36.03125,
27986
+ "learning_rate": 9.88433580947681e-06,
27987
+ "loss": 19.3088,
27988
+ "step": 39890
27989
+ },
27990
+ {
27991
+ "epoch": 0.7404366070745099,
27992
+ "grad_norm": 34.75,
27993
+ "learning_rate": 9.884306813690767e-06,
27994
+ "loss": 19.4918,
27995
+ "step": 39900
27996
+ },
27997
+ {
27998
+ "epoch": 0.7406221801589897,
27999
+ "grad_norm": 35.5,
28000
+ "learning_rate": 9.884277817904724e-06,
28001
+ "loss": 19.435,
28002
+ "step": 39910
28003
+ },
28004
+ {
28005
+ "epoch": 0.7408077532434696,
28006
+ "grad_norm": 33.78125,
28007
+ "learning_rate": 9.884248822118683e-06,
28008
+ "loss": 19.268,
28009
+ "step": 39920
28010
+ },
28011
+ {
28012
+ "epoch": 0.7409933263279493,
28013
+ "grad_norm": 37.5,
28014
+ "learning_rate": 9.884219826332639e-06,
28015
+ "loss": 19.885,
28016
+ "step": 39930
28017
+ },
28018
+ {
28019
+ "epoch": 0.7411788994124292,
28020
+ "grad_norm": 34.9375,
28021
+ "learning_rate": 9.884190830546596e-06,
28022
+ "loss": 19.6043,
28023
+ "step": 39940
28024
+ },
28025
+ {
28026
+ "epoch": 0.7413644724969091,
28027
+ "grad_norm": 36.65625,
28028
+ "learning_rate": 9.884161834760556e-06,
28029
+ "loss": 18.9762,
28030
+ "step": 39950
28031
+ },
28032
+ {
28033
+ "epoch": 0.7415500455813889,
28034
+ "grad_norm": 35.5,
28035
+ "learning_rate": 9.884132838974513e-06,
28036
+ "loss": 19.6159,
28037
+ "step": 39960
28038
+ },
28039
+ {
28040
+ "epoch": 0.7417356186658687,
28041
+ "grad_norm": 35.53125,
28042
+ "learning_rate": 9.88410384318847e-06,
28043
+ "loss": 19.5883,
28044
+ "step": 39970
28045
+ },
28046
+ {
28047
+ "epoch": 0.7419211917503485,
28048
+ "grad_norm": 35.21875,
28049
+ "learning_rate": 9.884074847402428e-06,
28050
+ "loss": 19.4468,
28051
+ "step": 39980
28052
+ },
28053
+ {
28054
+ "epoch": 0.7421067648348284,
28055
+ "grad_norm": 35.34375,
28056
+ "learning_rate": 9.884045851616385e-06,
28057
+ "loss": 19.7061,
28058
+ "step": 39990
28059
+ },
28060
+ {
28061
+ "epoch": 0.7422923379193082,
28062
+ "grad_norm": 37.5625,
28063
+ "learning_rate": 9.884016855830343e-06,
28064
+ "loss": 19.2119,
28065
+ "step": 40000
28066
+ },
28067
+ {
28068
+ "epoch": 0.7422923379193082,
28069
+ "eval_loss": 2.4196152687072754,
28070
+ "eval_runtime": 454.1953,
28071
+ "eval_samples_per_second": 3197.12,
28072
+ "eval_steps_per_second": 49.956,
28073
+ "step": 40000
28074
  }
28075
  ],
28076
  "logging_steps": 10,
 
28090
  "attributes": {}
28091
  }
28092
  },
28093
+ "total_flos": 6.982091036164096e+18,
28094
  "train_batch_size": 8,
28095
  "trial_name": null,
28096
  "trial_params": null