CocoRoF commited on
Commit
e45f711
·
verified ·
1 Parent(s): 382a385

Training in progress, step 45000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ecd57c7eace6adeec4710fec99cac2074403be145022a172c36c987b89390434
3
  size 306619286
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8898863e03111dc0f9067fbf61d7c9148ac8efc7f3547cbf592b621a60724e4b
3
  size 306619286
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51104d0f8c0c31f4a4bccea6f88abc44b2a966c89edf808f6831a68d92b864e7
3
  size 919972410
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b81fcbe3c87675fdccd5b2d56c7d720479008ef5d12e3659d6f5288de1f4369
3
  size 919972410
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e735ed11597ed40a2b6854e0229902e1a21fedc0a0dbc608ca905fae57d5b06b
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ca51d4b33edcedf9568d5202767b896d828b5aeca18f2cdd82617688464b784
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ba3815fc0953b1b7f08cea092dfc0a62c4bbc2a2c68780d3f4dd0b5e22582a7
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10e6ea705ea5a1704cd5773090c827a2013c8caab967a116ff24a5f57ce3ce90
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:647ac15563fcad903adbb616e9b2c36b237a3ed5939d088620212da969930f6c
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6ff0b4da12dd0cdcb6e90b04160e41685d9ccc1fa1cc74bb7949edf700200d4
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93e3733c5b180986b7efbec17b663bf5231343d187374d184768fcd913797167
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:322470b09ac4f5d9443d55c37c8b8e7d0e8a1702208c81e52e3a58a8de515b5b
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9820ea4fec1b01f3da091290c3e8b5ddb86a3a3fa17285c248b64910c2d0b4f0
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ababe8505205ca2bcb959a2abbe2fbc8b6ad677bd43b1f2ee9055b3cb400b061
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7413035def085e41776a629afc94fc24fe5a955f1ad83b32f9b370ab60f9a18d
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed1c3ba656fdb40a72824e366a08e148a30e1089a6ecd019eaf28fa4a17859fa
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91e3953bcbf4089415abffbd914fbbe4580121f6c843eabbf70624c5ed144814
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:042c62210b9e1c9ed394e0a3362b1c773c07591d94f2716a8e928676134742b7
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:369fde7bff4dfc0d6b9cf773cf9b0352696083f84763999e05a631ee6d52c5e3
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb54f0278c663494261026658652f845bae43245e75ccc213c6897de179f542a
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5802fc71a5ce36cca3a7f664b0fbad4d08efb9895a1c5eaa5692a421831e9c11
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6e8fc146b9f79d0e4ed1e4480536314f0ec3a00f8460a905aee0c66bb636dfe
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7422923379193082,
5
  "eval_steps": 5000,
6
- "global_step": 40000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -28071,6 +28071,3514 @@
28071
  "eval_samples_per_second": 3197.12,
28072
  "eval_steps_per_second": 49.956,
28073
  "step": 40000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28074
  }
28075
  ],
28076
  "logging_steps": 10,
@@ -28090,7 +31598,7 @@
28090
  "attributes": {}
28091
  }
28092
  },
28093
- "total_flos": 6.982091036164096e+18,
28094
  "train_batch_size": 8,
28095
  "trial_name": null,
28096
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8350788801592217,
5
  "eval_steps": 5000,
6
+ "global_step": 45000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
28071
  "eval_samples_per_second": 3197.12,
28072
  "eval_steps_per_second": 49.956,
28073
  "step": 40000
28074
+ },
28075
+ {
28076
+ "epoch": 0.742477911003788,
28077
+ "grad_norm": 35.1875,
28078
+ "learning_rate": 9.8839878600443e-06,
28079
+ "loss": 19.1801,
28080
+ "step": 40010
28081
+ },
28082
+ {
28083
+ "epoch": 0.7426634840882679,
28084
+ "grad_norm": 35.59375,
28085
+ "learning_rate": 9.883958864258259e-06,
28086
+ "loss": 19.2543,
28087
+ "step": 40020
28088
+ },
28089
+ {
28090
+ "epoch": 0.7428490571727476,
28091
+ "grad_norm": 35.34375,
28092
+ "learning_rate": 9.883929868472216e-06,
28093
+ "loss": 19.4882,
28094
+ "step": 40030
28095
+ },
28096
+ {
28097
+ "epoch": 0.7430346302572275,
28098
+ "grad_norm": 37.21875,
28099
+ "learning_rate": 9.883900872686172e-06,
28100
+ "loss": 19.6511,
28101
+ "step": 40040
28102
+ },
28103
+ {
28104
+ "epoch": 0.7432202033417074,
28105
+ "grad_norm": 37.5,
28106
+ "learning_rate": 9.883871876900131e-06,
28107
+ "loss": 19.5237,
28108
+ "step": 40050
28109
+ },
28110
+ {
28111
+ "epoch": 0.7434057764261871,
28112
+ "grad_norm": 37.0,
28113
+ "learning_rate": 9.883842881114089e-06,
28114
+ "loss": 19.3757,
28115
+ "step": 40060
28116
+ },
28117
+ {
28118
+ "epoch": 0.743591349510667,
28119
+ "grad_norm": 36.03125,
28120
+ "learning_rate": 9.883813885328046e-06,
28121
+ "loss": 19.4206,
28122
+ "step": 40070
28123
+ },
28124
+ {
28125
+ "epoch": 0.7437769225951468,
28126
+ "grad_norm": 34.375,
28127
+ "learning_rate": 9.883784889542004e-06,
28128
+ "loss": 19.3413,
28129
+ "step": 40080
28130
+ },
28131
+ {
28132
+ "epoch": 0.7439624956796266,
28133
+ "grad_norm": 35.25,
28134
+ "learning_rate": 9.883755893755961e-06,
28135
+ "loss": 19.2141,
28136
+ "step": 40090
28137
+ },
28138
+ {
28139
+ "epoch": 0.7441480687641064,
28140
+ "grad_norm": 36.21875,
28141
+ "learning_rate": 9.883726897969918e-06,
28142
+ "loss": 19.7938,
28143
+ "step": 40100
28144
+ },
28145
+ {
28146
+ "epoch": 0.7443336418485863,
28147
+ "grad_norm": 35.875,
28148
+ "learning_rate": 9.883697902183876e-06,
28149
+ "loss": 19.4773,
28150
+ "step": 40110
28151
+ },
28152
+ {
28153
+ "epoch": 0.744519214933066,
28154
+ "grad_norm": 34.75,
28155
+ "learning_rate": 9.883668906397835e-06,
28156
+ "loss": 19.4294,
28157
+ "step": 40120
28158
+ },
28159
+ {
28160
+ "epoch": 0.7447047880175459,
28161
+ "grad_norm": 36.375,
28162
+ "learning_rate": 9.883639910611792e-06,
28163
+ "loss": 19.7319,
28164
+ "step": 40130
28165
+ },
28166
+ {
28167
+ "epoch": 0.7448903611020258,
28168
+ "grad_norm": 37.96875,
28169
+ "learning_rate": 9.883610914825748e-06,
28170
+ "loss": 19.207,
28171
+ "step": 40140
28172
+ },
28173
+ {
28174
+ "epoch": 0.7450759341865056,
28175
+ "grad_norm": 36.65625,
28176
+ "learning_rate": 9.883581919039707e-06,
28177
+ "loss": 19.2734,
28178
+ "step": 40150
28179
+ },
28180
+ {
28181
+ "epoch": 0.7452615072709854,
28182
+ "grad_norm": 36.125,
28183
+ "learning_rate": 9.883552923253664e-06,
28184
+ "loss": 19.6617,
28185
+ "step": 40160
28186
+ },
28187
+ {
28188
+ "epoch": 0.7454470803554653,
28189
+ "grad_norm": 37.25,
28190
+ "learning_rate": 9.883523927467622e-06,
28191
+ "loss": 19.5435,
28192
+ "step": 40170
28193
+ },
28194
+ {
28195
+ "epoch": 0.7456326534399451,
28196
+ "grad_norm": 34.3125,
28197
+ "learning_rate": 9.88349493168158e-06,
28198
+ "loss": 19.359,
28199
+ "step": 40180
28200
+ },
28201
+ {
28202
+ "epoch": 0.7458182265244249,
28203
+ "grad_norm": 37.34375,
28204
+ "learning_rate": 9.883465935895538e-06,
28205
+ "loss": 19.4984,
28206
+ "step": 40190
28207
+ },
28208
+ {
28209
+ "epoch": 0.7460037996089047,
28210
+ "grad_norm": 35.3125,
28211
+ "learning_rate": 9.883436940109494e-06,
28212
+ "loss": 19.529,
28213
+ "step": 40200
28214
+ },
28215
+ {
28216
+ "epoch": 0.7461893726933846,
28217
+ "grad_norm": 37.34375,
28218
+ "learning_rate": 9.883407944323452e-06,
28219
+ "loss": 19.4569,
28220
+ "step": 40210
28221
+ },
28222
+ {
28223
+ "epoch": 0.7463749457778643,
28224
+ "grad_norm": 35.28125,
28225
+ "learning_rate": 9.88337894853741e-06,
28226
+ "loss": 19.7786,
28227
+ "step": 40220
28228
+ },
28229
+ {
28230
+ "epoch": 0.7465605188623442,
28231
+ "grad_norm": 37.03125,
28232
+ "learning_rate": 9.883349952751368e-06,
28233
+ "loss": 19.3416,
28234
+ "step": 40230
28235
+ },
28236
+ {
28237
+ "epoch": 0.7467460919468241,
28238
+ "grad_norm": 34.96875,
28239
+ "learning_rate": 9.883320956965325e-06,
28240
+ "loss": 19.2966,
28241
+ "step": 40240
28242
+ },
28243
+ {
28244
+ "epoch": 0.7469316650313038,
28245
+ "grad_norm": 34.59375,
28246
+ "learning_rate": 9.883291961179283e-06,
28247
+ "loss": 19.4214,
28248
+ "step": 40250
28249
+ },
28250
+ {
28251
+ "epoch": 0.7471172381157837,
28252
+ "grad_norm": 36.59375,
28253
+ "learning_rate": 9.88326296539324e-06,
28254
+ "loss": 19.2123,
28255
+ "step": 40260
28256
+ },
28257
+ {
28258
+ "epoch": 0.7473028112002635,
28259
+ "grad_norm": 35.375,
28260
+ "learning_rate": 9.883233969607198e-06,
28261
+ "loss": 19.0819,
28262
+ "step": 40270
28263
+ },
28264
+ {
28265
+ "epoch": 0.7474883842847433,
28266
+ "grad_norm": 35.125,
28267
+ "learning_rate": 9.883204973821155e-06,
28268
+ "loss": 19.7437,
28269
+ "step": 40280
28270
+ },
28271
+ {
28272
+ "epoch": 0.7476739573692232,
28273
+ "grad_norm": 36.6875,
28274
+ "learning_rate": 9.883175978035112e-06,
28275
+ "loss": 19.0334,
28276
+ "step": 40290
28277
+ },
28278
+ {
28279
+ "epoch": 0.747859530453703,
28280
+ "grad_norm": 34.3125,
28281
+ "learning_rate": 9.883146982249072e-06,
28282
+ "loss": 19.398,
28283
+ "step": 40300
28284
+ },
28285
+ {
28286
+ "epoch": 0.7480451035381829,
28287
+ "grad_norm": 36.6875,
28288
+ "learning_rate": 9.883117986463027e-06,
28289
+ "loss": 19.3823,
28290
+ "step": 40310
28291
+ },
28292
+ {
28293
+ "epoch": 0.7482306766226626,
28294
+ "grad_norm": 36.53125,
28295
+ "learning_rate": 9.883088990676986e-06,
28296
+ "loss": 19.3756,
28297
+ "step": 40320
28298
+ },
28299
+ {
28300
+ "epoch": 0.7484162497071425,
28301
+ "grad_norm": 38.0,
28302
+ "learning_rate": 9.883059994890944e-06,
28303
+ "loss": 18.9045,
28304
+ "step": 40330
28305
+ },
28306
+ {
28307
+ "epoch": 0.7486018227916224,
28308
+ "grad_norm": 36.96875,
28309
+ "learning_rate": 9.883030999104901e-06,
28310
+ "loss": 19.2252,
28311
+ "step": 40340
28312
+ },
28313
+ {
28314
+ "epoch": 0.7487873958761021,
28315
+ "grad_norm": 37.1875,
28316
+ "learning_rate": 9.883002003318859e-06,
28317
+ "loss": 19.2014,
28318
+ "step": 40350
28319
+ },
28320
+ {
28321
+ "epoch": 0.748972968960582,
28322
+ "grad_norm": 33.34375,
28323
+ "learning_rate": 9.882973007532816e-06,
28324
+ "loss": 19.202,
28325
+ "step": 40360
28326
+ },
28327
+ {
28328
+ "epoch": 0.7491585420450618,
28329
+ "grad_norm": 33.0625,
28330
+ "learning_rate": 9.882944011746773e-06,
28331
+ "loss": 19.8227,
28332
+ "step": 40370
28333
+ },
28334
+ {
28335
+ "epoch": 0.7493441151295416,
28336
+ "grad_norm": 33.875,
28337
+ "learning_rate": 9.88291501596073e-06,
28338
+ "loss": 19.4171,
28339
+ "step": 40380
28340
+ },
28341
+ {
28342
+ "epoch": 0.7495296882140214,
28343
+ "grad_norm": 36.21875,
28344
+ "learning_rate": 9.882886020174688e-06,
28345
+ "loss": 19.1841,
28346
+ "step": 40390
28347
+ },
28348
+ {
28349
+ "epoch": 0.7497152612985013,
28350
+ "grad_norm": 38.03125,
28351
+ "learning_rate": 9.882857024388647e-06,
28352
+ "loss": 19.3367,
28353
+ "step": 40400
28354
+ },
28355
+ {
28356
+ "epoch": 0.749900834382981,
28357
+ "grad_norm": 36.15625,
28358
+ "learning_rate": 9.882828028602603e-06,
28359
+ "loss": 19.5167,
28360
+ "step": 40410
28361
+ },
28362
+ {
28363
+ "epoch": 0.7500864074674609,
28364
+ "grad_norm": 36.0625,
28365
+ "learning_rate": 9.88279903281656e-06,
28366
+ "loss": 19.3782,
28367
+ "step": 40420
28368
+ },
28369
+ {
28370
+ "epoch": 0.7502719805519408,
28371
+ "grad_norm": 37.125,
28372
+ "learning_rate": 9.88277003703052e-06,
28373
+ "loss": 19.5295,
28374
+ "step": 40430
28375
+ },
28376
+ {
28377
+ "epoch": 0.7504575536364205,
28378
+ "grad_norm": 34.46875,
28379
+ "learning_rate": 9.882741041244477e-06,
28380
+ "loss": 19.3648,
28381
+ "step": 40440
28382
+ },
28383
+ {
28384
+ "epoch": 0.7506431267209004,
28385
+ "grad_norm": 35.40625,
28386
+ "learning_rate": 9.882712045458434e-06,
28387
+ "loss": 19.4462,
28388
+ "step": 40450
28389
+ },
28390
+ {
28391
+ "epoch": 0.7508286998053803,
28392
+ "grad_norm": 37.78125,
28393
+ "learning_rate": 9.882683049672392e-06,
28394
+ "loss": 19.5866,
28395
+ "step": 40460
28396
+ },
28397
+ {
28398
+ "epoch": 0.75101427288986,
28399
+ "grad_norm": 36.46875,
28400
+ "learning_rate": 9.88265405388635e-06,
28401
+ "loss": 19.1955,
28402
+ "step": 40470
28403
+ },
28404
+ {
28405
+ "epoch": 0.7511998459743399,
28406
+ "grad_norm": 36.59375,
28407
+ "learning_rate": 9.882625058100307e-06,
28408
+ "loss": 19.3081,
28409
+ "step": 40480
28410
+ },
28411
+ {
28412
+ "epoch": 0.7513854190588197,
28413
+ "grad_norm": 35.5625,
28414
+ "learning_rate": 9.882596062314264e-06,
28415
+ "loss": 19.5015,
28416
+ "step": 40490
28417
+ },
28418
+ {
28419
+ "epoch": 0.7515709921432996,
28420
+ "grad_norm": 36.90625,
28421
+ "learning_rate": 9.882567066528223e-06,
28422
+ "loss": 19.6425,
28423
+ "step": 40500
28424
+ },
28425
+ {
28426
+ "epoch": 0.7517565652277793,
28427
+ "grad_norm": 37.1875,
28428
+ "learning_rate": 9.88253807074218e-06,
28429
+ "loss": 19.2909,
28430
+ "step": 40510
28431
+ },
28432
+ {
28433
+ "epoch": 0.7519421383122592,
28434
+ "grad_norm": 33.59375,
28435
+ "learning_rate": 9.882509074956136e-06,
28436
+ "loss": 19.621,
28437
+ "step": 40520
28438
+ },
28439
+ {
28440
+ "epoch": 0.7521277113967391,
28441
+ "grad_norm": 36.34375,
28442
+ "learning_rate": 9.882480079170095e-06,
28443
+ "loss": 19.4051,
28444
+ "step": 40530
28445
+ },
28446
+ {
28447
+ "epoch": 0.7523132844812188,
28448
+ "grad_norm": 36.125,
28449
+ "learning_rate": 9.882451083384053e-06,
28450
+ "loss": 19.5807,
28451
+ "step": 40540
28452
+ },
28453
+ {
28454
+ "epoch": 0.7524988575656987,
28455
+ "grad_norm": 36.25,
28456
+ "learning_rate": 9.88242208759801e-06,
28457
+ "loss": 19.2389,
28458
+ "step": 40550
28459
+ },
28460
+ {
28461
+ "epoch": 0.7526844306501785,
28462
+ "grad_norm": 33.71875,
28463
+ "learning_rate": 9.882393091811968e-06,
28464
+ "loss": 19.5497,
28465
+ "step": 40560
28466
+ },
28467
+ {
28468
+ "epoch": 0.7528700037346583,
28469
+ "grad_norm": 36.6875,
28470
+ "learning_rate": 9.882364096025925e-06,
28471
+ "loss": 19.7689,
28472
+ "step": 40570
28473
+ },
28474
+ {
28475
+ "epoch": 0.7530555768191382,
28476
+ "grad_norm": 37.34375,
28477
+ "learning_rate": 9.882335100239882e-06,
28478
+ "loss": 19.1836,
28479
+ "step": 40580
28480
+ },
28481
+ {
28482
+ "epoch": 0.753241149903618,
28483
+ "grad_norm": 34.21875,
28484
+ "learning_rate": 9.88230610445384e-06,
28485
+ "loss": 18.9019,
28486
+ "step": 40590
28487
+ },
28488
+ {
28489
+ "epoch": 0.7534267229880978,
28490
+ "grad_norm": 35.53125,
28491
+ "learning_rate": 9.882277108667799e-06,
28492
+ "loss": 19.0727,
28493
+ "step": 40600
28494
+ },
28495
+ {
28496
+ "epoch": 0.7536122960725776,
28497
+ "grad_norm": 34.8125,
28498
+ "learning_rate": 9.882248112881756e-06,
28499
+ "loss": 19.1506,
28500
+ "step": 40610
28501
+ },
28502
+ {
28503
+ "epoch": 0.7537978691570575,
28504
+ "grad_norm": 35.625,
28505
+ "learning_rate": 9.882219117095714e-06,
28506
+ "loss": 19.1912,
28507
+ "step": 40620
28508
+ },
28509
+ {
28510
+ "epoch": 0.7539834422415372,
28511
+ "grad_norm": 35.84375,
28512
+ "learning_rate": 9.882190121309671e-06,
28513
+ "loss": 19.2547,
28514
+ "step": 40630
28515
+ },
28516
+ {
28517
+ "epoch": 0.7541690153260171,
28518
+ "grad_norm": 36.375,
28519
+ "learning_rate": 9.882161125523628e-06,
28520
+ "loss": 19.6462,
28521
+ "step": 40640
28522
+ },
28523
+ {
28524
+ "epoch": 0.754354588410497,
28525
+ "grad_norm": 37.21875,
28526
+ "learning_rate": 9.882132129737586e-06,
28527
+ "loss": 19.0219,
28528
+ "step": 40650
28529
+ },
28530
+ {
28531
+ "epoch": 0.7545401614949767,
28532
+ "grad_norm": 35.25,
28533
+ "learning_rate": 9.882103133951543e-06,
28534
+ "loss": 19.2065,
28535
+ "step": 40660
28536
+ },
28537
+ {
28538
+ "epoch": 0.7547257345794566,
28539
+ "grad_norm": 35.03125,
28540
+ "learning_rate": 9.882074138165502e-06,
28541
+ "loss": 19.058,
28542
+ "step": 40670
28543
+ },
28544
+ {
28545
+ "epoch": 0.7549113076639364,
28546
+ "grad_norm": 36.21875,
28547
+ "learning_rate": 9.882045142379458e-06,
28548
+ "loss": 19.1856,
28549
+ "step": 40680
28550
+ },
28551
+ {
28552
+ "epoch": 0.7550968807484163,
28553
+ "grad_norm": 35.5625,
28554
+ "learning_rate": 9.882016146593416e-06,
28555
+ "loss": 19.3971,
28556
+ "step": 40690
28557
+ },
28558
+ {
28559
+ "epoch": 0.755282453832896,
28560
+ "grad_norm": 34.59375,
28561
+ "learning_rate": 9.881987150807375e-06,
28562
+ "loss": 19.5486,
28563
+ "step": 40700
28564
+ },
28565
+ {
28566
+ "epoch": 0.7554680269173759,
28567
+ "grad_norm": 35.65625,
28568
+ "learning_rate": 9.881958155021332e-06,
28569
+ "loss": 19.3793,
28570
+ "step": 40710
28571
+ },
28572
+ {
28573
+ "epoch": 0.7556536000018558,
28574
+ "grad_norm": 37.78125,
28575
+ "learning_rate": 9.88192915923529e-06,
28576
+ "loss": 19.3927,
28577
+ "step": 40720
28578
+ },
28579
+ {
28580
+ "epoch": 0.7558391730863355,
28581
+ "grad_norm": 34.34375,
28582
+ "learning_rate": 9.881900163449247e-06,
28583
+ "loss": 19.2427,
28584
+ "step": 40730
28585
+ },
28586
+ {
28587
+ "epoch": 0.7560247461708154,
28588
+ "grad_norm": 35.46875,
28589
+ "learning_rate": 9.881871167663204e-06,
28590
+ "loss": 19.363,
28591
+ "step": 40740
28592
+ },
28593
+ {
28594
+ "epoch": 0.7562103192552952,
28595
+ "grad_norm": 34.28125,
28596
+ "learning_rate": 9.881842171877162e-06,
28597
+ "loss": 19.2003,
28598
+ "step": 40750
28599
+ },
28600
+ {
28601
+ "epoch": 0.756395892339775,
28602
+ "grad_norm": 36.21875,
28603
+ "learning_rate": 9.881813176091119e-06,
28604
+ "loss": 19.225,
28605
+ "step": 40760
28606
+ },
28607
+ {
28608
+ "epoch": 0.7565814654242549,
28609
+ "grad_norm": 35.375,
28610
+ "learning_rate": 9.881784180305078e-06,
28611
+ "loss": 19.8474,
28612
+ "step": 40770
28613
+ },
28614
+ {
28615
+ "epoch": 0.7567670385087347,
28616
+ "grad_norm": 35.46875,
28617
+ "learning_rate": 9.881755184519036e-06,
28618
+ "loss": 19.3724,
28619
+ "step": 40780
28620
+ },
28621
+ {
28622
+ "epoch": 0.7569526115932145,
28623
+ "grad_norm": 36.125,
28624
+ "learning_rate": 9.881726188732991e-06,
28625
+ "loss": 19.03,
28626
+ "step": 40790
28627
+ },
28628
+ {
28629
+ "epoch": 0.7571381846776943,
28630
+ "grad_norm": 36.75,
28631
+ "learning_rate": 9.88169719294695e-06,
28632
+ "loss": 19.4802,
28633
+ "step": 40800
28634
+ },
28635
+ {
28636
+ "epoch": 0.7573237577621742,
28637
+ "grad_norm": 34.03125,
28638
+ "learning_rate": 9.881668197160908e-06,
28639
+ "loss": 19.0697,
28640
+ "step": 40810
28641
+ },
28642
+ {
28643
+ "epoch": 0.757509330846654,
28644
+ "grad_norm": 35.4375,
28645
+ "learning_rate": 9.881639201374865e-06,
28646
+ "loss": 19.6207,
28647
+ "step": 40820
28648
+ },
28649
+ {
28650
+ "epoch": 0.7576949039311338,
28651
+ "grad_norm": 35.09375,
28652
+ "learning_rate": 9.881610205588823e-06,
28653
+ "loss": 19.4589,
28654
+ "step": 40830
28655
+ },
28656
+ {
28657
+ "epoch": 0.7578804770156137,
28658
+ "grad_norm": 34.4375,
28659
+ "learning_rate": 9.88158120980278e-06,
28660
+ "loss": 19.5961,
28661
+ "step": 40840
28662
+ },
28663
+ {
28664
+ "epoch": 0.7580660501000935,
28665
+ "grad_norm": 34.625,
28666
+ "learning_rate": 9.881552214016737e-06,
28667
+ "loss": 19.2363,
28668
+ "step": 40850
28669
+ },
28670
+ {
28671
+ "epoch": 0.7582516231845733,
28672
+ "grad_norm": 37.0,
28673
+ "learning_rate": 9.881523218230695e-06,
28674
+ "loss": 18.9855,
28675
+ "step": 40860
28676
+ },
28677
+ {
28678
+ "epoch": 0.7584371962690531,
28679
+ "grad_norm": 36.3125,
28680
+ "learning_rate": 9.881494222444652e-06,
28681
+ "loss": 19.5753,
28682
+ "step": 40870
28683
+ },
28684
+ {
28685
+ "epoch": 0.758622769353533,
28686
+ "grad_norm": 37.625,
28687
+ "learning_rate": 9.881465226658611e-06,
28688
+ "loss": 19.2938,
28689
+ "step": 40880
28690
+ },
28691
+ {
28692
+ "epoch": 0.7588083424380128,
28693
+ "grad_norm": 34.75,
28694
+ "learning_rate": 9.881436230872569e-06,
28695
+ "loss": 19.4143,
28696
+ "step": 40890
28697
+ },
28698
+ {
28699
+ "epoch": 0.7589939155224926,
28700
+ "grad_norm": 36.03125,
28701
+ "learning_rate": 9.881407235086526e-06,
28702
+ "loss": 19.3378,
28703
+ "step": 40900
28704
+ },
28705
+ {
28706
+ "epoch": 0.7591794886069725,
28707
+ "grad_norm": 34.65625,
28708
+ "learning_rate": 9.881378239300484e-06,
28709
+ "loss": 19.4041,
28710
+ "step": 40910
28711
+ },
28712
+ {
28713
+ "epoch": 0.7593650616914522,
28714
+ "grad_norm": 36.09375,
28715
+ "learning_rate": 9.881349243514441e-06,
28716
+ "loss": 19.7739,
28717
+ "step": 40920
28718
+ },
28719
+ {
28720
+ "epoch": 0.7595506347759321,
28721
+ "grad_norm": 36.59375,
28722
+ "learning_rate": 9.881320247728398e-06,
28723
+ "loss": 19.1821,
28724
+ "step": 40930
28725
+ },
28726
+ {
28727
+ "epoch": 0.759736207860412,
28728
+ "grad_norm": 34.6875,
28729
+ "learning_rate": 9.881291251942356e-06,
28730
+ "loss": 18.7575,
28731
+ "step": 40940
28732
+ },
28733
+ {
28734
+ "epoch": 0.7599217809448917,
28735
+ "grad_norm": 37.6875,
28736
+ "learning_rate": 9.881262256156313e-06,
28737
+ "loss": 19.4397,
28738
+ "step": 40950
28739
+ },
28740
+ {
28741
+ "epoch": 0.7601073540293716,
28742
+ "grad_norm": 36.28125,
28743
+ "learning_rate": 9.88123326037027e-06,
28744
+ "loss": 19.3813,
28745
+ "step": 40960
28746
+ },
28747
+ {
28748
+ "epoch": 0.7602929271138514,
28749
+ "grad_norm": 34.84375,
28750
+ "learning_rate": 9.881204264584228e-06,
28751
+ "loss": 19.342,
28752
+ "step": 40970
28753
+ },
28754
+ {
28755
+ "epoch": 0.7604785001983312,
28756
+ "grad_norm": 35.9375,
28757
+ "learning_rate": 9.881175268798187e-06,
28758
+ "loss": 18.8295,
28759
+ "step": 40980
28760
+ },
28761
+ {
28762
+ "epoch": 0.760664073282811,
28763
+ "grad_norm": 35.0,
28764
+ "learning_rate": 9.881146273012145e-06,
28765
+ "loss": 19.1556,
28766
+ "step": 40990
28767
+ },
28768
+ {
28769
+ "epoch": 0.7608496463672909,
28770
+ "grad_norm": 35.34375,
28771
+ "learning_rate": 9.8811172772261e-06,
28772
+ "loss": 19.3345,
28773
+ "step": 41000
28774
+ },
28775
+ {
28776
+ "epoch": 0.7610352194517707,
28777
+ "grad_norm": 37.96875,
28778
+ "learning_rate": 9.88108828144006e-06,
28779
+ "loss": 19.3571,
28780
+ "step": 41010
28781
+ },
28782
+ {
28783
+ "epoch": 0.7612207925362505,
28784
+ "grad_norm": 35.4375,
28785
+ "learning_rate": 9.881059285654017e-06,
28786
+ "loss": 19.0342,
28787
+ "step": 41020
28788
+ },
28789
+ {
28790
+ "epoch": 0.7614063656207304,
28791
+ "grad_norm": 35.59375,
28792
+ "learning_rate": 9.881030289867974e-06,
28793
+ "loss": 19.2773,
28794
+ "step": 41030
28795
+ },
28796
+ {
28797
+ "epoch": 0.7615919387052102,
28798
+ "grad_norm": 36.96875,
28799
+ "learning_rate": 9.881001294081932e-06,
28800
+ "loss": 19.5343,
28801
+ "step": 41040
28802
+ },
28803
+ {
28804
+ "epoch": 0.76177751178969,
28805
+ "grad_norm": 34.40625,
28806
+ "learning_rate": 9.88097229829589e-06,
28807
+ "loss": 18.9572,
28808
+ "step": 41050
28809
+ },
28810
+ {
28811
+ "epoch": 0.7619630848741699,
28812
+ "grad_norm": 35.65625,
28813
+ "learning_rate": 9.880943302509846e-06,
28814
+ "loss": 19.2055,
28815
+ "step": 41060
28816
+ },
28817
+ {
28818
+ "epoch": 0.7621486579586497,
28819
+ "grad_norm": 34.0,
28820
+ "learning_rate": 9.880914306723804e-06,
28821
+ "loss": 19.3073,
28822
+ "step": 41070
28823
+ },
28824
+ {
28825
+ "epoch": 0.7623342310431295,
28826
+ "grad_norm": 35.40625,
28827
+ "learning_rate": 9.880885310937763e-06,
28828
+ "loss": 19.5048,
28829
+ "step": 41080
28830
+ },
28831
+ {
28832
+ "epoch": 0.7625198041276093,
28833
+ "grad_norm": 36.1875,
28834
+ "learning_rate": 9.88085631515172e-06,
28835
+ "loss": 19.8103,
28836
+ "step": 41090
28837
+ },
28838
+ {
28839
+ "epoch": 0.7627053772120892,
28840
+ "grad_norm": 36.65625,
28841
+ "learning_rate": 9.880827319365678e-06,
28842
+ "loss": 19.5557,
28843
+ "step": 41100
28844
+ },
28845
+ {
28846
+ "epoch": 0.762890950296569,
28847
+ "grad_norm": 34.53125,
28848
+ "learning_rate": 9.880798323579635e-06,
28849
+ "loss": 19.5534,
28850
+ "step": 41110
28851
+ },
28852
+ {
28853
+ "epoch": 0.7630765233810488,
28854
+ "grad_norm": 35.6875,
28855
+ "learning_rate": 9.880769327793593e-06,
28856
+ "loss": 19.303,
28857
+ "step": 41120
28858
+ },
28859
+ {
28860
+ "epoch": 0.7632620964655287,
28861
+ "grad_norm": 34.71875,
28862
+ "learning_rate": 9.88074033200755e-06,
28863
+ "loss": 19.164,
28864
+ "step": 41130
28865
+ },
28866
+ {
28867
+ "epoch": 0.7634476695500084,
28868
+ "grad_norm": 37.0625,
28869
+ "learning_rate": 9.880711336221507e-06,
28870
+ "loss": 19.1969,
28871
+ "step": 41140
28872
+ },
28873
+ {
28874
+ "epoch": 0.7636332426344883,
28875
+ "grad_norm": 34.28125,
28876
+ "learning_rate": 9.880682340435466e-06,
28877
+ "loss": 19.5003,
28878
+ "step": 41150
28879
+ },
28880
+ {
28881
+ "epoch": 0.7638188157189681,
28882
+ "grad_norm": 36.78125,
28883
+ "learning_rate": 9.880653344649422e-06,
28884
+ "loss": 19.3433,
28885
+ "step": 41160
28886
+ },
28887
+ {
28888
+ "epoch": 0.7640043888034479,
28889
+ "grad_norm": 37.75,
28890
+ "learning_rate": 9.88062434886338e-06,
28891
+ "loss": 18.9825,
28892
+ "step": 41170
28893
+ },
28894
+ {
28895
+ "epoch": 0.7641899618879278,
28896
+ "grad_norm": 36.34375,
28897
+ "learning_rate": 9.880595353077339e-06,
28898
+ "loss": 19.0039,
28899
+ "step": 41180
28900
+ },
28901
+ {
28902
+ "epoch": 0.7643755349724076,
28903
+ "grad_norm": 36.8125,
28904
+ "learning_rate": 9.880566357291296e-06,
28905
+ "loss": 19.4506,
28906
+ "step": 41190
28907
+ },
28908
+ {
28909
+ "epoch": 0.7645611080568875,
28910
+ "grad_norm": 33.78125,
28911
+ "learning_rate": 9.880537361505253e-06,
28912
+ "loss": 19.3604,
28913
+ "step": 41200
28914
+ },
28915
+ {
28916
+ "epoch": 0.7647466811413672,
28917
+ "grad_norm": 37.875,
28918
+ "learning_rate": 9.880508365719211e-06,
28919
+ "loss": 19.2318,
28920
+ "step": 41210
28921
+ },
28922
+ {
28923
+ "epoch": 0.7649322542258471,
28924
+ "grad_norm": 35.5625,
28925
+ "learning_rate": 9.880479369933168e-06,
28926
+ "loss": 19.3256,
28927
+ "step": 41220
28928
+ },
28929
+ {
28930
+ "epoch": 0.765117827310327,
28931
+ "grad_norm": 37.40625,
28932
+ "learning_rate": 9.880450374147126e-06,
28933
+ "loss": 19.5014,
28934
+ "step": 41230
28935
+ },
28936
+ {
28937
+ "epoch": 0.7653034003948067,
28938
+ "grad_norm": 36.9375,
28939
+ "learning_rate": 9.880421378361083e-06,
28940
+ "loss": 19.2619,
28941
+ "step": 41240
28942
+ },
28943
+ {
28944
+ "epoch": 0.7654889734792866,
28945
+ "grad_norm": 37.53125,
28946
+ "learning_rate": 9.880392382575042e-06,
28947
+ "loss": 19.1359,
28948
+ "step": 41250
28949
+ },
28950
+ {
28951
+ "epoch": 0.7656745465637664,
28952
+ "grad_norm": 35.375,
28953
+ "learning_rate": 9.880363386789e-06,
28954
+ "loss": 19.3461,
28955
+ "step": 41260
28956
+ },
28957
+ {
28958
+ "epoch": 0.7658601196482462,
28959
+ "grad_norm": 36.71875,
28960
+ "learning_rate": 9.880334391002955e-06,
28961
+ "loss": 18.8452,
28962
+ "step": 41270
28963
+ },
28964
+ {
28965
+ "epoch": 0.766045692732726,
28966
+ "grad_norm": 36.03125,
28967
+ "learning_rate": 9.880305395216914e-06,
28968
+ "loss": 19.2407,
28969
+ "step": 41280
28970
+ },
28971
+ {
28972
+ "epoch": 0.7662312658172059,
28973
+ "grad_norm": 36.78125,
28974
+ "learning_rate": 9.880276399430872e-06,
28975
+ "loss": 19.4143,
28976
+ "step": 41290
28977
+ },
28978
+ {
28979
+ "epoch": 0.7664168389016857,
28980
+ "grad_norm": 35.8125,
28981
+ "learning_rate": 9.88024740364483e-06,
28982
+ "loss": 19.3873,
28983
+ "step": 41300
28984
+ },
28985
+ {
28986
+ "epoch": 0.7666024119861655,
28987
+ "grad_norm": 35.125,
28988
+ "learning_rate": 9.880218407858787e-06,
28989
+ "loss": 19.6352,
28990
+ "step": 41310
28991
+ },
28992
+ {
28993
+ "epoch": 0.7667879850706454,
28994
+ "grad_norm": 36.0625,
28995
+ "learning_rate": 9.880189412072744e-06,
28996
+ "loss": 19.2883,
28997
+ "step": 41320
28998
+ },
28999
+ {
29000
+ "epoch": 0.7669735581551251,
29001
+ "grad_norm": 36.40625,
29002
+ "learning_rate": 9.880160416286701e-06,
29003
+ "loss": 18.7959,
29004
+ "step": 41330
29005
+ },
29006
+ {
29007
+ "epoch": 0.767159131239605,
29008
+ "grad_norm": 34.78125,
29009
+ "learning_rate": 9.880131420500659e-06,
29010
+ "loss": 18.8009,
29011
+ "step": 41340
29012
+ },
29013
+ {
29014
+ "epoch": 0.7673447043240849,
29015
+ "grad_norm": 36.5625,
29016
+ "learning_rate": 9.880102424714618e-06,
29017
+ "loss": 19.1237,
29018
+ "step": 41350
29019
+ },
29020
+ {
29021
+ "epoch": 0.7675302774085646,
29022
+ "grad_norm": 38.78125,
29023
+ "learning_rate": 9.880073428928575e-06,
29024
+ "loss": 18.9787,
29025
+ "step": 41360
29026
+ },
29027
+ {
29028
+ "epoch": 0.7677158504930445,
29029
+ "grad_norm": 32.71875,
29030
+ "learning_rate": 9.880044433142533e-06,
29031
+ "loss": 19.2766,
29032
+ "step": 41370
29033
+ },
29034
+ {
29035
+ "epoch": 0.7679014235775243,
29036
+ "grad_norm": 35.1875,
29037
+ "learning_rate": 9.88001543735649e-06,
29038
+ "loss": 18.937,
29039
+ "step": 41380
29040
+ },
29041
+ {
29042
+ "epoch": 0.7680869966620042,
29043
+ "grad_norm": 35.40625,
29044
+ "learning_rate": 9.879986441570448e-06,
29045
+ "loss": 19.729,
29046
+ "step": 41390
29047
+ },
29048
+ {
29049
+ "epoch": 0.768272569746484,
29050
+ "grad_norm": 34.71875,
29051
+ "learning_rate": 9.879957445784405e-06,
29052
+ "loss": 19.1228,
29053
+ "step": 41400
29054
+ },
29055
+ {
29056
+ "epoch": 0.7684581428309638,
29057
+ "grad_norm": 36.5,
29058
+ "learning_rate": 9.879928449998362e-06,
29059
+ "loss": 19.3379,
29060
+ "step": 41410
29061
+ },
29062
+ {
29063
+ "epoch": 0.7686437159154437,
29064
+ "grad_norm": 36.75,
29065
+ "learning_rate": 9.87989945421232e-06,
29066
+ "loss": 19.4041,
29067
+ "step": 41420
29068
+ },
29069
+ {
29070
+ "epoch": 0.7688292889999234,
29071
+ "grad_norm": 37.03125,
29072
+ "learning_rate": 9.879870458426277e-06,
29073
+ "loss": 19.2628,
29074
+ "step": 41430
29075
+ },
29076
+ {
29077
+ "epoch": 0.7690148620844033,
29078
+ "grad_norm": 34.875,
29079
+ "learning_rate": 9.879841462640235e-06,
29080
+ "loss": 19.4629,
29081
+ "step": 41440
29082
+ },
29083
+ {
29084
+ "epoch": 0.7692004351688831,
29085
+ "grad_norm": 35.96875,
29086
+ "learning_rate": 9.879812466854192e-06,
29087
+ "loss": 19.196,
29088
+ "step": 41450
29089
+ },
29090
+ {
29091
+ "epoch": 0.7693860082533629,
29092
+ "grad_norm": 38.15625,
29093
+ "learning_rate": 9.879783471068151e-06,
29094
+ "loss": 19.752,
29095
+ "step": 41460
29096
+ },
29097
+ {
29098
+ "epoch": 0.7695715813378428,
29099
+ "grad_norm": 38.75,
29100
+ "learning_rate": 9.879754475282109e-06,
29101
+ "loss": 19.2405,
29102
+ "step": 41470
29103
+ },
29104
+ {
29105
+ "epoch": 0.7697571544223226,
29106
+ "grad_norm": 37.71875,
29107
+ "learning_rate": 9.879725479496066e-06,
29108
+ "loss": 19.2407,
29109
+ "step": 41480
29110
+ },
29111
+ {
29112
+ "epoch": 0.7699427275068024,
29113
+ "grad_norm": 36.78125,
29114
+ "learning_rate": 9.879696483710023e-06,
29115
+ "loss": 19.1094,
29116
+ "step": 41490
29117
+ },
29118
+ {
29119
+ "epoch": 0.7701283005912822,
29120
+ "grad_norm": 36.1875,
29121
+ "learning_rate": 9.87966748792398e-06,
29122
+ "loss": 19.3451,
29123
+ "step": 41500
29124
+ },
29125
+ {
29126
+ "epoch": 0.7703138736757621,
29127
+ "grad_norm": 36.53125,
29128
+ "learning_rate": 9.879638492137938e-06,
29129
+ "loss": 18.9383,
29130
+ "step": 41510
29131
+ },
29132
+ {
29133
+ "epoch": 0.7704994467602418,
29134
+ "grad_norm": 34.0,
29135
+ "learning_rate": 9.879609496351896e-06,
29136
+ "loss": 19.115,
29137
+ "step": 41520
29138
+ },
29139
+ {
29140
+ "epoch": 0.7706850198447217,
29141
+ "grad_norm": 35.78125,
29142
+ "learning_rate": 9.879580500565855e-06,
29143
+ "loss": 19.4492,
29144
+ "step": 41530
29145
+ },
29146
+ {
29147
+ "epoch": 0.7708705929292016,
29148
+ "grad_norm": 37.40625,
29149
+ "learning_rate": 9.87955150477981e-06,
29150
+ "loss": 19.3029,
29151
+ "step": 41540
29152
+ },
29153
+ {
29154
+ "epoch": 0.7710561660136813,
29155
+ "grad_norm": 35.15625,
29156
+ "learning_rate": 9.879522508993768e-06,
29157
+ "loss": 18.9063,
29158
+ "step": 41550
29159
+ },
29160
+ {
29161
+ "epoch": 0.7712417390981612,
29162
+ "grad_norm": 35.71875,
29163
+ "learning_rate": 9.879493513207727e-06,
29164
+ "loss": 19.6495,
29165
+ "step": 41560
29166
+ },
29167
+ {
29168
+ "epoch": 0.771427312182641,
29169
+ "grad_norm": 36.1875,
29170
+ "learning_rate": 9.879464517421684e-06,
29171
+ "loss": 19.384,
29172
+ "step": 41570
29173
+ },
29174
+ {
29175
+ "epoch": 0.7716128852671209,
29176
+ "grad_norm": 35.375,
29177
+ "learning_rate": 9.879435521635642e-06,
29178
+ "loss": 19.2114,
29179
+ "step": 41580
29180
+ },
29181
+ {
29182
+ "epoch": 0.7717984583516007,
29183
+ "grad_norm": 34.53125,
29184
+ "learning_rate": 9.879406525849599e-06,
29185
+ "loss": 19.6205,
29186
+ "step": 41590
29187
+ },
29188
+ {
29189
+ "epoch": 0.7719840314360805,
29190
+ "grad_norm": 33.59375,
29191
+ "learning_rate": 9.879377530063557e-06,
29192
+ "loss": 19.7098,
29193
+ "step": 41600
29194
+ },
29195
+ {
29196
+ "epoch": 0.7721696045205604,
29197
+ "grad_norm": 36.25,
29198
+ "learning_rate": 9.879348534277514e-06,
29199
+ "loss": 19.5254,
29200
+ "step": 41610
29201
+ },
29202
+ {
29203
+ "epoch": 0.7723551776050401,
29204
+ "grad_norm": 33.375,
29205
+ "learning_rate": 9.879319538491471e-06,
29206
+ "loss": 19.4273,
29207
+ "step": 41620
29208
+ },
29209
+ {
29210
+ "epoch": 0.77254075068952,
29211
+ "grad_norm": 36.46875,
29212
+ "learning_rate": 9.87929054270543e-06,
29213
+ "loss": 19.7937,
29214
+ "step": 41630
29215
+ },
29216
+ {
29217
+ "epoch": 0.7727263237739999,
29218
+ "grad_norm": 37.375,
29219
+ "learning_rate": 9.879261546919388e-06,
29220
+ "loss": 19.3302,
29221
+ "step": 41640
29222
+ },
29223
+ {
29224
+ "epoch": 0.7729118968584796,
29225
+ "grad_norm": 34.75,
29226
+ "learning_rate": 9.879232551133344e-06,
29227
+ "loss": 19.7385,
29228
+ "step": 41650
29229
+ },
29230
+ {
29231
+ "epoch": 0.7730974699429595,
29232
+ "grad_norm": 35.15625,
29233
+ "learning_rate": 9.879203555347303e-06,
29234
+ "loss": 19.3012,
29235
+ "step": 41660
29236
+ },
29237
+ {
29238
+ "epoch": 0.7732830430274393,
29239
+ "grad_norm": 36.46875,
29240
+ "learning_rate": 9.87917455956126e-06,
29241
+ "loss": 19.6957,
29242
+ "step": 41670
29243
+ },
29244
+ {
29245
+ "epoch": 0.7734686161119191,
29246
+ "grad_norm": 35.5625,
29247
+ "learning_rate": 9.879145563775217e-06,
29248
+ "loss": 19.4753,
29249
+ "step": 41680
29250
+ },
29251
+ {
29252
+ "epoch": 0.773654189196399,
29253
+ "grad_norm": 36.09375,
29254
+ "learning_rate": 9.879116567989175e-06,
29255
+ "loss": 18.989,
29256
+ "step": 41690
29257
+ },
29258
+ {
29259
+ "epoch": 0.7738397622808788,
29260
+ "grad_norm": 35.53125,
29261
+ "learning_rate": 9.879087572203132e-06,
29262
+ "loss": 19.5413,
29263
+ "step": 41700
29264
+ },
29265
+ {
29266
+ "epoch": 0.7740253353653586,
29267
+ "grad_norm": 35.4375,
29268
+ "learning_rate": 9.87905857641709e-06,
29269
+ "loss": 19.1875,
29270
+ "step": 41710
29271
+ },
29272
+ {
29273
+ "epoch": 0.7742109084498384,
29274
+ "grad_norm": 35.875,
29275
+ "learning_rate": 9.879029580631047e-06,
29276
+ "loss": 18.9351,
29277
+ "step": 41720
29278
+ },
29279
+ {
29280
+ "epoch": 0.7743964815343183,
29281
+ "grad_norm": 35.1875,
29282
+ "learning_rate": 9.879000584845006e-06,
29283
+ "loss": 19.5943,
29284
+ "step": 41730
29285
+ },
29286
+ {
29287
+ "epoch": 0.7745820546187981,
29288
+ "grad_norm": 35.15625,
29289
+ "learning_rate": 9.878971589058964e-06,
29290
+ "loss": 19.2853,
29291
+ "step": 41740
29292
+ },
29293
+ {
29294
+ "epoch": 0.7747676277032779,
29295
+ "grad_norm": 36.65625,
29296
+ "learning_rate": 9.87894259327292e-06,
29297
+ "loss": 19.1505,
29298
+ "step": 41750
29299
+ },
29300
+ {
29301
+ "epoch": 0.7749532007877578,
29302
+ "grad_norm": 37.03125,
29303
+ "learning_rate": 9.878913597486878e-06,
29304
+ "loss": 19.5033,
29305
+ "step": 41760
29306
+ },
29307
+ {
29308
+ "epoch": 0.7751387738722376,
29309
+ "grad_norm": 35.78125,
29310
+ "learning_rate": 9.878884601700836e-06,
29311
+ "loss": 19.2482,
29312
+ "step": 41770
29313
+ },
29314
+ {
29315
+ "epoch": 0.7753243469567174,
29316
+ "grad_norm": 36.84375,
29317
+ "learning_rate": 9.878855605914793e-06,
29318
+ "loss": 19.6271,
29319
+ "step": 41780
29320
+ },
29321
+ {
29322
+ "epoch": 0.7755099200411972,
29323
+ "grad_norm": 36.375,
29324
+ "learning_rate": 9.87882661012875e-06,
29325
+ "loss": 19.6722,
29326
+ "step": 41790
29327
+ },
29328
+ {
29329
+ "epoch": 0.7756954931256771,
29330
+ "grad_norm": 36.09375,
29331
+ "learning_rate": 9.87879761434271e-06,
29332
+ "loss": 19.1386,
29333
+ "step": 41800
29334
+ },
29335
+ {
29336
+ "epoch": 0.7758810662101568,
29337
+ "grad_norm": 35.0,
29338
+ "learning_rate": 9.878768618556665e-06,
29339
+ "loss": 19.687,
29340
+ "step": 41810
29341
+ },
29342
+ {
29343
+ "epoch": 0.7760666392946367,
29344
+ "grad_norm": 35.5,
29345
+ "learning_rate": 9.878739622770623e-06,
29346
+ "loss": 19.3746,
29347
+ "step": 41820
29348
+ },
29349
+ {
29350
+ "epoch": 0.7762522123791166,
29351
+ "grad_norm": 36.71875,
29352
+ "learning_rate": 9.878710626984582e-06,
29353
+ "loss": 19.3001,
29354
+ "step": 41830
29355
+ },
29356
+ {
29357
+ "epoch": 0.7764377854635963,
29358
+ "grad_norm": 35.875,
29359
+ "learning_rate": 9.87868163119854e-06,
29360
+ "loss": 19.1276,
29361
+ "step": 41840
29362
+ },
29363
+ {
29364
+ "epoch": 0.7766233585480762,
29365
+ "grad_norm": 36.59375,
29366
+ "learning_rate": 9.878652635412497e-06,
29367
+ "loss": 19.1937,
29368
+ "step": 41850
29369
+ },
29370
+ {
29371
+ "epoch": 0.776808931632556,
29372
+ "grad_norm": 34.5,
29373
+ "learning_rate": 9.878623639626454e-06,
29374
+ "loss": 19.5688,
29375
+ "step": 41860
29376
+ },
29377
+ {
29378
+ "epoch": 0.7769945047170358,
29379
+ "grad_norm": 36.84375,
29380
+ "learning_rate": 9.878594643840412e-06,
29381
+ "loss": 18.9137,
29382
+ "step": 41870
29383
+ },
29384
+ {
29385
+ "epoch": 0.7771800778015157,
29386
+ "grad_norm": 36.625,
29387
+ "learning_rate": 9.878565648054369e-06,
29388
+ "loss": 19.4192,
29389
+ "step": 41880
29390
+ },
29391
+ {
29392
+ "epoch": 0.7773656508859955,
29393
+ "grad_norm": 37.34375,
29394
+ "learning_rate": 9.878536652268326e-06,
29395
+ "loss": 19.2262,
29396
+ "step": 41890
29397
+ },
29398
+ {
29399
+ "epoch": 0.7775512239704753,
29400
+ "grad_norm": 36.09375,
29401
+ "learning_rate": 9.878507656482284e-06,
29402
+ "loss": 18.9775,
29403
+ "step": 41900
29404
+ },
29405
+ {
29406
+ "epoch": 0.7777367970549551,
29407
+ "grad_norm": 35.25,
29408
+ "learning_rate": 9.878478660696241e-06,
29409
+ "loss": 19.2364,
29410
+ "step": 41910
29411
+ },
29412
+ {
29413
+ "epoch": 0.777922370139435,
29414
+ "grad_norm": 35.65625,
29415
+ "learning_rate": 9.878449664910199e-06,
29416
+ "loss": 19.5075,
29417
+ "step": 41920
29418
+ },
29419
+ {
29420
+ "epoch": 0.7781079432239149,
29421
+ "grad_norm": 37.15625,
29422
+ "learning_rate": 9.878420669124156e-06,
29423
+ "loss": 19.3397,
29424
+ "step": 41930
29425
+ },
29426
+ {
29427
+ "epoch": 0.7782935163083946,
29428
+ "grad_norm": 33.5625,
29429
+ "learning_rate": 9.878391673338115e-06,
29430
+ "loss": 19.0782,
29431
+ "step": 41940
29432
+ },
29433
+ {
29434
+ "epoch": 0.7784790893928745,
29435
+ "grad_norm": 35.5,
29436
+ "learning_rate": 9.878362677552073e-06,
29437
+ "loss": 19.4959,
29438
+ "step": 41950
29439
+ },
29440
+ {
29441
+ "epoch": 0.7786646624773543,
29442
+ "grad_norm": 35.0,
29443
+ "learning_rate": 9.87833368176603e-06,
29444
+ "loss": 19.4202,
29445
+ "step": 41960
29446
+ },
29447
+ {
29448
+ "epoch": 0.7788502355618341,
29449
+ "grad_norm": 38.875,
29450
+ "learning_rate": 9.878304685979987e-06,
29451
+ "loss": 19.3642,
29452
+ "step": 41970
29453
+ },
29454
+ {
29455
+ "epoch": 0.7790358086463139,
29456
+ "grad_norm": 35.0,
29457
+ "learning_rate": 9.878275690193945e-06,
29458
+ "loss": 19.0663,
29459
+ "step": 41980
29460
+ },
29461
+ {
29462
+ "epoch": 0.7792213817307938,
29463
+ "grad_norm": 36.8125,
29464
+ "learning_rate": 9.878246694407902e-06,
29465
+ "loss": 19.0382,
29466
+ "step": 41990
29467
+ },
29468
+ {
29469
+ "epoch": 0.7794069548152736,
29470
+ "grad_norm": 36.5625,
29471
+ "learning_rate": 9.87821769862186e-06,
29472
+ "loss": 19.1091,
29473
+ "step": 42000
29474
+ },
29475
+ {
29476
+ "epoch": 0.7795925278997534,
29477
+ "grad_norm": 34.6875,
29478
+ "learning_rate": 9.878188702835819e-06,
29479
+ "loss": 19.1205,
29480
+ "step": 42010
29481
+ },
29482
+ {
29483
+ "epoch": 0.7797781009842333,
29484
+ "grad_norm": 34.6875,
29485
+ "learning_rate": 9.878159707049774e-06,
29486
+ "loss": 19.4904,
29487
+ "step": 42020
29488
+ },
29489
+ {
29490
+ "epoch": 0.779963674068713,
29491
+ "grad_norm": 35.84375,
29492
+ "learning_rate": 9.878130711263732e-06,
29493
+ "loss": 18.6078,
29494
+ "step": 42030
29495
+ },
29496
+ {
29497
+ "epoch": 0.7801492471531929,
29498
+ "grad_norm": 36.84375,
29499
+ "learning_rate": 9.878101715477691e-06,
29500
+ "loss": 18.9939,
29501
+ "step": 42040
29502
+ },
29503
+ {
29504
+ "epoch": 0.7803348202376728,
29505
+ "grad_norm": 35.96875,
29506
+ "learning_rate": 9.878072719691648e-06,
29507
+ "loss": 19.1132,
29508
+ "step": 42050
29509
+ },
29510
+ {
29511
+ "epoch": 0.7805203933221525,
29512
+ "grad_norm": 35.59375,
29513
+ "learning_rate": 9.878043723905606e-06,
29514
+ "loss": 19.2332,
29515
+ "step": 42060
29516
+ },
29517
+ {
29518
+ "epoch": 0.7807059664066324,
29519
+ "grad_norm": 37.875,
29520
+ "learning_rate": 9.878014728119563e-06,
29521
+ "loss": 19.1597,
29522
+ "step": 42070
29523
+ },
29524
+ {
29525
+ "epoch": 0.7808915394911122,
29526
+ "grad_norm": 36.65625,
29527
+ "learning_rate": 9.87798573233352e-06,
29528
+ "loss": 19.1368,
29529
+ "step": 42080
29530
+ },
29531
+ {
29532
+ "epoch": 0.781077112575592,
29533
+ "grad_norm": 36.21875,
29534
+ "learning_rate": 9.877956736547478e-06,
29535
+ "loss": 19.6976,
29536
+ "step": 42090
29537
+ },
29538
+ {
29539
+ "epoch": 0.7812626856600718,
29540
+ "grad_norm": 38.0625,
29541
+ "learning_rate": 9.877927740761435e-06,
29542
+ "loss": 18.8875,
29543
+ "step": 42100
29544
+ },
29545
+ {
29546
+ "epoch": 0.7814482587445517,
29547
+ "grad_norm": 36.8125,
29548
+ "learning_rate": 9.877898744975394e-06,
29549
+ "loss": 19.1315,
29550
+ "step": 42110
29551
+ },
29552
+ {
29553
+ "epoch": 0.7816338318290316,
29554
+ "grad_norm": 36.0,
29555
+ "learning_rate": 9.877869749189352e-06,
29556
+ "loss": 19.1785,
29557
+ "step": 42120
29558
+ },
29559
+ {
29560
+ "epoch": 0.7818194049135113,
29561
+ "grad_norm": 36.65625,
29562
+ "learning_rate": 9.877840753403308e-06,
29563
+ "loss": 19.2687,
29564
+ "step": 42130
29565
+ },
29566
+ {
29567
+ "epoch": 0.7820049779979912,
29568
+ "grad_norm": 36.0,
29569
+ "learning_rate": 9.877811757617267e-06,
29570
+ "loss": 19.1766,
29571
+ "step": 42140
29572
+ },
29573
+ {
29574
+ "epoch": 0.782190551082471,
29575
+ "grad_norm": 35.75,
29576
+ "learning_rate": 9.877782761831224e-06,
29577
+ "loss": 19.1511,
29578
+ "step": 42150
29579
+ },
29580
+ {
29581
+ "epoch": 0.7823761241669508,
29582
+ "grad_norm": 37.3125,
29583
+ "learning_rate": 9.877753766045181e-06,
29584
+ "loss": 19.0198,
29585
+ "step": 42160
29586
+ },
29587
+ {
29588
+ "epoch": 0.7825616972514307,
29589
+ "grad_norm": 34.5,
29590
+ "learning_rate": 9.877724770259139e-06,
29591
+ "loss": 19.6603,
29592
+ "step": 42170
29593
+ },
29594
+ {
29595
+ "epoch": 0.7827472703359105,
29596
+ "grad_norm": 34.65625,
29597
+ "learning_rate": 9.877695774473096e-06,
29598
+ "loss": 19.1918,
29599
+ "step": 42180
29600
+ },
29601
+ {
29602
+ "epoch": 0.7829328434203903,
29603
+ "grad_norm": 35.84375,
29604
+ "learning_rate": 9.877666778687054e-06,
29605
+ "loss": 19.2404,
29606
+ "step": 42190
29607
+ },
29608
+ {
29609
+ "epoch": 0.7831184165048701,
29610
+ "grad_norm": 36.28125,
29611
+ "learning_rate": 9.877637782901011e-06,
29612
+ "loss": 19.3464,
29613
+ "step": 42200
29614
+ },
29615
+ {
29616
+ "epoch": 0.78330398958935,
29617
+ "grad_norm": 36.125,
29618
+ "learning_rate": 9.87760878711497e-06,
29619
+ "loss": 19.2844,
29620
+ "step": 42210
29621
+ },
29622
+ {
29623
+ "epoch": 0.7834895626738297,
29624
+ "grad_norm": 36.4375,
29625
+ "learning_rate": 9.877579791328928e-06,
29626
+ "loss": 18.8615,
29627
+ "step": 42220
29628
+ },
29629
+ {
29630
+ "epoch": 0.7836751357583096,
29631
+ "grad_norm": 34.96875,
29632
+ "learning_rate": 9.877550795542885e-06,
29633
+ "loss": 18.8328,
29634
+ "step": 42230
29635
+ },
29636
+ {
29637
+ "epoch": 0.7838607088427895,
29638
+ "grad_norm": 34.125,
29639
+ "learning_rate": 9.877521799756842e-06,
29640
+ "loss": 19.1479,
29641
+ "step": 42240
29642
+ },
29643
+ {
29644
+ "epoch": 0.7840462819272692,
29645
+ "grad_norm": 34.9375,
29646
+ "learning_rate": 9.8774928039708e-06,
29647
+ "loss": 19.2093,
29648
+ "step": 42250
29649
+ },
29650
+ {
29651
+ "epoch": 0.7842318550117491,
29652
+ "grad_norm": 36.6875,
29653
+ "learning_rate": 9.877463808184757e-06,
29654
+ "loss": 18.9336,
29655
+ "step": 42260
29656
+ },
29657
+ {
29658
+ "epoch": 0.7844174280962289,
29659
+ "grad_norm": 37.125,
29660
+ "learning_rate": 9.877434812398715e-06,
29661
+ "loss": 18.7095,
29662
+ "step": 42270
29663
+ },
29664
+ {
29665
+ "epoch": 0.7846030011807088,
29666
+ "grad_norm": 36.5,
29667
+ "learning_rate": 9.877405816612674e-06,
29668
+ "loss": 19.2101,
29669
+ "step": 42280
29670
+ },
29671
+ {
29672
+ "epoch": 0.7847885742651886,
29673
+ "grad_norm": 35.90625,
29674
+ "learning_rate": 9.87737682082663e-06,
29675
+ "loss": 19.6382,
29676
+ "step": 42290
29677
+ },
29678
+ {
29679
+ "epoch": 0.7849741473496684,
29680
+ "grad_norm": 34.0625,
29681
+ "learning_rate": 9.877347825040587e-06,
29682
+ "loss": 19.6099,
29683
+ "step": 42300
29684
+ },
29685
+ {
29686
+ "epoch": 0.7851597204341483,
29687
+ "grad_norm": 36.8125,
29688
+ "learning_rate": 9.877318829254546e-06,
29689
+ "loss": 19.2404,
29690
+ "step": 42310
29691
+ },
29692
+ {
29693
+ "epoch": 0.785345293518628,
29694
+ "grad_norm": 36.03125,
29695
+ "learning_rate": 9.877289833468503e-06,
29696
+ "loss": 19.0874,
29697
+ "step": 42320
29698
+ },
29699
+ {
29700
+ "epoch": 0.7855308666031079,
29701
+ "grad_norm": 35.46875,
29702
+ "learning_rate": 9.87726083768246e-06,
29703
+ "loss": 19.0245,
29704
+ "step": 42330
29705
+ },
29706
+ {
29707
+ "epoch": 0.7857164396875878,
29708
+ "grad_norm": 36.625,
29709
+ "learning_rate": 9.877231841896418e-06,
29710
+ "loss": 19.3828,
29711
+ "step": 42340
29712
+ },
29713
+ {
29714
+ "epoch": 0.7859020127720675,
29715
+ "grad_norm": 37.625,
29716
+ "learning_rate": 9.877202846110376e-06,
29717
+ "loss": 18.6832,
29718
+ "step": 42350
29719
+ },
29720
+ {
29721
+ "epoch": 0.7860875858565474,
29722
+ "grad_norm": 34.375,
29723
+ "learning_rate": 9.877173850324333e-06,
29724
+ "loss": 19.379,
29725
+ "step": 42360
29726
+ },
29727
+ {
29728
+ "epoch": 0.7862731589410272,
29729
+ "grad_norm": 36.75,
29730
+ "learning_rate": 9.87714485453829e-06,
29731
+ "loss": 19.779,
29732
+ "step": 42370
29733
+ },
29734
+ {
29735
+ "epoch": 0.786458732025507,
29736
+ "grad_norm": 36.75,
29737
+ "learning_rate": 9.877115858752248e-06,
29738
+ "loss": 19.2348,
29739
+ "step": 42380
29740
+ },
29741
+ {
29742
+ "epoch": 0.7866443051099868,
29743
+ "grad_norm": 35.375,
29744
+ "learning_rate": 9.877086862966207e-06,
29745
+ "loss": 19.3421,
29746
+ "step": 42390
29747
+ },
29748
+ {
29749
+ "epoch": 0.7868298781944667,
29750
+ "grad_norm": 35.15625,
29751
+ "learning_rate": 9.877057867180163e-06,
29752
+ "loss": 19.0024,
29753
+ "step": 42400
29754
+ },
29755
+ {
29756
+ "epoch": 0.7870154512789465,
29757
+ "grad_norm": 36.96875,
29758
+ "learning_rate": 9.877028871394122e-06,
29759
+ "loss": 18.825,
29760
+ "step": 42410
29761
+ },
29762
+ {
29763
+ "epoch": 0.7872010243634263,
29764
+ "grad_norm": 36.65625,
29765
+ "learning_rate": 9.876999875608079e-06,
29766
+ "loss": 19.4176,
29767
+ "step": 42420
29768
+ },
29769
+ {
29770
+ "epoch": 0.7873865974479062,
29771
+ "grad_norm": 36.4375,
29772
+ "learning_rate": 9.876970879822037e-06,
29773
+ "loss": 19.0654,
29774
+ "step": 42430
29775
+ },
29776
+ {
29777
+ "epoch": 0.7875721705323859,
29778
+ "grad_norm": 36.625,
29779
+ "learning_rate": 9.876941884035994e-06,
29780
+ "loss": 19.5037,
29781
+ "step": 42440
29782
+ },
29783
+ {
29784
+ "epoch": 0.7877577436168658,
29785
+ "grad_norm": 35.71875,
29786
+ "learning_rate": 9.876912888249951e-06,
29787
+ "loss": 18.6874,
29788
+ "step": 42450
29789
+ },
29790
+ {
29791
+ "epoch": 0.7879433167013457,
29792
+ "grad_norm": 36.40625,
29793
+ "learning_rate": 9.876883892463909e-06,
29794
+ "loss": 19.3074,
29795
+ "step": 42460
29796
+ },
29797
+ {
29798
+ "epoch": 0.7881288897858255,
29799
+ "grad_norm": 34.96875,
29800
+ "learning_rate": 9.876854896677866e-06,
29801
+ "loss": 18.9853,
29802
+ "step": 42470
29803
+ },
29804
+ {
29805
+ "epoch": 0.7883144628703053,
29806
+ "grad_norm": 35.3125,
29807
+ "learning_rate": 9.876825900891824e-06,
29808
+ "loss": 18.9235,
29809
+ "step": 42480
29810
+ },
29811
+ {
29812
+ "epoch": 0.7885000359547851,
29813
+ "grad_norm": 36.90625,
29814
+ "learning_rate": 9.876796905105783e-06,
29815
+ "loss": 19.4066,
29816
+ "step": 42490
29817
+ },
29818
+ {
29819
+ "epoch": 0.788685609039265,
29820
+ "grad_norm": 36.1875,
29821
+ "learning_rate": 9.876767909319738e-06,
29822
+ "loss": 19.2456,
29823
+ "step": 42500
29824
+ },
29825
+ {
29826
+ "epoch": 0.7888711821237447,
29827
+ "grad_norm": 36.59375,
29828
+ "learning_rate": 9.876738913533696e-06,
29829
+ "loss": 18.9103,
29830
+ "step": 42510
29831
+ },
29832
+ {
29833
+ "epoch": 0.7890567552082246,
29834
+ "grad_norm": 34.03125,
29835
+ "learning_rate": 9.876709917747655e-06,
29836
+ "loss": 19.2111,
29837
+ "step": 42520
29838
+ },
29839
+ {
29840
+ "epoch": 0.7892423282927045,
29841
+ "grad_norm": 35.21875,
29842
+ "learning_rate": 9.876680921961612e-06,
29843
+ "loss": 19.4737,
29844
+ "step": 42530
29845
+ },
29846
+ {
29847
+ "epoch": 0.7894279013771842,
29848
+ "grad_norm": 35.65625,
29849
+ "learning_rate": 9.87665192617557e-06,
29850
+ "loss": 19.1734,
29851
+ "step": 42540
29852
+ },
29853
+ {
29854
+ "epoch": 0.7896134744616641,
29855
+ "grad_norm": 38.78125,
29856
+ "learning_rate": 9.876622930389527e-06,
29857
+ "loss": 19.2003,
29858
+ "step": 42550
29859
+ },
29860
+ {
29861
+ "epoch": 0.7897990475461439,
29862
+ "grad_norm": 35.53125,
29863
+ "learning_rate": 9.876593934603485e-06,
29864
+ "loss": 19.1934,
29865
+ "step": 42560
29866
+ },
29867
+ {
29868
+ "epoch": 0.7899846206306237,
29869
+ "grad_norm": 36.0625,
29870
+ "learning_rate": 9.876564938817442e-06,
29871
+ "loss": 19.3247,
29872
+ "step": 42570
29873
+ },
29874
+ {
29875
+ "epoch": 0.7901701937151036,
29876
+ "grad_norm": 36.28125,
29877
+ "learning_rate": 9.8765359430314e-06,
29878
+ "loss": 19.6564,
29879
+ "step": 42580
29880
+ },
29881
+ {
29882
+ "epoch": 0.7903557667995834,
29883
+ "grad_norm": 36.625,
29884
+ "learning_rate": 9.876506947245358e-06,
29885
+ "loss": 19.0949,
29886
+ "step": 42590
29887
+ },
29888
+ {
29889
+ "epoch": 0.7905413398840632,
29890
+ "grad_norm": 34.9375,
29891
+ "learning_rate": 9.876477951459316e-06,
29892
+ "loss": 18.5577,
29893
+ "step": 42600
29894
+ },
29895
+ {
29896
+ "epoch": 0.790726912968543,
29897
+ "grad_norm": 35.9375,
29898
+ "learning_rate": 9.876448955673272e-06,
29899
+ "loss": 18.6629,
29900
+ "step": 42610
29901
+ },
29902
+ {
29903
+ "epoch": 0.7909124860530229,
29904
+ "grad_norm": 35.40625,
29905
+ "learning_rate": 9.87641995988723e-06,
29906
+ "loss": 19.3618,
29907
+ "step": 42620
29908
+ },
29909
+ {
29910
+ "epoch": 0.7910980591375028,
29911
+ "grad_norm": 34.3125,
29912
+ "learning_rate": 9.876390964101188e-06,
29913
+ "loss": 18.985,
29914
+ "step": 42630
29915
+ },
29916
+ {
29917
+ "epoch": 0.7912836322219825,
29918
+ "grad_norm": 37.25,
29919
+ "learning_rate": 9.876361968315145e-06,
29920
+ "loss": 19.3863,
29921
+ "step": 42640
29922
+ },
29923
+ {
29924
+ "epoch": 0.7914692053064624,
29925
+ "grad_norm": 35.84375,
29926
+ "learning_rate": 9.876332972529103e-06,
29927
+ "loss": 19.2617,
29928
+ "step": 42650
29929
+ },
29930
+ {
29931
+ "epoch": 0.7916547783909422,
29932
+ "grad_norm": 35.75,
29933
+ "learning_rate": 9.87630397674306e-06,
29934
+ "loss": 19.6245,
29935
+ "step": 42660
29936
+ },
29937
+ {
29938
+ "epoch": 0.791840351475422,
29939
+ "grad_norm": 37.1875,
29940
+ "learning_rate": 9.876274980957018e-06,
29941
+ "loss": 18.9741,
29942
+ "step": 42670
29943
+ },
29944
+ {
29945
+ "epoch": 0.7920259245599018,
29946
+ "grad_norm": 37.21875,
29947
+ "learning_rate": 9.876245985170975e-06,
29948
+ "loss": 19.222,
29949
+ "step": 42680
29950
+ },
29951
+ {
29952
+ "epoch": 0.7922114976443817,
29953
+ "grad_norm": 33.5625,
29954
+ "learning_rate": 9.876216989384934e-06,
29955
+ "loss": 19.3086,
29956
+ "step": 42690
29957
+ },
29958
+ {
29959
+ "epoch": 0.7923970707288615,
29960
+ "grad_norm": 36.59375,
29961
+ "learning_rate": 9.876187993598892e-06,
29962
+ "loss": 19.1836,
29963
+ "step": 42700
29964
+ },
29965
+ {
29966
+ "epoch": 0.7925826438133413,
29967
+ "grad_norm": 36.625,
29968
+ "learning_rate": 9.876158997812849e-06,
29969
+ "loss": 19.0206,
29970
+ "step": 42710
29971
+ },
29972
+ {
29973
+ "epoch": 0.7927682168978212,
29974
+ "grad_norm": 35.5,
29975
+ "learning_rate": 9.876130002026806e-06,
29976
+ "loss": 19.2777,
29977
+ "step": 42720
29978
+ },
29979
+ {
29980
+ "epoch": 0.7929537899823009,
29981
+ "grad_norm": 38.15625,
29982
+ "learning_rate": 9.876101006240764e-06,
29983
+ "loss": 19.4687,
29984
+ "step": 42730
29985
+ },
29986
+ {
29987
+ "epoch": 0.7931393630667808,
29988
+ "grad_norm": 36.78125,
29989
+ "learning_rate": 9.876072010454721e-06,
29990
+ "loss": 19.2891,
29991
+ "step": 42740
29992
+ },
29993
+ {
29994
+ "epoch": 0.7933249361512607,
29995
+ "grad_norm": 36.3125,
29996
+ "learning_rate": 9.876043014668679e-06,
29997
+ "loss": 19.1073,
29998
+ "step": 42750
29999
+ },
30000
+ {
30001
+ "epoch": 0.7935105092357404,
30002
+ "grad_norm": 37.15625,
30003
+ "learning_rate": 9.876014018882638e-06,
30004
+ "loss": 19.2614,
30005
+ "step": 42760
30006
+ },
30007
+ {
30008
+ "epoch": 0.7936960823202203,
30009
+ "grad_norm": 35.5625,
30010
+ "learning_rate": 9.875985023096593e-06,
30011
+ "loss": 19.5217,
30012
+ "step": 42770
30013
+ },
30014
+ {
30015
+ "epoch": 0.7938816554047001,
30016
+ "grad_norm": 35.5625,
30017
+ "learning_rate": 9.875956027310551e-06,
30018
+ "loss": 18.8875,
30019
+ "step": 42780
30020
+ },
30021
+ {
30022
+ "epoch": 0.7940672284891799,
30023
+ "grad_norm": 34.9375,
30024
+ "learning_rate": 9.87592703152451e-06,
30025
+ "loss": 18.9884,
30026
+ "step": 42790
30027
+ },
30028
+ {
30029
+ "epoch": 0.7942528015736597,
30030
+ "grad_norm": 34.59375,
30031
+ "learning_rate": 9.875898035738467e-06,
30032
+ "loss": 18.7005,
30033
+ "step": 42800
30034
+ },
30035
+ {
30036
+ "epoch": 0.7944383746581396,
30037
+ "grad_norm": 34.78125,
30038
+ "learning_rate": 9.875869039952425e-06,
30039
+ "loss": 19.4181,
30040
+ "step": 42810
30041
+ },
30042
+ {
30043
+ "epoch": 0.7946239477426195,
30044
+ "grad_norm": 35.1875,
30045
+ "learning_rate": 9.875840044166382e-06,
30046
+ "loss": 19.495,
30047
+ "step": 42820
30048
+ },
30049
+ {
30050
+ "epoch": 0.7948095208270992,
30051
+ "grad_norm": 36.21875,
30052
+ "learning_rate": 9.87581104838034e-06,
30053
+ "loss": 19.6189,
30054
+ "step": 42830
30055
+ },
30056
+ {
30057
+ "epoch": 0.7949950939115791,
30058
+ "grad_norm": 34.40625,
30059
+ "learning_rate": 9.875782052594297e-06,
30060
+ "loss": 19.131,
30061
+ "step": 42840
30062
+ },
30063
+ {
30064
+ "epoch": 0.7951806669960589,
30065
+ "grad_norm": 38.84375,
30066
+ "learning_rate": 9.875753056808254e-06,
30067
+ "loss": 19.5435,
30068
+ "step": 42850
30069
+ },
30070
+ {
30071
+ "epoch": 0.7953662400805387,
30072
+ "grad_norm": 36.46875,
30073
+ "learning_rate": 9.875724061022214e-06,
30074
+ "loss": 19.4428,
30075
+ "step": 42860
30076
+ },
30077
+ {
30078
+ "epoch": 0.7955518131650186,
30079
+ "grad_norm": 34.53125,
30080
+ "learning_rate": 9.875695065236171e-06,
30081
+ "loss": 19.3201,
30082
+ "step": 42870
30083
+ },
30084
+ {
30085
+ "epoch": 0.7957373862494984,
30086
+ "grad_norm": 37.0,
30087
+ "learning_rate": 9.875666069450127e-06,
30088
+ "loss": 19.0514,
30089
+ "step": 42880
30090
+ },
30091
+ {
30092
+ "epoch": 0.7959229593339782,
30093
+ "grad_norm": 36.25,
30094
+ "learning_rate": 9.875637073664086e-06,
30095
+ "loss": 18.7184,
30096
+ "step": 42890
30097
+ },
30098
+ {
30099
+ "epoch": 0.796108532418458,
30100
+ "grad_norm": 35.375,
30101
+ "learning_rate": 9.875608077878043e-06,
30102
+ "loss": 19.1007,
30103
+ "step": 42900
30104
+ },
30105
+ {
30106
+ "epoch": 0.7962941055029379,
30107
+ "grad_norm": 33.46875,
30108
+ "learning_rate": 9.875579082092e-06,
30109
+ "loss": 18.8951,
30110
+ "step": 42910
30111
+ },
30112
+ {
30113
+ "epoch": 0.7964796785874176,
30114
+ "grad_norm": 37.375,
30115
+ "learning_rate": 9.875550086305958e-06,
30116
+ "loss": 19.4892,
30117
+ "step": 42920
30118
+ },
30119
+ {
30120
+ "epoch": 0.7966652516718975,
30121
+ "grad_norm": 35.78125,
30122
+ "learning_rate": 9.875521090519915e-06,
30123
+ "loss": 19.6579,
30124
+ "step": 42930
30125
+ },
30126
+ {
30127
+ "epoch": 0.7968508247563774,
30128
+ "grad_norm": 36.375,
30129
+ "learning_rate": 9.875492094733873e-06,
30130
+ "loss": 19.2619,
30131
+ "step": 42940
30132
+ },
30133
+ {
30134
+ "epoch": 0.7970363978408571,
30135
+ "grad_norm": 38.0625,
30136
+ "learning_rate": 9.87546309894783e-06,
30137
+ "loss": 19.5734,
30138
+ "step": 42950
30139
+ },
30140
+ {
30141
+ "epoch": 0.797221970925337,
30142
+ "grad_norm": 36.21875,
30143
+ "learning_rate": 9.875434103161788e-06,
30144
+ "loss": 18.9373,
30145
+ "step": 42960
30146
+ },
30147
+ {
30148
+ "epoch": 0.7974075440098168,
30149
+ "grad_norm": 35.9375,
30150
+ "learning_rate": 9.875405107375747e-06,
30151
+ "loss": 19.2821,
30152
+ "step": 42970
30153
+ },
30154
+ {
30155
+ "epoch": 0.7975931170942966,
30156
+ "grad_norm": 35.4375,
30157
+ "learning_rate": 9.875376111589704e-06,
30158
+ "loss": 19.2418,
30159
+ "step": 42980
30160
+ },
30161
+ {
30162
+ "epoch": 0.7977786901787765,
30163
+ "grad_norm": 36.03125,
30164
+ "learning_rate": 9.875347115803662e-06,
30165
+ "loss": 19.0795,
30166
+ "step": 42990
30167
+ },
30168
+ {
30169
+ "epoch": 0.7979642632632563,
30170
+ "grad_norm": 36.40625,
30171
+ "learning_rate": 9.875318120017619e-06,
30172
+ "loss": 19.5488,
30173
+ "step": 43000
30174
+ },
30175
+ {
30176
+ "epoch": 0.7981498363477362,
30177
+ "grad_norm": 34.9375,
30178
+ "learning_rate": 9.875289124231576e-06,
30179
+ "loss": 19.3044,
30180
+ "step": 43010
30181
+ },
30182
+ {
30183
+ "epoch": 0.7983354094322159,
30184
+ "grad_norm": 36.53125,
30185
+ "learning_rate": 9.875260128445534e-06,
30186
+ "loss": 19.1615,
30187
+ "step": 43020
30188
+ },
30189
+ {
30190
+ "epoch": 0.7985209825166958,
30191
+ "grad_norm": 35.96875,
30192
+ "learning_rate": 9.875231132659491e-06,
30193
+ "loss": 19.5529,
30194
+ "step": 43030
30195
+ },
30196
+ {
30197
+ "epoch": 0.7987065556011756,
30198
+ "grad_norm": 36.59375,
30199
+ "learning_rate": 9.875202136873449e-06,
30200
+ "loss": 18.9387,
30201
+ "step": 43040
30202
+ },
30203
+ {
30204
+ "epoch": 0.7988921286856554,
30205
+ "grad_norm": 35.4375,
30206
+ "learning_rate": 9.875173141087406e-06,
30207
+ "loss": 19.0329,
30208
+ "step": 43050
30209
+ },
30210
+ {
30211
+ "epoch": 0.7990777017701353,
30212
+ "grad_norm": 36.8125,
30213
+ "learning_rate": 9.875144145301363e-06,
30214
+ "loss": 19.6343,
30215
+ "step": 43060
30216
+ },
30217
+ {
30218
+ "epoch": 0.7992632748546151,
30219
+ "grad_norm": 36.40625,
30220
+ "learning_rate": 9.875115149515322e-06,
30221
+ "loss": 19.1546,
30222
+ "step": 43070
30223
+ },
30224
+ {
30225
+ "epoch": 0.7994488479390949,
30226
+ "grad_norm": 35.96875,
30227
+ "learning_rate": 9.87508615372928e-06,
30228
+ "loss": 18.9671,
30229
+ "step": 43080
30230
+ },
30231
+ {
30232
+ "epoch": 0.7996344210235747,
30233
+ "grad_norm": 36.53125,
30234
+ "learning_rate": 9.875057157943236e-06,
30235
+ "loss": 19.1909,
30236
+ "step": 43090
30237
+ },
30238
+ {
30239
+ "epoch": 0.7998199941080546,
30240
+ "grad_norm": 36.28125,
30241
+ "learning_rate": 9.875028162157195e-06,
30242
+ "loss": 19.067,
30243
+ "step": 43100
30244
+ },
30245
+ {
30246
+ "epoch": 0.8000055671925344,
30247
+ "grad_norm": 35.375,
30248
+ "learning_rate": 9.874999166371152e-06,
30249
+ "loss": 18.7279,
30250
+ "step": 43110
30251
+ },
30252
+ {
30253
+ "epoch": 0.8001911402770142,
30254
+ "grad_norm": 36.40625,
30255
+ "learning_rate": 9.87497017058511e-06,
30256
+ "loss": 19.0467,
30257
+ "step": 43120
30258
+ },
30259
+ {
30260
+ "epoch": 0.8003767133614941,
30261
+ "grad_norm": 39.53125,
30262
+ "learning_rate": 9.874941174799067e-06,
30263
+ "loss": 19.5147,
30264
+ "step": 43130
30265
+ },
30266
+ {
30267
+ "epoch": 0.8005622864459738,
30268
+ "grad_norm": 36.625,
30269
+ "learning_rate": 9.874912179013026e-06,
30270
+ "loss": 18.7374,
30271
+ "step": 43140
30272
+ },
30273
+ {
30274
+ "epoch": 0.8007478595304537,
30275
+ "grad_norm": 34.3125,
30276
+ "learning_rate": 9.874883183226982e-06,
30277
+ "loss": 19.4482,
30278
+ "step": 43150
30279
+ },
30280
+ {
30281
+ "epoch": 0.8009334326149335,
30282
+ "grad_norm": 35.8125,
30283
+ "learning_rate": 9.874854187440939e-06,
30284
+ "loss": 19.1165,
30285
+ "step": 43160
30286
+ },
30287
+ {
30288
+ "epoch": 0.8011190056994134,
30289
+ "grad_norm": 35.65625,
30290
+ "learning_rate": 9.874825191654898e-06,
30291
+ "loss": 18.8476,
30292
+ "step": 43170
30293
+ },
30294
+ {
30295
+ "epoch": 0.8013045787838932,
30296
+ "grad_norm": 36.6875,
30297
+ "learning_rate": 9.874796195868856e-06,
30298
+ "loss": 18.6734,
30299
+ "step": 43180
30300
+ },
30301
+ {
30302
+ "epoch": 0.801490151868373,
30303
+ "grad_norm": 36.3125,
30304
+ "learning_rate": 9.874767200082813e-06,
30305
+ "loss": 19.189,
30306
+ "step": 43190
30307
+ },
30308
+ {
30309
+ "epoch": 0.8016757249528529,
30310
+ "grad_norm": 35.8125,
30311
+ "learning_rate": 9.87473820429677e-06,
30312
+ "loss": 19.4344,
30313
+ "step": 43200
30314
+ },
30315
+ {
30316
+ "epoch": 0.8018612980373326,
30317
+ "grad_norm": 38.03125,
30318
+ "learning_rate": 9.874709208510728e-06,
30319
+ "loss": 19.112,
30320
+ "step": 43210
30321
+ },
30322
+ {
30323
+ "epoch": 0.8020468711218125,
30324
+ "grad_norm": 37.0625,
30325
+ "learning_rate": 9.874680212724685e-06,
30326
+ "loss": 19.2927,
30327
+ "step": 43220
30328
+ },
30329
+ {
30330
+ "epoch": 0.8022324442062924,
30331
+ "grad_norm": 35.15625,
30332
+ "learning_rate": 9.874651216938643e-06,
30333
+ "loss": 19.2974,
30334
+ "step": 43230
30335
+ },
30336
+ {
30337
+ "epoch": 0.8024180172907721,
30338
+ "grad_norm": 34.84375,
30339
+ "learning_rate": 9.874622221152602e-06,
30340
+ "loss": 18.9434,
30341
+ "step": 43240
30342
+ },
30343
+ {
30344
+ "epoch": 0.802603590375252,
30345
+ "grad_norm": 37.84375,
30346
+ "learning_rate": 9.874593225366557e-06,
30347
+ "loss": 18.9947,
30348
+ "step": 43250
30349
+ },
30350
+ {
30351
+ "epoch": 0.8027891634597318,
30352
+ "grad_norm": 36.3125,
30353
+ "learning_rate": 9.874564229580515e-06,
30354
+ "loss": 19.3009,
30355
+ "step": 43260
30356
+ },
30357
+ {
30358
+ "epoch": 0.8029747365442116,
30359
+ "grad_norm": 33.3125,
30360
+ "learning_rate": 9.874535233794474e-06,
30361
+ "loss": 18.7793,
30362
+ "step": 43270
30363
+ },
30364
+ {
30365
+ "epoch": 0.8031603096286914,
30366
+ "grad_norm": 36.78125,
30367
+ "learning_rate": 9.874506238008431e-06,
30368
+ "loss": 19.2566,
30369
+ "step": 43280
30370
+ },
30371
+ {
30372
+ "epoch": 0.8033458827131713,
30373
+ "grad_norm": 36.59375,
30374
+ "learning_rate": 9.874477242222389e-06,
30375
+ "loss": 19.238,
30376
+ "step": 43290
30377
+ },
30378
+ {
30379
+ "epoch": 0.8035314557976511,
30380
+ "grad_norm": 36.4375,
30381
+ "learning_rate": 9.874448246436346e-06,
30382
+ "loss": 19.245,
30383
+ "step": 43300
30384
+ },
30385
+ {
30386
+ "epoch": 0.8037170288821309,
30387
+ "grad_norm": 35.25,
30388
+ "learning_rate": 9.874419250650304e-06,
30389
+ "loss": 19.1339,
30390
+ "step": 43310
30391
+ },
30392
+ {
30393
+ "epoch": 0.8039026019666108,
30394
+ "grad_norm": 34.9375,
30395
+ "learning_rate": 9.874390254864261e-06,
30396
+ "loss": 18.6303,
30397
+ "step": 43320
30398
+ },
30399
+ {
30400
+ "epoch": 0.8040881750510905,
30401
+ "grad_norm": 36.3125,
30402
+ "learning_rate": 9.874361259078218e-06,
30403
+ "loss": 18.7144,
30404
+ "step": 43330
30405
+ },
30406
+ {
30407
+ "epoch": 0.8042737481355704,
30408
+ "grad_norm": 37.4375,
30409
+ "learning_rate": 9.874332263292178e-06,
30410
+ "loss": 19.6017,
30411
+ "step": 43340
30412
+ },
30413
+ {
30414
+ "epoch": 0.8044593212200503,
30415
+ "grad_norm": 35.28125,
30416
+ "learning_rate": 9.874303267506135e-06,
30417
+ "loss": 19.0519,
30418
+ "step": 43350
30419
+ },
30420
+ {
30421
+ "epoch": 0.8046448943045301,
30422
+ "grad_norm": 36.53125,
30423
+ "learning_rate": 9.87427427172009e-06,
30424
+ "loss": 19.1844,
30425
+ "step": 43360
30426
+ },
30427
+ {
30428
+ "epoch": 0.8048304673890099,
30429
+ "grad_norm": 36.15625,
30430
+ "learning_rate": 9.87424527593405e-06,
30431
+ "loss": 18.981,
30432
+ "step": 43370
30433
+ },
30434
+ {
30435
+ "epoch": 0.8050160404734897,
30436
+ "grad_norm": 37.0625,
30437
+ "learning_rate": 9.874216280148007e-06,
30438
+ "loss": 19.1258,
30439
+ "step": 43380
30440
+ },
30441
+ {
30442
+ "epoch": 0.8052016135579696,
30443
+ "grad_norm": 35.21875,
30444
+ "learning_rate": 9.874187284361965e-06,
30445
+ "loss": 18.9282,
30446
+ "step": 43390
30447
+ },
30448
+ {
30449
+ "epoch": 0.8053871866424493,
30450
+ "grad_norm": 39.625,
30451
+ "learning_rate": 9.874158288575922e-06,
30452
+ "loss": 18.9325,
30453
+ "step": 43400
30454
+ },
30455
+ {
30456
+ "epoch": 0.8055727597269292,
30457
+ "grad_norm": 35.3125,
30458
+ "learning_rate": 9.87412929278988e-06,
30459
+ "loss": 18.5758,
30460
+ "step": 43410
30461
+ },
30462
+ {
30463
+ "epoch": 0.8057583328114091,
30464
+ "grad_norm": 35.09375,
30465
+ "learning_rate": 9.874100297003837e-06,
30466
+ "loss": 18.8267,
30467
+ "step": 43420
30468
+ },
30469
+ {
30470
+ "epoch": 0.8059439058958888,
30471
+ "grad_norm": 34.21875,
30472
+ "learning_rate": 9.874071301217794e-06,
30473
+ "loss": 19.2754,
30474
+ "step": 43430
30475
+ },
30476
+ {
30477
+ "epoch": 0.8061294789803687,
30478
+ "grad_norm": 36.65625,
30479
+ "learning_rate": 9.874042305431752e-06,
30480
+ "loss": 19.3909,
30481
+ "step": 43440
30482
+ },
30483
+ {
30484
+ "epoch": 0.8063150520648485,
30485
+ "grad_norm": 35.03125,
30486
+ "learning_rate": 9.87401330964571e-06,
30487
+ "loss": 19.1061,
30488
+ "step": 43450
30489
+ },
30490
+ {
30491
+ "epoch": 0.8065006251493283,
30492
+ "grad_norm": 37.875,
30493
+ "learning_rate": 9.873984313859668e-06,
30494
+ "loss": 19.1138,
30495
+ "step": 43460
30496
+ },
30497
+ {
30498
+ "epoch": 0.8066861982338082,
30499
+ "grad_norm": 36.71875,
30500
+ "learning_rate": 9.873955318073626e-06,
30501
+ "loss": 19.7451,
30502
+ "step": 43470
30503
+ },
30504
+ {
30505
+ "epoch": 0.806871771318288,
30506
+ "grad_norm": 36.5,
30507
+ "learning_rate": 9.873926322287583e-06,
30508
+ "loss": 19.0428,
30509
+ "step": 43480
30510
+ },
30511
+ {
30512
+ "epoch": 0.8070573444027678,
30513
+ "grad_norm": 36.8125,
30514
+ "learning_rate": 9.87389732650154e-06,
30515
+ "loss": 18.7527,
30516
+ "step": 43490
30517
+ },
30518
+ {
30519
+ "epoch": 0.8072429174872476,
30520
+ "grad_norm": 36.5625,
30521
+ "learning_rate": 9.873868330715498e-06,
30522
+ "loss": 19.3082,
30523
+ "step": 43500
30524
+ },
30525
+ {
30526
+ "epoch": 0.8074284905717275,
30527
+ "grad_norm": 35.9375,
30528
+ "learning_rate": 9.873839334929455e-06,
30529
+ "loss": 19.3901,
30530
+ "step": 43510
30531
+ },
30532
+ {
30533
+ "epoch": 0.8076140636562072,
30534
+ "grad_norm": 36.8125,
30535
+ "learning_rate": 9.873810339143413e-06,
30536
+ "loss": 19.0965,
30537
+ "step": 43520
30538
+ },
30539
+ {
30540
+ "epoch": 0.8077996367406871,
30541
+ "grad_norm": 36.75,
30542
+ "learning_rate": 9.87378134335737e-06,
30543
+ "loss": 19.2141,
30544
+ "step": 43530
30545
+ },
30546
+ {
30547
+ "epoch": 0.807985209825167,
30548
+ "grad_norm": 33.71875,
30549
+ "learning_rate": 9.873752347571327e-06,
30550
+ "loss": 19.1556,
30551
+ "step": 43540
30552
+ },
30553
+ {
30554
+ "epoch": 0.8081707829096468,
30555
+ "grad_norm": 37.71875,
30556
+ "learning_rate": 9.873723351785286e-06,
30557
+ "loss": 19.2163,
30558
+ "step": 43550
30559
+ },
30560
+ {
30561
+ "epoch": 0.8083563559941266,
30562
+ "grad_norm": 34.5,
30563
+ "learning_rate": 9.873694355999244e-06,
30564
+ "loss": 19.1022,
30565
+ "step": 43560
30566
+ },
30567
+ {
30568
+ "epoch": 0.8085419290786064,
30569
+ "grad_norm": 37.5,
30570
+ "learning_rate": 9.873665360213201e-06,
30571
+ "loss": 19.0576,
30572
+ "step": 43570
30573
+ },
30574
+ {
30575
+ "epoch": 0.8087275021630863,
30576
+ "grad_norm": 36.625,
30577
+ "learning_rate": 9.873636364427159e-06,
30578
+ "loss": 19.0831,
30579
+ "step": 43580
30580
+ },
30581
+ {
30582
+ "epoch": 0.8089130752475661,
30583
+ "grad_norm": 35.125,
30584
+ "learning_rate": 9.873607368641116e-06,
30585
+ "loss": 18.7494,
30586
+ "step": 43590
30587
+ },
30588
+ {
30589
+ "epoch": 0.8090986483320459,
30590
+ "grad_norm": 36.625,
30591
+ "learning_rate": 9.873578372855074e-06,
30592
+ "loss": 19.0648,
30593
+ "step": 43600
30594
+ },
30595
+ {
30596
+ "epoch": 0.8092842214165258,
30597
+ "grad_norm": 36.5,
30598
+ "learning_rate": 9.873549377069031e-06,
30599
+ "loss": 19.0214,
30600
+ "step": 43610
30601
+ },
30602
+ {
30603
+ "epoch": 0.8094697945010055,
30604
+ "grad_norm": 36.8125,
30605
+ "learning_rate": 9.87352038128299e-06,
30606
+ "loss": 18.9287,
30607
+ "step": 43620
30608
+ },
30609
+ {
30610
+ "epoch": 0.8096553675854854,
30611
+ "grad_norm": 35.15625,
30612
+ "learning_rate": 9.873491385496946e-06,
30613
+ "loss": 19.3647,
30614
+ "step": 43630
30615
+ },
30616
+ {
30617
+ "epoch": 0.8098409406699653,
30618
+ "grad_norm": 35.3125,
30619
+ "learning_rate": 9.873462389710903e-06,
30620
+ "loss": 18.8901,
30621
+ "step": 43640
30622
+ },
30623
+ {
30624
+ "epoch": 0.810026513754445,
30625
+ "grad_norm": 33.78125,
30626
+ "learning_rate": 9.873433393924862e-06,
30627
+ "loss": 19.4528,
30628
+ "step": 43650
30629
+ },
30630
+ {
30631
+ "epoch": 0.8102120868389249,
30632
+ "grad_norm": 36.75,
30633
+ "learning_rate": 9.87340439813882e-06,
30634
+ "loss": 18.7406,
30635
+ "step": 43660
30636
+ },
30637
+ {
30638
+ "epoch": 0.8103976599234047,
30639
+ "grad_norm": 37.125,
30640
+ "learning_rate": 9.873375402352777e-06,
30641
+ "loss": 19.3565,
30642
+ "step": 43670
30643
+ },
30644
+ {
30645
+ "epoch": 0.8105832330078845,
30646
+ "grad_norm": 36.9375,
30647
+ "learning_rate": 9.873346406566734e-06,
30648
+ "loss": 19.6584,
30649
+ "step": 43680
30650
+ },
30651
+ {
30652
+ "epoch": 0.8107688060923643,
30653
+ "grad_norm": 36.3125,
30654
+ "learning_rate": 9.873317410780692e-06,
30655
+ "loss": 19.1017,
30656
+ "step": 43690
30657
+ },
30658
+ {
30659
+ "epoch": 0.8109543791768442,
30660
+ "grad_norm": 36.5625,
30661
+ "learning_rate": 9.87328841499465e-06,
30662
+ "loss": 19.0014,
30663
+ "step": 43700
30664
+ },
30665
+ {
30666
+ "epoch": 0.8111399522613241,
30667
+ "grad_norm": 35.96875,
30668
+ "learning_rate": 9.873259419208607e-06,
30669
+ "loss": 19.361,
30670
+ "step": 43710
30671
+ },
30672
+ {
30673
+ "epoch": 0.8113255253458038,
30674
+ "grad_norm": 35.96875,
30675
+ "learning_rate": 9.873230423422566e-06,
30676
+ "loss": 18.9856,
30677
+ "step": 43720
30678
+ },
30679
+ {
30680
+ "epoch": 0.8115110984302837,
30681
+ "grad_norm": 34.25,
30682
+ "learning_rate": 9.873201427636523e-06,
30683
+ "loss": 19.1586,
30684
+ "step": 43730
30685
+ },
30686
+ {
30687
+ "epoch": 0.8116966715147635,
30688
+ "grad_norm": 35.59375,
30689
+ "learning_rate": 9.873172431850479e-06,
30690
+ "loss": 19.5068,
30691
+ "step": 43740
30692
+ },
30693
+ {
30694
+ "epoch": 0.8118822445992433,
30695
+ "grad_norm": 34.59375,
30696
+ "learning_rate": 9.873143436064438e-06,
30697
+ "loss": 19.1461,
30698
+ "step": 43750
30699
+ },
30700
+ {
30701
+ "epoch": 0.8120678176837232,
30702
+ "grad_norm": 37.71875,
30703
+ "learning_rate": 9.873114440278395e-06,
30704
+ "loss": 19.1016,
30705
+ "step": 43760
30706
+ },
30707
+ {
30708
+ "epoch": 0.812253390768203,
30709
+ "grad_norm": 34.34375,
30710
+ "learning_rate": 9.873085444492353e-06,
30711
+ "loss": 19.1093,
30712
+ "step": 43770
30713
+ },
30714
+ {
30715
+ "epoch": 0.8124389638526828,
30716
+ "grad_norm": 34.9375,
30717
+ "learning_rate": 9.87305644870631e-06,
30718
+ "loss": 18.9712,
30719
+ "step": 43780
30720
+ },
30721
+ {
30722
+ "epoch": 0.8126245369371626,
30723
+ "grad_norm": 36.6875,
30724
+ "learning_rate": 9.873027452920268e-06,
30725
+ "loss": 19.6167,
30726
+ "step": 43790
30727
+ },
30728
+ {
30729
+ "epoch": 0.8128101100216425,
30730
+ "grad_norm": 36.53125,
30731
+ "learning_rate": 9.872998457134225e-06,
30732
+ "loss": 19.0376,
30733
+ "step": 43800
30734
+ },
30735
+ {
30736
+ "epoch": 0.8129956831061222,
30737
+ "grad_norm": 35.46875,
30738
+ "learning_rate": 9.872969461348182e-06,
30739
+ "loss": 19.1814,
30740
+ "step": 43810
30741
+ },
30742
+ {
30743
+ "epoch": 0.8131812561906021,
30744
+ "grad_norm": 36.0,
30745
+ "learning_rate": 9.872940465562142e-06,
30746
+ "loss": 19.1973,
30747
+ "step": 43820
30748
+ },
30749
+ {
30750
+ "epoch": 0.813366829275082,
30751
+ "grad_norm": 38.5625,
30752
+ "learning_rate": 9.872911469776099e-06,
30753
+ "loss": 19.2537,
30754
+ "step": 43830
30755
+ },
30756
+ {
30757
+ "epoch": 0.8135524023595617,
30758
+ "grad_norm": 38.25,
30759
+ "learning_rate": 9.872882473990055e-06,
30760
+ "loss": 19.3154,
30761
+ "step": 43840
30762
+ },
30763
+ {
30764
+ "epoch": 0.8137379754440416,
30765
+ "grad_norm": 37.03125,
30766
+ "learning_rate": 9.872853478204014e-06,
30767
+ "loss": 19.3999,
30768
+ "step": 43850
30769
+ },
30770
+ {
30771
+ "epoch": 0.8139235485285214,
30772
+ "grad_norm": 34.375,
30773
+ "learning_rate": 9.872824482417971e-06,
30774
+ "loss": 18.8485,
30775
+ "step": 43860
30776
+ },
30777
+ {
30778
+ "epoch": 0.8141091216130012,
30779
+ "grad_norm": 35.25,
30780
+ "learning_rate": 9.872795486631929e-06,
30781
+ "loss": 19.1239,
30782
+ "step": 43870
30783
+ },
30784
+ {
30785
+ "epoch": 0.8142946946974811,
30786
+ "grad_norm": 38.3125,
30787
+ "learning_rate": 9.872766490845886e-06,
30788
+ "loss": 19.4616,
30789
+ "step": 43880
30790
+ },
30791
+ {
30792
+ "epoch": 0.8144802677819609,
30793
+ "grad_norm": 37.625,
30794
+ "learning_rate": 9.872737495059843e-06,
30795
+ "loss": 19.0829,
30796
+ "step": 43890
30797
+ },
30798
+ {
30799
+ "epoch": 0.8146658408664408,
30800
+ "grad_norm": 37.8125,
30801
+ "learning_rate": 9.8727084992738e-06,
30802
+ "loss": 19.356,
30803
+ "step": 43900
30804
+ },
30805
+ {
30806
+ "epoch": 0.8148514139509205,
30807
+ "grad_norm": 39.5625,
30808
+ "learning_rate": 9.872679503487758e-06,
30809
+ "loss": 19.0015,
30810
+ "step": 43910
30811
+ },
30812
+ {
30813
+ "epoch": 0.8150369870354004,
30814
+ "grad_norm": 35.53125,
30815
+ "learning_rate": 9.872650507701717e-06,
30816
+ "loss": 19.1196,
30817
+ "step": 43920
30818
+ },
30819
+ {
30820
+ "epoch": 0.8152225601198803,
30821
+ "grad_norm": 37.625,
30822
+ "learning_rate": 9.872621511915675e-06,
30823
+ "loss": 19.5022,
30824
+ "step": 43930
30825
+ },
30826
+ {
30827
+ "epoch": 0.81540813320436,
30828
+ "grad_norm": 35.25,
30829
+ "learning_rate": 9.872592516129632e-06,
30830
+ "loss": 18.9608,
30831
+ "step": 43940
30832
+ },
30833
+ {
30834
+ "epoch": 0.8155937062888399,
30835
+ "grad_norm": 35.25,
30836
+ "learning_rate": 9.87256352034359e-06,
30837
+ "loss": 19.2786,
30838
+ "step": 43950
30839
+ },
30840
+ {
30841
+ "epoch": 0.8157792793733197,
30842
+ "grad_norm": 35.46875,
30843
+ "learning_rate": 9.872534524557547e-06,
30844
+ "loss": 19.0429,
30845
+ "step": 43960
30846
+ },
30847
+ {
30848
+ "epoch": 0.8159648524577995,
30849
+ "grad_norm": 36.9375,
30850
+ "learning_rate": 9.872505528771504e-06,
30851
+ "loss": 19.0499,
30852
+ "step": 43970
30853
+ },
30854
+ {
30855
+ "epoch": 0.8161504255422793,
30856
+ "grad_norm": 35.1875,
30857
+ "learning_rate": 9.872476532985462e-06,
30858
+ "loss": 19.377,
30859
+ "step": 43980
30860
+ },
30861
+ {
30862
+ "epoch": 0.8163359986267592,
30863
+ "grad_norm": 34.71875,
30864
+ "learning_rate": 9.87244753719942e-06,
30865
+ "loss": 19.3778,
30866
+ "step": 43990
30867
+ },
30868
+ {
30869
+ "epoch": 0.816521571711239,
30870
+ "grad_norm": 37.625,
30871
+ "learning_rate": 9.872418541413378e-06,
30872
+ "loss": 19.1619,
30873
+ "step": 44000
30874
+ },
30875
+ {
30876
+ "epoch": 0.8167071447957188,
30877
+ "grad_norm": 34.96875,
30878
+ "learning_rate": 9.872389545627334e-06,
30879
+ "loss": 19.2044,
30880
+ "step": 44010
30881
+ },
30882
+ {
30883
+ "epoch": 0.8168927178801987,
30884
+ "grad_norm": 36.46875,
30885
+ "learning_rate": 9.872360549841291e-06,
30886
+ "loss": 18.8557,
30887
+ "step": 44020
30888
+ },
30889
+ {
30890
+ "epoch": 0.8170782909646784,
30891
+ "grad_norm": 37.0625,
30892
+ "learning_rate": 9.87233155405525e-06,
30893
+ "loss": 19.3249,
30894
+ "step": 44030
30895
+ },
30896
+ {
30897
+ "epoch": 0.8172638640491583,
30898
+ "grad_norm": 33.6875,
30899
+ "learning_rate": 9.872302558269208e-06,
30900
+ "loss": 19.2119,
30901
+ "step": 44040
30902
+ },
30903
+ {
30904
+ "epoch": 0.8174494371336382,
30905
+ "grad_norm": 36.375,
30906
+ "learning_rate": 9.872273562483165e-06,
30907
+ "loss": 18.7741,
30908
+ "step": 44050
30909
+ },
30910
+ {
30911
+ "epoch": 0.817635010218118,
30912
+ "grad_norm": 38.40625,
30913
+ "learning_rate": 9.872244566697123e-06,
30914
+ "loss": 18.7126,
30915
+ "step": 44060
30916
+ },
30917
+ {
30918
+ "epoch": 0.8178205833025978,
30919
+ "grad_norm": 36.78125,
30920
+ "learning_rate": 9.87221557091108e-06,
30921
+ "loss": 19.2019,
30922
+ "step": 44070
30923
+ },
30924
+ {
30925
+ "epoch": 0.8180061563870776,
30926
+ "grad_norm": 36.0625,
30927
+ "learning_rate": 9.872186575125038e-06,
30928
+ "loss": 19.5095,
30929
+ "step": 44080
30930
+ },
30931
+ {
30932
+ "epoch": 0.8181917294715575,
30933
+ "grad_norm": 37.53125,
30934
+ "learning_rate": 9.872157579338995e-06,
30935
+ "loss": 19.4252,
30936
+ "step": 44090
30937
+ },
30938
+ {
30939
+ "epoch": 0.8183773025560372,
30940
+ "grad_norm": 37.21875,
30941
+ "learning_rate": 9.872128583552954e-06,
30942
+ "loss": 19.17,
30943
+ "step": 44100
30944
+ },
30945
+ {
30946
+ "epoch": 0.8185628756405171,
30947
+ "grad_norm": 33.5,
30948
+ "learning_rate": 9.87209958776691e-06,
30949
+ "loss": 19.4603,
30950
+ "step": 44110
30951
+ },
30952
+ {
30953
+ "epoch": 0.818748448724997,
30954
+ "grad_norm": 36.71875,
30955
+ "learning_rate": 9.872070591980867e-06,
30956
+ "loss": 19.2598,
30957
+ "step": 44120
30958
+ },
30959
+ {
30960
+ "epoch": 0.8189340218094767,
30961
+ "grad_norm": 34.5625,
30962
+ "learning_rate": 9.872041596194826e-06,
30963
+ "loss": 18.7924,
30964
+ "step": 44130
30965
+ },
30966
+ {
30967
+ "epoch": 0.8191195948939566,
30968
+ "grad_norm": 37.40625,
30969
+ "learning_rate": 9.872012600408784e-06,
30970
+ "loss": 19.4831,
30971
+ "step": 44140
30972
+ },
30973
+ {
30974
+ "epoch": 0.8193051679784364,
30975
+ "grad_norm": 35.03125,
30976
+ "learning_rate": 9.871983604622741e-06,
30977
+ "loss": 18.8814,
30978
+ "step": 44150
30979
+ },
30980
+ {
30981
+ "epoch": 0.8194907410629162,
30982
+ "grad_norm": 36.1875,
30983
+ "learning_rate": 9.871954608836698e-06,
30984
+ "loss": 19.2846,
30985
+ "step": 44160
30986
+ },
30987
+ {
30988
+ "epoch": 0.8196763141473961,
30989
+ "grad_norm": 35.46875,
30990
+ "learning_rate": 9.871925613050656e-06,
30991
+ "loss": 18.9267,
30992
+ "step": 44170
30993
+ },
30994
+ {
30995
+ "epoch": 0.8198618872318759,
30996
+ "grad_norm": 36.875,
30997
+ "learning_rate": 9.871896617264613e-06,
30998
+ "loss": 18.9907,
30999
+ "step": 44180
31000
+ },
31001
+ {
31002
+ "epoch": 0.8200474603163557,
31003
+ "grad_norm": 38.53125,
31004
+ "learning_rate": 9.87186762147857e-06,
31005
+ "loss": 19.8266,
31006
+ "step": 44190
31007
+ },
31008
+ {
31009
+ "epoch": 0.8202330334008355,
31010
+ "grad_norm": 37.1875,
31011
+ "learning_rate": 9.87183862569253e-06,
31012
+ "loss": 19.0446,
31013
+ "step": 44200
31014
+ },
31015
+ {
31016
+ "epoch": 0.8204186064853154,
31017
+ "grad_norm": 35.375,
31018
+ "learning_rate": 9.871809629906487e-06,
31019
+ "loss": 19.2261,
31020
+ "step": 44210
31021
+ },
31022
+ {
31023
+ "epoch": 0.8206041795697951,
31024
+ "grad_norm": 34.9375,
31025
+ "learning_rate": 9.871780634120443e-06,
31026
+ "loss": 19.3818,
31027
+ "step": 44220
31028
+ },
31029
+ {
31030
+ "epoch": 0.820789752654275,
31031
+ "grad_norm": 36.3125,
31032
+ "learning_rate": 9.871751638334402e-06,
31033
+ "loss": 19.2278,
31034
+ "step": 44230
31035
+ },
31036
+ {
31037
+ "epoch": 0.8209753257387549,
31038
+ "grad_norm": 38.21875,
31039
+ "learning_rate": 9.87172264254836e-06,
31040
+ "loss": 19.2026,
31041
+ "step": 44240
31042
+ },
31043
+ {
31044
+ "epoch": 0.8211608988232347,
31045
+ "grad_norm": 37.21875,
31046
+ "learning_rate": 9.871693646762317e-06,
31047
+ "loss": 19.0903,
31048
+ "step": 44250
31049
+ },
31050
+ {
31051
+ "epoch": 0.8213464719077145,
31052
+ "grad_norm": 37.28125,
31053
+ "learning_rate": 9.871664650976274e-06,
31054
+ "loss": 18.8226,
31055
+ "step": 44260
31056
+ },
31057
+ {
31058
+ "epoch": 0.8215320449921943,
31059
+ "grad_norm": 35.40625,
31060
+ "learning_rate": 9.871635655190232e-06,
31061
+ "loss": 19.3683,
31062
+ "step": 44270
31063
+ },
31064
+ {
31065
+ "epoch": 0.8217176180766742,
31066
+ "grad_norm": 35.21875,
31067
+ "learning_rate": 9.871606659404189e-06,
31068
+ "loss": 18.8462,
31069
+ "step": 44280
31070
+ },
31071
+ {
31072
+ "epoch": 0.821903191161154,
31073
+ "grad_norm": 38.71875,
31074
+ "learning_rate": 9.871577663618146e-06,
31075
+ "loss": 19.0862,
31076
+ "step": 44290
31077
+ },
31078
+ {
31079
+ "epoch": 0.8220887642456338,
31080
+ "grad_norm": 38.125,
31081
+ "learning_rate": 9.871548667832106e-06,
31082
+ "loss": 19.0924,
31083
+ "step": 44300
31084
+ },
31085
+ {
31086
+ "epoch": 0.8222743373301137,
31087
+ "grad_norm": 35.53125,
31088
+ "learning_rate": 9.871519672046063e-06,
31089
+ "loss": 18.8592,
31090
+ "step": 44310
31091
+ },
31092
+ {
31093
+ "epoch": 0.8224599104145934,
31094
+ "grad_norm": 36.875,
31095
+ "learning_rate": 9.87149067626002e-06,
31096
+ "loss": 19.0449,
31097
+ "step": 44320
31098
+ },
31099
+ {
31100
+ "epoch": 0.8226454834990733,
31101
+ "grad_norm": 32.75,
31102
+ "learning_rate": 9.871461680473978e-06,
31103
+ "loss": 18.9665,
31104
+ "step": 44330
31105
+ },
31106
+ {
31107
+ "epoch": 0.8228310565835532,
31108
+ "grad_norm": 34.0,
31109
+ "learning_rate": 9.871432684687935e-06,
31110
+ "loss": 18.4968,
31111
+ "step": 44340
31112
+ },
31113
+ {
31114
+ "epoch": 0.8230166296680329,
31115
+ "grad_norm": 34.6875,
31116
+ "learning_rate": 9.871403688901893e-06,
31117
+ "loss": 19.4038,
31118
+ "step": 44350
31119
+ },
31120
+ {
31121
+ "epoch": 0.8232022027525128,
31122
+ "grad_norm": 38.4375,
31123
+ "learning_rate": 9.87137469311585e-06,
31124
+ "loss": 19.172,
31125
+ "step": 44360
31126
+ },
31127
+ {
31128
+ "epoch": 0.8233877758369926,
31129
+ "grad_norm": 34.90625,
31130
+ "learning_rate": 9.871345697329809e-06,
31131
+ "loss": 19.3212,
31132
+ "step": 44370
31133
+ },
31134
+ {
31135
+ "epoch": 0.8235733489214724,
31136
+ "grad_norm": 35.03125,
31137
+ "learning_rate": 9.871316701543765e-06,
31138
+ "loss": 18.7932,
31139
+ "step": 44380
31140
+ },
31141
+ {
31142
+ "epoch": 0.8237589220059522,
31143
+ "grad_norm": 35.46875,
31144
+ "learning_rate": 9.871287705757722e-06,
31145
+ "loss": 19.2297,
31146
+ "step": 44390
31147
+ },
31148
+ {
31149
+ "epoch": 0.8239444950904321,
31150
+ "grad_norm": 36.78125,
31151
+ "learning_rate": 9.871258709971681e-06,
31152
+ "loss": 18.9214,
31153
+ "step": 44400
31154
+ },
31155
+ {
31156
+ "epoch": 0.8241300681749119,
31157
+ "grad_norm": 37.0,
31158
+ "learning_rate": 9.871229714185639e-06,
31159
+ "loss": 19.1307,
31160
+ "step": 44410
31161
+ },
31162
+ {
31163
+ "epoch": 0.8243156412593917,
31164
+ "grad_norm": 35.78125,
31165
+ "learning_rate": 9.871200718399596e-06,
31166
+ "loss": 19.2335,
31167
+ "step": 44420
31168
+ },
31169
+ {
31170
+ "epoch": 0.8245012143438716,
31171
+ "grad_norm": 35.90625,
31172
+ "learning_rate": 9.871171722613554e-06,
31173
+ "loss": 19.2364,
31174
+ "step": 44430
31175
+ },
31176
+ {
31177
+ "epoch": 0.8246867874283514,
31178
+ "grad_norm": 35.65625,
31179
+ "learning_rate": 9.871142726827511e-06,
31180
+ "loss": 18.697,
31181
+ "step": 44440
31182
+ },
31183
+ {
31184
+ "epoch": 0.8248723605128312,
31185
+ "grad_norm": 37.125,
31186
+ "learning_rate": 9.871113731041468e-06,
31187
+ "loss": 19.2022,
31188
+ "step": 44450
31189
+ },
31190
+ {
31191
+ "epoch": 0.825057933597311,
31192
+ "grad_norm": 35.90625,
31193
+ "learning_rate": 9.871084735255426e-06,
31194
+ "loss": 19.0279,
31195
+ "step": 44460
31196
+ },
31197
+ {
31198
+ "epoch": 0.8252435066817909,
31199
+ "grad_norm": 35.25,
31200
+ "learning_rate": 9.871055739469383e-06,
31201
+ "loss": 18.9795,
31202
+ "step": 44470
31203
+ },
31204
+ {
31205
+ "epoch": 0.8254290797662707,
31206
+ "grad_norm": 36.84375,
31207
+ "learning_rate": 9.871026743683342e-06,
31208
+ "loss": 19.2522,
31209
+ "step": 44480
31210
+ },
31211
+ {
31212
+ "epoch": 0.8256146528507505,
31213
+ "grad_norm": 36.1875,
31214
+ "learning_rate": 9.870997747897298e-06,
31215
+ "loss": 18.8613,
31216
+ "step": 44490
31217
+ },
31218
+ {
31219
+ "epoch": 0.8258002259352304,
31220
+ "grad_norm": 36.65625,
31221
+ "learning_rate": 9.870968752111257e-06,
31222
+ "loss": 19.3883,
31223
+ "step": 44500
31224
+ },
31225
+ {
31226
+ "epoch": 0.8259857990197101,
31227
+ "grad_norm": 36.09375,
31228
+ "learning_rate": 9.870939756325215e-06,
31229
+ "loss": 19.3544,
31230
+ "step": 44510
31231
+ },
31232
+ {
31233
+ "epoch": 0.82617137210419,
31234
+ "grad_norm": 38.5,
31235
+ "learning_rate": 9.870910760539172e-06,
31236
+ "loss": 19.3535,
31237
+ "step": 44520
31238
+ },
31239
+ {
31240
+ "epoch": 0.8263569451886699,
31241
+ "grad_norm": 34.75,
31242
+ "learning_rate": 9.87088176475313e-06,
31243
+ "loss": 18.8685,
31244
+ "step": 44530
31245
+ },
31246
+ {
31247
+ "epoch": 0.8265425182731496,
31248
+ "grad_norm": 38.8125,
31249
+ "learning_rate": 9.870852768967087e-06,
31250
+ "loss": 19.0754,
31251
+ "step": 44540
31252
+ },
31253
+ {
31254
+ "epoch": 0.8267280913576295,
31255
+ "grad_norm": 36.46875,
31256
+ "learning_rate": 9.870823773181044e-06,
31257
+ "loss": 19.056,
31258
+ "step": 44550
31259
+ },
31260
+ {
31261
+ "epoch": 0.8269136644421093,
31262
+ "grad_norm": 36.1875,
31263
+ "learning_rate": 9.870794777395002e-06,
31264
+ "loss": 18.5412,
31265
+ "step": 44560
31266
+ },
31267
+ {
31268
+ "epoch": 0.8270992375265891,
31269
+ "grad_norm": 34.15625,
31270
+ "learning_rate": 9.870765781608959e-06,
31271
+ "loss": 18.657,
31272
+ "step": 44570
31273
+ },
31274
+ {
31275
+ "epoch": 0.827284810611069,
31276
+ "grad_norm": 37.0,
31277
+ "learning_rate": 9.870736785822918e-06,
31278
+ "loss": 19.3666,
31279
+ "step": 44580
31280
+ },
31281
+ {
31282
+ "epoch": 0.8274703836955488,
31283
+ "grad_norm": 36.65625,
31284
+ "learning_rate": 9.870707790036875e-06,
31285
+ "loss": 19.1981,
31286
+ "step": 44590
31287
+ },
31288
+ {
31289
+ "epoch": 0.8276559567800287,
31290
+ "grad_norm": 35.4375,
31291
+ "learning_rate": 9.870678794250831e-06,
31292
+ "loss": 19.325,
31293
+ "step": 44600
31294
+ },
31295
+ {
31296
+ "epoch": 0.8278415298645084,
31297
+ "grad_norm": 36.375,
31298
+ "learning_rate": 9.87064979846479e-06,
31299
+ "loss": 19.4554,
31300
+ "step": 44610
31301
+ },
31302
+ {
31303
+ "epoch": 0.8280271029489883,
31304
+ "grad_norm": 36.78125,
31305
+ "learning_rate": 9.870620802678748e-06,
31306
+ "loss": 18.9955,
31307
+ "step": 44620
31308
+ },
31309
+ {
31310
+ "epoch": 0.8282126760334682,
31311
+ "grad_norm": 38.1875,
31312
+ "learning_rate": 9.870591806892705e-06,
31313
+ "loss": 19.2107,
31314
+ "step": 44630
31315
+ },
31316
+ {
31317
+ "epoch": 0.8283982491179479,
31318
+ "grad_norm": 37.5,
31319
+ "learning_rate": 9.870562811106662e-06,
31320
+ "loss": 18.8142,
31321
+ "step": 44640
31322
+ },
31323
+ {
31324
+ "epoch": 0.8285838222024278,
31325
+ "grad_norm": 38.34375,
31326
+ "learning_rate": 9.87053381532062e-06,
31327
+ "loss": 19.3055,
31328
+ "step": 44650
31329
+ },
31330
+ {
31331
+ "epoch": 0.8287693952869076,
31332
+ "grad_norm": 36.0,
31333
+ "learning_rate": 9.870504819534577e-06,
31334
+ "loss": 18.8541,
31335
+ "step": 44660
31336
+ },
31337
+ {
31338
+ "epoch": 0.8289549683713874,
31339
+ "grad_norm": 39.78125,
31340
+ "learning_rate": 9.870475823748535e-06,
31341
+ "loss": 19.1764,
31342
+ "step": 44670
31343
+ },
31344
+ {
31345
+ "epoch": 0.8291405414558672,
31346
+ "grad_norm": 36.03125,
31347
+ "learning_rate": 9.870446827962494e-06,
31348
+ "loss": 18.7747,
31349
+ "step": 44680
31350
+ },
31351
+ {
31352
+ "epoch": 0.8293261145403471,
31353
+ "grad_norm": 34.875,
31354
+ "learning_rate": 9.870417832176451e-06,
31355
+ "loss": 19.1716,
31356
+ "step": 44690
31357
+ },
31358
+ {
31359
+ "epoch": 0.8295116876248269,
31360
+ "grad_norm": 36.4375,
31361
+ "learning_rate": 9.870388836390407e-06,
31362
+ "loss": 19.152,
31363
+ "step": 44700
31364
+ },
31365
+ {
31366
+ "epoch": 0.8296972607093067,
31367
+ "grad_norm": 37.46875,
31368
+ "learning_rate": 9.870359840604366e-06,
31369
+ "loss": 18.8802,
31370
+ "step": 44710
31371
+ },
31372
+ {
31373
+ "epoch": 0.8298828337937866,
31374
+ "grad_norm": 35.5625,
31375
+ "learning_rate": 9.870330844818323e-06,
31376
+ "loss": 19.1369,
31377
+ "step": 44720
31378
+ },
31379
+ {
31380
+ "epoch": 0.8300684068782663,
31381
+ "grad_norm": 35.75,
31382
+ "learning_rate": 9.870301849032281e-06,
31383
+ "loss": 19.0659,
31384
+ "step": 44730
31385
+ },
31386
+ {
31387
+ "epoch": 0.8302539799627462,
31388
+ "grad_norm": 36.3125,
31389
+ "learning_rate": 9.870272853246238e-06,
31390
+ "loss": 19.2007,
31391
+ "step": 44740
31392
+ },
31393
+ {
31394
+ "epoch": 0.830439553047226,
31395
+ "grad_norm": 37.09375,
31396
+ "learning_rate": 9.870243857460197e-06,
31397
+ "loss": 19.3437,
31398
+ "step": 44750
31399
+ },
31400
+ {
31401
+ "epoch": 0.8306251261317058,
31402
+ "grad_norm": 36.28125,
31403
+ "learning_rate": 9.870214861674153e-06,
31404
+ "loss": 19.456,
31405
+ "step": 44760
31406
+ },
31407
+ {
31408
+ "epoch": 0.8308106992161857,
31409
+ "grad_norm": 35.625,
31410
+ "learning_rate": 9.87018586588811e-06,
31411
+ "loss": 18.8098,
31412
+ "step": 44770
31413
+ },
31414
+ {
31415
+ "epoch": 0.8309962723006655,
31416
+ "grad_norm": 36.0,
31417
+ "learning_rate": 9.87015687010207e-06,
31418
+ "loss": 19.4965,
31419
+ "step": 44780
31420
+ },
31421
+ {
31422
+ "epoch": 0.8311818453851454,
31423
+ "grad_norm": 36.125,
31424
+ "learning_rate": 9.870127874316027e-06,
31425
+ "loss": 19.1793,
31426
+ "step": 44790
31427
+ },
31428
+ {
31429
+ "epoch": 0.8313674184696251,
31430
+ "grad_norm": 36.78125,
31431
+ "learning_rate": 9.870098878529984e-06,
31432
+ "loss": 19.0221,
31433
+ "step": 44800
31434
+ },
31435
+ {
31436
+ "epoch": 0.831552991554105,
31437
+ "grad_norm": 36.15625,
31438
+ "learning_rate": 9.870069882743942e-06,
31439
+ "loss": 18.9155,
31440
+ "step": 44810
31441
+ },
31442
+ {
31443
+ "epoch": 0.8317385646385849,
31444
+ "grad_norm": 36.09375,
31445
+ "learning_rate": 9.8700408869579e-06,
31446
+ "loss": 18.9602,
31447
+ "step": 44820
31448
+ },
31449
+ {
31450
+ "epoch": 0.8319241377230646,
31451
+ "grad_norm": 37.4375,
31452
+ "learning_rate": 9.870011891171857e-06,
31453
+ "loss": 19.3968,
31454
+ "step": 44830
31455
+ },
31456
+ {
31457
+ "epoch": 0.8321097108075445,
31458
+ "grad_norm": 38.1875,
31459
+ "learning_rate": 9.869982895385814e-06,
31460
+ "loss": 18.7445,
31461
+ "step": 44840
31462
+ },
31463
+ {
31464
+ "epoch": 0.8322952838920243,
31465
+ "grad_norm": 34.875,
31466
+ "learning_rate": 9.869953899599773e-06,
31467
+ "loss": 19.3119,
31468
+ "step": 44850
31469
+ },
31470
+ {
31471
+ "epoch": 0.8324808569765041,
31472
+ "grad_norm": 36.0625,
31473
+ "learning_rate": 9.869924903813729e-06,
31474
+ "loss": 18.8227,
31475
+ "step": 44860
31476
+ },
31477
+ {
31478
+ "epoch": 0.832666430060984,
31479
+ "grad_norm": 35.15625,
31480
+ "learning_rate": 9.869895908027686e-06,
31481
+ "loss": 18.7788,
31482
+ "step": 44870
31483
+ },
31484
+ {
31485
+ "epoch": 0.8328520031454638,
31486
+ "grad_norm": 34.03125,
31487
+ "learning_rate": 9.869866912241645e-06,
31488
+ "loss": 19.126,
31489
+ "step": 44880
31490
+ },
31491
+ {
31492
+ "epoch": 0.8330375762299436,
31493
+ "grad_norm": 38.15625,
31494
+ "learning_rate": 9.869837916455603e-06,
31495
+ "loss": 18.9767,
31496
+ "step": 44890
31497
+ },
31498
+ {
31499
+ "epoch": 0.8332231493144234,
31500
+ "grad_norm": 37.15625,
31501
+ "learning_rate": 9.86980892066956e-06,
31502
+ "loss": 18.608,
31503
+ "step": 44900
31504
+ },
31505
+ {
31506
+ "epoch": 0.8334087223989033,
31507
+ "grad_norm": 37.03125,
31508
+ "learning_rate": 9.869779924883518e-06,
31509
+ "loss": 19.1132,
31510
+ "step": 44910
31511
+ },
31512
+ {
31513
+ "epoch": 0.833594295483383,
31514
+ "grad_norm": 35.0625,
31515
+ "learning_rate": 9.869750929097475e-06,
31516
+ "loss": 18.8629,
31517
+ "step": 44920
31518
+ },
31519
+ {
31520
+ "epoch": 0.8337798685678629,
31521
+ "grad_norm": 36.6875,
31522
+ "learning_rate": 9.869721933311432e-06,
31523
+ "loss": 19.265,
31524
+ "step": 44930
31525
+ },
31526
+ {
31527
+ "epoch": 0.8339654416523428,
31528
+ "grad_norm": 34.09375,
31529
+ "learning_rate": 9.86969293752539e-06,
31530
+ "loss": 19.1547,
31531
+ "step": 44940
31532
+ },
31533
+ {
31534
+ "epoch": 0.8341510147368225,
31535
+ "grad_norm": 34.78125,
31536
+ "learning_rate": 9.869663941739349e-06,
31537
+ "loss": 19.0749,
31538
+ "step": 44950
31539
+ },
31540
+ {
31541
+ "epoch": 0.8343365878213024,
31542
+ "grad_norm": 36.28125,
31543
+ "learning_rate": 9.869634945953306e-06,
31544
+ "loss": 19.0023,
31545
+ "step": 44960
31546
+ },
31547
+ {
31548
+ "epoch": 0.8345221609057822,
31549
+ "grad_norm": 37.09375,
31550
+ "learning_rate": 9.869605950167262e-06,
31551
+ "loss": 19.0009,
31552
+ "step": 44970
31553
+ },
31554
+ {
31555
+ "epoch": 0.8347077339902621,
31556
+ "grad_norm": 34.96875,
31557
+ "learning_rate": 9.869576954381221e-06,
31558
+ "loss": 18.7841,
31559
+ "step": 44980
31560
+ },
31561
+ {
31562
+ "epoch": 0.8348933070747419,
31563
+ "grad_norm": 34.9375,
31564
+ "learning_rate": 9.869547958595179e-06,
31565
+ "loss": 18.6512,
31566
+ "step": 44990
31567
+ },
31568
+ {
31569
+ "epoch": 0.8350788801592217,
31570
+ "grad_norm": 36.5,
31571
+ "learning_rate": 9.869518962809136e-06,
31572
+ "loss": 19.053,
31573
+ "step": 45000
31574
+ },
31575
+ {
31576
+ "epoch": 0.8350788801592217,
31577
+ "eval_loss": 2.3824570178985596,
31578
+ "eval_runtime": 454.6243,
31579
+ "eval_samples_per_second": 3194.103,
31580
+ "eval_steps_per_second": 49.909,
31581
+ "step": 45000
31582
  }
31583
  ],
31584
  "logging_steps": 10,
 
31598
  "attributes": {}
31599
  }
31600
  },
31601
+ "total_flos": 7.854852415684608e+18,
31602
  "train_batch_size": 8,
31603
  "trial_name": null,
31604
  "trial_params": null