tranhuyHoang commited on
Commit
37f695b
·
verified ·
1 Parent(s): 9f3e0ba

Training in progress, step 6000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e3a43ae5b3f3682613c7ad9cc7e4227c0dff7d59f6a1aa485003f5e58962d85
3
  size 91951912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed9777fa1a77a56e9dd3241ee3dbd254aa1cf90fdeb5edfb1fcce31ac82e94fb
3
  size 91951912
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff01ee30401958db89536b92926f9bc5a00f7fb3ffe30da4675a5a14b4da9798
3
  size 183991627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9637bb35b55b1113901601f4621b591b014ca6a043584b2d2d2c061ad16c07c2
3
  size 183991627
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:718a0f3db00824213036a2c0441849791319b7d9cf189065873bb26a7020738e
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:098b29492211804ab324a36f37466821d948280bb74fce4ba895c03f13ecd878
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7af1ff25c4ed7f9575dcc62f1797b96a6fa130cba9fc7b7aad57b3821420102d
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79980175536a1928569d00db36b920754a385741f60dcf609f0aba7a8a424e74
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.00055,
6
  "eval_steps": 500,
7
- "global_step": 5500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -38596,6 +38596,3514 @@
38596
  "eval_samples_per_second": 27.364,
38597
  "eval_steps_per_second": 1.71,
38598
  "step": 5500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38599
  }
38600
  ],
38601
  "logging_steps": 1,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.0006,
6
  "eval_steps": 500,
7
+ "global_step": 6000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
38596
  "eval_samples_per_second": 27.364,
38597
  "eval_steps_per_second": 1.71,
38598
  "step": 5500
38599
+ },
38600
+ {
38601
+ "epoch": 0.0005501,
38602
+ "grad_norm": 2.1657469272613525,
38603
+ "learning_rate": 5.5e-05,
38604
+ "loss": 1.375,
38605
+ "step": 5501
38606
+ },
38607
+ {
38608
+ "epoch": 0.0005502,
38609
+ "grad_norm": 2.278029680252075,
38610
+ "learning_rate": 5.5010000000000004e-05,
38611
+ "loss": 1.375,
38612
+ "step": 5502
38613
+ },
38614
+ {
38615
+ "epoch": 0.0005503,
38616
+ "grad_norm": 2.653407335281372,
38617
+ "learning_rate": 5.502e-05,
38618
+ "loss": 1.7002,
38619
+ "step": 5503
38620
+ },
38621
+ {
38622
+ "epoch": 0.0005504,
38623
+ "grad_norm": 2.2126805782318115,
38624
+ "learning_rate": 5.503e-05,
38625
+ "loss": 1.3184,
38626
+ "step": 5504
38627
+ },
38628
+ {
38629
+ "epoch": 0.0005505,
38630
+ "grad_norm": 2.2534098625183105,
38631
+ "learning_rate": 5.504e-05,
38632
+ "loss": 1.3574,
38633
+ "step": 5505
38634
+ },
38635
+ {
38636
+ "epoch": 0.0005506,
38637
+ "grad_norm": 2.7865779399871826,
38638
+ "learning_rate": 5.505e-05,
38639
+ "loss": 1.6641,
38640
+ "step": 5506
38641
+ },
38642
+ {
38643
+ "epoch": 0.0005507,
38644
+ "grad_norm": 3.0122759342193604,
38645
+ "learning_rate": 5.506e-05,
38646
+ "loss": 1.7051,
38647
+ "step": 5507
38648
+ },
38649
+ {
38650
+ "epoch": 0.0005508,
38651
+ "grad_norm": 2.089824914932251,
38652
+ "learning_rate": 5.507e-05,
38653
+ "loss": 1.2598,
38654
+ "step": 5508
38655
+ },
38656
+ {
38657
+ "epoch": 0.0005509,
38658
+ "grad_norm": 3.3949615955352783,
38659
+ "learning_rate": 5.508000000000001e-05,
38660
+ "loss": 1.8682,
38661
+ "step": 5509
38662
+ },
38663
+ {
38664
+ "epoch": 0.000551,
38665
+ "grad_norm": 5.518685817718506,
38666
+ "learning_rate": 5.5089999999999996e-05,
38667
+ "loss": 1.5371,
38668
+ "step": 5510
38669
+ },
38670
+ {
38671
+ "epoch": 0.0005511,
38672
+ "grad_norm": 3.192974328994751,
38673
+ "learning_rate": 5.51e-05,
38674
+ "loss": 1.6025,
38675
+ "step": 5511
38676
+ },
38677
+ {
38678
+ "epoch": 0.0005512,
38679
+ "grad_norm": 2.699589967727661,
38680
+ "learning_rate": 5.5110000000000006e-05,
38681
+ "loss": 1.4658,
38682
+ "step": 5512
38683
+ },
38684
+ {
38685
+ "epoch": 0.0005513,
38686
+ "grad_norm": 2.347198486328125,
38687
+ "learning_rate": 5.5119999999999994e-05,
38688
+ "loss": 1.4316,
38689
+ "step": 5513
38690
+ },
38691
+ {
38692
+ "epoch": 0.0005514,
38693
+ "grad_norm": 2.406765937805176,
38694
+ "learning_rate": 5.513e-05,
38695
+ "loss": 1.375,
38696
+ "step": 5514
38697
+ },
38698
+ {
38699
+ "epoch": 0.0005515,
38700
+ "grad_norm": 2.5467593669891357,
38701
+ "learning_rate": 5.5140000000000004e-05,
38702
+ "loss": 1.4297,
38703
+ "step": 5515
38704
+ },
38705
+ {
38706
+ "epoch": 0.0005516,
38707
+ "grad_norm": 2.204380512237549,
38708
+ "learning_rate": 5.5150000000000006e-05,
38709
+ "loss": 1.2383,
38710
+ "step": 5516
38711
+ },
38712
+ {
38713
+ "epoch": 0.0005517,
38714
+ "grad_norm": 2.2602663040161133,
38715
+ "learning_rate": 5.516e-05,
38716
+ "loss": 1.3105,
38717
+ "step": 5517
38718
+ },
38719
+ {
38720
+ "epoch": 0.0005518,
38721
+ "grad_norm": 4.868964672088623,
38722
+ "learning_rate": 5.517e-05,
38723
+ "loss": 1.9258,
38724
+ "step": 5518
38725
+ },
38726
+ {
38727
+ "epoch": 0.0005519,
38728
+ "grad_norm": 2.5829179286956787,
38729
+ "learning_rate": 5.5180000000000004e-05,
38730
+ "loss": 1.4307,
38731
+ "step": 5519
38732
+ },
38733
+ {
38734
+ "epoch": 0.000552,
38735
+ "grad_norm": 2.0237176418304443,
38736
+ "learning_rate": 5.519e-05,
38737
+ "loss": 1.2646,
38738
+ "step": 5520
38739
+ },
38740
+ {
38741
+ "epoch": 0.0005521,
38742
+ "grad_norm": 1.967319130897522,
38743
+ "learning_rate": 5.52e-05,
38744
+ "loss": 1.1914,
38745
+ "step": 5521
38746
+ },
38747
+ {
38748
+ "epoch": 0.0005522,
38749
+ "grad_norm": 2.0111095905303955,
38750
+ "learning_rate": 5.521e-05,
38751
+ "loss": 1.293,
38752
+ "step": 5522
38753
+ },
38754
+ {
38755
+ "epoch": 0.0005523,
38756
+ "grad_norm": 2.2081053256988525,
38757
+ "learning_rate": 5.522e-05,
38758
+ "loss": 1.418,
38759
+ "step": 5523
38760
+ },
38761
+ {
38762
+ "epoch": 0.0005524,
38763
+ "grad_norm": 2.613236904144287,
38764
+ "learning_rate": 5.523e-05,
38765
+ "loss": 1.4785,
38766
+ "step": 5524
38767
+ },
38768
+ {
38769
+ "epoch": 0.0005525,
38770
+ "grad_norm": 3.4059560298919678,
38771
+ "learning_rate": 5.524e-05,
38772
+ "loss": 1.6406,
38773
+ "step": 5525
38774
+ },
38775
+ {
38776
+ "epoch": 0.0005526,
38777
+ "grad_norm": 2.1261260509490967,
38778
+ "learning_rate": 5.525000000000001e-05,
38779
+ "loss": 1.3711,
38780
+ "step": 5526
38781
+ },
38782
+ {
38783
+ "epoch": 0.0005527,
38784
+ "grad_norm": 3.0109899044036865,
38785
+ "learning_rate": 5.5259999999999996e-05,
38786
+ "loss": 1.4688,
38787
+ "step": 5527
38788
+ },
38789
+ {
38790
+ "epoch": 0.0005528,
38791
+ "grad_norm": 2.294259786605835,
38792
+ "learning_rate": 5.527e-05,
38793
+ "loss": 1.4619,
38794
+ "step": 5528
38795
+ },
38796
+ {
38797
+ "epoch": 0.0005529,
38798
+ "grad_norm": 2.180101156234741,
38799
+ "learning_rate": 5.5280000000000006e-05,
38800
+ "loss": 1.4219,
38801
+ "step": 5529
38802
+ },
38803
+ {
38804
+ "epoch": 0.000553,
38805
+ "grad_norm": 2.065366268157959,
38806
+ "learning_rate": 5.5289999999999994e-05,
38807
+ "loss": 1.3633,
38808
+ "step": 5530
38809
+ },
38810
+ {
38811
+ "epoch": 0.0005531,
38812
+ "grad_norm": 2.6592462062835693,
38813
+ "learning_rate": 5.53e-05,
38814
+ "loss": 1.2856,
38815
+ "step": 5531
38816
+ },
38817
+ {
38818
+ "epoch": 0.0005532,
38819
+ "grad_norm": 2.605691432952881,
38820
+ "learning_rate": 5.5310000000000004e-05,
38821
+ "loss": 1.498,
38822
+ "step": 5532
38823
+ },
38824
+ {
38825
+ "epoch": 0.0005533,
38826
+ "grad_norm": 2.811755418777466,
38827
+ "learning_rate": 5.5320000000000006e-05,
38828
+ "loss": 1.7549,
38829
+ "step": 5533
38830
+ },
38831
+ {
38832
+ "epoch": 0.0005534,
38833
+ "grad_norm": 2.2970316410064697,
38834
+ "learning_rate": 5.533e-05,
38835
+ "loss": 1.4023,
38836
+ "step": 5534
38837
+ },
38838
+ {
38839
+ "epoch": 0.0005535,
38840
+ "grad_norm": 2.2041218280792236,
38841
+ "learning_rate": 5.534e-05,
38842
+ "loss": 1.4492,
38843
+ "step": 5535
38844
+ },
38845
+ {
38846
+ "epoch": 0.0005536,
38847
+ "grad_norm": 3.6363894939422607,
38848
+ "learning_rate": 5.5350000000000004e-05,
38849
+ "loss": 2.0703,
38850
+ "step": 5536
38851
+ },
38852
+ {
38853
+ "epoch": 0.0005537,
38854
+ "grad_norm": 2.3322548866271973,
38855
+ "learning_rate": 5.536e-05,
38856
+ "loss": 1.3018,
38857
+ "step": 5537
38858
+ },
38859
+ {
38860
+ "epoch": 0.0005538,
38861
+ "grad_norm": 2.0842840671539307,
38862
+ "learning_rate": 5.537e-05,
38863
+ "loss": 1.3398,
38864
+ "step": 5538
38865
+ },
38866
+ {
38867
+ "epoch": 0.0005539,
38868
+ "grad_norm": 2.181790828704834,
38869
+ "learning_rate": 5.538e-05,
38870
+ "loss": 1.333,
38871
+ "step": 5539
38872
+ },
38873
+ {
38874
+ "epoch": 0.000554,
38875
+ "grad_norm": 2.298056125640869,
38876
+ "learning_rate": 5.539e-05,
38877
+ "loss": 1.416,
38878
+ "step": 5540
38879
+ },
38880
+ {
38881
+ "epoch": 0.0005541,
38882
+ "grad_norm": 3.197896718978882,
38883
+ "learning_rate": 5.54e-05,
38884
+ "loss": 1.2461,
38885
+ "step": 5541
38886
+ },
38887
+ {
38888
+ "epoch": 0.0005542,
38889
+ "grad_norm": 2.2699851989746094,
38890
+ "learning_rate": 5.541e-05,
38891
+ "loss": 1.2783,
38892
+ "step": 5542
38893
+ },
38894
+ {
38895
+ "epoch": 0.0005543,
38896
+ "grad_norm": 2.420722484588623,
38897
+ "learning_rate": 5.542000000000001e-05,
38898
+ "loss": 1.5762,
38899
+ "step": 5543
38900
+ },
38901
+ {
38902
+ "epoch": 0.0005544,
38903
+ "grad_norm": 2.019577980041504,
38904
+ "learning_rate": 5.5429999999999996e-05,
38905
+ "loss": 1.2617,
38906
+ "step": 5544
38907
+ },
38908
+ {
38909
+ "epoch": 0.0005545,
38910
+ "grad_norm": 2.4632229804992676,
38911
+ "learning_rate": 5.544e-05,
38912
+ "loss": 1.396,
38913
+ "step": 5545
38914
+ },
38915
+ {
38916
+ "epoch": 0.0005546,
38917
+ "grad_norm": 2.2937636375427246,
38918
+ "learning_rate": 5.5450000000000006e-05,
38919
+ "loss": 1.3311,
38920
+ "step": 5546
38921
+ },
38922
+ {
38923
+ "epoch": 0.0005547,
38924
+ "grad_norm": 2.050861120223999,
38925
+ "learning_rate": 5.5459999999999994e-05,
38926
+ "loss": 1.2666,
38927
+ "step": 5547
38928
+ },
38929
+ {
38930
+ "epoch": 0.0005548,
38931
+ "grad_norm": 2.0719165802001953,
38932
+ "learning_rate": 5.547e-05,
38933
+ "loss": 1.3037,
38934
+ "step": 5548
38935
+ },
38936
+ {
38937
+ "epoch": 0.0005549,
38938
+ "grad_norm": 2.0907227993011475,
38939
+ "learning_rate": 5.5480000000000004e-05,
38940
+ "loss": 1.2842,
38941
+ "step": 5549
38942
+ },
38943
+ {
38944
+ "epoch": 0.000555,
38945
+ "grad_norm": 2.092015504837036,
38946
+ "learning_rate": 5.5490000000000006e-05,
38947
+ "loss": 1.2529,
38948
+ "step": 5550
38949
+ },
38950
+ {
38951
+ "epoch": 0.0005551,
38952
+ "grad_norm": 2.0380799770355225,
38953
+ "learning_rate": 5.55e-05,
38954
+ "loss": 1.2295,
38955
+ "step": 5551
38956
+ },
38957
+ {
38958
+ "epoch": 0.0005552,
38959
+ "grad_norm": 2.0194711685180664,
38960
+ "learning_rate": 5.551e-05,
38961
+ "loss": 1.1826,
38962
+ "step": 5552
38963
+ },
38964
+ {
38965
+ "epoch": 0.0005553,
38966
+ "grad_norm": 1.984717845916748,
38967
+ "learning_rate": 5.5520000000000004e-05,
38968
+ "loss": 1.2383,
38969
+ "step": 5553
38970
+ },
38971
+ {
38972
+ "epoch": 0.0005554,
38973
+ "grad_norm": 1.9688911437988281,
38974
+ "learning_rate": 5.553e-05,
38975
+ "loss": 1.2822,
38976
+ "step": 5554
38977
+ },
38978
+ {
38979
+ "epoch": 0.0005555,
38980
+ "grad_norm": 3.0318920612335205,
38981
+ "learning_rate": 5.554e-05,
38982
+ "loss": 1.8086,
38983
+ "step": 5555
38984
+ },
38985
+ {
38986
+ "epoch": 0.0005556,
38987
+ "grad_norm": 2.0116493701934814,
38988
+ "learning_rate": 5.555e-05,
38989
+ "loss": 1.1943,
38990
+ "step": 5556
38991
+ },
38992
+ {
38993
+ "epoch": 0.0005557,
38994
+ "grad_norm": 2.2734971046447754,
38995
+ "learning_rate": 5.5559999999999997e-05,
38996
+ "loss": 1.4131,
38997
+ "step": 5557
38998
+ },
38999
+ {
39000
+ "epoch": 0.0005558,
39001
+ "grad_norm": 2.3707101345062256,
39002
+ "learning_rate": 5.557e-05,
39003
+ "loss": 1.3682,
39004
+ "step": 5558
39005
+ },
39006
+ {
39007
+ "epoch": 0.0005559,
39008
+ "grad_norm": 1.9980303049087524,
39009
+ "learning_rate": 5.558e-05,
39010
+ "loss": 1.2549,
39011
+ "step": 5559
39012
+ },
39013
+ {
39014
+ "epoch": 0.000556,
39015
+ "grad_norm": 2.1238646507263184,
39016
+ "learning_rate": 5.559000000000001e-05,
39017
+ "loss": 1.2539,
39018
+ "step": 5560
39019
+ },
39020
+ {
39021
+ "epoch": 0.0005561,
39022
+ "grad_norm": 2.268760919570923,
39023
+ "learning_rate": 5.5599999999999996e-05,
39024
+ "loss": 1.4111,
39025
+ "step": 5561
39026
+ },
39027
+ {
39028
+ "epoch": 0.0005562,
39029
+ "grad_norm": 2.3243017196655273,
39030
+ "learning_rate": 5.561e-05,
39031
+ "loss": 1.373,
39032
+ "step": 5562
39033
+ },
39034
+ {
39035
+ "epoch": 0.0005563,
39036
+ "grad_norm": 2.0662007331848145,
39037
+ "learning_rate": 5.5620000000000006e-05,
39038
+ "loss": 1.3096,
39039
+ "step": 5563
39040
+ },
39041
+ {
39042
+ "epoch": 0.0005564,
39043
+ "grad_norm": 2.231890916824341,
39044
+ "learning_rate": 5.5629999999999994e-05,
39045
+ "loss": 1.373,
39046
+ "step": 5564
39047
+ },
39048
+ {
39049
+ "epoch": 0.0005565,
39050
+ "grad_norm": 2.1430857181549072,
39051
+ "learning_rate": 5.564e-05,
39052
+ "loss": 1.1699,
39053
+ "step": 5565
39054
+ },
39055
+ {
39056
+ "epoch": 0.0005566,
39057
+ "grad_norm": 1.9390007257461548,
39058
+ "learning_rate": 5.5650000000000004e-05,
39059
+ "loss": 1.2163,
39060
+ "step": 5566
39061
+ },
39062
+ {
39063
+ "epoch": 0.0005567,
39064
+ "grad_norm": 3.218642473220825,
39065
+ "learning_rate": 5.5660000000000006e-05,
39066
+ "loss": 1.9434,
39067
+ "step": 5567
39068
+ },
39069
+ {
39070
+ "epoch": 0.0005568,
39071
+ "grad_norm": 2.7889389991760254,
39072
+ "learning_rate": 5.567e-05,
39073
+ "loss": 1.7451,
39074
+ "step": 5568
39075
+ },
39076
+ {
39077
+ "epoch": 0.0005569,
39078
+ "grad_norm": 2.129279613494873,
39079
+ "learning_rate": 5.568e-05,
39080
+ "loss": 1.2842,
39081
+ "step": 5569
39082
+ },
39083
+ {
39084
+ "epoch": 0.000557,
39085
+ "grad_norm": 2.407432794570923,
39086
+ "learning_rate": 5.5690000000000004e-05,
39087
+ "loss": 1.4795,
39088
+ "step": 5570
39089
+ },
39090
+ {
39091
+ "epoch": 0.0005571,
39092
+ "grad_norm": 2.932892084121704,
39093
+ "learning_rate": 5.57e-05,
39094
+ "loss": 1.2832,
39095
+ "step": 5571
39096
+ },
39097
+ {
39098
+ "epoch": 0.0005572,
39099
+ "grad_norm": 2.2704365253448486,
39100
+ "learning_rate": 5.571e-05,
39101
+ "loss": 1.3706,
39102
+ "step": 5572
39103
+ },
39104
+ {
39105
+ "epoch": 0.0005573,
39106
+ "grad_norm": 2.6265451908111572,
39107
+ "learning_rate": 5.572e-05,
39108
+ "loss": 1.2607,
39109
+ "step": 5573
39110
+ },
39111
+ {
39112
+ "epoch": 0.0005574,
39113
+ "grad_norm": 2.1422863006591797,
39114
+ "learning_rate": 5.573e-05,
39115
+ "loss": 1.2617,
39116
+ "step": 5574
39117
+ },
39118
+ {
39119
+ "epoch": 0.0005575,
39120
+ "grad_norm": 2.9647858142852783,
39121
+ "learning_rate": 5.574e-05,
39122
+ "loss": 1.3901,
39123
+ "step": 5575
39124
+ },
39125
+ {
39126
+ "epoch": 0.0005576,
39127
+ "grad_norm": 2.489037036895752,
39128
+ "learning_rate": 5.575e-05,
39129
+ "loss": 1.3994,
39130
+ "step": 5576
39131
+ },
39132
+ {
39133
+ "epoch": 0.0005577,
39134
+ "grad_norm": 2.4596824645996094,
39135
+ "learning_rate": 5.576000000000001e-05,
39136
+ "loss": 1.3701,
39137
+ "step": 5577
39138
+ },
39139
+ {
39140
+ "epoch": 0.0005578,
39141
+ "grad_norm": 2.2265207767486572,
39142
+ "learning_rate": 5.5769999999999996e-05,
39143
+ "loss": 1.248,
39144
+ "step": 5578
39145
+ },
39146
+ {
39147
+ "epoch": 0.0005579,
39148
+ "grad_norm": 2.3037335872650146,
39149
+ "learning_rate": 5.578e-05,
39150
+ "loss": 1.333,
39151
+ "step": 5579
39152
+ },
39153
+ {
39154
+ "epoch": 0.000558,
39155
+ "grad_norm": 2.651296615600586,
39156
+ "learning_rate": 5.5790000000000006e-05,
39157
+ "loss": 1.4697,
39158
+ "step": 5580
39159
+ },
39160
+ {
39161
+ "epoch": 0.0005581,
39162
+ "grad_norm": 2.3579533100128174,
39163
+ "learning_rate": 5.5799999999999994e-05,
39164
+ "loss": 1.3623,
39165
+ "step": 5581
39166
+ },
39167
+ {
39168
+ "epoch": 0.0005582,
39169
+ "grad_norm": 2.523846387863159,
39170
+ "learning_rate": 5.581e-05,
39171
+ "loss": 1.415,
39172
+ "step": 5582
39173
+ },
39174
+ {
39175
+ "epoch": 0.0005583,
39176
+ "grad_norm": 2.5580360889434814,
39177
+ "learning_rate": 5.5820000000000004e-05,
39178
+ "loss": 1.4512,
39179
+ "step": 5583
39180
+ },
39181
+ {
39182
+ "epoch": 0.0005584,
39183
+ "grad_norm": 2.344586133956909,
39184
+ "learning_rate": 5.5830000000000006e-05,
39185
+ "loss": 1.3662,
39186
+ "step": 5584
39187
+ },
39188
+ {
39189
+ "epoch": 0.0005585,
39190
+ "grad_norm": 2.040177822113037,
39191
+ "learning_rate": 5.584e-05,
39192
+ "loss": 1.2173,
39193
+ "step": 5585
39194
+ },
39195
+ {
39196
+ "epoch": 0.0005586,
39197
+ "grad_norm": 1.9751789569854736,
39198
+ "learning_rate": 5.585e-05,
39199
+ "loss": 1.1479,
39200
+ "step": 5586
39201
+ },
39202
+ {
39203
+ "epoch": 0.0005587,
39204
+ "grad_norm": 2.2699475288391113,
39205
+ "learning_rate": 5.5860000000000004e-05,
39206
+ "loss": 1.2803,
39207
+ "step": 5587
39208
+ },
39209
+ {
39210
+ "epoch": 0.0005588,
39211
+ "grad_norm": 2.3292815685272217,
39212
+ "learning_rate": 5.587e-05,
39213
+ "loss": 1.3145,
39214
+ "step": 5588
39215
+ },
39216
+ {
39217
+ "epoch": 0.0005589,
39218
+ "grad_norm": 2.4007952213287354,
39219
+ "learning_rate": 5.588e-05,
39220
+ "loss": 1.2822,
39221
+ "step": 5589
39222
+ },
39223
+ {
39224
+ "epoch": 0.000559,
39225
+ "grad_norm": 2.313913345336914,
39226
+ "learning_rate": 5.589e-05,
39227
+ "loss": 1.2842,
39228
+ "step": 5590
39229
+ },
39230
+ {
39231
+ "epoch": 0.0005591,
39232
+ "grad_norm": 2.481891632080078,
39233
+ "learning_rate": 5.59e-05,
39234
+ "loss": 1.5146,
39235
+ "step": 5591
39236
+ },
39237
+ {
39238
+ "epoch": 0.0005592,
39239
+ "grad_norm": 2.1890406608581543,
39240
+ "learning_rate": 5.591e-05,
39241
+ "loss": 1.2607,
39242
+ "step": 5592
39243
+ },
39244
+ {
39245
+ "epoch": 0.0005593,
39246
+ "grad_norm": 2.6850531101226807,
39247
+ "learning_rate": 5.592e-05,
39248
+ "loss": 1.4482,
39249
+ "step": 5593
39250
+ },
39251
+ {
39252
+ "epoch": 0.0005594,
39253
+ "grad_norm": 2.2254996299743652,
39254
+ "learning_rate": 5.593000000000001e-05,
39255
+ "loss": 1.291,
39256
+ "step": 5594
39257
+ },
39258
+ {
39259
+ "epoch": 0.0005595,
39260
+ "grad_norm": 2.0163609981536865,
39261
+ "learning_rate": 5.5939999999999996e-05,
39262
+ "loss": 1.2124,
39263
+ "step": 5595
39264
+ },
39265
+ {
39266
+ "epoch": 0.0005596,
39267
+ "grad_norm": 2.391115188598633,
39268
+ "learning_rate": 5.595e-05,
39269
+ "loss": 1.4141,
39270
+ "step": 5596
39271
+ },
39272
+ {
39273
+ "epoch": 0.0005597,
39274
+ "grad_norm": 2.7599549293518066,
39275
+ "learning_rate": 5.5960000000000006e-05,
39276
+ "loss": 1.5264,
39277
+ "step": 5597
39278
+ },
39279
+ {
39280
+ "epoch": 0.0005598,
39281
+ "grad_norm": 2.434512138366699,
39282
+ "learning_rate": 5.5969999999999994e-05,
39283
+ "loss": 1.3662,
39284
+ "step": 5598
39285
+ },
39286
+ {
39287
+ "epoch": 0.0005599,
39288
+ "grad_norm": 2.0736303329467773,
39289
+ "learning_rate": 5.598e-05,
39290
+ "loss": 1.1714,
39291
+ "step": 5599
39292
+ },
39293
+ {
39294
+ "epoch": 0.00056,
39295
+ "grad_norm": 2.0648913383483887,
39296
+ "learning_rate": 5.5990000000000004e-05,
39297
+ "loss": 1.1797,
39298
+ "step": 5600
39299
+ },
39300
+ {
39301
+ "epoch": 0.0005601,
39302
+ "grad_norm": 4.771807670593262,
39303
+ "learning_rate": 5.6000000000000006e-05,
39304
+ "loss": 1.6016,
39305
+ "step": 5601
39306
+ },
39307
+ {
39308
+ "epoch": 0.0005602,
39309
+ "grad_norm": 2.7045605182647705,
39310
+ "learning_rate": 5.601e-05,
39311
+ "loss": 1.4512,
39312
+ "step": 5602
39313
+ },
39314
+ {
39315
+ "epoch": 0.0005603,
39316
+ "grad_norm": 2.5183730125427246,
39317
+ "learning_rate": 5.602e-05,
39318
+ "loss": 1.376,
39319
+ "step": 5603
39320
+ },
39321
+ {
39322
+ "epoch": 0.0005604,
39323
+ "grad_norm": 2.135667085647583,
39324
+ "learning_rate": 5.6030000000000004e-05,
39325
+ "loss": 1.3174,
39326
+ "step": 5604
39327
+ },
39328
+ {
39329
+ "epoch": 0.0005605,
39330
+ "grad_norm": 2.1617133617401123,
39331
+ "learning_rate": 5.604e-05,
39332
+ "loss": 1.248,
39333
+ "step": 5605
39334
+ },
39335
+ {
39336
+ "epoch": 0.0005606,
39337
+ "grad_norm": 2.7303905487060547,
39338
+ "learning_rate": 5.605e-05,
39339
+ "loss": 1.3955,
39340
+ "step": 5606
39341
+ },
39342
+ {
39343
+ "epoch": 0.0005607,
39344
+ "grad_norm": 2.1086387634277344,
39345
+ "learning_rate": 5.606e-05,
39346
+ "loss": 1.3164,
39347
+ "step": 5607
39348
+ },
39349
+ {
39350
+ "epoch": 0.0005608,
39351
+ "grad_norm": 2.3741819858551025,
39352
+ "learning_rate": 5.607e-05,
39353
+ "loss": 1.4766,
39354
+ "step": 5608
39355
+ },
39356
+ {
39357
+ "epoch": 0.0005609,
39358
+ "grad_norm": 2.0160882472991943,
39359
+ "learning_rate": 5.608e-05,
39360
+ "loss": 1.2295,
39361
+ "step": 5609
39362
+ },
39363
+ {
39364
+ "epoch": 0.000561,
39365
+ "grad_norm": 1.9973747730255127,
39366
+ "learning_rate": 5.609e-05,
39367
+ "loss": 1.1104,
39368
+ "step": 5610
39369
+ },
39370
+ {
39371
+ "epoch": 0.0005611,
39372
+ "grad_norm": 2.368208885192871,
39373
+ "learning_rate": 5.610000000000001e-05,
39374
+ "loss": 1.6182,
39375
+ "step": 5611
39376
+ },
39377
+ {
39378
+ "epoch": 0.0005612,
39379
+ "grad_norm": 3.3395214080810547,
39380
+ "learning_rate": 5.6109999999999996e-05,
39381
+ "loss": 2.1504,
39382
+ "step": 5612
39383
+ },
39384
+ {
39385
+ "epoch": 0.0005613,
39386
+ "grad_norm": 2.3721203804016113,
39387
+ "learning_rate": 5.612e-05,
39388
+ "loss": 1.3037,
39389
+ "step": 5613
39390
+ },
39391
+ {
39392
+ "epoch": 0.0005614,
39393
+ "grad_norm": 2.190359115600586,
39394
+ "learning_rate": 5.6130000000000006e-05,
39395
+ "loss": 1.167,
39396
+ "step": 5614
39397
+ },
39398
+ {
39399
+ "epoch": 0.0005615,
39400
+ "grad_norm": 2.519402503967285,
39401
+ "learning_rate": 5.6139999999999994e-05,
39402
+ "loss": 1.3716,
39403
+ "step": 5615
39404
+ },
39405
+ {
39406
+ "epoch": 0.0005616,
39407
+ "grad_norm": 2.859174966812134,
39408
+ "learning_rate": 5.615e-05,
39409
+ "loss": 1.5605,
39410
+ "step": 5616
39411
+ },
39412
+ {
39413
+ "epoch": 0.0005617,
39414
+ "grad_norm": 2.296678304672241,
39415
+ "learning_rate": 5.6160000000000004e-05,
39416
+ "loss": 1.1484,
39417
+ "step": 5617
39418
+ },
39419
+ {
39420
+ "epoch": 0.0005618,
39421
+ "grad_norm": 2.1283135414123535,
39422
+ "learning_rate": 5.6170000000000006e-05,
39423
+ "loss": 1.1885,
39424
+ "step": 5618
39425
+ },
39426
+ {
39427
+ "epoch": 0.0005619,
39428
+ "grad_norm": 2.090508222579956,
39429
+ "learning_rate": 5.618e-05,
39430
+ "loss": 1.2754,
39431
+ "step": 5619
39432
+ },
39433
+ {
39434
+ "epoch": 0.000562,
39435
+ "grad_norm": 2.2708656787872314,
39436
+ "learning_rate": 5.619e-05,
39437
+ "loss": 1.458,
39438
+ "step": 5620
39439
+ },
39440
+ {
39441
+ "epoch": 0.0005621,
39442
+ "grad_norm": 1.9705712795257568,
39443
+ "learning_rate": 5.6200000000000004e-05,
39444
+ "loss": 1.082,
39445
+ "step": 5621
39446
+ },
39447
+ {
39448
+ "epoch": 0.0005622,
39449
+ "grad_norm": 1.862807273864746,
39450
+ "learning_rate": 5.621e-05,
39451
+ "loss": 1.1211,
39452
+ "step": 5622
39453
+ },
39454
+ {
39455
+ "epoch": 0.0005623,
39456
+ "grad_norm": 2.0587210655212402,
39457
+ "learning_rate": 5.622e-05,
39458
+ "loss": 1.2495,
39459
+ "step": 5623
39460
+ },
39461
+ {
39462
+ "epoch": 0.0005624,
39463
+ "grad_norm": 1.9742335081100464,
39464
+ "learning_rate": 5.623e-05,
39465
+ "loss": 1.165,
39466
+ "step": 5624
39467
+ },
39468
+ {
39469
+ "epoch": 0.0005625,
39470
+ "grad_norm": 2.1360883712768555,
39471
+ "learning_rate": 5.624e-05,
39472
+ "loss": 1.1914,
39473
+ "step": 5625
39474
+ },
39475
+ {
39476
+ "epoch": 0.0005626,
39477
+ "grad_norm": 2.0543458461761475,
39478
+ "learning_rate": 5.625e-05,
39479
+ "loss": 1.1826,
39480
+ "step": 5626
39481
+ },
39482
+ {
39483
+ "epoch": 0.0005627,
39484
+ "grad_norm": 2.346017360687256,
39485
+ "learning_rate": 5.626e-05,
39486
+ "loss": 1.3242,
39487
+ "step": 5627
39488
+ },
39489
+ {
39490
+ "epoch": 0.0005628,
39491
+ "grad_norm": 2.727350950241089,
39492
+ "learning_rate": 5.627000000000001e-05,
39493
+ "loss": 1.417,
39494
+ "step": 5628
39495
+ },
39496
+ {
39497
+ "epoch": 0.0005629,
39498
+ "grad_norm": 2.4296886920928955,
39499
+ "learning_rate": 5.6279999999999996e-05,
39500
+ "loss": 1.188,
39501
+ "step": 5629
39502
+ },
39503
+ {
39504
+ "epoch": 0.000563,
39505
+ "grad_norm": 2.0857579708099365,
39506
+ "learning_rate": 5.629e-05,
39507
+ "loss": 1.1699,
39508
+ "step": 5630
39509
+ },
39510
+ {
39511
+ "epoch": 0.0005631,
39512
+ "grad_norm": 2.0088858604431152,
39513
+ "learning_rate": 5.6300000000000006e-05,
39514
+ "loss": 1.2188,
39515
+ "step": 5631
39516
+ },
39517
+ {
39518
+ "epoch": 0.0005632,
39519
+ "grad_norm": 2.1220197677612305,
39520
+ "learning_rate": 5.6309999999999994e-05,
39521
+ "loss": 1.2217,
39522
+ "step": 5632
39523
+ },
39524
+ {
39525
+ "epoch": 0.0005633,
39526
+ "grad_norm": 2.684288740158081,
39527
+ "learning_rate": 5.632e-05,
39528
+ "loss": 1.3774,
39529
+ "step": 5633
39530
+ },
39531
+ {
39532
+ "epoch": 0.0005634,
39533
+ "grad_norm": 2.1442840099334717,
39534
+ "learning_rate": 5.6330000000000004e-05,
39535
+ "loss": 1.2217,
39536
+ "step": 5634
39537
+ },
39538
+ {
39539
+ "epoch": 0.0005635,
39540
+ "grad_norm": 2.0995047092437744,
39541
+ "learning_rate": 5.6340000000000006e-05,
39542
+ "loss": 1.2295,
39543
+ "step": 5635
39544
+ },
39545
+ {
39546
+ "epoch": 0.0005636,
39547
+ "grad_norm": 2.410370111465454,
39548
+ "learning_rate": 5.635e-05,
39549
+ "loss": 1.2529,
39550
+ "step": 5636
39551
+ },
39552
+ {
39553
+ "epoch": 0.0005637,
39554
+ "grad_norm": 2.588259220123291,
39555
+ "learning_rate": 5.636e-05,
39556
+ "loss": 1.3447,
39557
+ "step": 5637
39558
+ },
39559
+ {
39560
+ "epoch": 0.0005638,
39561
+ "grad_norm": 2.3008322715759277,
39562
+ "learning_rate": 5.6370000000000004e-05,
39563
+ "loss": 1.2861,
39564
+ "step": 5638
39565
+ },
39566
+ {
39567
+ "epoch": 0.0005639,
39568
+ "grad_norm": 2.134218215942383,
39569
+ "learning_rate": 5.638e-05,
39570
+ "loss": 1.1191,
39571
+ "step": 5639
39572
+ },
39573
+ {
39574
+ "epoch": 0.000564,
39575
+ "grad_norm": 2.9954631328582764,
39576
+ "learning_rate": 5.639e-05,
39577
+ "loss": 1.3828,
39578
+ "step": 5640
39579
+ },
39580
+ {
39581
+ "epoch": 0.0005641,
39582
+ "grad_norm": 2.734889030456543,
39583
+ "learning_rate": 5.64e-05,
39584
+ "loss": 1.6357,
39585
+ "step": 5641
39586
+ },
39587
+ {
39588
+ "epoch": 0.0005642,
39589
+ "grad_norm": 2.154879093170166,
39590
+ "learning_rate": 5.641e-05,
39591
+ "loss": 1.2168,
39592
+ "step": 5642
39593
+ },
39594
+ {
39595
+ "epoch": 0.0005643,
39596
+ "grad_norm": 2.6774098873138428,
39597
+ "learning_rate": 5.642e-05,
39598
+ "loss": 1.5977,
39599
+ "step": 5643
39600
+ },
39601
+ {
39602
+ "epoch": 0.0005644,
39603
+ "grad_norm": 2.5120511054992676,
39604
+ "learning_rate": 5.643e-05,
39605
+ "loss": 1.3242,
39606
+ "step": 5644
39607
+ },
39608
+ {
39609
+ "epoch": 0.0005645,
39610
+ "grad_norm": 2.580930233001709,
39611
+ "learning_rate": 5.644000000000001e-05,
39612
+ "loss": 1.4297,
39613
+ "step": 5645
39614
+ },
39615
+ {
39616
+ "epoch": 0.0005646,
39617
+ "grad_norm": 2.058643341064453,
39618
+ "learning_rate": 5.6449999999999997e-05,
39619
+ "loss": 1.1025,
39620
+ "step": 5646
39621
+ },
39622
+ {
39623
+ "epoch": 0.0005647,
39624
+ "grad_norm": 1.924004077911377,
39625
+ "learning_rate": 5.646e-05,
39626
+ "loss": 1.1406,
39627
+ "step": 5647
39628
+ },
39629
+ {
39630
+ "epoch": 0.0005648,
39631
+ "grad_norm": 2.348186492919922,
39632
+ "learning_rate": 5.6470000000000007e-05,
39633
+ "loss": 1.2178,
39634
+ "step": 5648
39635
+ },
39636
+ {
39637
+ "epoch": 0.0005649,
39638
+ "grad_norm": 2.400103807449341,
39639
+ "learning_rate": 5.6479999999999995e-05,
39640
+ "loss": 1.333,
39641
+ "step": 5649
39642
+ },
39643
+ {
39644
+ "epoch": 0.000565,
39645
+ "grad_norm": 2.0180234909057617,
39646
+ "learning_rate": 5.649e-05,
39647
+ "loss": 1.0439,
39648
+ "step": 5650
39649
+ },
39650
+ {
39651
+ "epoch": 0.0005651,
39652
+ "grad_norm": 2.074004650115967,
39653
+ "learning_rate": 5.6500000000000005e-05,
39654
+ "loss": 1.1084,
39655
+ "step": 5651
39656
+ },
39657
+ {
39658
+ "epoch": 0.0005652,
39659
+ "grad_norm": 2.834411144256592,
39660
+ "learning_rate": 5.6510000000000006e-05,
39661
+ "loss": 1.3525,
39662
+ "step": 5652
39663
+ },
39664
+ {
39665
+ "epoch": 0.0005653,
39666
+ "grad_norm": 2.1253347396850586,
39667
+ "learning_rate": 5.652e-05,
39668
+ "loss": 1.1514,
39669
+ "step": 5653
39670
+ },
39671
+ {
39672
+ "epoch": 0.0005654,
39673
+ "grad_norm": 2.0319814682006836,
39674
+ "learning_rate": 5.653e-05,
39675
+ "loss": 1.1621,
39676
+ "step": 5654
39677
+ },
39678
+ {
39679
+ "epoch": 0.0005655,
39680
+ "grad_norm": 1.959982991218567,
39681
+ "learning_rate": 5.6540000000000004e-05,
39682
+ "loss": 1.1929,
39683
+ "step": 5655
39684
+ },
39685
+ {
39686
+ "epoch": 0.0005656,
39687
+ "grad_norm": 2.5499229431152344,
39688
+ "learning_rate": 5.655e-05,
39689
+ "loss": 1.4463,
39690
+ "step": 5656
39691
+ },
39692
+ {
39693
+ "epoch": 0.0005657,
39694
+ "grad_norm": 2.4173882007598877,
39695
+ "learning_rate": 5.656e-05,
39696
+ "loss": 1.2705,
39697
+ "step": 5657
39698
+ },
39699
+ {
39700
+ "epoch": 0.0005658,
39701
+ "grad_norm": 2.9077694416046143,
39702
+ "learning_rate": 5.657e-05,
39703
+ "loss": 1.418,
39704
+ "step": 5658
39705
+ },
39706
+ {
39707
+ "epoch": 0.0005659,
39708
+ "grad_norm": 2.3227171897888184,
39709
+ "learning_rate": 5.658e-05,
39710
+ "loss": 1.3213,
39711
+ "step": 5659
39712
+ },
39713
+ {
39714
+ "epoch": 0.000566,
39715
+ "grad_norm": 2.192533016204834,
39716
+ "learning_rate": 5.659e-05,
39717
+ "loss": 1.1631,
39718
+ "step": 5660
39719
+ },
39720
+ {
39721
+ "epoch": 0.0005661,
39722
+ "grad_norm": 2.154204845428467,
39723
+ "learning_rate": 5.66e-05,
39724
+ "loss": 1.2314,
39725
+ "step": 5661
39726
+ },
39727
+ {
39728
+ "epoch": 0.0005662,
39729
+ "grad_norm": 2.6472861766815186,
39730
+ "learning_rate": 5.661000000000001e-05,
39731
+ "loss": 1.4326,
39732
+ "step": 5662
39733
+ },
39734
+ {
39735
+ "epoch": 0.0005663,
39736
+ "grad_norm": 2.488671064376831,
39737
+ "learning_rate": 5.662e-05,
39738
+ "loss": 1.2031,
39739
+ "step": 5663
39740
+ },
39741
+ {
39742
+ "epoch": 0.0005664,
39743
+ "grad_norm": 2.7378642559051514,
39744
+ "learning_rate": 5.663e-05,
39745
+ "loss": 1.4014,
39746
+ "step": 5664
39747
+ },
39748
+ {
39749
+ "epoch": 0.0005665,
39750
+ "grad_norm": 2.089963436126709,
39751
+ "learning_rate": 5.6640000000000007e-05,
39752
+ "loss": 1.1357,
39753
+ "step": 5665
39754
+ },
39755
+ {
39756
+ "epoch": 0.0005666,
39757
+ "grad_norm": 2.295555353164673,
39758
+ "learning_rate": 5.6649999999999995e-05,
39759
+ "loss": 1.2432,
39760
+ "step": 5666
39761
+ },
39762
+ {
39763
+ "epoch": 0.0005667,
39764
+ "grad_norm": 3.187014102935791,
39765
+ "learning_rate": 5.666e-05,
39766
+ "loss": 1.2603,
39767
+ "step": 5667
39768
+ },
39769
+ {
39770
+ "epoch": 0.0005668,
39771
+ "grad_norm": 2.1170108318328857,
39772
+ "learning_rate": 5.6670000000000005e-05,
39773
+ "loss": 1.125,
39774
+ "step": 5668
39775
+ },
39776
+ {
39777
+ "epoch": 0.0005669,
39778
+ "grad_norm": 2.15708327293396,
39779
+ "learning_rate": 5.6680000000000006e-05,
39780
+ "loss": 1.1699,
39781
+ "step": 5669
39782
+ },
39783
+ {
39784
+ "epoch": 0.000567,
39785
+ "grad_norm": 1.9517823457717896,
39786
+ "learning_rate": 5.669e-05,
39787
+ "loss": 1.1523,
39788
+ "step": 5670
39789
+ },
39790
+ {
39791
+ "epoch": 0.0005671,
39792
+ "grad_norm": 2.6591954231262207,
39793
+ "learning_rate": 5.67e-05,
39794
+ "loss": 1.2305,
39795
+ "step": 5671
39796
+ },
39797
+ {
39798
+ "epoch": 0.0005672,
39799
+ "grad_norm": 1.9791878461837769,
39800
+ "learning_rate": 5.6710000000000004e-05,
39801
+ "loss": 1.0776,
39802
+ "step": 5672
39803
+ },
39804
+ {
39805
+ "epoch": 0.0005673,
39806
+ "grad_norm": 2.0013673305511475,
39807
+ "learning_rate": 5.672e-05,
39808
+ "loss": 1.1182,
39809
+ "step": 5673
39810
+ },
39811
+ {
39812
+ "epoch": 0.0005674,
39813
+ "grad_norm": 2.209170341491699,
39814
+ "learning_rate": 5.673e-05,
39815
+ "loss": 1.2578,
39816
+ "step": 5674
39817
+ },
39818
+ {
39819
+ "epoch": 0.0005675,
39820
+ "grad_norm": 2.586726665496826,
39821
+ "learning_rate": 5.674e-05,
39822
+ "loss": 1.3379,
39823
+ "step": 5675
39824
+ },
39825
+ {
39826
+ "epoch": 0.0005676,
39827
+ "grad_norm": 1.9457072019577026,
39828
+ "learning_rate": 5.675e-05,
39829
+ "loss": 1.0288,
39830
+ "step": 5676
39831
+ },
39832
+ {
39833
+ "epoch": 0.0005677,
39834
+ "grad_norm": 2.2997748851776123,
39835
+ "learning_rate": 5.676e-05,
39836
+ "loss": 1.2896,
39837
+ "step": 5677
39838
+ },
39839
+ {
39840
+ "epoch": 0.0005678,
39841
+ "grad_norm": 1.9310775995254517,
39842
+ "learning_rate": 5.677e-05,
39843
+ "loss": 1.2266,
39844
+ "step": 5678
39845
+ },
39846
+ {
39847
+ "epoch": 0.0005679,
39848
+ "grad_norm": 1.9865697622299194,
39849
+ "learning_rate": 5.678000000000001e-05,
39850
+ "loss": 1.1147,
39851
+ "step": 5679
39852
+ },
39853
+ {
39854
+ "epoch": 0.000568,
39855
+ "grad_norm": 1.958775281906128,
39856
+ "learning_rate": 5.679e-05,
39857
+ "loss": 1.0835,
39858
+ "step": 5680
39859
+ },
39860
+ {
39861
+ "epoch": 0.0005681,
39862
+ "grad_norm": 1.8496625423431396,
39863
+ "learning_rate": 5.68e-05,
39864
+ "loss": 1.0537,
39865
+ "step": 5681
39866
+ },
39867
+ {
39868
+ "epoch": 0.0005682,
39869
+ "grad_norm": 1.922885537147522,
39870
+ "learning_rate": 5.681000000000001e-05,
39871
+ "loss": 1.1465,
39872
+ "step": 5682
39873
+ },
39874
+ {
39875
+ "epoch": 0.0005683,
39876
+ "grad_norm": 1.9769484996795654,
39877
+ "learning_rate": 5.6819999999999995e-05,
39878
+ "loss": 1.0781,
39879
+ "step": 5683
39880
+ },
39881
+ {
39882
+ "epoch": 0.0005684,
39883
+ "grad_norm": 1.9770240783691406,
39884
+ "learning_rate": 5.683e-05,
39885
+ "loss": 1.1118,
39886
+ "step": 5684
39887
+ },
39888
+ {
39889
+ "epoch": 0.0005685,
39890
+ "grad_norm": 1.9034974575042725,
39891
+ "learning_rate": 5.6840000000000005e-05,
39892
+ "loss": 1.0576,
39893
+ "step": 5685
39894
+ },
39895
+ {
39896
+ "epoch": 0.0005686,
39897
+ "grad_norm": 2.0151827335357666,
39898
+ "learning_rate": 5.6850000000000006e-05,
39899
+ "loss": 1.0386,
39900
+ "step": 5686
39901
+ },
39902
+ {
39903
+ "epoch": 0.0005687,
39904
+ "grad_norm": 3.5754876136779785,
39905
+ "learning_rate": 5.686e-05,
39906
+ "loss": 1.6006,
39907
+ "step": 5687
39908
+ },
39909
+ {
39910
+ "epoch": 0.0005688,
39911
+ "grad_norm": 2.2252042293548584,
39912
+ "learning_rate": 5.687e-05,
39913
+ "loss": 1.1045,
39914
+ "step": 5688
39915
+ },
39916
+ {
39917
+ "epoch": 0.0005689,
39918
+ "grad_norm": 2.010793447494507,
39919
+ "learning_rate": 5.6880000000000004e-05,
39920
+ "loss": 1.0869,
39921
+ "step": 5689
39922
+ },
39923
+ {
39924
+ "epoch": 0.000569,
39925
+ "grad_norm": 1.8892912864685059,
39926
+ "learning_rate": 5.689e-05,
39927
+ "loss": 1.1279,
39928
+ "step": 5690
39929
+ },
39930
+ {
39931
+ "epoch": 0.0005691,
39932
+ "grad_norm": 1.9598063230514526,
39933
+ "learning_rate": 5.69e-05,
39934
+ "loss": 1.0938,
39935
+ "step": 5691
39936
+ },
39937
+ {
39938
+ "epoch": 0.0005692,
39939
+ "grad_norm": 1.9675894975662231,
39940
+ "learning_rate": 5.691e-05,
39941
+ "loss": 1.0811,
39942
+ "step": 5692
39943
+ },
39944
+ {
39945
+ "epoch": 0.0005693,
39946
+ "grad_norm": 1.9908654689788818,
39947
+ "learning_rate": 5.692e-05,
39948
+ "loss": 1.165,
39949
+ "step": 5693
39950
+ },
39951
+ {
39952
+ "epoch": 0.0005694,
39953
+ "grad_norm": 1.8950060606002808,
39954
+ "learning_rate": 5.693e-05,
39955
+ "loss": 1.1211,
39956
+ "step": 5694
39957
+ },
39958
+ {
39959
+ "epoch": 0.0005695,
39960
+ "grad_norm": 2.0623393058776855,
39961
+ "learning_rate": 5.694e-05,
39962
+ "loss": 1.124,
39963
+ "step": 5695
39964
+ },
39965
+ {
39966
+ "epoch": 0.0005696,
39967
+ "grad_norm": 2.491461753845215,
39968
+ "learning_rate": 5.695e-05,
39969
+ "loss": 1.3096,
39970
+ "step": 5696
39971
+ },
39972
+ {
39973
+ "epoch": 0.0005697,
39974
+ "grad_norm": 2.5069475173950195,
39975
+ "learning_rate": 5.696e-05,
39976
+ "loss": 1.3896,
39977
+ "step": 5697
39978
+ },
39979
+ {
39980
+ "epoch": 0.0005698,
39981
+ "grad_norm": 1.8650083541870117,
39982
+ "learning_rate": 5.697e-05,
39983
+ "loss": 1.0679,
39984
+ "step": 5698
39985
+ },
39986
+ {
39987
+ "epoch": 0.0005699,
39988
+ "grad_norm": 1.7204188108444214,
39989
+ "learning_rate": 5.698000000000001e-05,
39990
+ "loss": 1.0107,
39991
+ "step": 5699
39992
+ },
39993
+ {
39994
+ "epoch": 0.00057,
39995
+ "grad_norm": 2.277846336364746,
39996
+ "learning_rate": 5.6989999999999995e-05,
39997
+ "loss": 1.2178,
39998
+ "step": 5700
39999
+ },
40000
+ {
40001
+ "epoch": 0.0005701,
40002
+ "grad_norm": 2.155337333679199,
40003
+ "learning_rate": 5.7e-05,
40004
+ "loss": 1.3115,
40005
+ "step": 5701
40006
+ },
40007
+ {
40008
+ "epoch": 0.0005702,
40009
+ "grad_norm": 2.2528960704803467,
40010
+ "learning_rate": 5.7010000000000005e-05,
40011
+ "loss": 1.2407,
40012
+ "step": 5702
40013
+ },
40014
+ {
40015
+ "epoch": 0.0005703,
40016
+ "grad_norm": 1.8725662231445312,
40017
+ "learning_rate": 5.7020000000000006e-05,
40018
+ "loss": 1.1646,
40019
+ "step": 5703
40020
+ },
40021
+ {
40022
+ "epoch": 0.0005704,
40023
+ "grad_norm": 2.5592408180236816,
40024
+ "learning_rate": 5.703e-05,
40025
+ "loss": 1.3496,
40026
+ "step": 5704
40027
+ },
40028
+ {
40029
+ "epoch": 0.0005705,
40030
+ "grad_norm": 2.1637556552886963,
40031
+ "learning_rate": 5.704e-05,
40032
+ "loss": 1.249,
40033
+ "step": 5705
40034
+ },
40035
+ {
40036
+ "epoch": 0.0005706,
40037
+ "grad_norm": 2.220670461654663,
40038
+ "learning_rate": 5.7050000000000004e-05,
40039
+ "loss": 1.1992,
40040
+ "step": 5706
40041
+ },
40042
+ {
40043
+ "epoch": 0.0005707,
40044
+ "grad_norm": 1.9339892864227295,
40045
+ "learning_rate": 5.706e-05,
40046
+ "loss": 1.1338,
40047
+ "step": 5707
40048
+ },
40049
+ {
40050
+ "epoch": 0.0005708,
40051
+ "grad_norm": 2.1802027225494385,
40052
+ "learning_rate": 5.707e-05,
40053
+ "loss": 1.2393,
40054
+ "step": 5708
40055
+ },
40056
+ {
40057
+ "epoch": 0.0005709,
40058
+ "grad_norm": 1.7978614568710327,
40059
+ "learning_rate": 5.708e-05,
40060
+ "loss": 1.0391,
40061
+ "step": 5709
40062
+ },
40063
+ {
40064
+ "epoch": 0.000571,
40065
+ "grad_norm": 3.548150062561035,
40066
+ "learning_rate": 5.709e-05,
40067
+ "loss": 1.1035,
40068
+ "step": 5710
40069
+ },
40070
+ {
40071
+ "epoch": 0.0005711,
40072
+ "grad_norm": 2.054603099822998,
40073
+ "learning_rate": 5.71e-05,
40074
+ "loss": 1.0298,
40075
+ "step": 5711
40076
+ },
40077
+ {
40078
+ "epoch": 0.0005712,
40079
+ "grad_norm": 2.206544876098633,
40080
+ "learning_rate": 5.711e-05,
40081
+ "loss": 1.2344,
40082
+ "step": 5712
40083
+ },
40084
+ {
40085
+ "epoch": 0.0005713,
40086
+ "grad_norm": 4.416079521179199,
40087
+ "learning_rate": 5.712e-05,
40088
+ "loss": 1.4077,
40089
+ "step": 5713
40090
+ },
40091
+ {
40092
+ "epoch": 0.0005714,
40093
+ "grad_norm": 2.3344037532806396,
40094
+ "learning_rate": 5.713e-05,
40095
+ "loss": 1.0947,
40096
+ "step": 5714
40097
+ },
40098
+ {
40099
+ "epoch": 0.0005715,
40100
+ "grad_norm": 2.2653141021728516,
40101
+ "learning_rate": 5.714e-05,
40102
+ "loss": 1.0908,
40103
+ "step": 5715
40104
+ },
40105
+ {
40106
+ "epoch": 0.0005716,
40107
+ "grad_norm": 2.118607997894287,
40108
+ "learning_rate": 5.715000000000001e-05,
40109
+ "loss": 1.2393,
40110
+ "step": 5716
40111
+ },
40112
+ {
40113
+ "epoch": 0.0005717,
40114
+ "grad_norm": 2.184203863143921,
40115
+ "learning_rate": 5.7159999999999995e-05,
40116
+ "loss": 1.147,
40117
+ "step": 5717
40118
+ },
40119
+ {
40120
+ "epoch": 0.0005718,
40121
+ "grad_norm": 2.034911870956421,
40122
+ "learning_rate": 5.7169999999999996e-05,
40123
+ "loss": 1.1333,
40124
+ "step": 5718
40125
+ },
40126
+ {
40127
+ "epoch": 0.0005719,
40128
+ "grad_norm": 1.8440461158752441,
40129
+ "learning_rate": 5.7180000000000005e-05,
40130
+ "loss": 0.9668,
40131
+ "step": 5719
40132
+ },
40133
+ {
40134
+ "epoch": 0.000572,
40135
+ "grad_norm": 2.0628461837768555,
40136
+ "learning_rate": 5.7190000000000006e-05,
40137
+ "loss": 1.293,
40138
+ "step": 5720
40139
+ },
40140
+ {
40141
+ "epoch": 0.0005721,
40142
+ "grad_norm": 2.522508382797241,
40143
+ "learning_rate": 5.72e-05,
40144
+ "loss": 1.2866,
40145
+ "step": 5721
40146
+ },
40147
+ {
40148
+ "epoch": 0.0005722,
40149
+ "grad_norm": 2.229628324508667,
40150
+ "learning_rate": 5.721e-05,
40151
+ "loss": 1.1309,
40152
+ "step": 5722
40153
+ },
40154
+ {
40155
+ "epoch": 0.0005723,
40156
+ "grad_norm": 5.912186622619629,
40157
+ "learning_rate": 5.7220000000000004e-05,
40158
+ "loss": 1.8047,
40159
+ "step": 5723
40160
+ },
40161
+ {
40162
+ "epoch": 0.0005724,
40163
+ "grad_norm": 3.1335630416870117,
40164
+ "learning_rate": 5.723e-05,
40165
+ "loss": 1.1792,
40166
+ "step": 5724
40167
+ },
40168
+ {
40169
+ "epoch": 0.0005725,
40170
+ "grad_norm": 2.22836971282959,
40171
+ "learning_rate": 5.724e-05,
40172
+ "loss": 1.0972,
40173
+ "step": 5725
40174
+ },
40175
+ {
40176
+ "epoch": 0.0005726,
40177
+ "grad_norm": 2.203935146331787,
40178
+ "learning_rate": 5.725e-05,
40179
+ "loss": 1.186,
40180
+ "step": 5726
40181
+ },
40182
+ {
40183
+ "epoch": 0.0005727,
40184
+ "grad_norm": 1.68068528175354,
40185
+ "learning_rate": 5.726e-05,
40186
+ "loss": 1.0049,
40187
+ "step": 5727
40188
+ },
40189
+ {
40190
+ "epoch": 0.0005728,
40191
+ "grad_norm": 2.190138339996338,
40192
+ "learning_rate": 5.727e-05,
40193
+ "loss": 1.1567,
40194
+ "step": 5728
40195
+ },
40196
+ {
40197
+ "epoch": 0.0005729,
40198
+ "grad_norm": 1.9706114530563354,
40199
+ "learning_rate": 5.728e-05,
40200
+ "loss": 1.0264,
40201
+ "step": 5729
40202
+ },
40203
+ {
40204
+ "epoch": 0.000573,
40205
+ "grad_norm": 1.7482571601867676,
40206
+ "learning_rate": 5.729e-05,
40207
+ "loss": 1.0039,
40208
+ "step": 5730
40209
+ },
40210
+ {
40211
+ "epoch": 0.0005731,
40212
+ "grad_norm": 2.326173782348633,
40213
+ "learning_rate": 5.73e-05,
40214
+ "loss": 1.0903,
40215
+ "step": 5731
40216
+ },
40217
+ {
40218
+ "epoch": 0.0005732,
40219
+ "grad_norm": 1.7929534912109375,
40220
+ "learning_rate": 5.731e-05,
40221
+ "loss": 0.9854,
40222
+ "step": 5732
40223
+ },
40224
+ {
40225
+ "epoch": 0.0005733,
40226
+ "grad_norm": 1.764266848564148,
40227
+ "learning_rate": 5.732000000000001e-05,
40228
+ "loss": 0.9575,
40229
+ "step": 5733
40230
+ },
40231
+ {
40232
+ "epoch": 0.0005734,
40233
+ "grad_norm": 2.5840072631835938,
40234
+ "learning_rate": 5.7329999999999995e-05,
40235
+ "loss": 1.3408,
40236
+ "step": 5734
40237
+ },
40238
+ {
40239
+ "epoch": 0.0005735,
40240
+ "grad_norm": 2.9217374324798584,
40241
+ "learning_rate": 5.7339999999999996e-05,
40242
+ "loss": 1.3711,
40243
+ "step": 5735
40244
+ },
40245
+ {
40246
+ "epoch": 0.0005736,
40247
+ "grad_norm": 3.844555616378784,
40248
+ "learning_rate": 5.7350000000000005e-05,
40249
+ "loss": 2.0879,
40250
+ "step": 5736
40251
+ },
40252
+ {
40253
+ "epoch": 0.0005737,
40254
+ "grad_norm": 2.118861198425293,
40255
+ "learning_rate": 5.7360000000000006e-05,
40256
+ "loss": 1.0342,
40257
+ "step": 5737
40258
+ },
40259
+ {
40260
+ "epoch": 0.0005738,
40261
+ "grad_norm": 3.1677181720733643,
40262
+ "learning_rate": 5.737e-05,
40263
+ "loss": 1.3975,
40264
+ "step": 5738
40265
+ },
40266
+ {
40267
+ "epoch": 0.0005739,
40268
+ "grad_norm": 2.005305051803589,
40269
+ "learning_rate": 5.738e-05,
40270
+ "loss": 1.063,
40271
+ "step": 5739
40272
+ },
40273
+ {
40274
+ "epoch": 0.000574,
40275
+ "grad_norm": 2.0784804821014404,
40276
+ "learning_rate": 5.7390000000000004e-05,
40277
+ "loss": 1.0684,
40278
+ "step": 5740
40279
+ },
40280
+ {
40281
+ "epoch": 0.0005741,
40282
+ "grad_norm": 3.071539878845215,
40283
+ "learning_rate": 5.74e-05,
40284
+ "loss": 1.2676,
40285
+ "step": 5741
40286
+ },
40287
+ {
40288
+ "epoch": 0.0005742,
40289
+ "grad_norm": 2.1717145442962646,
40290
+ "learning_rate": 5.741e-05,
40291
+ "loss": 1.0571,
40292
+ "step": 5742
40293
+ },
40294
+ {
40295
+ "epoch": 0.0005743,
40296
+ "grad_norm": 2.0021402835845947,
40297
+ "learning_rate": 5.742e-05,
40298
+ "loss": 1.0229,
40299
+ "step": 5743
40300
+ },
40301
+ {
40302
+ "epoch": 0.0005744,
40303
+ "grad_norm": 1.7721225023269653,
40304
+ "learning_rate": 5.743e-05,
40305
+ "loss": 0.9575,
40306
+ "step": 5744
40307
+ },
40308
+ {
40309
+ "epoch": 0.0005745,
40310
+ "grad_norm": 2.241121530532837,
40311
+ "learning_rate": 5.744e-05,
40312
+ "loss": 1.2036,
40313
+ "step": 5745
40314
+ },
40315
+ {
40316
+ "epoch": 0.0005746,
40317
+ "grad_norm": 2.1638896465301514,
40318
+ "learning_rate": 5.745e-05,
40319
+ "loss": 1.064,
40320
+ "step": 5746
40321
+ },
40322
+ {
40323
+ "epoch": 0.0005747,
40324
+ "grad_norm": 2.39729642868042,
40325
+ "learning_rate": 5.746e-05,
40326
+ "loss": 1.2822,
40327
+ "step": 5747
40328
+ },
40329
+ {
40330
+ "epoch": 0.0005748,
40331
+ "grad_norm": 2.052424907684326,
40332
+ "learning_rate": 5.747e-05,
40333
+ "loss": 1.0215,
40334
+ "step": 5748
40335
+ },
40336
+ {
40337
+ "epoch": 0.0005749,
40338
+ "grad_norm": 1.981493592262268,
40339
+ "learning_rate": 5.748e-05,
40340
+ "loss": 1.0049,
40341
+ "step": 5749
40342
+ },
40343
+ {
40344
+ "epoch": 0.000575,
40345
+ "grad_norm": 1.9867534637451172,
40346
+ "learning_rate": 5.749000000000001e-05,
40347
+ "loss": 1.0015,
40348
+ "step": 5750
40349
+ },
40350
+ {
40351
+ "epoch": 0.0005751,
40352
+ "grad_norm": 3.0150108337402344,
40353
+ "learning_rate": 5.7499999999999995e-05,
40354
+ "loss": 1.2261,
40355
+ "step": 5751
40356
+ },
40357
+ {
40358
+ "epoch": 0.0005752,
40359
+ "grad_norm": 1.9989426136016846,
40360
+ "learning_rate": 5.7509999999999997e-05,
40361
+ "loss": 0.9468,
40362
+ "step": 5752
40363
+ },
40364
+ {
40365
+ "epoch": 0.0005753,
40366
+ "grad_norm": 1.9769320487976074,
40367
+ "learning_rate": 5.7520000000000005e-05,
40368
+ "loss": 0.9893,
40369
+ "step": 5753
40370
+ },
40371
+ {
40372
+ "epoch": 0.0005754,
40373
+ "grad_norm": 1.8832272291183472,
40374
+ "learning_rate": 5.7530000000000007e-05,
40375
+ "loss": 0.9966,
40376
+ "step": 5754
40377
+ },
40378
+ {
40379
+ "epoch": 0.0005755,
40380
+ "grad_norm": 1.9896249771118164,
40381
+ "learning_rate": 5.754e-05,
40382
+ "loss": 1.0957,
40383
+ "step": 5755
40384
+ },
40385
+ {
40386
+ "epoch": 0.0005756,
40387
+ "grad_norm": 2.0165281295776367,
40388
+ "learning_rate": 5.755e-05,
40389
+ "loss": 1.061,
40390
+ "step": 5756
40391
+ },
40392
+ {
40393
+ "epoch": 0.0005757,
40394
+ "grad_norm": 4.255472660064697,
40395
+ "learning_rate": 5.7560000000000005e-05,
40396
+ "loss": 1.168,
40397
+ "step": 5757
40398
+ },
40399
+ {
40400
+ "epoch": 0.0005758,
40401
+ "grad_norm": 1.8657300472259521,
40402
+ "learning_rate": 5.757e-05,
40403
+ "loss": 0.9268,
40404
+ "step": 5758
40405
+ },
40406
+ {
40407
+ "epoch": 0.0005759,
40408
+ "grad_norm": 1.934298038482666,
40409
+ "learning_rate": 5.758e-05,
40410
+ "loss": 1.0547,
40411
+ "step": 5759
40412
+ },
40413
+ {
40414
+ "epoch": 0.000576,
40415
+ "grad_norm": 1.8616300821304321,
40416
+ "learning_rate": 5.759e-05,
40417
+ "loss": 0.9517,
40418
+ "step": 5760
40419
+ },
40420
+ {
40421
+ "epoch": 0.0005761,
40422
+ "grad_norm": 1.7131308317184448,
40423
+ "learning_rate": 5.76e-05,
40424
+ "loss": 0.9521,
40425
+ "step": 5761
40426
+ },
40427
+ {
40428
+ "epoch": 0.0005762,
40429
+ "grad_norm": 1.90117609500885,
40430
+ "learning_rate": 5.761e-05,
40431
+ "loss": 1.0596,
40432
+ "step": 5762
40433
+ },
40434
+ {
40435
+ "epoch": 0.0005763,
40436
+ "grad_norm": 2.66219425201416,
40437
+ "learning_rate": 5.762e-05,
40438
+ "loss": 1.1758,
40439
+ "step": 5763
40440
+ },
40441
+ {
40442
+ "epoch": 0.0005764,
40443
+ "grad_norm": 2.0648040771484375,
40444
+ "learning_rate": 5.763e-05,
40445
+ "loss": 1.2681,
40446
+ "step": 5764
40447
+ },
40448
+ {
40449
+ "epoch": 0.0005765,
40450
+ "grad_norm": 2.287407636642456,
40451
+ "learning_rate": 5.764e-05,
40452
+ "loss": 1.0923,
40453
+ "step": 5765
40454
+ },
40455
+ {
40456
+ "epoch": 0.0005766,
40457
+ "grad_norm": 2.0532753467559814,
40458
+ "learning_rate": 5.765e-05,
40459
+ "loss": 1.0767,
40460
+ "step": 5766
40461
+ },
40462
+ {
40463
+ "epoch": 0.0005767,
40464
+ "grad_norm": 1.9127421379089355,
40465
+ "learning_rate": 5.766000000000001e-05,
40466
+ "loss": 0.9902,
40467
+ "step": 5767
40468
+ },
40469
+ {
40470
+ "epoch": 0.0005768,
40471
+ "grad_norm": 1.6706316471099854,
40472
+ "learning_rate": 5.7669999999999995e-05,
40473
+ "loss": 0.915,
40474
+ "step": 5768
40475
+ },
40476
+ {
40477
+ "epoch": 0.0005769,
40478
+ "grad_norm": 2.3771395683288574,
40479
+ "learning_rate": 5.768e-05,
40480
+ "loss": 1.0625,
40481
+ "step": 5769
40482
+ },
40483
+ {
40484
+ "epoch": 0.000577,
40485
+ "grad_norm": 1.9100340604782104,
40486
+ "learning_rate": 5.7690000000000005e-05,
40487
+ "loss": 0.9658,
40488
+ "step": 5770
40489
+ },
40490
+ {
40491
+ "epoch": 0.0005771,
40492
+ "grad_norm": 1.868527889251709,
40493
+ "learning_rate": 5.7700000000000007e-05,
40494
+ "loss": 1.0156,
40495
+ "step": 5771
40496
+ },
40497
+ {
40498
+ "epoch": 0.0005772,
40499
+ "grad_norm": 1.7585581541061401,
40500
+ "learning_rate": 5.771e-05,
40501
+ "loss": 0.9268,
40502
+ "step": 5772
40503
+ },
40504
+ {
40505
+ "epoch": 0.0005773,
40506
+ "grad_norm": 1.8959033489227295,
40507
+ "learning_rate": 5.772e-05,
40508
+ "loss": 0.9536,
40509
+ "step": 5773
40510
+ },
40511
+ {
40512
+ "epoch": 0.0005774,
40513
+ "grad_norm": 1.867139458656311,
40514
+ "learning_rate": 5.7730000000000005e-05,
40515
+ "loss": 1.0,
40516
+ "step": 5774
40517
+ },
40518
+ {
40519
+ "epoch": 0.0005775,
40520
+ "grad_norm": 1.6358270645141602,
40521
+ "learning_rate": 5.774e-05,
40522
+ "loss": 0.9214,
40523
+ "step": 5775
40524
+ },
40525
+ {
40526
+ "epoch": 0.0005776,
40527
+ "grad_norm": 2.010411024093628,
40528
+ "learning_rate": 5.775e-05,
40529
+ "loss": 0.9609,
40530
+ "step": 5776
40531
+ },
40532
+ {
40533
+ "epoch": 0.0005777,
40534
+ "grad_norm": 1.5704444646835327,
40535
+ "learning_rate": 5.776e-05,
40536
+ "loss": 0.8735,
40537
+ "step": 5777
40538
+ },
40539
+ {
40540
+ "epoch": 0.0005778,
40541
+ "grad_norm": 1.827335000038147,
40542
+ "learning_rate": 5.777e-05,
40543
+ "loss": 0.9409,
40544
+ "step": 5778
40545
+ },
40546
+ {
40547
+ "epoch": 0.0005779,
40548
+ "grad_norm": 1.7069824934005737,
40549
+ "learning_rate": 5.778e-05,
40550
+ "loss": 0.8984,
40551
+ "step": 5779
40552
+ },
40553
+ {
40554
+ "epoch": 0.000578,
40555
+ "grad_norm": 1.6686686277389526,
40556
+ "learning_rate": 5.779e-05,
40557
+ "loss": 0.8862,
40558
+ "step": 5780
40559
+ },
40560
+ {
40561
+ "epoch": 0.0005781,
40562
+ "grad_norm": 2.480184316635132,
40563
+ "learning_rate": 5.78e-05,
40564
+ "loss": 1.0791,
40565
+ "step": 5781
40566
+ },
40567
+ {
40568
+ "epoch": 0.0005782,
40569
+ "grad_norm": 1.9088640213012695,
40570
+ "learning_rate": 5.781e-05,
40571
+ "loss": 1.0312,
40572
+ "step": 5782
40573
+ },
40574
+ {
40575
+ "epoch": 0.0005783,
40576
+ "grad_norm": 1.7736129760742188,
40577
+ "learning_rate": 5.782e-05,
40578
+ "loss": 0.9204,
40579
+ "step": 5783
40580
+ },
40581
+ {
40582
+ "epoch": 0.0005784,
40583
+ "grad_norm": 1.7554395198822021,
40584
+ "learning_rate": 5.783000000000001e-05,
40585
+ "loss": 0.9634,
40586
+ "step": 5784
40587
+ },
40588
+ {
40589
+ "epoch": 0.0005785,
40590
+ "grad_norm": 2.0152337551116943,
40591
+ "learning_rate": 5.7839999999999995e-05,
40592
+ "loss": 0.874,
40593
+ "step": 5785
40594
+ },
40595
+ {
40596
+ "epoch": 0.0005786,
40597
+ "grad_norm": 1.6878819465637207,
40598
+ "learning_rate": 5.785e-05,
40599
+ "loss": 0.9248,
40600
+ "step": 5786
40601
+ },
40602
+ {
40603
+ "epoch": 0.0005787,
40604
+ "grad_norm": 1.9107030630111694,
40605
+ "learning_rate": 5.7860000000000005e-05,
40606
+ "loss": 0.9609,
40607
+ "step": 5787
40608
+ },
40609
+ {
40610
+ "epoch": 0.0005788,
40611
+ "grad_norm": 1.958961009979248,
40612
+ "learning_rate": 5.787000000000001e-05,
40613
+ "loss": 1.0776,
40614
+ "step": 5788
40615
+ },
40616
+ {
40617
+ "epoch": 0.0005789,
40618
+ "grad_norm": 1.6951417922973633,
40619
+ "learning_rate": 5.788e-05,
40620
+ "loss": 0.8779,
40621
+ "step": 5789
40622
+ },
40623
+ {
40624
+ "epoch": 0.000579,
40625
+ "grad_norm": 2.0156352519989014,
40626
+ "learning_rate": 5.789e-05,
40627
+ "loss": 1.0054,
40628
+ "step": 5790
40629
+ },
40630
+ {
40631
+ "epoch": 0.0005791,
40632
+ "grad_norm": 1.7576463222503662,
40633
+ "learning_rate": 5.7900000000000005e-05,
40634
+ "loss": 0.855,
40635
+ "step": 5791
40636
+ },
40637
+ {
40638
+ "epoch": 0.0005792,
40639
+ "grad_norm": 1.4828929901123047,
40640
+ "learning_rate": 5.791e-05,
40641
+ "loss": 0.8442,
40642
+ "step": 5792
40643
+ },
40644
+ {
40645
+ "epoch": 0.0005793,
40646
+ "grad_norm": 1.69581937789917,
40647
+ "learning_rate": 5.792e-05,
40648
+ "loss": 0.9282,
40649
+ "step": 5793
40650
+ },
40651
+ {
40652
+ "epoch": 0.0005794,
40653
+ "grad_norm": 1.922916054725647,
40654
+ "learning_rate": 5.793e-05,
40655
+ "loss": 0.9702,
40656
+ "step": 5794
40657
+ },
40658
+ {
40659
+ "epoch": 0.0005795,
40660
+ "grad_norm": 2.0185506343841553,
40661
+ "learning_rate": 5.794e-05,
40662
+ "loss": 1.0571,
40663
+ "step": 5795
40664
+ },
40665
+ {
40666
+ "epoch": 0.0005796,
40667
+ "grad_norm": 1.7874513864517212,
40668
+ "learning_rate": 5.795e-05,
40669
+ "loss": 0.9526,
40670
+ "step": 5796
40671
+ },
40672
+ {
40673
+ "epoch": 0.0005797,
40674
+ "grad_norm": 1.6726452112197876,
40675
+ "learning_rate": 5.796e-05,
40676
+ "loss": 0.8789,
40677
+ "step": 5797
40678
+ },
40679
+ {
40680
+ "epoch": 0.0005798,
40681
+ "grad_norm": 1.81720769405365,
40682
+ "learning_rate": 5.797e-05,
40683
+ "loss": 0.9414,
40684
+ "step": 5798
40685
+ },
40686
+ {
40687
+ "epoch": 0.0005799,
40688
+ "grad_norm": 1.6245219707489014,
40689
+ "learning_rate": 5.798e-05,
40690
+ "loss": 0.8921,
40691
+ "step": 5799
40692
+ },
40693
+ {
40694
+ "epoch": 0.00058,
40695
+ "grad_norm": 1.8347564935684204,
40696
+ "learning_rate": 5.799e-05,
40697
+ "loss": 0.8745,
40698
+ "step": 5800
40699
+ },
40700
+ {
40701
+ "epoch": 0.0005801,
40702
+ "grad_norm": 1.843480110168457,
40703
+ "learning_rate": 5.800000000000001e-05,
40704
+ "loss": 0.9004,
40705
+ "step": 5801
40706
+ },
40707
+ {
40708
+ "epoch": 0.0005802,
40709
+ "grad_norm": 1.781453013420105,
40710
+ "learning_rate": 5.8009999999999995e-05,
40711
+ "loss": 0.9199,
40712
+ "step": 5802
40713
+ },
40714
+ {
40715
+ "epoch": 0.0005803,
40716
+ "grad_norm": 1.734071135520935,
40717
+ "learning_rate": 5.802e-05,
40718
+ "loss": 0.9146,
40719
+ "step": 5803
40720
+ },
40721
+ {
40722
+ "epoch": 0.0005804,
40723
+ "grad_norm": 1.772889256477356,
40724
+ "learning_rate": 5.8030000000000005e-05,
40725
+ "loss": 0.8794,
40726
+ "step": 5804
40727
+ },
40728
+ {
40729
+ "epoch": 0.0005805,
40730
+ "grad_norm": 2.4896934032440186,
40731
+ "learning_rate": 5.804000000000001e-05,
40732
+ "loss": 0.9492,
40733
+ "step": 5805
40734
+ },
40735
+ {
40736
+ "epoch": 0.0005806,
40737
+ "grad_norm": 1.6063295602798462,
40738
+ "learning_rate": 5.805e-05,
40739
+ "loss": 0.9087,
40740
+ "step": 5806
40741
+ },
40742
+ {
40743
+ "epoch": 0.0005807,
40744
+ "grad_norm": 1.9447569847106934,
40745
+ "learning_rate": 5.806e-05,
40746
+ "loss": 0.9951,
40747
+ "step": 5807
40748
+ },
40749
+ {
40750
+ "epoch": 0.0005808,
40751
+ "grad_norm": 1.6404564380645752,
40752
+ "learning_rate": 5.8070000000000005e-05,
40753
+ "loss": 0.8506,
40754
+ "step": 5808
40755
+ },
40756
+ {
40757
+ "epoch": 0.0005809,
40758
+ "grad_norm": 1.6724512577056885,
40759
+ "learning_rate": 5.808e-05,
40760
+ "loss": 0.8447,
40761
+ "step": 5809
40762
+ },
40763
+ {
40764
+ "epoch": 0.000581,
40765
+ "grad_norm": 1.8658446073532104,
40766
+ "learning_rate": 5.809e-05,
40767
+ "loss": 0.8677,
40768
+ "step": 5810
40769
+ },
40770
+ {
40771
+ "epoch": 0.0005811,
40772
+ "grad_norm": 1.774506688117981,
40773
+ "learning_rate": 5.81e-05,
40774
+ "loss": 0.8779,
40775
+ "step": 5811
40776
+ },
40777
+ {
40778
+ "epoch": 0.0005812,
40779
+ "grad_norm": 1.6786613464355469,
40780
+ "learning_rate": 5.811e-05,
40781
+ "loss": 0.8633,
40782
+ "step": 5812
40783
+ },
40784
+ {
40785
+ "epoch": 0.0005813,
40786
+ "grad_norm": 1.9051158428192139,
40787
+ "learning_rate": 5.812e-05,
40788
+ "loss": 0.9424,
40789
+ "step": 5813
40790
+ },
40791
+ {
40792
+ "epoch": 0.0005814,
40793
+ "grad_norm": 1.724073052406311,
40794
+ "learning_rate": 5.813e-05,
40795
+ "loss": 0.9126,
40796
+ "step": 5814
40797
+ },
40798
+ {
40799
+ "epoch": 0.0005815,
40800
+ "grad_norm": 1.6745671033859253,
40801
+ "learning_rate": 5.814e-05,
40802
+ "loss": 0.8604,
40803
+ "step": 5815
40804
+ },
40805
+ {
40806
+ "epoch": 0.0005816,
40807
+ "grad_norm": 1.6149710416793823,
40808
+ "learning_rate": 5.815e-05,
40809
+ "loss": 0.8623,
40810
+ "step": 5816
40811
+ },
40812
+ {
40813
+ "epoch": 0.0005817,
40814
+ "grad_norm": 1.640714168548584,
40815
+ "learning_rate": 5.816e-05,
40816
+ "loss": 0.8633,
40817
+ "step": 5817
40818
+ },
40819
+ {
40820
+ "epoch": 0.0005818,
40821
+ "grad_norm": 1.8054769039154053,
40822
+ "learning_rate": 5.817000000000001e-05,
40823
+ "loss": 0.8481,
40824
+ "step": 5818
40825
+ },
40826
+ {
40827
+ "epoch": 0.0005819,
40828
+ "grad_norm": 1.6028062105178833,
40829
+ "learning_rate": 5.8179999999999995e-05,
40830
+ "loss": 0.8555,
40831
+ "step": 5819
40832
+ },
40833
+ {
40834
+ "epoch": 0.000582,
40835
+ "grad_norm": 1.8541170358657837,
40836
+ "learning_rate": 5.819e-05,
40837
+ "loss": 0.9297,
40838
+ "step": 5820
40839
+ },
40840
+ {
40841
+ "epoch": 0.0005821,
40842
+ "grad_norm": 1.7812132835388184,
40843
+ "learning_rate": 5.8200000000000005e-05,
40844
+ "loss": 0.916,
40845
+ "step": 5821
40846
+ },
40847
+ {
40848
+ "epoch": 0.0005822,
40849
+ "grad_norm": 1.6715450286865234,
40850
+ "learning_rate": 5.821000000000001e-05,
40851
+ "loss": 0.876,
40852
+ "step": 5822
40853
+ },
40854
+ {
40855
+ "epoch": 0.0005823,
40856
+ "grad_norm": 1.883423089981079,
40857
+ "learning_rate": 5.822e-05,
40858
+ "loss": 1.061,
40859
+ "step": 5823
40860
+ },
40861
+ {
40862
+ "epoch": 0.0005824,
40863
+ "grad_norm": 1.6956429481506348,
40864
+ "learning_rate": 5.823e-05,
40865
+ "loss": 0.8438,
40866
+ "step": 5824
40867
+ },
40868
+ {
40869
+ "epoch": 0.0005825,
40870
+ "grad_norm": 1.6848008632659912,
40871
+ "learning_rate": 5.8240000000000005e-05,
40872
+ "loss": 0.8887,
40873
+ "step": 5825
40874
+ },
40875
+ {
40876
+ "epoch": 0.0005826,
40877
+ "grad_norm": 1.761123776435852,
40878
+ "learning_rate": 5.825e-05,
40879
+ "loss": 0.9277,
40880
+ "step": 5826
40881
+ },
40882
+ {
40883
+ "epoch": 0.0005827,
40884
+ "grad_norm": 4.005990028381348,
40885
+ "learning_rate": 5.826e-05,
40886
+ "loss": 1.5615,
40887
+ "step": 5827
40888
+ },
40889
+ {
40890
+ "epoch": 0.0005828,
40891
+ "grad_norm": 2.3055953979492188,
40892
+ "learning_rate": 5.827e-05,
40893
+ "loss": 0.9722,
40894
+ "step": 5828
40895
+ },
40896
+ {
40897
+ "epoch": 0.0005829,
40898
+ "grad_norm": 3.2178542613983154,
40899
+ "learning_rate": 5.828e-05,
40900
+ "loss": 1.0449,
40901
+ "step": 5829
40902
+ },
40903
+ {
40904
+ "epoch": 0.000583,
40905
+ "grad_norm": 2.7492799758911133,
40906
+ "learning_rate": 5.829e-05,
40907
+ "loss": 0.9448,
40908
+ "step": 5830
40909
+ },
40910
+ {
40911
+ "epoch": 0.0005831,
40912
+ "grad_norm": 1.996016502380371,
40913
+ "learning_rate": 5.83e-05,
40914
+ "loss": 0.8779,
40915
+ "step": 5831
40916
+ },
40917
+ {
40918
+ "epoch": 0.0005832,
40919
+ "grad_norm": 1.6616058349609375,
40920
+ "learning_rate": 5.831e-05,
40921
+ "loss": 0.7661,
40922
+ "step": 5832
40923
+ },
40924
+ {
40925
+ "epoch": 0.0005833,
40926
+ "grad_norm": 1.789231300354004,
40927
+ "learning_rate": 5.832e-05,
40928
+ "loss": 0.835,
40929
+ "step": 5833
40930
+ },
40931
+ {
40932
+ "epoch": 0.0005834,
40933
+ "grad_norm": 1.9673794507980347,
40934
+ "learning_rate": 5.833e-05,
40935
+ "loss": 0.8794,
40936
+ "step": 5834
40937
+ },
40938
+ {
40939
+ "epoch": 0.0005835,
40940
+ "grad_norm": 1.7308722734451294,
40941
+ "learning_rate": 5.834000000000001e-05,
40942
+ "loss": 0.8584,
40943
+ "step": 5835
40944
+ },
40945
+ {
40946
+ "epoch": 0.0005836,
40947
+ "grad_norm": 1.6586679220199585,
40948
+ "learning_rate": 5.8349999999999995e-05,
40949
+ "loss": 0.8403,
40950
+ "step": 5836
40951
+ },
40952
+ {
40953
+ "epoch": 0.0005837,
40954
+ "grad_norm": 1.7169954776763916,
40955
+ "learning_rate": 5.836e-05,
40956
+ "loss": 0.8477,
40957
+ "step": 5837
40958
+ },
40959
+ {
40960
+ "epoch": 0.0005838,
40961
+ "grad_norm": 1.6661771535873413,
40962
+ "learning_rate": 5.8370000000000005e-05,
40963
+ "loss": 0.8574,
40964
+ "step": 5838
40965
+ },
40966
+ {
40967
+ "epoch": 0.0005839,
40968
+ "grad_norm": 1.6541742086410522,
40969
+ "learning_rate": 5.838000000000001e-05,
40970
+ "loss": 0.7949,
40971
+ "step": 5839
40972
+ },
40973
+ {
40974
+ "epoch": 0.000584,
40975
+ "grad_norm": 1.5927313566207886,
40976
+ "learning_rate": 5.839e-05,
40977
+ "loss": 0.7852,
40978
+ "step": 5840
40979
+ },
40980
+ {
40981
+ "epoch": 0.0005841,
40982
+ "grad_norm": 1.6425414085388184,
40983
+ "learning_rate": 5.84e-05,
40984
+ "loss": 0.8096,
40985
+ "step": 5841
40986
+ },
40987
+ {
40988
+ "epoch": 0.0005842,
40989
+ "grad_norm": 1.7989174127578735,
40990
+ "learning_rate": 5.8410000000000005e-05,
40991
+ "loss": 0.8882,
40992
+ "step": 5842
40993
+ },
40994
+ {
40995
+ "epoch": 0.0005843,
40996
+ "grad_norm": 1.8322278261184692,
40997
+ "learning_rate": 5.842e-05,
40998
+ "loss": 1.001,
40999
+ "step": 5843
41000
+ },
41001
+ {
41002
+ "epoch": 0.0005844,
41003
+ "grad_norm": 1.8731627464294434,
41004
+ "learning_rate": 5.843e-05,
41005
+ "loss": 0.8843,
41006
+ "step": 5844
41007
+ },
41008
+ {
41009
+ "epoch": 0.0005845,
41010
+ "grad_norm": 1.5437366962432861,
41011
+ "learning_rate": 5.844e-05,
41012
+ "loss": 0.7725,
41013
+ "step": 5845
41014
+ },
41015
+ {
41016
+ "epoch": 0.0005846,
41017
+ "grad_norm": 2.8317415714263916,
41018
+ "learning_rate": 5.845e-05,
41019
+ "loss": 1.0942,
41020
+ "step": 5846
41021
+ },
41022
+ {
41023
+ "epoch": 0.0005847,
41024
+ "grad_norm": 1.7654151916503906,
41025
+ "learning_rate": 5.846e-05,
41026
+ "loss": 0.8237,
41027
+ "step": 5847
41028
+ },
41029
+ {
41030
+ "epoch": 0.0005848,
41031
+ "grad_norm": 1.8651440143585205,
41032
+ "learning_rate": 5.847e-05,
41033
+ "loss": 0.812,
41034
+ "step": 5848
41035
+ },
41036
+ {
41037
+ "epoch": 0.0005849,
41038
+ "grad_norm": 1.6400736570358276,
41039
+ "learning_rate": 5.848e-05,
41040
+ "loss": 0.7842,
41041
+ "step": 5849
41042
+ },
41043
+ {
41044
+ "epoch": 0.000585,
41045
+ "grad_norm": 1.6788883209228516,
41046
+ "learning_rate": 5.849e-05,
41047
+ "loss": 0.8096,
41048
+ "step": 5850
41049
+ },
41050
+ {
41051
+ "epoch": 0.0005851,
41052
+ "grad_norm": 2.2326557636260986,
41053
+ "learning_rate": 5.85e-05,
41054
+ "loss": 0.96,
41055
+ "step": 5851
41056
+ },
41057
+ {
41058
+ "epoch": 0.0005852,
41059
+ "grad_norm": 1.8667100667953491,
41060
+ "learning_rate": 5.851000000000001e-05,
41061
+ "loss": 0.8208,
41062
+ "step": 5852
41063
+ },
41064
+ {
41065
+ "epoch": 0.0005853,
41066
+ "grad_norm": 1.6106916666030884,
41067
+ "learning_rate": 5.8519999999999995e-05,
41068
+ "loss": 0.7939,
41069
+ "step": 5853
41070
+ },
41071
+ {
41072
+ "epoch": 0.0005854,
41073
+ "grad_norm": 1.6850008964538574,
41074
+ "learning_rate": 5.853e-05,
41075
+ "loss": 0.8037,
41076
+ "step": 5854
41077
+ },
41078
+ {
41079
+ "epoch": 0.0005855,
41080
+ "grad_norm": 1.6820380687713623,
41081
+ "learning_rate": 5.8540000000000005e-05,
41082
+ "loss": 0.7905,
41083
+ "step": 5855
41084
+ },
41085
+ {
41086
+ "epoch": 0.0005856,
41087
+ "grad_norm": 1.6166424751281738,
41088
+ "learning_rate": 5.855000000000001e-05,
41089
+ "loss": 0.7661,
41090
+ "step": 5856
41091
+ },
41092
+ {
41093
+ "epoch": 0.0005857,
41094
+ "grad_norm": 1.606998085975647,
41095
+ "learning_rate": 5.856e-05,
41096
+ "loss": 0.8359,
41097
+ "step": 5857
41098
+ },
41099
+ {
41100
+ "epoch": 0.0005858,
41101
+ "grad_norm": 1.6939175128936768,
41102
+ "learning_rate": 5.857e-05,
41103
+ "loss": 0.8188,
41104
+ "step": 5858
41105
+ },
41106
+ {
41107
+ "epoch": 0.0005859,
41108
+ "grad_norm": 1.7543524503707886,
41109
+ "learning_rate": 5.8580000000000005e-05,
41110
+ "loss": 0.8491,
41111
+ "step": 5859
41112
+ },
41113
+ {
41114
+ "epoch": 0.000586,
41115
+ "grad_norm": 1.7315897941589355,
41116
+ "learning_rate": 5.859e-05,
41117
+ "loss": 0.8999,
41118
+ "step": 5860
41119
+ },
41120
+ {
41121
+ "epoch": 0.0005861,
41122
+ "grad_norm": 2.3954813480377197,
41123
+ "learning_rate": 5.86e-05,
41124
+ "loss": 1.1616,
41125
+ "step": 5861
41126
+ },
41127
+ {
41128
+ "epoch": 0.0005862,
41129
+ "grad_norm": 1.6844712495803833,
41130
+ "learning_rate": 5.861e-05,
41131
+ "loss": 0.772,
41132
+ "step": 5862
41133
+ },
41134
+ {
41135
+ "epoch": 0.0005863,
41136
+ "grad_norm": 1.5823957920074463,
41137
+ "learning_rate": 5.862e-05,
41138
+ "loss": 0.8071,
41139
+ "step": 5863
41140
+ },
41141
+ {
41142
+ "epoch": 0.0005864,
41143
+ "grad_norm": 1.5627663135528564,
41144
+ "learning_rate": 5.863e-05,
41145
+ "loss": 0.812,
41146
+ "step": 5864
41147
+ },
41148
+ {
41149
+ "epoch": 0.0005865,
41150
+ "grad_norm": 1.6954442262649536,
41151
+ "learning_rate": 5.864e-05,
41152
+ "loss": 0.7988,
41153
+ "step": 5865
41154
+ },
41155
+ {
41156
+ "epoch": 0.0005866,
41157
+ "grad_norm": 1.7571743726730347,
41158
+ "learning_rate": 5.865e-05,
41159
+ "loss": 0.7607,
41160
+ "step": 5866
41161
+ },
41162
+ {
41163
+ "epoch": 0.0005867,
41164
+ "grad_norm": 1.6370595693588257,
41165
+ "learning_rate": 5.866e-05,
41166
+ "loss": 0.7529,
41167
+ "step": 5867
41168
+ },
41169
+ {
41170
+ "epoch": 0.0005868,
41171
+ "grad_norm": 1.5386178493499756,
41172
+ "learning_rate": 5.867e-05,
41173
+ "loss": 0.8086,
41174
+ "step": 5868
41175
+ },
41176
+ {
41177
+ "epoch": 0.0005869,
41178
+ "grad_norm": 1.5495586395263672,
41179
+ "learning_rate": 5.868000000000001e-05,
41180
+ "loss": 0.7627,
41181
+ "step": 5869
41182
+ },
41183
+ {
41184
+ "epoch": 0.000587,
41185
+ "grad_norm": 1.72905695438385,
41186
+ "learning_rate": 5.8689999999999995e-05,
41187
+ "loss": 0.8296,
41188
+ "step": 5870
41189
+ },
41190
+ {
41191
+ "epoch": 0.0005871,
41192
+ "grad_norm": 1.6074451208114624,
41193
+ "learning_rate": 5.87e-05,
41194
+ "loss": 0.876,
41195
+ "step": 5871
41196
+ },
41197
+ {
41198
+ "epoch": 0.0005872,
41199
+ "grad_norm": 1.5799434185028076,
41200
+ "learning_rate": 5.8710000000000005e-05,
41201
+ "loss": 0.8086,
41202
+ "step": 5872
41203
+ },
41204
+ {
41205
+ "epoch": 0.0005873,
41206
+ "grad_norm": 1.7185566425323486,
41207
+ "learning_rate": 5.872000000000001e-05,
41208
+ "loss": 0.873,
41209
+ "step": 5873
41210
+ },
41211
+ {
41212
+ "epoch": 0.0005874,
41213
+ "grad_norm": 1.6175681352615356,
41214
+ "learning_rate": 5.873e-05,
41215
+ "loss": 0.7603,
41216
+ "step": 5874
41217
+ },
41218
+ {
41219
+ "epoch": 0.0005875,
41220
+ "grad_norm": 1.3885085582733154,
41221
+ "learning_rate": 5.8740000000000003e-05,
41222
+ "loss": 0.8052,
41223
+ "step": 5875
41224
+ },
41225
+ {
41226
+ "epoch": 0.0005876,
41227
+ "grad_norm": 1.5084010362625122,
41228
+ "learning_rate": 5.8750000000000005e-05,
41229
+ "loss": 0.8022,
41230
+ "step": 5876
41231
+ },
41232
+ {
41233
+ "epoch": 0.0005877,
41234
+ "grad_norm": 1.5180604457855225,
41235
+ "learning_rate": 5.876e-05,
41236
+ "loss": 0.7681,
41237
+ "step": 5877
41238
+ },
41239
+ {
41240
+ "epoch": 0.0005878,
41241
+ "grad_norm": 1.5178165435791016,
41242
+ "learning_rate": 5.877e-05,
41243
+ "loss": 0.7559,
41244
+ "step": 5878
41245
+ },
41246
+ {
41247
+ "epoch": 0.0005879,
41248
+ "grad_norm": 1.4072206020355225,
41249
+ "learning_rate": 5.878e-05,
41250
+ "loss": 0.7432,
41251
+ "step": 5879
41252
+ },
41253
+ {
41254
+ "epoch": 0.000588,
41255
+ "grad_norm": 1.6438937187194824,
41256
+ "learning_rate": 5.879e-05,
41257
+ "loss": 0.8389,
41258
+ "step": 5880
41259
+ },
41260
+ {
41261
+ "epoch": 0.0005881,
41262
+ "grad_norm": 1.6769683361053467,
41263
+ "learning_rate": 5.88e-05,
41264
+ "loss": 0.8018,
41265
+ "step": 5881
41266
+ },
41267
+ {
41268
+ "epoch": 0.0005882,
41269
+ "grad_norm": 1.623231291770935,
41270
+ "learning_rate": 5.881e-05,
41271
+ "loss": 0.7739,
41272
+ "step": 5882
41273
+ },
41274
+ {
41275
+ "epoch": 0.0005883,
41276
+ "grad_norm": 1.6477833986282349,
41277
+ "learning_rate": 5.882e-05,
41278
+ "loss": 0.7993,
41279
+ "step": 5883
41280
+ },
41281
+ {
41282
+ "epoch": 0.0005884,
41283
+ "grad_norm": 1.6617273092269897,
41284
+ "learning_rate": 5.883e-05,
41285
+ "loss": 0.8618,
41286
+ "step": 5884
41287
+ },
41288
+ {
41289
+ "epoch": 0.0005885,
41290
+ "grad_norm": 2.173892021179199,
41291
+ "learning_rate": 5.884e-05,
41292
+ "loss": 1.0137,
41293
+ "step": 5885
41294
+ },
41295
+ {
41296
+ "epoch": 0.0005886,
41297
+ "grad_norm": 1.5440952777862549,
41298
+ "learning_rate": 5.885000000000001e-05,
41299
+ "loss": 0.7686,
41300
+ "step": 5886
41301
+ },
41302
+ {
41303
+ "epoch": 0.0005887,
41304
+ "grad_norm": 1.2644119262695312,
41305
+ "learning_rate": 5.8859999999999995e-05,
41306
+ "loss": 0.7139,
41307
+ "step": 5887
41308
+ },
41309
+ {
41310
+ "epoch": 0.0005888,
41311
+ "grad_norm": 1.484767198562622,
41312
+ "learning_rate": 5.887e-05,
41313
+ "loss": 0.7417,
41314
+ "step": 5888
41315
+ },
41316
+ {
41317
+ "epoch": 0.0005889,
41318
+ "grad_norm": 1.5068488121032715,
41319
+ "learning_rate": 5.8880000000000005e-05,
41320
+ "loss": 0.7974,
41321
+ "step": 5889
41322
+ },
41323
+ {
41324
+ "epoch": 0.000589,
41325
+ "grad_norm": 3.0165534019470215,
41326
+ "learning_rate": 5.889000000000001e-05,
41327
+ "loss": 0.812,
41328
+ "step": 5890
41329
+ },
41330
+ {
41331
+ "epoch": 0.0005891,
41332
+ "grad_norm": 1.8161137104034424,
41333
+ "learning_rate": 5.89e-05,
41334
+ "loss": 0.8296,
41335
+ "step": 5891
41336
+ },
41337
+ {
41338
+ "epoch": 0.0005892,
41339
+ "grad_norm": 1.7120927572250366,
41340
+ "learning_rate": 5.8910000000000003e-05,
41341
+ "loss": 0.7227,
41342
+ "step": 5892
41343
+ },
41344
+ {
41345
+ "epoch": 0.0005893,
41346
+ "grad_norm": 1.631578803062439,
41347
+ "learning_rate": 5.8920000000000005e-05,
41348
+ "loss": 0.7925,
41349
+ "step": 5893
41350
+ },
41351
+ {
41352
+ "epoch": 0.0005894,
41353
+ "grad_norm": 1.703325629234314,
41354
+ "learning_rate": 5.893e-05,
41355
+ "loss": 0.8032,
41356
+ "step": 5894
41357
+ },
41358
+ {
41359
+ "epoch": 0.0005895,
41360
+ "grad_norm": 1.7086197137832642,
41361
+ "learning_rate": 5.894e-05,
41362
+ "loss": 0.8003,
41363
+ "step": 5895
41364
+ },
41365
+ {
41366
+ "epoch": 0.0005896,
41367
+ "grad_norm": 5.005209922790527,
41368
+ "learning_rate": 5.895e-05,
41369
+ "loss": 1.2173,
41370
+ "step": 5896
41371
+ },
41372
+ {
41373
+ "epoch": 0.0005897,
41374
+ "grad_norm": 1.6591870784759521,
41375
+ "learning_rate": 5.896e-05,
41376
+ "loss": 0.7886,
41377
+ "step": 5897
41378
+ },
41379
+ {
41380
+ "epoch": 0.0005898,
41381
+ "grad_norm": 1.8780789375305176,
41382
+ "learning_rate": 5.897e-05,
41383
+ "loss": 0.832,
41384
+ "step": 5898
41385
+ },
41386
+ {
41387
+ "epoch": 0.0005899,
41388
+ "grad_norm": 1.866897702217102,
41389
+ "learning_rate": 5.898e-05,
41390
+ "loss": 0.8823,
41391
+ "step": 5899
41392
+ },
41393
+ {
41394
+ "epoch": 0.00059,
41395
+ "grad_norm": 1.6844897270202637,
41396
+ "learning_rate": 5.899e-05,
41397
+ "loss": 0.856,
41398
+ "step": 5900
41399
+ },
41400
+ {
41401
+ "epoch": 0.0005901,
41402
+ "grad_norm": 1.6563727855682373,
41403
+ "learning_rate": 5.9e-05,
41404
+ "loss": 0.8486,
41405
+ "step": 5901
41406
+ },
41407
+ {
41408
+ "epoch": 0.0005902,
41409
+ "grad_norm": 1.4859638214111328,
41410
+ "learning_rate": 5.901e-05,
41411
+ "loss": 0.7632,
41412
+ "step": 5902
41413
+ },
41414
+ {
41415
+ "epoch": 0.0005903,
41416
+ "grad_norm": 1.5815120935440063,
41417
+ "learning_rate": 5.902000000000001e-05,
41418
+ "loss": 0.7725,
41419
+ "step": 5903
41420
+ },
41421
+ {
41422
+ "epoch": 0.0005904,
41423
+ "grad_norm": 2.134547710418701,
41424
+ "learning_rate": 5.9029999999999996e-05,
41425
+ "loss": 0.9082,
41426
+ "step": 5904
41427
+ },
41428
+ {
41429
+ "epoch": 0.0005905,
41430
+ "grad_norm": 1.654038906097412,
41431
+ "learning_rate": 5.904e-05,
41432
+ "loss": 0.7949,
41433
+ "step": 5905
41434
+ },
41435
+ {
41436
+ "epoch": 0.0005906,
41437
+ "grad_norm": 1.5547631978988647,
41438
+ "learning_rate": 5.9050000000000006e-05,
41439
+ "loss": 0.752,
41440
+ "step": 5906
41441
+ },
41442
+ {
41443
+ "epoch": 0.0005907,
41444
+ "grad_norm": 1.5662357807159424,
41445
+ "learning_rate": 5.906000000000001e-05,
41446
+ "loss": 0.8008,
41447
+ "step": 5907
41448
+ },
41449
+ {
41450
+ "epoch": 0.0005908,
41451
+ "grad_norm": 1.57384192943573,
41452
+ "learning_rate": 5.907e-05,
41453
+ "loss": 0.7471,
41454
+ "step": 5908
41455
+ },
41456
+ {
41457
+ "epoch": 0.0005909,
41458
+ "grad_norm": 1.4176363945007324,
41459
+ "learning_rate": 5.9080000000000004e-05,
41460
+ "loss": 0.7207,
41461
+ "step": 5909
41462
+ },
41463
+ {
41464
+ "epoch": 0.000591,
41465
+ "grad_norm": 2.020887851715088,
41466
+ "learning_rate": 5.9090000000000005e-05,
41467
+ "loss": 1.1572,
41468
+ "step": 5910
41469
+ },
41470
+ {
41471
+ "epoch": 0.0005911,
41472
+ "grad_norm": 1.6313410997390747,
41473
+ "learning_rate": 5.91e-05,
41474
+ "loss": 0.7993,
41475
+ "step": 5911
41476
+ },
41477
+ {
41478
+ "epoch": 0.0005912,
41479
+ "grad_norm": 1.5192641019821167,
41480
+ "learning_rate": 5.911e-05,
41481
+ "loss": 0.7412,
41482
+ "step": 5912
41483
+ },
41484
+ {
41485
+ "epoch": 0.0005913,
41486
+ "grad_norm": 1.6517242193222046,
41487
+ "learning_rate": 5.912e-05,
41488
+ "loss": 0.793,
41489
+ "step": 5913
41490
+ },
41491
+ {
41492
+ "epoch": 0.0005914,
41493
+ "grad_norm": 1.642682671546936,
41494
+ "learning_rate": 5.913e-05,
41495
+ "loss": 0.7759,
41496
+ "step": 5914
41497
+ },
41498
+ {
41499
+ "epoch": 0.0005915,
41500
+ "grad_norm": 1.656015157699585,
41501
+ "learning_rate": 5.914e-05,
41502
+ "loss": 0.7896,
41503
+ "step": 5915
41504
+ },
41505
+ {
41506
+ "epoch": 0.0005916,
41507
+ "grad_norm": 1.924726963043213,
41508
+ "learning_rate": 5.915e-05,
41509
+ "loss": 0.8882,
41510
+ "step": 5916
41511
+ },
41512
+ {
41513
+ "epoch": 0.0005917,
41514
+ "grad_norm": 1.5693986415863037,
41515
+ "learning_rate": 5.916e-05,
41516
+ "loss": 0.7817,
41517
+ "step": 5917
41518
+ },
41519
+ {
41520
+ "epoch": 0.0005918,
41521
+ "grad_norm": 1.9120745658874512,
41522
+ "learning_rate": 5.917e-05,
41523
+ "loss": 0.8506,
41524
+ "step": 5918
41525
+ },
41526
+ {
41527
+ "epoch": 0.0005919,
41528
+ "grad_norm": 1.9217987060546875,
41529
+ "learning_rate": 5.918e-05,
41530
+ "loss": 0.8892,
41531
+ "step": 5919
41532
+ },
41533
+ {
41534
+ "epoch": 0.000592,
41535
+ "grad_norm": 2.201404094696045,
41536
+ "learning_rate": 5.919000000000001e-05,
41537
+ "loss": 0.9932,
41538
+ "step": 5920
41539
+ },
41540
+ {
41541
+ "epoch": 0.0005921,
41542
+ "grad_norm": 1.560445785522461,
41543
+ "learning_rate": 5.9199999999999996e-05,
41544
+ "loss": 0.7314,
41545
+ "step": 5921
41546
+ },
41547
+ {
41548
+ "epoch": 0.0005922,
41549
+ "grad_norm": 1.4320646524429321,
41550
+ "learning_rate": 5.921e-05,
41551
+ "loss": 0.7236,
41552
+ "step": 5922
41553
+ },
41554
+ {
41555
+ "epoch": 0.0005923,
41556
+ "grad_norm": 1.3355786800384521,
41557
+ "learning_rate": 5.9220000000000006e-05,
41558
+ "loss": 0.6875,
41559
+ "step": 5923
41560
+ },
41561
+ {
41562
+ "epoch": 0.0005924,
41563
+ "grad_norm": 1.5497984886169434,
41564
+ "learning_rate": 5.923000000000001e-05,
41565
+ "loss": 0.7876,
41566
+ "step": 5924
41567
+ },
41568
+ {
41569
+ "epoch": 0.0005925,
41570
+ "grad_norm": 1.6212843656539917,
41571
+ "learning_rate": 5.924e-05,
41572
+ "loss": 0.7842,
41573
+ "step": 5925
41574
+ },
41575
+ {
41576
+ "epoch": 0.0005926,
41577
+ "grad_norm": 1.6754783391952515,
41578
+ "learning_rate": 5.9250000000000004e-05,
41579
+ "loss": 0.811,
41580
+ "step": 5926
41581
+ },
41582
+ {
41583
+ "epoch": 0.0005927,
41584
+ "grad_norm": 1.5971804857254028,
41585
+ "learning_rate": 5.9260000000000005e-05,
41586
+ "loss": 0.7827,
41587
+ "step": 5927
41588
+ },
41589
+ {
41590
+ "epoch": 0.0005928,
41591
+ "grad_norm": 1.6103317737579346,
41592
+ "learning_rate": 5.927e-05,
41593
+ "loss": 0.8335,
41594
+ "step": 5928
41595
+ },
41596
+ {
41597
+ "epoch": 0.0005929,
41598
+ "grad_norm": 1.4968230724334717,
41599
+ "learning_rate": 5.928e-05,
41600
+ "loss": 0.7837,
41601
+ "step": 5929
41602
+ },
41603
+ {
41604
+ "epoch": 0.000593,
41605
+ "grad_norm": 2.0906319618225098,
41606
+ "learning_rate": 5.929e-05,
41607
+ "loss": 0.9165,
41608
+ "step": 5930
41609
+ },
41610
+ {
41611
+ "epoch": 0.0005931,
41612
+ "grad_norm": 1.530055046081543,
41613
+ "learning_rate": 5.93e-05,
41614
+ "loss": 0.7358,
41615
+ "step": 5931
41616
+ },
41617
+ {
41618
+ "epoch": 0.0005932,
41619
+ "grad_norm": 1.9216514825820923,
41620
+ "learning_rate": 5.931e-05,
41621
+ "loss": 0.9087,
41622
+ "step": 5932
41623
+ },
41624
+ {
41625
+ "epoch": 0.0005933,
41626
+ "grad_norm": 1.5280019044876099,
41627
+ "learning_rate": 5.932e-05,
41628
+ "loss": 0.7646,
41629
+ "step": 5933
41630
+ },
41631
+ {
41632
+ "epoch": 0.0005934,
41633
+ "grad_norm": 1.504082202911377,
41634
+ "learning_rate": 5.933e-05,
41635
+ "loss": 0.7935,
41636
+ "step": 5934
41637
+ },
41638
+ {
41639
+ "epoch": 0.0005935,
41640
+ "grad_norm": 1.5964938402175903,
41641
+ "learning_rate": 5.934e-05,
41642
+ "loss": 0.8315,
41643
+ "step": 5935
41644
+ },
41645
+ {
41646
+ "epoch": 0.0005936,
41647
+ "grad_norm": 1.5614784955978394,
41648
+ "learning_rate": 5.935e-05,
41649
+ "loss": 0.8149,
41650
+ "step": 5936
41651
+ },
41652
+ {
41653
+ "epoch": 0.0005937,
41654
+ "grad_norm": 1.6300089359283447,
41655
+ "learning_rate": 5.936000000000001e-05,
41656
+ "loss": 0.7729,
41657
+ "step": 5937
41658
+ },
41659
+ {
41660
+ "epoch": 0.0005938,
41661
+ "grad_norm": 2.1969408988952637,
41662
+ "learning_rate": 5.9369999999999996e-05,
41663
+ "loss": 0.9463,
41664
+ "step": 5938
41665
+ },
41666
+ {
41667
+ "epoch": 0.0005939,
41668
+ "grad_norm": 1.637465476989746,
41669
+ "learning_rate": 5.938e-05,
41670
+ "loss": 0.7358,
41671
+ "step": 5939
41672
+ },
41673
+ {
41674
+ "epoch": 0.000594,
41675
+ "grad_norm": 2.084805488586426,
41676
+ "learning_rate": 5.9390000000000006e-05,
41677
+ "loss": 0.8447,
41678
+ "step": 5940
41679
+ },
41680
+ {
41681
+ "epoch": 0.0005941,
41682
+ "grad_norm": 1.8123067617416382,
41683
+ "learning_rate": 5.940000000000001e-05,
41684
+ "loss": 0.7827,
41685
+ "step": 5941
41686
+ },
41687
+ {
41688
+ "epoch": 0.0005942,
41689
+ "grad_norm": 2.333995819091797,
41690
+ "learning_rate": 5.941e-05,
41691
+ "loss": 0.8882,
41692
+ "step": 5942
41693
+ },
41694
+ {
41695
+ "epoch": 0.0005943,
41696
+ "grad_norm": 1.5387401580810547,
41697
+ "learning_rate": 5.9420000000000004e-05,
41698
+ "loss": 0.7437,
41699
+ "step": 5943
41700
+ },
41701
+ {
41702
+ "epoch": 0.0005944,
41703
+ "grad_norm": 1.5562912225723267,
41704
+ "learning_rate": 5.9430000000000005e-05,
41705
+ "loss": 0.7188,
41706
+ "step": 5944
41707
+ },
41708
+ {
41709
+ "epoch": 0.0005945,
41710
+ "grad_norm": 1.7533470392227173,
41711
+ "learning_rate": 5.944e-05,
41712
+ "loss": 0.8643,
41713
+ "step": 5945
41714
+ },
41715
+ {
41716
+ "epoch": 0.0005946,
41717
+ "grad_norm": 1.5385069847106934,
41718
+ "learning_rate": 5.945e-05,
41719
+ "loss": 0.7598,
41720
+ "step": 5946
41721
+ },
41722
+ {
41723
+ "epoch": 0.0005947,
41724
+ "grad_norm": 1.808990716934204,
41725
+ "learning_rate": 5.946e-05,
41726
+ "loss": 0.8101,
41727
+ "step": 5947
41728
+ },
41729
+ {
41730
+ "epoch": 0.0005948,
41731
+ "grad_norm": 1.5682218074798584,
41732
+ "learning_rate": 5.947e-05,
41733
+ "loss": 0.8096,
41734
+ "step": 5948
41735
+ },
41736
+ {
41737
+ "epoch": 0.0005949,
41738
+ "grad_norm": 1.9593143463134766,
41739
+ "learning_rate": 5.948e-05,
41740
+ "loss": 0.873,
41741
+ "step": 5949
41742
+ },
41743
+ {
41744
+ "epoch": 0.000595,
41745
+ "grad_norm": 2.1629326343536377,
41746
+ "learning_rate": 5.949e-05,
41747
+ "loss": 0.8833,
41748
+ "step": 5950
41749
+ },
41750
+ {
41751
+ "epoch": 0.0005951,
41752
+ "grad_norm": 2.159661293029785,
41753
+ "learning_rate": 5.95e-05,
41754
+ "loss": 0.8745,
41755
+ "step": 5951
41756
+ },
41757
+ {
41758
+ "epoch": 0.0005952,
41759
+ "grad_norm": 1.592955231666565,
41760
+ "learning_rate": 5.951e-05,
41761
+ "loss": 0.7007,
41762
+ "step": 5952
41763
+ },
41764
+ {
41765
+ "epoch": 0.0005953,
41766
+ "grad_norm": 1.721563696861267,
41767
+ "learning_rate": 5.952e-05,
41768
+ "loss": 0.793,
41769
+ "step": 5953
41770
+ },
41771
+ {
41772
+ "epoch": 0.0005954,
41773
+ "grad_norm": 1.742895245552063,
41774
+ "learning_rate": 5.953e-05,
41775
+ "loss": 0.7817,
41776
+ "step": 5954
41777
+ },
41778
+ {
41779
+ "epoch": 0.0005955,
41780
+ "grad_norm": 1.558279037475586,
41781
+ "learning_rate": 5.9539999999999996e-05,
41782
+ "loss": 0.8027,
41783
+ "step": 5955
41784
+ },
41785
+ {
41786
+ "epoch": 0.0005956,
41787
+ "grad_norm": 1.4903243780136108,
41788
+ "learning_rate": 5.955e-05,
41789
+ "loss": 0.7188,
41790
+ "step": 5956
41791
+ },
41792
+ {
41793
+ "epoch": 0.0005957,
41794
+ "grad_norm": 1.782842755317688,
41795
+ "learning_rate": 5.9560000000000006e-05,
41796
+ "loss": 0.7915,
41797
+ "step": 5957
41798
+ },
41799
+ {
41800
+ "epoch": 0.0005958,
41801
+ "grad_norm": 1.6400823593139648,
41802
+ "learning_rate": 5.957000000000001e-05,
41803
+ "loss": 0.7417,
41804
+ "step": 5958
41805
+ },
41806
+ {
41807
+ "epoch": 0.0005959,
41808
+ "grad_norm": 1.4021106958389282,
41809
+ "learning_rate": 5.958e-05,
41810
+ "loss": 0.6851,
41811
+ "step": 5959
41812
+ },
41813
+ {
41814
+ "epoch": 0.000596,
41815
+ "grad_norm": 1.7664570808410645,
41816
+ "learning_rate": 5.9590000000000004e-05,
41817
+ "loss": 0.8364,
41818
+ "step": 5960
41819
+ },
41820
+ {
41821
+ "epoch": 0.0005961,
41822
+ "grad_norm": 1.5353564023971558,
41823
+ "learning_rate": 5.9600000000000005e-05,
41824
+ "loss": 0.7529,
41825
+ "step": 5961
41826
+ },
41827
+ {
41828
+ "epoch": 0.0005962,
41829
+ "grad_norm": 1.3988913297653198,
41830
+ "learning_rate": 5.961e-05,
41831
+ "loss": 0.6948,
41832
+ "step": 5962
41833
+ },
41834
+ {
41835
+ "epoch": 0.0005963,
41836
+ "grad_norm": 3.4333651065826416,
41837
+ "learning_rate": 5.962e-05,
41838
+ "loss": 0.8628,
41839
+ "step": 5963
41840
+ },
41841
+ {
41842
+ "epoch": 0.0005964,
41843
+ "grad_norm": 1.5233893394470215,
41844
+ "learning_rate": 5.963e-05,
41845
+ "loss": 0.7407,
41846
+ "step": 5964
41847
+ },
41848
+ {
41849
+ "epoch": 0.0005965,
41850
+ "grad_norm": 1.7804893255233765,
41851
+ "learning_rate": 5.964e-05,
41852
+ "loss": 0.771,
41853
+ "step": 5965
41854
+ },
41855
+ {
41856
+ "epoch": 0.0005966,
41857
+ "grad_norm": 1.4234447479248047,
41858
+ "learning_rate": 5.965e-05,
41859
+ "loss": 0.6919,
41860
+ "step": 5966
41861
+ },
41862
+ {
41863
+ "epoch": 0.0005967,
41864
+ "grad_norm": 1.5506657361984253,
41865
+ "learning_rate": 5.966e-05,
41866
+ "loss": 0.7441,
41867
+ "step": 5967
41868
+ },
41869
+ {
41870
+ "epoch": 0.0005968,
41871
+ "grad_norm": 1.599525809288025,
41872
+ "learning_rate": 5.967e-05,
41873
+ "loss": 0.7598,
41874
+ "step": 5968
41875
+ },
41876
+ {
41877
+ "epoch": 0.0005969,
41878
+ "grad_norm": 2.79545259475708,
41879
+ "learning_rate": 5.968e-05,
41880
+ "loss": 0.8911,
41881
+ "step": 5969
41882
+ },
41883
+ {
41884
+ "epoch": 0.000597,
41885
+ "grad_norm": 1.5482027530670166,
41886
+ "learning_rate": 5.969e-05,
41887
+ "loss": 0.7261,
41888
+ "step": 5970
41889
+ },
41890
+ {
41891
+ "epoch": 0.0005971,
41892
+ "grad_norm": 1.4654983282089233,
41893
+ "learning_rate": 5.97e-05,
41894
+ "loss": 0.6968,
41895
+ "step": 5971
41896
+ },
41897
+ {
41898
+ "epoch": 0.0005972,
41899
+ "grad_norm": 1.8669058084487915,
41900
+ "learning_rate": 5.9709999999999996e-05,
41901
+ "loss": 0.7749,
41902
+ "step": 5972
41903
+ },
41904
+ {
41905
+ "epoch": 0.0005973,
41906
+ "grad_norm": 1.5295121669769287,
41907
+ "learning_rate": 5.972e-05,
41908
+ "loss": 0.7104,
41909
+ "step": 5973
41910
+ },
41911
+ {
41912
+ "epoch": 0.0005974,
41913
+ "grad_norm": 1.6404061317443848,
41914
+ "learning_rate": 5.9730000000000006e-05,
41915
+ "loss": 0.7319,
41916
+ "step": 5974
41917
+ },
41918
+ {
41919
+ "epoch": 0.0005975,
41920
+ "grad_norm": 1.3939292430877686,
41921
+ "learning_rate": 5.974000000000001e-05,
41922
+ "loss": 0.6626,
41923
+ "step": 5975
41924
+ },
41925
+ {
41926
+ "epoch": 0.0005976,
41927
+ "grad_norm": 1.4034295082092285,
41928
+ "learning_rate": 5.9749999999999995e-05,
41929
+ "loss": 0.6572,
41930
+ "step": 5976
41931
+ },
41932
+ {
41933
+ "epoch": 0.0005977,
41934
+ "grad_norm": 1.4362789392471313,
41935
+ "learning_rate": 5.9760000000000004e-05,
41936
+ "loss": 0.6987,
41937
+ "step": 5977
41938
+ },
41939
+ {
41940
+ "epoch": 0.0005978,
41941
+ "grad_norm": 1.4524978399276733,
41942
+ "learning_rate": 5.9770000000000005e-05,
41943
+ "loss": 0.71,
41944
+ "step": 5978
41945
+ },
41946
+ {
41947
+ "epoch": 0.0005979,
41948
+ "grad_norm": 2.0200347900390625,
41949
+ "learning_rate": 5.978e-05,
41950
+ "loss": 0.855,
41951
+ "step": 5979
41952
+ },
41953
+ {
41954
+ "epoch": 0.000598,
41955
+ "grad_norm": 1.6224732398986816,
41956
+ "learning_rate": 5.979e-05,
41957
+ "loss": 0.7871,
41958
+ "step": 5980
41959
+ },
41960
+ {
41961
+ "epoch": 0.0005981,
41962
+ "grad_norm": 1.3911391496658325,
41963
+ "learning_rate": 5.9800000000000003e-05,
41964
+ "loss": 0.6836,
41965
+ "step": 5981
41966
+ },
41967
+ {
41968
+ "epoch": 0.0005982,
41969
+ "grad_norm": 1.5443239212036133,
41970
+ "learning_rate": 5.981e-05,
41971
+ "loss": 0.7676,
41972
+ "step": 5982
41973
+ },
41974
+ {
41975
+ "epoch": 0.0005983,
41976
+ "grad_norm": 1.5114848613739014,
41977
+ "learning_rate": 5.982e-05,
41978
+ "loss": 0.73,
41979
+ "step": 5983
41980
+ },
41981
+ {
41982
+ "epoch": 0.0005984,
41983
+ "grad_norm": 1.7796193361282349,
41984
+ "learning_rate": 5.983e-05,
41985
+ "loss": 0.9609,
41986
+ "step": 5984
41987
+ },
41988
+ {
41989
+ "epoch": 0.0005985,
41990
+ "grad_norm": 1.8018146753311157,
41991
+ "learning_rate": 5.984e-05,
41992
+ "loss": 0.8877,
41993
+ "step": 5985
41994
+ },
41995
+ {
41996
+ "epoch": 0.0005986,
41997
+ "grad_norm": 1.4452275037765503,
41998
+ "learning_rate": 5.985e-05,
41999
+ "loss": 0.6768,
42000
+ "step": 5986
42001
+ },
42002
+ {
42003
+ "epoch": 0.0005987,
42004
+ "grad_norm": 1.846722960472107,
42005
+ "learning_rate": 5.986e-05,
42006
+ "loss": 0.8223,
42007
+ "step": 5987
42008
+ },
42009
+ {
42010
+ "epoch": 0.0005988,
42011
+ "grad_norm": 2.8641154766082764,
42012
+ "learning_rate": 5.987e-05,
42013
+ "loss": 0.9253,
42014
+ "step": 5988
42015
+ },
42016
+ {
42017
+ "epoch": 0.0005989,
42018
+ "grad_norm": 1.8777289390563965,
42019
+ "learning_rate": 5.9879999999999996e-05,
42020
+ "loss": 0.7803,
42021
+ "step": 5989
42022
+ },
42023
+ {
42024
+ "epoch": 0.000599,
42025
+ "grad_norm": 1.9890245199203491,
42026
+ "learning_rate": 5.989e-05,
42027
+ "loss": 0.8408,
42028
+ "step": 5990
42029
+ },
42030
+ {
42031
+ "epoch": 0.0005991,
42032
+ "grad_norm": 1.6677210330963135,
42033
+ "learning_rate": 5.9900000000000006e-05,
42034
+ "loss": 0.7793,
42035
+ "step": 5991
42036
+ },
42037
+ {
42038
+ "epoch": 0.0005992,
42039
+ "grad_norm": 1.460680365562439,
42040
+ "learning_rate": 5.991000000000001e-05,
42041
+ "loss": 0.7139,
42042
+ "step": 5992
42043
+ },
42044
+ {
42045
+ "epoch": 0.0005993,
42046
+ "grad_norm": 1.5622482299804688,
42047
+ "learning_rate": 5.9919999999999996e-05,
42048
+ "loss": 0.73,
42049
+ "step": 5993
42050
+ },
42051
+ {
42052
+ "epoch": 0.0005994,
42053
+ "grad_norm": 1.4825682640075684,
42054
+ "learning_rate": 5.9930000000000004e-05,
42055
+ "loss": 0.6875,
42056
+ "step": 5994
42057
+ },
42058
+ {
42059
+ "epoch": 0.0005995,
42060
+ "grad_norm": 1.6099538803100586,
42061
+ "learning_rate": 5.9940000000000005e-05,
42062
+ "loss": 0.792,
42063
+ "step": 5995
42064
+ },
42065
+ {
42066
+ "epoch": 0.0005996,
42067
+ "grad_norm": 1.5532127618789673,
42068
+ "learning_rate": 5.995e-05,
42069
+ "loss": 0.731,
42070
+ "step": 5996
42071
+ },
42072
+ {
42073
+ "epoch": 0.0005997,
42074
+ "grad_norm": 1.4975957870483398,
42075
+ "learning_rate": 5.996e-05,
42076
+ "loss": 0.7085,
42077
+ "step": 5997
42078
+ },
42079
+ {
42080
+ "epoch": 0.0005998,
42081
+ "grad_norm": 1.4132734537124634,
42082
+ "learning_rate": 5.9970000000000004e-05,
42083
+ "loss": 0.6748,
42084
+ "step": 5998
42085
+ },
42086
+ {
42087
+ "epoch": 0.0005999,
42088
+ "grad_norm": 1.314280390739441,
42089
+ "learning_rate": 5.998e-05,
42090
+ "loss": 0.6504,
42091
+ "step": 5999
42092
+ },
42093
+ {
42094
+ "epoch": 0.0006,
42095
+ "grad_norm": 1.5365554094314575,
42096
+ "learning_rate": 5.999e-05,
42097
+ "loss": 0.6851,
42098
+ "step": 6000
42099
+ },
42100
+ {
42101
+ "epoch": 0.0006,
42102
+ "eval_loss": 0.08720719069242477,
42103
+ "eval_runtime": 362.1442,
42104
+ "eval_samples_per_second": 27.613,
42105
+ "eval_steps_per_second": 1.726,
42106
+ "step": 6000
42107
  }
42108
  ],
42109
  "logging_steps": 1,
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b00a94ea1a4dcec603738ff8adef6d8bb8b7aa704248eec4a741c6d35c08c05
3
  size 5841
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae380d9a3464bac94b12b7910f8c9fe7f8da9b6797d02eff5df63fffc97e0f19
3
  size 5841