CocoRoF commited on
Commit
20a7457
·
verified ·
1 Parent(s): 4b1dcfe

Training in progress, step 17500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a186d2cbe6d7d5ac4c2cb2dffcb32f2c152be1b999f1d2203cb01e12498cf45
3
  size 737632172
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f780a9a78fdadce0c173bf611a5da60db156d63194a2e6a49f1f18c27d761ce
3
  size 737632172
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cd8d731853da7018e3f5c4a04692a24cce0335468b21c7f5e72c91a67c23f4c
3
  size 1475354682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15d1e92749084ac9dde10d7d65367e2e60f9c34a59a6069753dd8472f0fc8a13
3
  size 1475354682
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39d0a42a76c6856b42516358f397705cff8f5ae2210de23f6abc8fc7d370ce43
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1754ce1fea08e0a1abf50b88b05ad2235accf247d46d7ee2f8c08c6670f73f31
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c79d39762bf88c59ea58ab8c192f4d9721ab6eba78debc69a369654d4199af50
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae26017d4550577988f9e10089ab5b71db8da5c695439c0a0fea91d6a1fd0704
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1426a2458db12639f98377335a1109abade08a448981fb41c315ef1f9fd4191e
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d1f128b23b661bf875e117cc47a5648d99e77550cfacf4588ce64a1dd7dbde3
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e598407075fbe89ea2094160f92052415ad3b0b80d125438201859b9875d537b
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaf41da8bd40bcccaff03238fa84745187c3a9d568a9b5f691e9996625af1de6
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:437e8f540f343ec9a874078e652ba30d02c2d12e3039d8092e96942ade74967b
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a50ddb223b7bd2b99f1b2554cda38ae044aac0f187628b6ded5c4d407979e294
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23cfb7770b02ea62450b4853818ad587c09df566939a04e941091111cc9b7cf2
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6bf0de30b7a6e43c74608e8f1fa3b7d38bb356d58e402c397bc6ad56aa95795
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54fe581e399def26af9ab0920fdea64c37d1eed5ad5a4b3fec55e45525aba99f
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd016a3c3e3ba2a5ae38a6d0f24920c1961e6c3882d668aaebde5a2d6e1459fb
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:491793f3baa6e0a6171458158bd6f4cce55a8696d0c0e279c19b74fbf532973f
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4e5c265f62dd45b87e17d9c102ed3afb1ecc9d2d1466b032139f4181be9bfb9
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c420d6b7ad7b972ca48fc034e3641c21f7aede383a84979b4bca5295d5ea7ac1
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4aa89b4c5d338501a2c77924372d3acbefc23cb2b700c704822eb4c4c76c5fb
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6657013996371928,
5
  "eval_steps": 1000,
6
- "global_step": 15000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -10627,6 +10627,1772 @@
10627
  "eval_samples_per_second": 1807.337,
10628
  "eval_steps_per_second": 56.48,
10629
  "step": 15000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10630
  }
10631
  ],
10632
  "logging_steps": 10,
@@ -10646,7 +12412,7 @@
10646
  "attributes": {}
10647
  }
10648
  },
10649
- "total_flos": 5.234584807538688e+18,
10650
  "train_batch_size": 4,
10651
  "trial_name": null,
10652
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7766516329100582,
5
  "eval_steps": 1000,
6
+ "global_step": 17500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
10627
  "eval_samples_per_second": 1807.337,
10628
  "eval_steps_per_second": 56.48,
10629
  "step": 15000
10630
+ },
10631
+ {
10632
+ "epoch": 0.6661452005702841,
10633
+ "grad_norm": 70.91802215576172,
10634
+ "learning_rate": 9.973978721146893e-06,
10635
+ "loss": 10.7977,
10636
+ "step": 15010
10637
+ },
10638
+ {
10639
+ "epoch": 0.6665890015033756,
10640
+ "grad_norm": 73.55622100830078,
10641
+ "learning_rate": 9.973961385184965e-06,
10642
+ "loss": 10.6874,
10643
+ "step": 15020
10644
+ },
10645
+ {
10646
+ "epoch": 0.6670328024364671,
10647
+ "grad_norm": 83.61444854736328,
10648
+ "learning_rate": 9.973944049223038e-06,
10649
+ "loss": 10.7632,
10650
+ "step": 15030
10651
+ },
10652
+ {
10653
+ "epoch": 0.6674766033695586,
10654
+ "grad_norm": 67.87864685058594,
10655
+ "learning_rate": 9.97392671326111e-06,
10656
+ "loss": 10.6371,
10657
+ "step": 15040
10658
+ },
10659
+ {
10660
+ "epoch": 0.6679204043026501,
10661
+ "grad_norm": 67.57317352294922,
10662
+ "learning_rate": 9.973909377299182e-06,
10663
+ "loss": 10.4995,
10664
+ "step": 15050
10665
+ },
10666
+ {
10667
+ "epoch": 0.6683642052357415,
10668
+ "grad_norm": 75.98468780517578,
10669
+ "learning_rate": 9.973892041337255e-06,
10670
+ "loss": 10.5515,
10671
+ "step": 15060
10672
+ },
10673
+ {
10674
+ "epoch": 0.668808006168833,
10675
+ "grad_norm": 70.60570526123047,
10676
+ "learning_rate": 9.973874705375328e-06,
10677
+ "loss": 10.7693,
10678
+ "step": 15070
10679
+ },
10680
+ {
10681
+ "epoch": 0.6692518071019244,
10682
+ "grad_norm": 64.22474670410156,
10683
+ "learning_rate": 9.9738573694134e-06,
10684
+ "loss": 10.5848,
10685
+ "step": 15080
10686
+ },
10687
+ {
10688
+ "epoch": 0.6696956080350159,
10689
+ "grad_norm": 70.5452880859375,
10690
+ "learning_rate": 9.973840033451473e-06,
10691
+ "loss": 10.1573,
10692
+ "step": 15090
10693
+ },
10694
+ {
10695
+ "epoch": 0.6701394089681073,
10696
+ "grad_norm": 65.60162353515625,
10697
+ "learning_rate": 9.973822697489546e-06,
10698
+ "loss": 10.7798,
10699
+ "step": 15100
10700
+ },
10701
+ {
10702
+ "epoch": 0.6705832099011988,
10703
+ "grad_norm": 66.75809478759766,
10704
+ "learning_rate": 9.973805361527617e-06,
10705
+ "loss": 10.576,
10706
+ "step": 15110
10707
+ },
10708
+ {
10709
+ "epoch": 0.6710270108342903,
10710
+ "grad_norm": 71.2154312133789,
10711
+ "learning_rate": 9.97378802556569e-06,
10712
+ "loss": 11.1499,
10713
+ "step": 15120
10714
+ },
10715
+ {
10716
+ "epoch": 0.6714708117673818,
10717
+ "grad_norm": 77.17507934570312,
10718
+ "learning_rate": 9.973770689603763e-06,
10719
+ "loss": 10.311,
10720
+ "step": 15130
10721
+ },
10722
+ {
10723
+ "epoch": 0.6719146127004733,
10724
+ "grad_norm": 76.38935852050781,
10725
+ "learning_rate": 9.973753353641835e-06,
10726
+ "loss": 10.6521,
10727
+ "step": 15140
10728
+ },
10729
+ {
10730
+ "epoch": 0.6723584136335646,
10731
+ "grad_norm": 65.29672241210938,
10732
+ "learning_rate": 9.973736017679908e-06,
10733
+ "loss": 10.6403,
10734
+ "step": 15150
10735
+ },
10736
+ {
10737
+ "epoch": 0.6728022145666561,
10738
+ "grad_norm": 63.178077697753906,
10739
+ "learning_rate": 9.973718681717981e-06,
10740
+ "loss": 10.3128,
10741
+ "step": 15160
10742
+ },
10743
+ {
10744
+ "epoch": 0.6732460154997476,
10745
+ "grad_norm": 65.84847259521484,
10746
+ "learning_rate": 9.973701345756054e-06,
10747
+ "loss": 10.3361,
10748
+ "step": 15170
10749
+ },
10750
+ {
10751
+ "epoch": 0.6736898164328391,
10752
+ "grad_norm": 72.8542251586914,
10753
+ "learning_rate": 9.973684009794125e-06,
10754
+ "loss": 10.3667,
10755
+ "step": 15180
10756
+ },
10757
+ {
10758
+ "epoch": 0.6741336173659305,
10759
+ "grad_norm": 72.04983520507812,
10760
+ "learning_rate": 9.973666673832198e-06,
10761
+ "loss": 11.1101,
10762
+ "step": 15190
10763
+ },
10764
+ {
10765
+ "epoch": 0.674577418299022,
10766
+ "grad_norm": 63.43279266357422,
10767
+ "learning_rate": 9.973649337870272e-06,
10768
+ "loss": 10.7974,
10769
+ "step": 15200
10770
+ },
10771
+ {
10772
+ "epoch": 0.6750212192321134,
10773
+ "grad_norm": 76.92269897460938,
10774
+ "learning_rate": 9.973632001908343e-06,
10775
+ "loss": 10.4673,
10776
+ "step": 15210
10777
+ },
10778
+ {
10779
+ "epoch": 0.6754650201652049,
10780
+ "grad_norm": 78.26722717285156,
10781
+ "learning_rate": 9.973614665946416e-06,
10782
+ "loss": 10.9101,
10783
+ "step": 15220
10784
+ },
10785
+ {
10786
+ "epoch": 0.6759088210982963,
10787
+ "grad_norm": 67.4601058959961,
10788
+ "learning_rate": 9.973597329984489e-06,
10789
+ "loss": 10.7099,
10790
+ "step": 15230
10791
+ },
10792
+ {
10793
+ "epoch": 0.6763526220313878,
10794
+ "grad_norm": 67.75270080566406,
10795
+ "learning_rate": 9.97357999402256e-06,
10796
+ "loss": 10.8756,
10797
+ "step": 15240
10798
+ },
10799
+ {
10800
+ "epoch": 0.6767964229644793,
10801
+ "grad_norm": 65.31672668457031,
10802
+ "learning_rate": 9.973562658060634e-06,
10803
+ "loss": 10.5466,
10804
+ "step": 15250
10805
+ },
10806
+ {
10807
+ "epoch": 0.6772402238975708,
10808
+ "grad_norm": 84.87113952636719,
10809
+ "learning_rate": 9.973545322098707e-06,
10810
+ "loss": 10.5983,
10811
+ "step": 15260
10812
+ },
10813
+ {
10814
+ "epoch": 0.6776840248306623,
10815
+ "grad_norm": 64.18128204345703,
10816
+ "learning_rate": 9.973527986136778e-06,
10817
+ "loss": 10.241,
10818
+ "step": 15270
10819
+ },
10820
+ {
10821
+ "epoch": 0.6781278257637536,
10822
+ "grad_norm": 86.96048736572266,
10823
+ "learning_rate": 9.973510650174851e-06,
10824
+ "loss": 10.4494,
10825
+ "step": 15280
10826
+ },
10827
+ {
10828
+ "epoch": 0.6785716266968451,
10829
+ "grad_norm": 77.51726531982422,
10830
+ "learning_rate": 9.973493314212924e-06,
10831
+ "loss": 10.4231,
10832
+ "step": 15290
10833
+ },
10834
+ {
10835
+ "epoch": 0.6790154276299366,
10836
+ "grad_norm": 74.92723083496094,
10837
+ "learning_rate": 9.973475978250996e-06,
10838
+ "loss": 10.1194,
10839
+ "step": 15300
10840
+ },
10841
+ {
10842
+ "epoch": 0.6794592285630281,
10843
+ "grad_norm": 66.97340393066406,
10844
+ "learning_rate": 9.973458642289069e-06,
10845
+ "loss": 10.7708,
10846
+ "step": 15310
10847
+ },
10848
+ {
10849
+ "epoch": 0.6799030294961195,
10850
+ "grad_norm": 79.47786712646484,
10851
+ "learning_rate": 9.973441306327142e-06,
10852
+ "loss": 10.2036,
10853
+ "step": 15320
10854
+ },
10855
+ {
10856
+ "epoch": 0.680346830429211,
10857
+ "grad_norm": 85.9738540649414,
10858
+ "learning_rate": 9.973423970365213e-06,
10859
+ "loss": 10.7741,
10860
+ "step": 15330
10861
+ },
10862
+ {
10863
+ "epoch": 0.6807906313623024,
10864
+ "grad_norm": 83.63733673095703,
10865
+ "learning_rate": 9.973406634403286e-06,
10866
+ "loss": 10.5102,
10867
+ "step": 15340
10868
+ },
10869
+ {
10870
+ "epoch": 0.6812344322953939,
10871
+ "grad_norm": 68.50708770751953,
10872
+ "learning_rate": 9.97338929844136e-06,
10873
+ "loss": 10.3726,
10874
+ "step": 15350
10875
+ },
10876
+ {
10877
+ "epoch": 0.6816782332284853,
10878
+ "grad_norm": 74.40569305419922,
10879
+ "learning_rate": 9.97337196247943e-06,
10880
+ "loss": 11.1048,
10881
+ "step": 15360
10882
+ },
10883
+ {
10884
+ "epoch": 0.6821220341615768,
10885
+ "grad_norm": 81.3375473022461,
10886
+ "learning_rate": 9.973354626517504e-06,
10887
+ "loss": 9.9319,
10888
+ "step": 15370
10889
+ },
10890
+ {
10891
+ "epoch": 0.6825658350946683,
10892
+ "grad_norm": 74.2603988647461,
10893
+ "learning_rate": 9.973337290555577e-06,
10894
+ "loss": 10.8757,
10895
+ "step": 15380
10896
+ },
10897
+ {
10898
+ "epoch": 0.6830096360277598,
10899
+ "grad_norm": 71.74883270263672,
10900
+ "learning_rate": 9.97331995459365e-06,
10901
+ "loss": 10.64,
10902
+ "step": 15390
10903
+ },
10904
+ {
10905
+ "epoch": 0.6834534369608513,
10906
+ "grad_norm": 68.61750030517578,
10907
+ "learning_rate": 9.973302618631721e-06,
10908
+ "loss": 10.7962,
10909
+ "step": 15400
10910
+ },
10911
+ {
10912
+ "epoch": 0.6838972378939426,
10913
+ "grad_norm": 59.030921936035156,
10914
+ "learning_rate": 9.973285282669794e-06,
10915
+ "loss": 10.5287,
10916
+ "step": 15410
10917
+ },
10918
+ {
10919
+ "epoch": 0.6843410388270341,
10920
+ "grad_norm": 76.87126922607422,
10921
+ "learning_rate": 9.973267946707867e-06,
10922
+ "loss": 10.624,
10923
+ "step": 15420
10924
+ },
10925
+ {
10926
+ "epoch": 0.6847848397601256,
10927
+ "grad_norm": 78.29729461669922,
10928
+ "learning_rate": 9.973250610745939e-06,
10929
+ "loss": 10.6998,
10930
+ "step": 15430
10931
+ },
10932
+ {
10933
+ "epoch": 0.6852286406932171,
10934
+ "grad_norm": 70.97583770751953,
10935
+ "learning_rate": 9.973233274784012e-06,
10936
+ "loss": 10.6444,
10937
+ "step": 15440
10938
+ },
10939
+ {
10940
+ "epoch": 0.6856724416263085,
10941
+ "grad_norm": 65.24356842041016,
10942
+ "learning_rate": 9.973215938822085e-06,
10943
+ "loss": 10.6682,
10944
+ "step": 15450
10945
+ },
10946
+ {
10947
+ "epoch": 0.6861162425594,
10948
+ "grad_norm": 71.17062377929688,
10949
+ "learning_rate": 9.973198602860156e-06,
10950
+ "loss": 11.0228,
10951
+ "step": 15460
10952
+ },
10953
+ {
10954
+ "epoch": 0.6865600434924914,
10955
+ "grad_norm": 87.5817642211914,
10956
+ "learning_rate": 9.97318126689823e-06,
10957
+ "loss": 10.3963,
10958
+ "step": 15470
10959
+ },
10960
+ {
10961
+ "epoch": 0.6870038444255829,
10962
+ "grad_norm": 62.55752182006836,
10963
+ "learning_rate": 9.973163930936302e-06,
10964
+ "loss": 10.5304,
10965
+ "step": 15480
10966
+ },
10967
+ {
10968
+ "epoch": 0.6874476453586744,
10969
+ "grad_norm": 72.95471954345703,
10970
+ "learning_rate": 9.973146594974374e-06,
10971
+ "loss": 10.7116,
10972
+ "step": 15490
10973
+ },
10974
+ {
10975
+ "epoch": 0.6878914462917658,
10976
+ "grad_norm": 63.889129638671875,
10977
+ "learning_rate": 9.973129259012447e-06,
10978
+ "loss": 10.2496,
10979
+ "step": 15500
10980
+ },
10981
+ {
10982
+ "epoch": 0.6883352472248573,
10983
+ "grad_norm": 70.15679168701172,
10984
+ "learning_rate": 9.97311192305052e-06,
10985
+ "loss": 10.0643,
10986
+ "step": 15510
10987
+ },
10988
+ {
10989
+ "epoch": 0.6887790481579488,
10990
+ "grad_norm": 69.72803497314453,
10991
+ "learning_rate": 9.973094587088591e-06,
10992
+ "loss": 10.5465,
10993
+ "step": 15520
10994
+ },
10995
+ {
10996
+ "epoch": 0.6892228490910403,
10997
+ "grad_norm": 64.5792236328125,
10998
+ "learning_rate": 9.973077251126664e-06,
10999
+ "loss": 10.7322,
11000
+ "step": 15530
11001
+ },
11002
+ {
11003
+ "epoch": 0.6896666500241316,
11004
+ "grad_norm": 82.74343872070312,
11005
+ "learning_rate": 9.973059915164738e-06,
11006
+ "loss": 10.3258,
11007
+ "step": 15540
11008
+ },
11009
+ {
11010
+ "epoch": 0.6901104509572231,
11011
+ "grad_norm": 73.86137390136719,
11012
+ "learning_rate": 9.973042579202809e-06,
11013
+ "loss": 10.4541,
11014
+ "step": 15550
11015
+ },
11016
+ {
11017
+ "epoch": 0.6905542518903146,
11018
+ "grad_norm": 81.49348449707031,
11019
+ "learning_rate": 9.973025243240882e-06,
11020
+ "loss": 10.4592,
11021
+ "step": 15560
11022
+ },
11023
+ {
11024
+ "epoch": 0.6909980528234061,
11025
+ "grad_norm": 70.07816314697266,
11026
+ "learning_rate": 9.973007907278955e-06,
11027
+ "loss": 10.6212,
11028
+ "step": 15570
11029
+ },
11030
+ {
11031
+ "epoch": 0.6914418537564975,
11032
+ "grad_norm": 75.75015258789062,
11033
+ "learning_rate": 9.972990571317026e-06,
11034
+ "loss": 10.459,
11035
+ "step": 15580
11036
+ },
11037
+ {
11038
+ "epoch": 0.691885654689589,
11039
+ "grad_norm": 69.51868438720703,
11040
+ "learning_rate": 9.9729732353551e-06,
11041
+ "loss": 10.6566,
11042
+ "step": 15590
11043
+ },
11044
+ {
11045
+ "epoch": 0.6923294556226804,
11046
+ "grad_norm": 72.89574432373047,
11047
+ "learning_rate": 9.972955899393173e-06,
11048
+ "loss": 10.8038,
11049
+ "step": 15600
11050
+ },
11051
+ {
11052
+ "epoch": 0.6927732565557719,
11053
+ "grad_norm": 70.88813018798828,
11054
+ "learning_rate": 9.972938563431246e-06,
11055
+ "loss": 10.7221,
11056
+ "step": 15610
11057
+ },
11058
+ {
11059
+ "epoch": 0.6932170574888634,
11060
+ "grad_norm": 84.63404083251953,
11061
+ "learning_rate": 9.972921227469317e-06,
11062
+ "loss": 10.8532,
11063
+ "step": 15620
11064
+ },
11065
+ {
11066
+ "epoch": 0.6936608584219548,
11067
+ "grad_norm": 68.11902618408203,
11068
+ "learning_rate": 9.97290389150739e-06,
11069
+ "loss": 10.7919,
11070
+ "step": 15630
11071
+ },
11072
+ {
11073
+ "epoch": 0.6941046593550463,
11074
+ "grad_norm": 88.98064422607422,
11075
+ "learning_rate": 9.972886555545463e-06,
11076
+ "loss": 10.5843,
11077
+ "step": 15640
11078
+ },
11079
+ {
11080
+ "epoch": 0.6945484602881378,
11081
+ "grad_norm": 72.20332336425781,
11082
+ "learning_rate": 9.972869219583535e-06,
11083
+ "loss": 10.7185,
11084
+ "step": 15650
11085
+ },
11086
+ {
11087
+ "epoch": 0.6949922612212293,
11088
+ "grad_norm": 61.422576904296875,
11089
+ "learning_rate": 9.972851883621608e-06,
11090
+ "loss": 10.5118,
11091
+ "step": 15660
11092
+ },
11093
+ {
11094
+ "epoch": 0.6954360621543206,
11095
+ "grad_norm": 62.006317138671875,
11096
+ "learning_rate": 9.97283454765968e-06,
11097
+ "loss": 10.2841,
11098
+ "step": 15670
11099
+ },
11100
+ {
11101
+ "epoch": 0.6958798630874121,
11102
+ "grad_norm": 69.10589599609375,
11103
+ "learning_rate": 9.972817211697752e-06,
11104
+ "loss": 10.6618,
11105
+ "step": 15680
11106
+ },
11107
+ {
11108
+ "epoch": 0.6963236640205036,
11109
+ "grad_norm": 79.93278503417969,
11110
+ "learning_rate": 9.972799875735825e-06,
11111
+ "loss": 10.4638,
11112
+ "step": 15690
11113
+ },
11114
+ {
11115
+ "epoch": 0.6967674649535951,
11116
+ "grad_norm": 71.16869354248047,
11117
+ "learning_rate": 9.972782539773898e-06,
11118
+ "loss": 10.4969,
11119
+ "step": 15700
11120
+ },
11121
+ {
11122
+ "epoch": 0.6972112658866866,
11123
+ "grad_norm": 76.26361846923828,
11124
+ "learning_rate": 9.97276520381197e-06,
11125
+ "loss": 10.5915,
11126
+ "step": 15710
11127
+ },
11128
+ {
11129
+ "epoch": 0.697655066819778,
11130
+ "grad_norm": 80.85043334960938,
11131
+ "learning_rate": 9.972747867850043e-06,
11132
+ "loss": 11.0115,
11133
+ "step": 15720
11134
+ },
11135
+ {
11136
+ "epoch": 0.6980988677528694,
11137
+ "grad_norm": 65.5965576171875,
11138
+ "learning_rate": 9.972730531888116e-06,
11139
+ "loss": 10.756,
11140
+ "step": 15730
11141
+ },
11142
+ {
11143
+ "epoch": 0.6985426686859609,
11144
+ "grad_norm": 67.6102066040039,
11145
+ "learning_rate": 9.972713195926187e-06,
11146
+ "loss": 10.5331,
11147
+ "step": 15740
11148
+ },
11149
+ {
11150
+ "epoch": 0.6989864696190524,
11151
+ "grad_norm": 69.96884155273438,
11152
+ "learning_rate": 9.97269585996426e-06,
11153
+ "loss": 10.6435,
11154
+ "step": 15750
11155
+ },
11156
+ {
11157
+ "epoch": 0.6994302705521438,
11158
+ "grad_norm": 58.43931198120117,
11159
+ "learning_rate": 9.972678524002333e-06,
11160
+ "loss": 10.2629,
11161
+ "step": 15760
11162
+ },
11163
+ {
11164
+ "epoch": 0.6998740714852353,
11165
+ "grad_norm": 89.6247329711914,
11166
+ "learning_rate": 9.972661188040405e-06,
11167
+ "loss": 11.0823,
11168
+ "step": 15770
11169
+ },
11170
+ {
11171
+ "epoch": 0.7003178724183268,
11172
+ "grad_norm": 86.3462142944336,
11173
+ "learning_rate": 9.972643852078478e-06,
11174
+ "loss": 11.0013,
11175
+ "step": 15780
11176
+ },
11177
+ {
11178
+ "epoch": 0.7007616733514183,
11179
+ "grad_norm": 67.15304565429688,
11180
+ "learning_rate": 9.972626516116551e-06,
11181
+ "loss": 10.5305,
11182
+ "step": 15790
11183
+ },
11184
+ {
11185
+ "epoch": 0.7012054742845096,
11186
+ "grad_norm": 70.22341918945312,
11187
+ "learning_rate": 9.972609180154624e-06,
11188
+ "loss": 10.2173,
11189
+ "step": 15800
11190
+ },
11191
+ {
11192
+ "epoch": 0.7016492752176011,
11193
+ "grad_norm": 67.83938598632812,
11194
+ "learning_rate": 9.972591844192695e-06,
11195
+ "loss": 10.1845,
11196
+ "step": 15810
11197
+ },
11198
+ {
11199
+ "epoch": 0.7020930761506926,
11200
+ "grad_norm": 73.8240966796875,
11201
+ "learning_rate": 9.972574508230769e-06,
11202
+ "loss": 10.6668,
11203
+ "step": 15820
11204
+ },
11205
+ {
11206
+ "epoch": 0.7025368770837841,
11207
+ "grad_norm": 74.54997253417969,
11208
+ "learning_rate": 9.972557172268842e-06,
11209
+ "loss": 10.4764,
11210
+ "step": 15830
11211
+ },
11212
+ {
11213
+ "epoch": 0.7029806780168756,
11214
+ "grad_norm": 77.97396087646484,
11215
+ "learning_rate": 9.972539836306913e-06,
11216
+ "loss": 10.8094,
11217
+ "step": 15840
11218
+ },
11219
+ {
11220
+ "epoch": 0.703424478949967,
11221
+ "grad_norm": 69.54945373535156,
11222
+ "learning_rate": 9.972522500344986e-06,
11223
+ "loss": 10.1263,
11224
+ "step": 15850
11225
+ },
11226
+ {
11227
+ "epoch": 0.7038682798830584,
11228
+ "grad_norm": 74.77880859375,
11229
+ "learning_rate": 9.972505164383059e-06,
11230
+ "loss": 10.8418,
11231
+ "step": 15860
11232
+ },
11233
+ {
11234
+ "epoch": 0.7043120808161499,
11235
+ "grad_norm": 73.15492248535156,
11236
+ "learning_rate": 9.97248782842113e-06,
11237
+ "loss": 10.6459,
11238
+ "step": 15870
11239
+ },
11240
+ {
11241
+ "epoch": 0.7047558817492414,
11242
+ "grad_norm": 75.93856048583984,
11243
+ "learning_rate": 9.972470492459204e-06,
11244
+ "loss": 10.5095,
11245
+ "step": 15880
11246
+ },
11247
+ {
11248
+ "epoch": 0.7051996826823328,
11249
+ "grad_norm": 71.69056701660156,
11250
+ "learning_rate": 9.972453156497277e-06,
11251
+ "loss": 10.5275,
11252
+ "step": 15890
11253
+ },
11254
+ {
11255
+ "epoch": 0.7056434836154243,
11256
+ "grad_norm": 70.82780456542969,
11257
+ "learning_rate": 9.972435820535348e-06,
11258
+ "loss": 10.1364,
11259
+ "step": 15900
11260
+ },
11261
+ {
11262
+ "epoch": 0.7060872845485158,
11263
+ "grad_norm": 75.50528717041016,
11264
+ "learning_rate": 9.972418484573421e-06,
11265
+ "loss": 10.4329,
11266
+ "step": 15910
11267
+ },
11268
+ {
11269
+ "epoch": 0.7065310854816073,
11270
+ "grad_norm": 78.77973937988281,
11271
+ "learning_rate": 9.972401148611494e-06,
11272
+ "loss": 10.6305,
11273
+ "step": 15920
11274
+ },
11275
+ {
11276
+ "epoch": 0.7069748864146986,
11277
+ "grad_norm": 70.21820068359375,
11278
+ "learning_rate": 9.972383812649567e-06,
11279
+ "loss": 10.6905,
11280
+ "step": 15930
11281
+ },
11282
+ {
11283
+ "epoch": 0.7074186873477901,
11284
+ "grad_norm": 88.8916015625,
11285
+ "learning_rate": 9.972366476687639e-06,
11286
+ "loss": 10.3912,
11287
+ "step": 15940
11288
+ },
11289
+ {
11290
+ "epoch": 0.7078624882808816,
11291
+ "grad_norm": 84.31858825683594,
11292
+ "learning_rate": 9.972349140725712e-06,
11293
+ "loss": 10.5527,
11294
+ "step": 15950
11295
+ },
11296
+ {
11297
+ "epoch": 0.7083062892139731,
11298
+ "grad_norm": 71.62870788574219,
11299
+ "learning_rate": 9.972331804763785e-06,
11300
+ "loss": 10.6158,
11301
+ "step": 15960
11302
+ },
11303
+ {
11304
+ "epoch": 0.7087500901470646,
11305
+ "grad_norm": 64.43877410888672,
11306
+ "learning_rate": 9.972314468801856e-06,
11307
+ "loss": 10.8082,
11308
+ "step": 15970
11309
+ },
11310
+ {
11311
+ "epoch": 0.709193891080156,
11312
+ "grad_norm": 63.440391540527344,
11313
+ "learning_rate": 9.97229713283993e-06,
11314
+ "loss": 10.5228,
11315
+ "step": 15980
11316
+ },
11317
+ {
11318
+ "epoch": 0.7096376920132474,
11319
+ "grad_norm": 68.9405746459961,
11320
+ "learning_rate": 9.972279796878002e-06,
11321
+ "loss": 10.631,
11322
+ "step": 15990
11323
+ },
11324
+ {
11325
+ "epoch": 0.7100814929463389,
11326
+ "grad_norm": 78.99846649169922,
11327
+ "learning_rate": 9.972262460916074e-06,
11328
+ "loss": 10.7683,
11329
+ "step": 16000
11330
+ },
11331
+ {
11332
+ "epoch": 0.7100814929463389,
11333
+ "eval_loss": 0.3311347961425781,
11334
+ "eval_runtime": 678.7995,
11335
+ "eval_samples_per_second": 1789.028,
11336
+ "eval_steps_per_second": 55.908,
11337
+ "step": 16000
11338
+ },
11339
+ {
11340
+ "epoch": 0.7105252938794304,
11341
+ "grad_norm": 69.47785949707031,
11342
+ "learning_rate": 9.972245124954147e-06,
11343
+ "loss": 10.1387,
11344
+ "step": 16010
11345
+ },
11346
+ {
11347
+ "epoch": 0.7109690948125218,
11348
+ "grad_norm": 69.83346557617188,
11349
+ "learning_rate": 9.97222778899222e-06,
11350
+ "loss": 10.7619,
11351
+ "step": 16020
11352
+ },
11353
+ {
11354
+ "epoch": 0.7114128957456133,
11355
+ "grad_norm": 84.6226577758789,
11356
+ "learning_rate": 9.972210453030291e-06,
11357
+ "loss": 10.7781,
11358
+ "step": 16030
11359
+ },
11360
+ {
11361
+ "epoch": 0.7118566966787048,
11362
+ "grad_norm": 68.47029876708984,
11363
+ "learning_rate": 9.972193117068364e-06,
11364
+ "loss": 10.684,
11365
+ "step": 16040
11366
+ },
11367
+ {
11368
+ "epoch": 0.7123004976117963,
11369
+ "grad_norm": 78.78176879882812,
11370
+ "learning_rate": 9.972175781106437e-06,
11371
+ "loss": 10.7629,
11372
+ "step": 16050
11373
+ },
11374
+ {
11375
+ "epoch": 0.7127442985448877,
11376
+ "grad_norm": 70.59607696533203,
11377
+ "learning_rate": 9.97215844514451e-06,
11378
+ "loss": 10.7124,
11379
+ "step": 16060
11380
+ },
11381
+ {
11382
+ "epoch": 0.7131880994779791,
11383
+ "grad_norm": 69.93449401855469,
11384
+ "learning_rate": 9.972141109182582e-06,
11385
+ "loss": 11.4954,
11386
+ "step": 16070
11387
+ },
11388
+ {
11389
+ "epoch": 0.7136319004110706,
11390
+ "grad_norm": 67.49927520751953,
11391
+ "learning_rate": 9.972123773220655e-06,
11392
+ "loss": 10.9881,
11393
+ "step": 16080
11394
+ },
11395
+ {
11396
+ "epoch": 0.7140757013441621,
11397
+ "grad_norm": 66.40914916992188,
11398
+ "learning_rate": 9.972106437258728e-06,
11399
+ "loss": 10.8346,
11400
+ "step": 16090
11401
+ },
11402
+ {
11403
+ "epoch": 0.7145195022772536,
11404
+ "grad_norm": 66.59365844726562,
11405
+ "learning_rate": 9.9720891012968e-06,
11406
+ "loss": 10.703,
11407
+ "step": 16100
11408
+ },
11409
+ {
11410
+ "epoch": 0.714963303210345,
11411
+ "grad_norm": 61.00757598876953,
11412
+ "learning_rate": 9.972071765334873e-06,
11413
+ "loss": 10.3785,
11414
+ "step": 16110
11415
+ },
11416
+ {
11417
+ "epoch": 0.7154071041434364,
11418
+ "grad_norm": 78.33125305175781,
11419
+ "learning_rate": 9.972054429372946e-06,
11420
+ "loss": 10.8235,
11421
+ "step": 16120
11422
+ },
11423
+ {
11424
+ "epoch": 0.7158509050765279,
11425
+ "grad_norm": 63.80758285522461,
11426
+ "learning_rate": 9.972037093411017e-06,
11427
+ "loss": 10.8951,
11428
+ "step": 16130
11429
+ },
11430
+ {
11431
+ "epoch": 0.7162947060096194,
11432
+ "grad_norm": 76.70967864990234,
11433
+ "learning_rate": 9.97201975744909e-06,
11434
+ "loss": 10.9641,
11435
+ "step": 16140
11436
+ },
11437
+ {
11438
+ "epoch": 0.7167385069427108,
11439
+ "grad_norm": 71.60503387451172,
11440
+ "learning_rate": 9.972002421487163e-06,
11441
+ "loss": 10.5517,
11442
+ "step": 16150
11443
+ },
11444
+ {
11445
+ "epoch": 0.7171823078758023,
11446
+ "grad_norm": 74.2575912475586,
11447
+ "learning_rate": 9.971985085525235e-06,
11448
+ "loss": 10.161,
11449
+ "step": 16160
11450
+ },
11451
+ {
11452
+ "epoch": 0.7176261088088938,
11453
+ "grad_norm": 71.10789489746094,
11454
+ "learning_rate": 9.971967749563308e-06,
11455
+ "loss": 10.6625,
11456
+ "step": 16170
11457
+ },
11458
+ {
11459
+ "epoch": 0.7180699097419853,
11460
+ "grad_norm": 71.27813720703125,
11461
+ "learning_rate": 9.97195041360138e-06,
11462
+ "loss": 10.9964,
11463
+ "step": 16180
11464
+ },
11465
+ {
11466
+ "epoch": 0.7185137106750767,
11467
+ "grad_norm": 63.57979965209961,
11468
+ "learning_rate": 9.971933077639454e-06,
11469
+ "loss": 10.1936,
11470
+ "step": 16190
11471
+ },
11472
+ {
11473
+ "epoch": 0.7189575116081681,
11474
+ "grad_norm": 74.10035705566406,
11475
+ "learning_rate": 9.971915741677525e-06,
11476
+ "loss": 10.8882,
11477
+ "step": 16200
11478
+ },
11479
+ {
11480
+ "epoch": 0.7194013125412596,
11481
+ "grad_norm": 73.26065826416016,
11482
+ "learning_rate": 9.971898405715598e-06,
11483
+ "loss": 10.5628,
11484
+ "step": 16210
11485
+ },
11486
+ {
11487
+ "epoch": 0.7198451134743511,
11488
+ "grad_norm": 68.37783813476562,
11489
+ "learning_rate": 9.971881069753671e-06,
11490
+ "loss": 10.6096,
11491
+ "step": 16220
11492
+ },
11493
+ {
11494
+ "epoch": 0.7202889144074426,
11495
+ "grad_norm": 84.5615234375,
11496
+ "learning_rate": 9.971863733791743e-06,
11497
+ "loss": 10.8151,
11498
+ "step": 16230
11499
+ },
11500
+ {
11501
+ "epoch": 0.720732715340534,
11502
+ "grad_norm": 63.7740478515625,
11503
+ "learning_rate": 9.971846397829816e-06,
11504
+ "loss": 10.403,
11505
+ "step": 16240
11506
+ },
11507
+ {
11508
+ "epoch": 0.7211765162736254,
11509
+ "grad_norm": 70.54612731933594,
11510
+ "learning_rate": 9.971829061867889e-06,
11511
+ "loss": 10.0854,
11512
+ "step": 16250
11513
+ },
11514
+ {
11515
+ "epoch": 0.7216203172067169,
11516
+ "grad_norm": 63.2757682800293,
11517
+ "learning_rate": 9.97181172590596e-06,
11518
+ "loss": 10.6528,
11519
+ "step": 16260
11520
+ },
11521
+ {
11522
+ "epoch": 0.7220641181398084,
11523
+ "grad_norm": 71.35702514648438,
11524
+ "learning_rate": 9.971794389944033e-06,
11525
+ "loss": 10.4698,
11526
+ "step": 16270
11527
+ },
11528
+ {
11529
+ "epoch": 0.7225079190728998,
11530
+ "grad_norm": 66.68602752685547,
11531
+ "learning_rate": 9.971777053982106e-06,
11532
+ "loss": 10.593,
11533
+ "step": 16280
11534
+ },
11535
+ {
11536
+ "epoch": 0.7229517200059913,
11537
+ "grad_norm": 67.9871597290039,
11538
+ "learning_rate": 9.971759718020178e-06,
11539
+ "loss": 10.6789,
11540
+ "step": 16290
11541
+ },
11542
+ {
11543
+ "epoch": 0.7233955209390828,
11544
+ "grad_norm": 78.72682189941406,
11545
+ "learning_rate": 9.97174238205825e-06,
11546
+ "loss": 10.5447,
11547
+ "step": 16300
11548
+ },
11549
+ {
11550
+ "epoch": 0.7238393218721743,
11551
+ "grad_norm": 74.193359375,
11552
+ "learning_rate": 9.971725046096324e-06,
11553
+ "loss": 10.7683,
11554
+ "step": 16310
11555
+ },
11556
+ {
11557
+ "epoch": 0.7242831228052657,
11558
+ "grad_norm": 73.49239349365234,
11559
+ "learning_rate": 9.971707710134397e-06,
11560
+ "loss": 10.0631,
11561
+ "step": 16320
11562
+ },
11563
+ {
11564
+ "epoch": 0.7247269237383571,
11565
+ "grad_norm": 85.48190307617188,
11566
+ "learning_rate": 9.971690374172468e-06,
11567
+ "loss": 10.7823,
11568
+ "step": 16330
11569
+ },
11570
+ {
11571
+ "epoch": 0.7251707246714486,
11572
+ "grad_norm": 62.24036407470703,
11573
+ "learning_rate": 9.971673038210541e-06,
11574
+ "loss": 9.9273,
11575
+ "step": 16340
11576
+ },
11577
+ {
11578
+ "epoch": 0.7256145256045401,
11579
+ "grad_norm": 64.34358978271484,
11580
+ "learning_rate": 9.971655702248615e-06,
11581
+ "loss": 10.3995,
11582
+ "step": 16350
11583
+ },
11584
+ {
11585
+ "epoch": 0.7260583265376316,
11586
+ "grad_norm": 66.04435729980469,
11587
+ "learning_rate": 9.971638366286686e-06,
11588
+ "loss": 10.2451,
11589
+ "step": 16360
11590
+ },
11591
+ {
11592
+ "epoch": 0.726502127470723,
11593
+ "grad_norm": 64.41586303710938,
11594
+ "learning_rate": 9.971621030324759e-06,
11595
+ "loss": 10.8199,
11596
+ "step": 16370
11597
+ },
11598
+ {
11599
+ "epoch": 0.7269459284038144,
11600
+ "grad_norm": 69.1751937866211,
11601
+ "learning_rate": 9.971603694362832e-06,
11602
+ "loss": 10.4755,
11603
+ "step": 16380
11604
+ },
11605
+ {
11606
+ "epoch": 0.7273897293369059,
11607
+ "grad_norm": 66.9188003540039,
11608
+ "learning_rate": 9.971586358400903e-06,
11609
+ "loss": 10.3615,
11610
+ "step": 16390
11611
+ },
11612
+ {
11613
+ "epoch": 0.7278335302699974,
11614
+ "grad_norm": 75.37189483642578,
11615
+ "learning_rate": 9.971569022438977e-06,
11616
+ "loss": 11.1675,
11617
+ "step": 16400
11618
+ },
11619
+ {
11620
+ "epoch": 0.7282773312030889,
11621
+ "grad_norm": 64.27860260009766,
11622
+ "learning_rate": 9.97155168647705e-06,
11623
+ "loss": 10.897,
11624
+ "step": 16410
11625
+ },
11626
+ {
11627
+ "epoch": 0.7287211321361803,
11628
+ "grad_norm": 71.226806640625,
11629
+ "learning_rate": 9.971534350515123e-06,
11630
+ "loss": 10.5914,
11631
+ "step": 16420
11632
+ },
11633
+ {
11634
+ "epoch": 0.7291649330692718,
11635
+ "grad_norm": 65.22527313232422,
11636
+ "learning_rate": 9.971517014553194e-06,
11637
+ "loss": 10.7522,
11638
+ "step": 16430
11639
+ },
11640
+ {
11641
+ "epoch": 0.7296087340023633,
11642
+ "grad_norm": 70.96646118164062,
11643
+ "learning_rate": 9.971499678591267e-06,
11644
+ "loss": 10.519,
11645
+ "step": 16440
11646
+ },
11647
+ {
11648
+ "epoch": 0.7300525349354547,
11649
+ "grad_norm": 78.15453338623047,
11650
+ "learning_rate": 9.97148234262934e-06,
11651
+ "loss": 10.4566,
11652
+ "step": 16450
11653
+ },
11654
+ {
11655
+ "epoch": 0.7304963358685461,
11656
+ "grad_norm": 66.62492370605469,
11657
+ "learning_rate": 9.971465006667412e-06,
11658
+ "loss": 11.0907,
11659
+ "step": 16460
11660
+ },
11661
+ {
11662
+ "epoch": 0.7309401368016376,
11663
+ "grad_norm": 76.4091796875,
11664
+ "learning_rate": 9.971447670705485e-06,
11665
+ "loss": 10.673,
11666
+ "step": 16470
11667
+ },
11668
+ {
11669
+ "epoch": 0.7313839377347291,
11670
+ "grad_norm": 68.68971252441406,
11671
+ "learning_rate": 9.971430334743558e-06,
11672
+ "loss": 10.1921,
11673
+ "step": 16480
11674
+ },
11675
+ {
11676
+ "epoch": 0.7318277386678206,
11677
+ "grad_norm": 76.54867553710938,
11678
+ "learning_rate": 9.97141299878163e-06,
11679
+ "loss": 10.1928,
11680
+ "step": 16490
11681
+ },
11682
+ {
11683
+ "epoch": 0.732271539600912,
11684
+ "grad_norm": 72.19425964355469,
11685
+ "learning_rate": 9.971395662819702e-06,
11686
+ "loss": 10.8393,
11687
+ "step": 16500
11688
+ },
11689
+ {
11690
+ "epoch": 0.7327153405340034,
11691
+ "grad_norm": 68.68523406982422,
11692
+ "learning_rate": 9.971378326857775e-06,
11693
+ "loss": 10.8051,
11694
+ "step": 16510
11695
+ },
11696
+ {
11697
+ "epoch": 0.7331591414670949,
11698
+ "grad_norm": 73.1769027709961,
11699
+ "learning_rate": 9.971360990895847e-06,
11700
+ "loss": 10.6388,
11701
+ "step": 16520
11702
+ },
11703
+ {
11704
+ "epoch": 0.7336029424001864,
11705
+ "grad_norm": 61.5041618347168,
11706
+ "learning_rate": 9.97134365493392e-06,
11707
+ "loss": 10.3324,
11708
+ "step": 16530
11709
+ },
11710
+ {
11711
+ "epoch": 0.7340467433332779,
11712
+ "grad_norm": 64.92015838623047,
11713
+ "learning_rate": 9.971326318971993e-06,
11714
+ "loss": 10.3545,
11715
+ "step": 16540
11716
+ },
11717
+ {
11718
+ "epoch": 0.7344905442663693,
11719
+ "grad_norm": 70.95281982421875,
11720
+ "learning_rate": 9.971308983010066e-06,
11721
+ "loss": 10.4945,
11722
+ "step": 16550
11723
+ },
11724
+ {
11725
+ "epoch": 0.7349343451994608,
11726
+ "grad_norm": 62.154808044433594,
11727
+ "learning_rate": 9.971291647048137e-06,
11728
+ "loss": 10.4194,
11729
+ "step": 16560
11730
+ },
11731
+ {
11732
+ "epoch": 0.7353781461325523,
11733
+ "grad_norm": 63.85173416137695,
11734
+ "learning_rate": 9.97127431108621e-06,
11735
+ "loss": 10.6319,
11736
+ "step": 16570
11737
+ },
11738
+ {
11739
+ "epoch": 0.7358219470656437,
11740
+ "grad_norm": 87.90074157714844,
11741
+ "learning_rate": 9.971256975124283e-06,
11742
+ "loss": 10.2764,
11743
+ "step": 16580
11744
+ },
11745
+ {
11746
+ "epoch": 0.7362657479987351,
11747
+ "grad_norm": 68.94241333007812,
11748
+ "learning_rate": 9.971239639162355e-06,
11749
+ "loss": 10.6806,
11750
+ "step": 16590
11751
+ },
11752
+ {
11753
+ "epoch": 0.7367095489318266,
11754
+ "grad_norm": 74.24497985839844,
11755
+ "learning_rate": 9.971222303200428e-06,
11756
+ "loss": 10.0719,
11757
+ "step": 16600
11758
+ },
11759
+ {
11760
+ "epoch": 0.7371533498649181,
11761
+ "grad_norm": 70.3670654296875,
11762
+ "learning_rate": 9.971204967238501e-06,
11763
+ "loss": 10.5849,
11764
+ "step": 16610
11765
+ },
11766
+ {
11767
+ "epoch": 0.7375971507980096,
11768
+ "grad_norm": 69.45691680908203,
11769
+ "learning_rate": 9.971187631276572e-06,
11770
+ "loss": 10.4697,
11771
+ "step": 16620
11772
+ },
11773
+ {
11774
+ "epoch": 0.7380409517311011,
11775
+ "grad_norm": 65.4955062866211,
11776
+ "learning_rate": 9.971170295314645e-06,
11777
+ "loss": 10.3592,
11778
+ "step": 16630
11779
+ },
11780
+ {
11781
+ "epoch": 0.7384847526641924,
11782
+ "grad_norm": 79.28197479248047,
11783
+ "learning_rate": 9.971152959352719e-06,
11784
+ "loss": 10.2718,
11785
+ "step": 16640
11786
+ },
11787
+ {
11788
+ "epoch": 0.7389285535972839,
11789
+ "grad_norm": 57.633644104003906,
11790
+ "learning_rate": 9.97113562339079e-06,
11791
+ "loss": 10.4478,
11792
+ "step": 16650
11793
+ },
11794
+ {
11795
+ "epoch": 0.7393723545303754,
11796
+ "grad_norm": 58.49510955810547,
11797
+ "learning_rate": 9.971118287428863e-06,
11798
+ "loss": 10.6408,
11799
+ "step": 16660
11800
+ },
11801
+ {
11802
+ "epoch": 0.7398161554634669,
11803
+ "grad_norm": 68.87074279785156,
11804
+ "learning_rate": 9.971100951466936e-06,
11805
+ "loss": 10.4892,
11806
+ "step": 16670
11807
+ },
11808
+ {
11809
+ "epoch": 0.7402599563965583,
11810
+ "grad_norm": 83.9663314819336,
11811
+ "learning_rate": 9.971083615505007e-06,
11812
+ "loss": 10.6967,
11813
+ "step": 16680
11814
+ },
11815
+ {
11816
+ "epoch": 0.7407037573296498,
11817
+ "grad_norm": 66.5399398803711,
11818
+ "learning_rate": 9.97106627954308e-06,
11819
+ "loss": 10.2476,
11820
+ "step": 16690
11821
+ },
11822
+ {
11823
+ "epoch": 0.7411475582627413,
11824
+ "grad_norm": 71.39287567138672,
11825
+ "learning_rate": 9.971048943581154e-06,
11826
+ "loss": 10.4425,
11827
+ "step": 16700
11828
+ },
11829
+ {
11830
+ "epoch": 0.7415913591958327,
11831
+ "grad_norm": 67.80374145507812,
11832
+ "learning_rate": 9.971031607619227e-06,
11833
+ "loss": 10.8299,
11834
+ "step": 16710
11835
+ },
11836
+ {
11837
+ "epoch": 0.7420351601289241,
11838
+ "grad_norm": 63.548011779785156,
11839
+ "learning_rate": 9.971014271657298e-06,
11840
+ "loss": 10.5723,
11841
+ "step": 16720
11842
+ },
11843
+ {
11844
+ "epoch": 0.7424789610620156,
11845
+ "grad_norm": 66.04682922363281,
11846
+ "learning_rate": 9.970996935695371e-06,
11847
+ "loss": 10.3872,
11848
+ "step": 16730
11849
+ },
11850
+ {
11851
+ "epoch": 0.7429227619951071,
11852
+ "grad_norm": 69.4638900756836,
11853
+ "learning_rate": 9.970979599733444e-06,
11854
+ "loss": 11.023,
11855
+ "step": 16740
11856
+ },
11857
+ {
11858
+ "epoch": 0.7433665629281986,
11859
+ "grad_norm": 61.98347473144531,
11860
+ "learning_rate": 9.970962263771516e-06,
11861
+ "loss": 10.1672,
11862
+ "step": 16750
11863
+ },
11864
+ {
11865
+ "epoch": 0.7438103638612901,
11866
+ "grad_norm": 75.08468627929688,
11867
+ "learning_rate": 9.970944927809589e-06,
11868
+ "loss": 10.4586,
11869
+ "step": 16760
11870
+ },
11871
+ {
11872
+ "epoch": 0.7442541647943814,
11873
+ "grad_norm": 58.552120208740234,
11874
+ "learning_rate": 9.970927591847662e-06,
11875
+ "loss": 10.6722,
11876
+ "step": 16770
11877
+ },
11878
+ {
11879
+ "epoch": 0.7446979657274729,
11880
+ "grad_norm": 70.47040557861328,
11881
+ "learning_rate": 9.970910255885733e-06,
11882
+ "loss": 10.1118,
11883
+ "step": 16780
11884
+ },
11885
+ {
11886
+ "epoch": 0.7451417666605644,
11887
+ "grad_norm": 75.66903686523438,
11888
+ "learning_rate": 9.970892919923806e-06,
11889
+ "loss": 10.5813,
11890
+ "step": 16790
11891
+ },
11892
+ {
11893
+ "epoch": 0.7455855675936559,
11894
+ "grad_norm": 79.39729309082031,
11895
+ "learning_rate": 9.97087558396188e-06,
11896
+ "loss": 10.3935,
11897
+ "step": 16800
11898
+ },
11899
+ {
11900
+ "epoch": 0.7460293685267473,
11901
+ "grad_norm": 69.61038970947266,
11902
+ "learning_rate": 9.97085824799995e-06,
11903
+ "loss": 10.3304,
11904
+ "step": 16810
11905
+ },
11906
+ {
11907
+ "epoch": 0.7464731694598388,
11908
+ "grad_norm": 70.38590240478516,
11909
+ "learning_rate": 9.970840912038024e-06,
11910
+ "loss": 10.5146,
11911
+ "step": 16820
11912
+ },
11913
+ {
11914
+ "epoch": 0.7469169703929303,
11915
+ "grad_norm": 83.62046813964844,
11916
+ "learning_rate": 9.970823576076097e-06,
11917
+ "loss": 10.7194,
11918
+ "step": 16830
11919
+ },
11920
+ {
11921
+ "epoch": 0.7473607713260217,
11922
+ "grad_norm": 69.001708984375,
11923
+ "learning_rate": 9.970806240114168e-06,
11924
+ "loss": 10.6674,
11925
+ "step": 16840
11926
+ },
11927
+ {
11928
+ "epoch": 0.7478045722591131,
11929
+ "grad_norm": 65.15716552734375,
11930
+ "learning_rate": 9.970788904152241e-06,
11931
+ "loss": 10.6923,
11932
+ "step": 16850
11933
+ },
11934
+ {
11935
+ "epoch": 0.7482483731922046,
11936
+ "grad_norm": 74.37089538574219,
11937
+ "learning_rate": 9.970771568190314e-06,
11938
+ "loss": 10.7676,
11939
+ "step": 16860
11940
+ },
11941
+ {
11942
+ "epoch": 0.7486921741252961,
11943
+ "grad_norm": 64.5125732421875,
11944
+ "learning_rate": 9.970754232228386e-06,
11945
+ "loss": 10.2827,
11946
+ "step": 16870
11947
+ },
11948
+ {
11949
+ "epoch": 0.7491359750583876,
11950
+ "grad_norm": 69.91007232666016,
11951
+ "learning_rate": 9.970736896266459e-06,
11952
+ "loss": 10.6307,
11953
+ "step": 16880
11954
+ },
11955
+ {
11956
+ "epoch": 0.7495797759914791,
11957
+ "grad_norm": 59.29727554321289,
11958
+ "learning_rate": 9.970719560304532e-06,
11959
+ "loss": 10.4378,
11960
+ "step": 16890
11961
+ },
11962
+ {
11963
+ "epoch": 0.7500235769245704,
11964
+ "grad_norm": 90.56709289550781,
11965
+ "learning_rate": 9.970702224342603e-06,
11966
+ "loss": 10.3348,
11967
+ "step": 16900
11968
+ },
11969
+ {
11970
+ "epoch": 0.7504673778576619,
11971
+ "grad_norm": 70.33431243896484,
11972
+ "learning_rate": 9.970684888380676e-06,
11973
+ "loss": 10.3737,
11974
+ "step": 16910
11975
+ },
11976
+ {
11977
+ "epoch": 0.7509111787907534,
11978
+ "grad_norm": 62.954490661621094,
11979
+ "learning_rate": 9.97066755241875e-06,
11980
+ "loss": 10.6825,
11981
+ "step": 16920
11982
+ },
11983
+ {
11984
+ "epoch": 0.7513549797238449,
11985
+ "grad_norm": 64.03510284423828,
11986
+ "learning_rate": 9.970650216456823e-06,
11987
+ "loss": 10.4315,
11988
+ "step": 16930
11989
+ },
11990
+ {
11991
+ "epoch": 0.7517987806569363,
11992
+ "grad_norm": 61.26763153076172,
11993
+ "learning_rate": 9.970632880494894e-06,
11994
+ "loss": 9.9209,
11995
+ "step": 16940
11996
+ },
11997
+ {
11998
+ "epoch": 0.7522425815900278,
11999
+ "grad_norm": 64.45995330810547,
12000
+ "learning_rate": 9.970615544532967e-06,
12001
+ "loss": 10.4647,
12002
+ "step": 16950
12003
+ },
12004
+ {
12005
+ "epoch": 0.7526863825231193,
12006
+ "grad_norm": 71.07040405273438,
12007
+ "learning_rate": 9.97059820857104e-06,
12008
+ "loss": 10.4505,
12009
+ "step": 16960
12010
+ },
12011
+ {
12012
+ "epoch": 0.7531301834562107,
12013
+ "grad_norm": 61.676551818847656,
12014
+ "learning_rate": 9.970580872609112e-06,
12015
+ "loss": 10.1759,
12016
+ "step": 16970
12017
+ },
12018
+ {
12019
+ "epoch": 0.7535739843893022,
12020
+ "grad_norm": 76.77957916259766,
12021
+ "learning_rate": 9.970563536647185e-06,
12022
+ "loss": 10.5416,
12023
+ "step": 16980
12024
+ },
12025
+ {
12026
+ "epoch": 0.7540177853223936,
12027
+ "grad_norm": 83.22810363769531,
12028
+ "learning_rate": 9.970546200685258e-06,
12029
+ "loss": 10.2076,
12030
+ "step": 16990
12031
+ },
12032
+ {
12033
+ "epoch": 0.7544615862554851,
12034
+ "grad_norm": 68.98297882080078,
12035
+ "learning_rate": 9.970528864723329e-06,
12036
+ "loss": 10.4059,
12037
+ "step": 17000
12038
+ },
12039
+ {
12040
+ "epoch": 0.7544615862554851,
12041
+ "eval_loss": 0.3286471664905548,
12042
+ "eval_runtime": 678.3037,
12043
+ "eval_samples_per_second": 1790.335,
12044
+ "eval_steps_per_second": 55.948,
12045
+ "step": 17000
12046
+ },
12047
+ {
12048
+ "epoch": 0.7549053871885766,
12049
+ "grad_norm": 61.05122756958008,
12050
+ "learning_rate": 9.970511528761402e-06,
12051
+ "loss": 10.329,
12052
+ "step": 17010
12053
+ },
12054
+ {
12055
+ "epoch": 0.7553491881216681,
12056
+ "grad_norm": 69.60665893554688,
12057
+ "learning_rate": 9.970494192799475e-06,
12058
+ "loss": 10.3436,
12059
+ "step": 17020
12060
+ },
12061
+ {
12062
+ "epoch": 0.7557929890547594,
12063
+ "grad_norm": 70.77852630615234,
12064
+ "learning_rate": 9.970476856837547e-06,
12065
+ "loss": 10.8488,
12066
+ "step": 17030
12067
+ },
12068
+ {
12069
+ "epoch": 0.7562367899878509,
12070
+ "grad_norm": 71.26077270507812,
12071
+ "learning_rate": 9.97045952087562e-06,
12072
+ "loss": 10.4241,
12073
+ "step": 17040
12074
+ },
12075
+ {
12076
+ "epoch": 0.7566805909209424,
12077
+ "grad_norm": 95.98668670654297,
12078
+ "learning_rate": 9.970442184913693e-06,
12079
+ "loss": 10.1583,
12080
+ "step": 17050
12081
+ },
12082
+ {
12083
+ "epoch": 0.7571243918540339,
12084
+ "grad_norm": 77.86578369140625,
12085
+ "learning_rate": 9.970424848951764e-06,
12086
+ "loss": 10.8454,
12087
+ "step": 17060
12088
+ },
12089
+ {
12090
+ "epoch": 0.7575681927871253,
12091
+ "grad_norm": 81.84298706054688,
12092
+ "learning_rate": 9.970407512989837e-06,
12093
+ "loss": 10.523,
12094
+ "step": 17070
12095
+ },
12096
+ {
12097
+ "epoch": 0.7580119937202168,
12098
+ "grad_norm": 71.15061950683594,
12099
+ "learning_rate": 9.97039017702791e-06,
12100
+ "loss": 10.206,
12101
+ "step": 17080
12102
+ },
12103
+ {
12104
+ "epoch": 0.7584557946533083,
12105
+ "grad_norm": 65.26417541503906,
12106
+ "learning_rate": 9.970372841065982e-06,
12107
+ "loss": 10.8191,
12108
+ "step": 17090
12109
+ },
12110
+ {
12111
+ "epoch": 0.7588995955863997,
12112
+ "grad_norm": 76.11380767822266,
12113
+ "learning_rate": 9.970355505104055e-06,
12114
+ "loss": 10.6828,
12115
+ "step": 17100
12116
+ },
12117
+ {
12118
+ "epoch": 0.7593433965194912,
12119
+ "grad_norm": 82.5249252319336,
12120
+ "learning_rate": 9.970338169142128e-06,
12121
+ "loss": 10.5496,
12122
+ "step": 17110
12123
+ },
12124
+ {
12125
+ "epoch": 0.7597871974525826,
12126
+ "grad_norm": 66.36536407470703,
12127
+ "learning_rate": 9.9703208331802e-06,
12128
+ "loss": 10.0697,
12129
+ "step": 17120
12130
+ },
12131
+ {
12132
+ "epoch": 0.7602309983856741,
12133
+ "grad_norm": 61.46723556518555,
12134
+ "learning_rate": 9.970303497218272e-06,
12135
+ "loss": 10.5154,
12136
+ "step": 17130
12137
+ },
12138
+ {
12139
+ "epoch": 0.7606747993187656,
12140
+ "grad_norm": 65.16950225830078,
12141
+ "learning_rate": 9.970286161256345e-06,
12142
+ "loss": 10.5957,
12143
+ "step": 17140
12144
+ },
12145
+ {
12146
+ "epoch": 0.7611186002518571,
12147
+ "grad_norm": 74.29840850830078,
12148
+ "learning_rate": 9.970268825294418e-06,
12149
+ "loss": 10.9534,
12150
+ "step": 17150
12151
+ },
12152
+ {
12153
+ "epoch": 0.7615624011849484,
12154
+ "grad_norm": 63.95246887207031,
12155
+ "learning_rate": 9.97025148933249e-06,
12156
+ "loss": 10.559,
12157
+ "step": 17160
12158
+ },
12159
+ {
12160
+ "epoch": 0.7620062021180399,
12161
+ "grad_norm": 70.78131866455078,
12162
+ "learning_rate": 9.970234153370563e-06,
12163
+ "loss": 10.4259,
12164
+ "step": 17170
12165
+ },
12166
+ {
12167
+ "epoch": 0.7624500030511314,
12168
+ "grad_norm": 68.86581420898438,
12169
+ "learning_rate": 9.970216817408636e-06,
12170
+ "loss": 10.5729,
12171
+ "step": 17180
12172
+ },
12173
+ {
12174
+ "epoch": 0.7628938039842229,
12175
+ "grad_norm": 69.18133544921875,
12176
+ "learning_rate": 9.970199481446707e-06,
12177
+ "loss": 10.4368,
12178
+ "step": 17190
12179
+ },
12180
+ {
12181
+ "epoch": 0.7633376049173144,
12182
+ "grad_norm": 63.52793502807617,
12183
+ "learning_rate": 9.97018214548478e-06,
12184
+ "loss": 10.4153,
12185
+ "step": 17200
12186
+ },
12187
+ {
12188
+ "epoch": 0.7637814058504058,
12189
+ "grad_norm": 72.20518493652344,
12190
+ "learning_rate": 9.970164809522854e-06,
12191
+ "loss": 10.7712,
12192
+ "step": 17210
12193
+ },
12194
+ {
12195
+ "epoch": 0.7642252067834973,
12196
+ "grad_norm": 71.09992218017578,
12197
+ "learning_rate": 9.970147473560925e-06,
12198
+ "loss": 10.4743,
12199
+ "step": 17220
12200
+ },
12201
+ {
12202
+ "epoch": 0.7646690077165887,
12203
+ "grad_norm": 73.4151611328125,
12204
+ "learning_rate": 9.970130137598998e-06,
12205
+ "loss": 10.7,
12206
+ "step": 17230
12207
+ },
12208
+ {
12209
+ "epoch": 0.7651128086496802,
12210
+ "grad_norm": 59.74842834472656,
12211
+ "learning_rate": 9.970112801637071e-06,
12212
+ "loss": 10.5612,
12213
+ "step": 17240
12214
+ },
12215
+ {
12216
+ "epoch": 0.7655566095827716,
12217
+ "grad_norm": 67.19086456298828,
12218
+ "learning_rate": 9.970095465675142e-06,
12219
+ "loss": 10.3241,
12220
+ "step": 17250
12221
+ },
12222
+ {
12223
+ "epoch": 0.7660004105158631,
12224
+ "grad_norm": 80.37249755859375,
12225
+ "learning_rate": 9.970078129713216e-06,
12226
+ "loss": 10.8193,
12227
+ "step": 17260
12228
+ },
12229
+ {
12230
+ "epoch": 0.7664442114489546,
12231
+ "grad_norm": 64.05519104003906,
12232
+ "learning_rate": 9.970060793751289e-06,
12233
+ "loss": 10.5312,
12234
+ "step": 17270
12235
+ },
12236
+ {
12237
+ "epoch": 0.7668880123820461,
12238
+ "grad_norm": 63.82759475708008,
12239
+ "learning_rate": 9.97004345778936e-06,
12240
+ "loss": 10.2693,
12241
+ "step": 17280
12242
+ },
12243
+ {
12244
+ "epoch": 0.7673318133151374,
12245
+ "grad_norm": 74.00315856933594,
12246
+ "learning_rate": 9.970026121827433e-06,
12247
+ "loss": 10.5823,
12248
+ "step": 17290
12249
+ },
12250
+ {
12251
+ "epoch": 0.7677756142482289,
12252
+ "grad_norm": 72.47602844238281,
12253
+ "learning_rate": 9.970008785865506e-06,
12254
+ "loss": 10.3111,
12255
+ "step": 17300
12256
+ },
12257
+ {
12258
+ "epoch": 0.7682194151813204,
12259
+ "grad_norm": 59.534305572509766,
12260
+ "learning_rate": 9.969991449903578e-06,
12261
+ "loss": 10.7712,
12262
+ "step": 17310
12263
+ },
12264
+ {
12265
+ "epoch": 0.7686632161144119,
12266
+ "grad_norm": 70.15426635742188,
12267
+ "learning_rate": 9.96997411394165e-06,
12268
+ "loss": 10.3361,
12269
+ "step": 17320
12270
+ },
12271
+ {
12272
+ "epoch": 0.7691070170475034,
12273
+ "grad_norm": 60.14602279663086,
12274
+ "learning_rate": 9.969956777979724e-06,
12275
+ "loss": 10.8061,
12276
+ "step": 17330
12277
+ },
12278
+ {
12279
+ "epoch": 0.7695508179805948,
12280
+ "grad_norm": 64.0710678100586,
12281
+ "learning_rate": 9.969939442017795e-06,
12282
+ "loss": 9.9819,
12283
+ "step": 17340
12284
+ },
12285
+ {
12286
+ "epoch": 0.7699946189136863,
12287
+ "grad_norm": 76.11770629882812,
12288
+ "learning_rate": 9.969922106055868e-06,
12289
+ "loss": 10.2555,
12290
+ "step": 17350
12291
+ },
12292
+ {
12293
+ "epoch": 0.7704384198467777,
12294
+ "grad_norm": 68.8619613647461,
12295
+ "learning_rate": 9.969904770093941e-06,
12296
+ "loss": 10.5506,
12297
+ "step": 17360
12298
+ },
12299
+ {
12300
+ "epoch": 0.7708822207798692,
12301
+ "grad_norm": 66.60945129394531,
12302
+ "learning_rate": 9.969887434132014e-06,
12303
+ "loss": 10.3023,
12304
+ "step": 17370
12305
+ },
12306
+ {
12307
+ "epoch": 0.7713260217129606,
12308
+ "grad_norm": 66.75738525390625,
12309
+ "learning_rate": 9.969870098170086e-06,
12310
+ "loss": 10.126,
12311
+ "step": 17380
12312
+ },
12313
+ {
12314
+ "epoch": 0.7717698226460521,
12315
+ "grad_norm": 65.49826049804688,
12316
+ "learning_rate": 9.969852762208159e-06,
12317
+ "loss": 10.6585,
12318
+ "step": 17390
12319
+ },
12320
+ {
12321
+ "epoch": 0.7722136235791436,
12322
+ "grad_norm": 65.2136001586914,
12323
+ "learning_rate": 9.969835426246232e-06,
12324
+ "loss": 10.4105,
12325
+ "step": 17400
12326
+ },
12327
+ {
12328
+ "epoch": 0.7726574245122351,
12329
+ "grad_norm": 62.805213928222656,
12330
+ "learning_rate": 9.969818090284303e-06,
12331
+ "loss": 10.1974,
12332
+ "step": 17410
12333
+ },
12334
+ {
12335
+ "epoch": 0.7731012254453264,
12336
+ "grad_norm": 66.39070892333984,
12337
+ "learning_rate": 9.969800754322376e-06,
12338
+ "loss": 10.7106,
12339
+ "step": 17420
12340
+ },
12341
+ {
12342
+ "epoch": 0.7735450263784179,
12343
+ "grad_norm": 70.36665344238281,
12344
+ "learning_rate": 9.96978341836045e-06,
12345
+ "loss": 10.2655,
12346
+ "step": 17430
12347
+ },
12348
+ {
12349
+ "epoch": 0.7739888273115094,
12350
+ "grad_norm": 62.32572937011719,
12351
+ "learning_rate": 9.96976608239852e-06,
12352
+ "loss": 10.2899,
12353
+ "step": 17440
12354
+ },
12355
+ {
12356
+ "epoch": 0.7744326282446009,
12357
+ "grad_norm": 64.87157440185547,
12358
+ "learning_rate": 9.969748746436594e-06,
12359
+ "loss": 10.2878,
12360
+ "step": 17450
12361
+ },
12362
+ {
12363
+ "epoch": 0.7748764291776924,
12364
+ "grad_norm": 65.008544921875,
12365
+ "learning_rate": 9.969731410474667e-06,
12366
+ "loss": 10.5488,
12367
+ "step": 17460
12368
+ },
12369
+ {
12370
+ "epoch": 0.7753202301107838,
12371
+ "grad_norm": 62.856143951416016,
12372
+ "learning_rate": 9.969714074512738e-06,
12373
+ "loss": 10.3436,
12374
+ "step": 17470
12375
+ },
12376
+ {
12377
+ "epoch": 0.7757640310438753,
12378
+ "grad_norm": 77.94728088378906,
12379
+ "learning_rate": 9.969696738550811e-06,
12380
+ "loss": 10.7276,
12381
+ "step": 17480
12382
+ },
12383
+ {
12384
+ "epoch": 0.7762078319769667,
12385
+ "grad_norm": 59.522884368896484,
12386
+ "learning_rate": 9.969679402588884e-06,
12387
+ "loss": 10.267,
12388
+ "step": 17490
12389
+ },
12390
+ {
12391
+ "epoch": 0.7766516329100582,
12392
+ "grad_norm": 65.48174285888672,
12393
+ "learning_rate": 9.969662066626956e-06,
12394
+ "loss": 10.1914,
12395
+ "step": 17500
12396
  }
12397
  ],
12398
  "logging_steps": 10,
 
12412
  "attributes": {}
12413
  }
12414
  },
12415
+ "total_flos": 6.107015608795136e+18,
12416
  "train_batch_size": 4,
12417
  "trial_name": null,
12418
  "trial_params": null