1epoch_stratos_unverified_mix / trainer_state.json
sedrickkeh's picture
End of training
b764faa verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 1806,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0005537098560354374,
"grad_norm": 5.9498114585876465,
"learning_rate": 5.524861878453039e-08,
"loss": 0.862,
"step": 1
},
{
"epoch": 0.0011074197120708748,
"grad_norm": 5.8652262687683105,
"learning_rate": 1.1049723756906078e-07,
"loss": 0.8523,
"step": 2
},
{
"epoch": 0.0016611295681063123,
"grad_norm": 5.964611053466797,
"learning_rate": 1.6574585635359117e-07,
"loss": 0.8463,
"step": 3
},
{
"epoch": 0.0022148394241417496,
"grad_norm": 6.093450546264648,
"learning_rate": 2.2099447513812156e-07,
"loss": 0.8815,
"step": 4
},
{
"epoch": 0.0027685492801771874,
"grad_norm": 5.786026477813721,
"learning_rate": 2.7624309392265196e-07,
"loss": 0.8466,
"step": 5
},
{
"epoch": 0.0033222591362126247,
"grad_norm": 6.09655237197876,
"learning_rate": 3.3149171270718233e-07,
"loss": 0.8834,
"step": 6
},
{
"epoch": 0.003875968992248062,
"grad_norm": 6.1001691818237305,
"learning_rate": 3.867403314917127e-07,
"loss": 0.8818,
"step": 7
},
{
"epoch": 0.004429678848283499,
"grad_norm": 5.940769672393799,
"learning_rate": 4.419889502762431e-07,
"loss": 0.8718,
"step": 8
},
{
"epoch": 0.0049833887043189366,
"grad_norm": 6.211287021636963,
"learning_rate": 4.972375690607735e-07,
"loss": 0.9087,
"step": 9
},
{
"epoch": 0.005537098560354375,
"grad_norm": 5.576071262359619,
"learning_rate": 5.524861878453039e-07,
"loss": 0.8313,
"step": 10
},
{
"epoch": 0.006090808416389812,
"grad_norm": 5.358067512512207,
"learning_rate": 6.077348066298343e-07,
"loss": 0.8397,
"step": 11
},
{
"epoch": 0.006644518272425249,
"grad_norm": 5.2589569091796875,
"learning_rate": 6.629834254143647e-07,
"loss": 0.8471,
"step": 12
},
{
"epoch": 0.007198228128460687,
"grad_norm": 5.519755840301514,
"learning_rate": 7.18232044198895e-07,
"loss": 0.8814,
"step": 13
},
{
"epoch": 0.007751937984496124,
"grad_norm": 4.506252765655518,
"learning_rate": 7.734806629834254e-07,
"loss": 0.8166,
"step": 14
},
{
"epoch": 0.008305647840531562,
"grad_norm": 4.339894771575928,
"learning_rate": 8.287292817679559e-07,
"loss": 0.8434,
"step": 15
},
{
"epoch": 0.008859357696566999,
"grad_norm": 4.21922492980957,
"learning_rate": 8.839779005524863e-07,
"loss": 0.8234,
"step": 16
},
{
"epoch": 0.009413067552602437,
"grad_norm": 4.036139011383057,
"learning_rate": 9.392265193370166e-07,
"loss": 0.8213,
"step": 17
},
{
"epoch": 0.009966777408637873,
"grad_norm": 3.8818323612213135,
"learning_rate": 9.94475138121547e-07,
"loss": 0.8183,
"step": 18
},
{
"epoch": 0.010520487264673311,
"grad_norm": 2.2787609100341797,
"learning_rate": 1.0497237569060774e-06,
"loss": 0.8084,
"step": 19
},
{
"epoch": 0.01107419712070875,
"grad_norm": 2.223111867904663,
"learning_rate": 1.1049723756906078e-06,
"loss": 0.7873,
"step": 20
},
{
"epoch": 0.011627906976744186,
"grad_norm": 2.168884515762329,
"learning_rate": 1.160220994475138e-06,
"loss": 0.7801,
"step": 21
},
{
"epoch": 0.012181616832779624,
"grad_norm": 1.910096287727356,
"learning_rate": 1.2154696132596686e-06,
"loss": 0.7352,
"step": 22
},
{
"epoch": 0.01273532668881506,
"grad_norm": 1.8659913539886475,
"learning_rate": 1.270718232044199e-06,
"loss": 0.7379,
"step": 23
},
{
"epoch": 0.013289036544850499,
"grad_norm": 1.8087621927261353,
"learning_rate": 1.3259668508287293e-06,
"loss": 0.752,
"step": 24
},
{
"epoch": 0.013842746400885935,
"grad_norm": 1.5981807708740234,
"learning_rate": 1.3812154696132598e-06,
"loss": 0.731,
"step": 25
},
{
"epoch": 0.014396456256921373,
"grad_norm": 2.118313789367676,
"learning_rate": 1.43646408839779e-06,
"loss": 0.69,
"step": 26
},
{
"epoch": 0.014950166112956811,
"grad_norm": 3.023369789123535,
"learning_rate": 1.4917127071823205e-06,
"loss": 0.7747,
"step": 27
},
{
"epoch": 0.015503875968992248,
"grad_norm": 2.6562323570251465,
"learning_rate": 1.5469613259668508e-06,
"loss": 0.7159,
"step": 28
},
{
"epoch": 0.016057585825027684,
"grad_norm": 2.647136926651001,
"learning_rate": 1.6022099447513815e-06,
"loss": 0.7343,
"step": 29
},
{
"epoch": 0.016611295681063124,
"grad_norm": 2.517387866973877,
"learning_rate": 1.6574585635359118e-06,
"loss": 0.7349,
"step": 30
},
{
"epoch": 0.01716500553709856,
"grad_norm": 2.214327335357666,
"learning_rate": 1.7127071823204422e-06,
"loss": 0.6861,
"step": 31
},
{
"epoch": 0.017718715393133997,
"grad_norm": 2.009155511856079,
"learning_rate": 1.7679558011049725e-06,
"loss": 0.7274,
"step": 32
},
{
"epoch": 0.018272425249169437,
"grad_norm": 1.5480848550796509,
"learning_rate": 1.823204419889503e-06,
"loss": 0.7111,
"step": 33
},
{
"epoch": 0.018826135105204873,
"grad_norm": 1.0842509269714355,
"learning_rate": 1.8784530386740332e-06,
"loss": 0.7029,
"step": 34
},
{
"epoch": 0.01937984496124031,
"grad_norm": 0.9915339350700378,
"learning_rate": 1.933701657458564e-06,
"loss": 0.6761,
"step": 35
},
{
"epoch": 0.019933554817275746,
"grad_norm": 1.0703707933425903,
"learning_rate": 1.988950276243094e-06,
"loss": 0.6833,
"step": 36
},
{
"epoch": 0.020487264673311186,
"grad_norm": 1.0729620456695557,
"learning_rate": 2.0441988950276245e-06,
"loss": 0.6164,
"step": 37
},
{
"epoch": 0.021040974529346623,
"grad_norm": 1.251114010810852,
"learning_rate": 2.0994475138121547e-06,
"loss": 0.6802,
"step": 38
},
{
"epoch": 0.02159468438538206,
"grad_norm": 0.9531381726264954,
"learning_rate": 2.1546961325966854e-06,
"loss": 0.6638,
"step": 39
},
{
"epoch": 0.0221483942414175,
"grad_norm": 0.8726658225059509,
"learning_rate": 2.2099447513812157e-06,
"loss": 0.641,
"step": 40
},
{
"epoch": 0.022702104097452935,
"grad_norm": 0.8520472049713135,
"learning_rate": 2.265193370165746e-06,
"loss": 0.663,
"step": 41
},
{
"epoch": 0.023255813953488372,
"grad_norm": 0.9407281279563904,
"learning_rate": 2.320441988950276e-06,
"loss": 0.6627,
"step": 42
},
{
"epoch": 0.023809523809523808,
"grad_norm": 0.7615970373153687,
"learning_rate": 2.375690607734807e-06,
"loss": 0.6401,
"step": 43
},
{
"epoch": 0.024363233665559248,
"grad_norm": 0.7133028507232666,
"learning_rate": 2.430939226519337e-06,
"loss": 0.6575,
"step": 44
},
{
"epoch": 0.024916943521594685,
"grad_norm": 0.7690572142601013,
"learning_rate": 2.486187845303868e-06,
"loss": 0.6733,
"step": 45
},
{
"epoch": 0.02547065337763012,
"grad_norm": 0.705028235912323,
"learning_rate": 2.541436464088398e-06,
"loss": 0.6162,
"step": 46
},
{
"epoch": 0.02602436323366556,
"grad_norm": 0.7950240969657898,
"learning_rate": 2.5966850828729284e-06,
"loss": 0.656,
"step": 47
},
{
"epoch": 0.026578073089700997,
"grad_norm": 0.7641728520393372,
"learning_rate": 2.6519337016574586e-06,
"loss": 0.6353,
"step": 48
},
{
"epoch": 0.027131782945736434,
"grad_norm": 0.7284824252128601,
"learning_rate": 2.707182320441989e-06,
"loss": 0.6285,
"step": 49
},
{
"epoch": 0.02768549280177187,
"grad_norm": 0.5608909726142883,
"learning_rate": 2.7624309392265196e-06,
"loss": 0.6057,
"step": 50
},
{
"epoch": 0.02823920265780731,
"grad_norm": 0.5988432168960571,
"learning_rate": 2.81767955801105e-06,
"loss": 0.6032,
"step": 51
},
{
"epoch": 0.028792912513842746,
"grad_norm": 0.7558192610740662,
"learning_rate": 2.87292817679558e-06,
"loss": 0.5942,
"step": 52
},
{
"epoch": 0.029346622369878183,
"grad_norm": 0.7168074250221252,
"learning_rate": 2.9281767955801104e-06,
"loss": 0.5914,
"step": 53
},
{
"epoch": 0.029900332225913623,
"grad_norm": 0.7384585738182068,
"learning_rate": 2.983425414364641e-06,
"loss": 0.6502,
"step": 54
},
{
"epoch": 0.03045404208194906,
"grad_norm": 0.5286744832992554,
"learning_rate": 3.0386740331491713e-06,
"loss": 0.5973,
"step": 55
},
{
"epoch": 0.031007751937984496,
"grad_norm": 0.6040074229240417,
"learning_rate": 3.0939226519337016e-06,
"loss": 0.6212,
"step": 56
},
{
"epoch": 0.03156146179401993,
"grad_norm": 0.6607239842414856,
"learning_rate": 3.149171270718232e-06,
"loss": 0.5962,
"step": 57
},
{
"epoch": 0.03211517165005537,
"grad_norm": 0.5292708873748779,
"learning_rate": 3.204419889502763e-06,
"loss": 0.5785,
"step": 58
},
{
"epoch": 0.032668881506090805,
"grad_norm": 0.5248478055000305,
"learning_rate": 3.2596685082872933e-06,
"loss": 0.643,
"step": 59
},
{
"epoch": 0.03322259136212625,
"grad_norm": 0.5045427680015564,
"learning_rate": 3.3149171270718235e-06,
"loss": 0.5906,
"step": 60
},
{
"epoch": 0.033776301218161685,
"grad_norm": 0.5321183800697327,
"learning_rate": 3.370165745856354e-06,
"loss": 0.5901,
"step": 61
},
{
"epoch": 0.03433001107419712,
"grad_norm": 0.5732381939888,
"learning_rate": 3.4254143646408845e-06,
"loss": 0.6214,
"step": 62
},
{
"epoch": 0.03488372093023256,
"grad_norm": 0.4849403202533722,
"learning_rate": 3.4806629834254147e-06,
"loss": 0.5658,
"step": 63
},
{
"epoch": 0.035437430786267994,
"grad_norm": 0.5135958790779114,
"learning_rate": 3.535911602209945e-06,
"loss": 0.5828,
"step": 64
},
{
"epoch": 0.03599114064230343,
"grad_norm": 0.5140406489372253,
"learning_rate": 3.5911602209944757e-06,
"loss": 0.5816,
"step": 65
},
{
"epoch": 0.036544850498338874,
"grad_norm": 0.47636738419532776,
"learning_rate": 3.646408839779006e-06,
"loss": 0.5968,
"step": 66
},
{
"epoch": 0.03709856035437431,
"grad_norm": 0.44486942887306213,
"learning_rate": 3.7016574585635362e-06,
"loss": 0.573,
"step": 67
},
{
"epoch": 0.03765227021040975,
"grad_norm": 0.45608875155448914,
"learning_rate": 3.7569060773480665e-06,
"loss": 0.5612,
"step": 68
},
{
"epoch": 0.03820598006644518,
"grad_norm": 0.4425041377544403,
"learning_rate": 3.812154696132597e-06,
"loss": 0.5806,
"step": 69
},
{
"epoch": 0.03875968992248062,
"grad_norm": 0.5609722137451172,
"learning_rate": 3.867403314917128e-06,
"loss": 0.5642,
"step": 70
},
{
"epoch": 0.039313399778516056,
"grad_norm": 0.4668285548686981,
"learning_rate": 3.922651933701658e-06,
"loss": 0.5704,
"step": 71
},
{
"epoch": 0.03986710963455149,
"grad_norm": 0.4177052676677704,
"learning_rate": 3.977900552486188e-06,
"loss": 0.5787,
"step": 72
},
{
"epoch": 0.040420819490586936,
"grad_norm": 0.4882914423942566,
"learning_rate": 4.033149171270719e-06,
"loss": 0.5835,
"step": 73
},
{
"epoch": 0.04097452934662237,
"grad_norm": 0.44587841629981995,
"learning_rate": 4.088397790055249e-06,
"loss": 0.561,
"step": 74
},
{
"epoch": 0.04152823920265781,
"grad_norm": 0.46285197138786316,
"learning_rate": 4.143646408839779e-06,
"loss": 0.5867,
"step": 75
},
{
"epoch": 0.042081949058693245,
"grad_norm": 0.44081801176071167,
"learning_rate": 4.1988950276243095e-06,
"loss": 0.5788,
"step": 76
},
{
"epoch": 0.04263565891472868,
"grad_norm": 0.4478652775287628,
"learning_rate": 4.2541436464088406e-06,
"loss": 0.5631,
"step": 77
},
{
"epoch": 0.04318936877076412,
"grad_norm": 0.4420766830444336,
"learning_rate": 4.309392265193371e-06,
"loss": 0.5746,
"step": 78
},
{
"epoch": 0.043743078626799554,
"grad_norm": 0.440927654504776,
"learning_rate": 4.364640883977901e-06,
"loss": 0.5907,
"step": 79
},
{
"epoch": 0.044296788482835,
"grad_norm": 0.4461442828178406,
"learning_rate": 4.419889502762431e-06,
"loss": 0.5495,
"step": 80
},
{
"epoch": 0.044850498338870434,
"grad_norm": 0.477222740650177,
"learning_rate": 4.475138121546962e-06,
"loss": 0.6103,
"step": 81
},
{
"epoch": 0.04540420819490587,
"grad_norm": 0.4078069031238556,
"learning_rate": 4.530386740331492e-06,
"loss": 0.5715,
"step": 82
},
{
"epoch": 0.04595791805094131,
"grad_norm": 0.4012727737426758,
"learning_rate": 4.585635359116022e-06,
"loss": 0.5388,
"step": 83
},
{
"epoch": 0.046511627906976744,
"grad_norm": 0.41609224677085876,
"learning_rate": 4.640883977900552e-06,
"loss": 0.5713,
"step": 84
},
{
"epoch": 0.04706533776301218,
"grad_norm": 0.45814239978790283,
"learning_rate": 4.6961325966850835e-06,
"loss": 0.6032,
"step": 85
},
{
"epoch": 0.047619047619047616,
"grad_norm": 0.4698704183101654,
"learning_rate": 4.751381215469614e-06,
"loss": 0.5671,
"step": 86
},
{
"epoch": 0.04817275747508306,
"grad_norm": 0.4096158742904663,
"learning_rate": 4.806629834254144e-06,
"loss": 0.5474,
"step": 87
},
{
"epoch": 0.048726467331118496,
"grad_norm": 0.4519617557525635,
"learning_rate": 4.861878453038674e-06,
"loss": 0.5641,
"step": 88
},
{
"epoch": 0.04928017718715393,
"grad_norm": 0.3691461980342865,
"learning_rate": 4.9171270718232054e-06,
"loss": 0.5661,
"step": 89
},
{
"epoch": 0.04983388704318937,
"grad_norm": 0.417376309633255,
"learning_rate": 4.972375690607736e-06,
"loss": 0.5554,
"step": 90
},
{
"epoch": 0.050387596899224806,
"grad_norm": 0.42060375213623047,
"learning_rate": 5.027624309392266e-06,
"loss": 0.5667,
"step": 91
},
{
"epoch": 0.05094130675526024,
"grad_norm": 0.41582152247428894,
"learning_rate": 5.082872928176796e-06,
"loss": 0.5524,
"step": 92
},
{
"epoch": 0.05149501661129568,
"grad_norm": 0.39075276255607605,
"learning_rate": 5.1381215469613265e-06,
"loss": 0.5698,
"step": 93
},
{
"epoch": 0.05204872646733112,
"grad_norm": 0.42373329401016235,
"learning_rate": 5.193370165745857e-06,
"loss": 0.564,
"step": 94
},
{
"epoch": 0.05260243632336656,
"grad_norm": 0.433353066444397,
"learning_rate": 5.248618784530387e-06,
"loss": 0.5763,
"step": 95
},
{
"epoch": 0.053156146179401995,
"grad_norm": 0.4021036922931671,
"learning_rate": 5.303867403314917e-06,
"loss": 0.5425,
"step": 96
},
{
"epoch": 0.05370985603543743,
"grad_norm": 0.3890472948551178,
"learning_rate": 5.3591160220994476e-06,
"loss": 0.5486,
"step": 97
},
{
"epoch": 0.05426356589147287,
"grad_norm": 0.3637540638446808,
"learning_rate": 5.414364640883978e-06,
"loss": 0.5249,
"step": 98
},
{
"epoch": 0.054817275747508304,
"grad_norm": 0.40060943365097046,
"learning_rate": 5.469613259668509e-06,
"loss": 0.582,
"step": 99
},
{
"epoch": 0.05537098560354374,
"grad_norm": 0.41719919443130493,
"learning_rate": 5.524861878453039e-06,
"loss": 0.5838,
"step": 100
},
{
"epoch": 0.055924695459579184,
"grad_norm": 0.46253830194473267,
"learning_rate": 5.5801104972375695e-06,
"loss": 0.5752,
"step": 101
},
{
"epoch": 0.05647840531561462,
"grad_norm": 0.4139224588871002,
"learning_rate": 5.6353591160221e-06,
"loss": 0.5517,
"step": 102
},
{
"epoch": 0.05703211517165006,
"grad_norm": 0.5012995004653931,
"learning_rate": 5.69060773480663e-06,
"loss": 0.5253,
"step": 103
},
{
"epoch": 0.05758582502768549,
"grad_norm": 0.4264964163303375,
"learning_rate": 5.74585635359116e-06,
"loss": 0.5499,
"step": 104
},
{
"epoch": 0.05813953488372093,
"grad_norm": 0.4827825725078583,
"learning_rate": 5.8011049723756905e-06,
"loss": 0.5679,
"step": 105
},
{
"epoch": 0.058693244739756366,
"grad_norm": 0.4288899600505829,
"learning_rate": 5.856353591160221e-06,
"loss": 0.5534,
"step": 106
},
{
"epoch": 0.0592469545957918,
"grad_norm": 0.5517690181732178,
"learning_rate": 5.911602209944752e-06,
"loss": 0.5809,
"step": 107
},
{
"epoch": 0.059800664451827246,
"grad_norm": 0.41885989904403687,
"learning_rate": 5.966850828729282e-06,
"loss": 0.513,
"step": 108
},
{
"epoch": 0.06035437430786268,
"grad_norm": 0.4665631055831909,
"learning_rate": 6.0220994475138124e-06,
"loss": 0.5555,
"step": 109
},
{
"epoch": 0.06090808416389812,
"grad_norm": 0.46911975741386414,
"learning_rate": 6.077348066298343e-06,
"loss": 0.5769,
"step": 110
},
{
"epoch": 0.061461794019933555,
"grad_norm": 0.5048715472221375,
"learning_rate": 6.132596685082873e-06,
"loss": 0.5394,
"step": 111
},
{
"epoch": 0.06201550387596899,
"grad_norm": 0.49173814058303833,
"learning_rate": 6.187845303867403e-06,
"loss": 0.5256,
"step": 112
},
{
"epoch": 0.06256921373200443,
"grad_norm": 0.43289491534233093,
"learning_rate": 6.2430939226519335e-06,
"loss": 0.5481,
"step": 113
},
{
"epoch": 0.06312292358803986,
"grad_norm": 0.5766280889511108,
"learning_rate": 6.298342541436464e-06,
"loss": 0.5448,
"step": 114
},
{
"epoch": 0.0636766334440753,
"grad_norm": 0.4479210674762726,
"learning_rate": 6.353591160220996e-06,
"loss": 0.5419,
"step": 115
},
{
"epoch": 0.06423034330011074,
"grad_norm": 0.46152201294898987,
"learning_rate": 6.408839779005526e-06,
"loss": 0.5534,
"step": 116
},
{
"epoch": 0.06478405315614617,
"grad_norm": 0.36882197856903076,
"learning_rate": 6.464088397790056e-06,
"loss": 0.527,
"step": 117
},
{
"epoch": 0.06533776301218161,
"grad_norm": 0.46747303009033203,
"learning_rate": 6.5193370165745865e-06,
"loss": 0.5454,
"step": 118
},
{
"epoch": 0.06589147286821706,
"grad_norm": 0.44827598333358765,
"learning_rate": 6.574585635359117e-06,
"loss": 0.5337,
"step": 119
},
{
"epoch": 0.0664451827242525,
"grad_norm": 0.468741238117218,
"learning_rate": 6.629834254143647e-06,
"loss": 0.5405,
"step": 120
},
{
"epoch": 0.06699889258028793,
"grad_norm": 0.41223788261413574,
"learning_rate": 6.685082872928177e-06,
"loss": 0.5283,
"step": 121
},
{
"epoch": 0.06755260243632337,
"grad_norm": 0.49077704548835754,
"learning_rate": 6.740331491712708e-06,
"loss": 0.5487,
"step": 122
},
{
"epoch": 0.0681063122923588,
"grad_norm": 0.4232911765575409,
"learning_rate": 6.795580110497239e-06,
"loss": 0.5511,
"step": 123
},
{
"epoch": 0.06866002214839424,
"grad_norm": 0.4715983271598816,
"learning_rate": 6.850828729281769e-06,
"loss": 0.5556,
"step": 124
},
{
"epoch": 0.06921373200442968,
"grad_norm": 0.4605570435523987,
"learning_rate": 6.906077348066299e-06,
"loss": 0.5431,
"step": 125
},
{
"epoch": 0.06976744186046512,
"grad_norm": 0.44163256883621216,
"learning_rate": 6.9613259668508295e-06,
"loss": 0.5496,
"step": 126
},
{
"epoch": 0.07032115171650055,
"grad_norm": 0.4515061676502228,
"learning_rate": 7.01657458563536e-06,
"loss": 0.5589,
"step": 127
},
{
"epoch": 0.07087486157253599,
"grad_norm": 0.42739376425743103,
"learning_rate": 7.07182320441989e-06,
"loss": 0.4992,
"step": 128
},
{
"epoch": 0.07142857142857142,
"grad_norm": 0.4851279854774475,
"learning_rate": 7.12707182320442e-06,
"loss": 0.5672,
"step": 129
},
{
"epoch": 0.07198228128460686,
"grad_norm": 0.4500119984149933,
"learning_rate": 7.182320441988951e-06,
"loss": 0.5416,
"step": 130
},
{
"epoch": 0.0725359911406423,
"grad_norm": 0.4973263144493103,
"learning_rate": 7.237569060773482e-06,
"loss": 0.5557,
"step": 131
},
{
"epoch": 0.07308970099667775,
"grad_norm": 0.44461292028427124,
"learning_rate": 7.292817679558012e-06,
"loss": 0.5227,
"step": 132
},
{
"epoch": 0.07364341085271318,
"grad_norm": 0.47895634174346924,
"learning_rate": 7.348066298342542e-06,
"loss": 0.5235,
"step": 133
},
{
"epoch": 0.07419712070874862,
"grad_norm": 0.4273644685745239,
"learning_rate": 7.4033149171270724e-06,
"loss": 0.5454,
"step": 134
},
{
"epoch": 0.07475083056478406,
"grad_norm": 0.4457617998123169,
"learning_rate": 7.458563535911603e-06,
"loss": 0.5361,
"step": 135
},
{
"epoch": 0.0753045404208195,
"grad_norm": 0.48914194107055664,
"learning_rate": 7.513812154696133e-06,
"loss": 0.563,
"step": 136
},
{
"epoch": 0.07585825027685493,
"grad_norm": 0.4490724503993988,
"learning_rate": 7.569060773480663e-06,
"loss": 0.532,
"step": 137
},
{
"epoch": 0.07641196013289037,
"grad_norm": 0.5025115013122559,
"learning_rate": 7.624309392265194e-06,
"loss": 0.546,
"step": 138
},
{
"epoch": 0.0769656699889258,
"grad_norm": 0.5027139186859131,
"learning_rate": 7.679558011049725e-06,
"loss": 0.5729,
"step": 139
},
{
"epoch": 0.07751937984496124,
"grad_norm": 0.46010562777519226,
"learning_rate": 7.734806629834256e-06,
"loss": 0.5299,
"step": 140
},
{
"epoch": 0.07807308970099668,
"grad_norm": 0.46475037932395935,
"learning_rate": 7.790055248618785e-06,
"loss": 0.5403,
"step": 141
},
{
"epoch": 0.07862679955703211,
"grad_norm": 0.48647457361221313,
"learning_rate": 7.845303867403316e-06,
"loss": 0.5281,
"step": 142
},
{
"epoch": 0.07918050941306755,
"grad_norm": 0.5236656069755554,
"learning_rate": 7.900552486187846e-06,
"loss": 0.5555,
"step": 143
},
{
"epoch": 0.07973421926910298,
"grad_norm": 0.5494616031646729,
"learning_rate": 7.955801104972377e-06,
"loss": 0.5621,
"step": 144
},
{
"epoch": 0.08028792912513842,
"grad_norm": 0.4531148076057434,
"learning_rate": 8.011049723756906e-06,
"loss": 0.5576,
"step": 145
},
{
"epoch": 0.08084163898117387,
"grad_norm": 0.45845550298690796,
"learning_rate": 8.066298342541437e-06,
"loss": 0.554,
"step": 146
},
{
"epoch": 0.08139534883720931,
"grad_norm": 0.6133643388748169,
"learning_rate": 8.121546961325968e-06,
"loss": 0.5496,
"step": 147
},
{
"epoch": 0.08194905869324474,
"grad_norm": 0.43506351113319397,
"learning_rate": 8.176795580110498e-06,
"loss": 0.548,
"step": 148
},
{
"epoch": 0.08250276854928018,
"grad_norm": 0.4651344120502472,
"learning_rate": 8.232044198895029e-06,
"loss": 0.527,
"step": 149
},
{
"epoch": 0.08305647840531562,
"grad_norm": 0.43981248140335083,
"learning_rate": 8.287292817679558e-06,
"loss": 0.5379,
"step": 150
},
{
"epoch": 0.08361018826135105,
"grad_norm": 0.5043104290962219,
"learning_rate": 8.34254143646409e-06,
"loss": 0.5298,
"step": 151
},
{
"epoch": 0.08416389811738649,
"grad_norm": 0.3996557891368866,
"learning_rate": 8.397790055248619e-06,
"loss": 0.5308,
"step": 152
},
{
"epoch": 0.08471760797342193,
"grad_norm": 0.5180063843727112,
"learning_rate": 8.45303867403315e-06,
"loss": 0.5278,
"step": 153
},
{
"epoch": 0.08527131782945736,
"grad_norm": 0.48788321018218994,
"learning_rate": 8.508287292817681e-06,
"loss": 0.5215,
"step": 154
},
{
"epoch": 0.0858250276854928,
"grad_norm": 0.4473685026168823,
"learning_rate": 8.56353591160221e-06,
"loss": 0.5346,
"step": 155
},
{
"epoch": 0.08637873754152824,
"grad_norm": 0.5907010436058044,
"learning_rate": 8.618784530386742e-06,
"loss": 0.5278,
"step": 156
},
{
"epoch": 0.08693244739756367,
"grad_norm": 0.495604932308197,
"learning_rate": 8.674033149171271e-06,
"loss": 0.5076,
"step": 157
},
{
"epoch": 0.08748615725359911,
"grad_norm": 0.5206358432769775,
"learning_rate": 8.729281767955802e-06,
"loss": 0.5517,
"step": 158
},
{
"epoch": 0.08803986710963455,
"grad_norm": 0.5317306518554688,
"learning_rate": 8.784530386740332e-06,
"loss": 0.5221,
"step": 159
},
{
"epoch": 0.08859357696567,
"grad_norm": 0.5278891921043396,
"learning_rate": 8.839779005524863e-06,
"loss": 0.5531,
"step": 160
},
{
"epoch": 0.08914728682170543,
"grad_norm": 0.4803391695022583,
"learning_rate": 8.895027624309392e-06,
"loss": 0.5359,
"step": 161
},
{
"epoch": 0.08970099667774087,
"grad_norm": 0.6480128169059753,
"learning_rate": 8.950276243093923e-06,
"loss": 0.5393,
"step": 162
},
{
"epoch": 0.0902547065337763,
"grad_norm": 0.4067532420158386,
"learning_rate": 9.005524861878454e-06,
"loss": 0.5344,
"step": 163
},
{
"epoch": 0.09080841638981174,
"grad_norm": 0.4894218444824219,
"learning_rate": 9.060773480662984e-06,
"loss": 0.5316,
"step": 164
},
{
"epoch": 0.09136212624584718,
"grad_norm": 0.6006916761398315,
"learning_rate": 9.116022099447515e-06,
"loss": 0.5531,
"step": 165
},
{
"epoch": 0.09191583610188261,
"grad_norm": 0.5255333185195923,
"learning_rate": 9.171270718232044e-06,
"loss": 0.5253,
"step": 166
},
{
"epoch": 0.09246954595791805,
"grad_norm": 0.5868107676506042,
"learning_rate": 9.226519337016575e-06,
"loss": 0.5451,
"step": 167
},
{
"epoch": 0.09302325581395349,
"grad_norm": 0.5775595307350159,
"learning_rate": 9.281767955801105e-06,
"loss": 0.5496,
"step": 168
},
{
"epoch": 0.09357696566998892,
"grad_norm": 0.4332346022129059,
"learning_rate": 9.337016574585636e-06,
"loss": 0.4993,
"step": 169
},
{
"epoch": 0.09413067552602436,
"grad_norm": 0.6276872158050537,
"learning_rate": 9.392265193370167e-06,
"loss": 0.5209,
"step": 170
},
{
"epoch": 0.0946843853820598,
"grad_norm": 0.4875026047229767,
"learning_rate": 9.447513812154696e-06,
"loss": 0.5293,
"step": 171
},
{
"epoch": 0.09523809523809523,
"grad_norm": 0.4791780412197113,
"learning_rate": 9.502762430939228e-06,
"loss": 0.528,
"step": 172
},
{
"epoch": 0.09579180509413067,
"grad_norm": 0.4861852526664734,
"learning_rate": 9.558011049723757e-06,
"loss": 0.498,
"step": 173
},
{
"epoch": 0.09634551495016612,
"grad_norm": 0.44678786396980286,
"learning_rate": 9.613259668508288e-06,
"loss": 0.52,
"step": 174
},
{
"epoch": 0.09689922480620156,
"grad_norm": 0.5340399146080017,
"learning_rate": 9.668508287292818e-06,
"loss": 0.5291,
"step": 175
},
{
"epoch": 0.09745293466223699,
"grad_norm": 0.5051288604736328,
"learning_rate": 9.723756906077349e-06,
"loss": 0.5231,
"step": 176
},
{
"epoch": 0.09800664451827243,
"grad_norm": 0.5003914833068848,
"learning_rate": 9.779005524861878e-06,
"loss": 0.4915,
"step": 177
},
{
"epoch": 0.09856035437430787,
"grad_norm": 0.5183685421943665,
"learning_rate": 9.834254143646411e-06,
"loss": 0.5206,
"step": 178
},
{
"epoch": 0.0991140642303433,
"grad_norm": 0.5782157778739929,
"learning_rate": 9.88950276243094e-06,
"loss": 0.5309,
"step": 179
},
{
"epoch": 0.09966777408637874,
"grad_norm": 0.47796350717544556,
"learning_rate": 9.944751381215471e-06,
"loss": 0.5286,
"step": 180
},
{
"epoch": 0.10022148394241417,
"grad_norm": 0.5073690414428711,
"learning_rate": 1e-05,
"loss": 0.5304,
"step": 181
},
{
"epoch": 0.10077519379844961,
"grad_norm": 0.49675506353378296,
"learning_rate": 9.999990655998744e-06,
"loss": 0.5272,
"step": 182
},
{
"epoch": 0.10132890365448505,
"grad_norm": 0.4897266924381256,
"learning_rate": 9.9999626240299e-06,
"loss": 0.5136,
"step": 183
},
{
"epoch": 0.10188261351052048,
"grad_norm": 0.4757635295391083,
"learning_rate": 9.999915904198239e-06,
"loss": 0.5275,
"step": 184
},
{
"epoch": 0.10243632336655592,
"grad_norm": 0.5029352307319641,
"learning_rate": 9.99985049667838e-06,
"loss": 0.4964,
"step": 185
},
{
"epoch": 0.10299003322259136,
"grad_norm": 0.45012396574020386,
"learning_rate": 9.999766401714795e-06,
"loss": 0.515,
"step": 186
},
{
"epoch": 0.1035437430786268,
"grad_norm": 0.500487744808197,
"learning_rate": 9.999663619621793e-06,
"loss": 0.5304,
"step": 187
},
{
"epoch": 0.10409745293466224,
"grad_norm": 0.4716833233833313,
"learning_rate": 9.999542150783533e-06,
"loss": 0.504,
"step": 188
},
{
"epoch": 0.10465116279069768,
"grad_norm": 0.48771926760673523,
"learning_rate": 9.999401995654018e-06,
"loss": 0.5434,
"step": 189
},
{
"epoch": 0.10520487264673312,
"grad_norm": 0.49197521805763245,
"learning_rate": 9.999243154757092e-06,
"loss": 0.5254,
"step": 190
},
{
"epoch": 0.10575858250276855,
"grad_norm": 0.443828284740448,
"learning_rate": 9.999065628686439e-06,
"loss": 0.5076,
"step": 191
},
{
"epoch": 0.10631229235880399,
"grad_norm": 0.44356590509414673,
"learning_rate": 9.998869418105578e-06,
"loss": 0.5017,
"step": 192
},
{
"epoch": 0.10686600221483943,
"grad_norm": 0.4854886829853058,
"learning_rate": 9.99865452374787e-06,
"loss": 0.5067,
"step": 193
},
{
"epoch": 0.10741971207087486,
"grad_norm": 0.5027791857719421,
"learning_rate": 9.9984209464165e-06,
"loss": 0.5234,
"step": 194
},
{
"epoch": 0.1079734219269103,
"grad_norm": 0.49939441680908203,
"learning_rate": 9.99816868698449e-06,
"loss": 0.5005,
"step": 195
},
{
"epoch": 0.10852713178294573,
"grad_norm": 0.5125188231468201,
"learning_rate": 9.997897746394684e-06,
"loss": 0.5197,
"step": 196
},
{
"epoch": 0.10908084163898117,
"grad_norm": 0.508100152015686,
"learning_rate": 9.99760812565975e-06,
"loss": 0.5081,
"step": 197
},
{
"epoch": 0.10963455149501661,
"grad_norm": 0.5313785076141357,
"learning_rate": 9.997299825862172e-06,
"loss": 0.4863,
"step": 198
},
{
"epoch": 0.11018826135105204,
"grad_norm": 0.47651734948158264,
"learning_rate": 9.996972848154254e-06,
"loss": 0.504,
"step": 199
},
{
"epoch": 0.11074197120708748,
"grad_norm": 0.5211071968078613,
"learning_rate": 9.996627193758108e-06,
"loss": 0.5289,
"step": 200
},
{
"epoch": 0.11129568106312292,
"grad_norm": 0.5157010555267334,
"learning_rate": 9.996262863965651e-06,
"loss": 0.5057,
"step": 201
},
{
"epoch": 0.11184939091915837,
"grad_norm": 0.5023174285888672,
"learning_rate": 9.995879860138605e-06,
"loss": 0.5255,
"step": 202
},
{
"epoch": 0.1124031007751938,
"grad_norm": 0.5226082801818848,
"learning_rate": 9.99547818370848e-06,
"loss": 0.5076,
"step": 203
},
{
"epoch": 0.11295681063122924,
"grad_norm": 0.5416733622550964,
"learning_rate": 9.995057836176588e-06,
"loss": 0.5243,
"step": 204
},
{
"epoch": 0.11351052048726468,
"grad_norm": 0.5339394807815552,
"learning_rate": 9.994618819114015e-06,
"loss": 0.5288,
"step": 205
},
{
"epoch": 0.11406423034330011,
"grad_norm": 0.5571205615997314,
"learning_rate": 9.994161134161635e-06,
"loss": 0.5187,
"step": 206
},
{
"epoch": 0.11461794019933555,
"grad_norm": 0.48999401926994324,
"learning_rate": 9.99368478303009e-06,
"loss": 0.5229,
"step": 207
},
{
"epoch": 0.11517165005537099,
"grad_norm": 0.5341947078704834,
"learning_rate": 9.993189767499789e-06,
"loss": 0.5136,
"step": 208
},
{
"epoch": 0.11572535991140642,
"grad_norm": 0.43289002776145935,
"learning_rate": 9.992676089420903e-06,
"loss": 0.5227,
"step": 209
},
{
"epoch": 0.11627906976744186,
"grad_norm": 0.5622230172157288,
"learning_rate": 9.99214375071336e-06,
"loss": 0.5189,
"step": 210
},
{
"epoch": 0.1168327796234773,
"grad_norm": 0.43735480308532715,
"learning_rate": 9.991592753366822e-06,
"loss": 0.5103,
"step": 211
},
{
"epoch": 0.11738648947951273,
"grad_norm": 0.49578508734703064,
"learning_rate": 9.991023099440702e-06,
"loss": 0.5234,
"step": 212
},
{
"epoch": 0.11794019933554817,
"grad_norm": 0.5016103982925415,
"learning_rate": 9.990434791064137e-06,
"loss": 0.5143,
"step": 213
},
{
"epoch": 0.1184939091915836,
"grad_norm": 0.46542438864707947,
"learning_rate": 9.98982783043599e-06,
"loss": 0.5073,
"step": 214
},
{
"epoch": 0.11904761904761904,
"grad_norm": 0.47478312253952026,
"learning_rate": 9.989202219824834e-06,
"loss": 0.5102,
"step": 215
},
{
"epoch": 0.11960132890365449,
"grad_norm": 0.48327192664146423,
"learning_rate": 9.988557961568956e-06,
"loss": 0.4817,
"step": 216
},
{
"epoch": 0.12015503875968993,
"grad_norm": 0.4620007574558258,
"learning_rate": 9.987895058076334e-06,
"loss": 0.5077,
"step": 217
},
{
"epoch": 0.12070874861572536,
"grad_norm": 0.4638729393482208,
"learning_rate": 9.987213511824634e-06,
"loss": 0.5415,
"step": 218
},
{
"epoch": 0.1212624584717608,
"grad_norm": 0.4861536920070648,
"learning_rate": 9.986513325361209e-06,
"loss": 0.4944,
"step": 219
},
{
"epoch": 0.12181616832779624,
"grad_norm": 0.4344837963581085,
"learning_rate": 9.98579450130307e-06,
"loss": 0.4911,
"step": 220
},
{
"epoch": 0.12236987818383167,
"grad_norm": 0.46759846806526184,
"learning_rate": 9.985057042336898e-06,
"loss": 0.5114,
"step": 221
},
{
"epoch": 0.12292358803986711,
"grad_norm": 0.5901568531990051,
"learning_rate": 9.984300951219022e-06,
"loss": 0.5624,
"step": 222
},
{
"epoch": 0.12347729789590255,
"grad_norm": 0.45916667580604553,
"learning_rate": 9.983526230775405e-06,
"loss": 0.508,
"step": 223
},
{
"epoch": 0.12403100775193798,
"grad_norm": 0.45065367221832275,
"learning_rate": 9.982732883901641e-06,
"loss": 0.5452,
"step": 224
},
{
"epoch": 0.12458471760797342,
"grad_norm": 0.4651075601577759,
"learning_rate": 9.981920913562948e-06,
"loss": 0.5167,
"step": 225
},
{
"epoch": 0.12513842746400886,
"grad_norm": 0.4300501346588135,
"learning_rate": 9.981090322794145e-06,
"loss": 0.5086,
"step": 226
},
{
"epoch": 0.1256921373200443,
"grad_norm": 0.5401392579078674,
"learning_rate": 9.980241114699647e-06,
"loss": 0.5161,
"step": 227
},
{
"epoch": 0.12624584717607973,
"grad_norm": 0.5935031771659851,
"learning_rate": 9.979373292453457e-06,
"loss": 0.5069,
"step": 228
},
{
"epoch": 0.12679955703211518,
"grad_norm": 0.45174965262413025,
"learning_rate": 9.978486859299146e-06,
"loss": 0.4954,
"step": 229
},
{
"epoch": 0.1273532668881506,
"grad_norm": 0.535507082939148,
"learning_rate": 9.977581818549849e-06,
"loss": 0.5104,
"step": 230
},
{
"epoch": 0.12790697674418605,
"grad_norm": 0.5454712510108948,
"learning_rate": 9.976658173588244e-06,
"loss": 0.5423,
"step": 231
},
{
"epoch": 0.12846068660022147,
"grad_norm": 0.5051414370536804,
"learning_rate": 9.97571592786655e-06,
"loss": 0.5128,
"step": 232
},
{
"epoch": 0.12901439645625692,
"grad_norm": 0.449792742729187,
"learning_rate": 9.974755084906503e-06,
"loss": 0.5228,
"step": 233
},
{
"epoch": 0.12956810631229235,
"grad_norm": 0.44292494654655457,
"learning_rate": 9.973775648299349e-06,
"loss": 0.5058,
"step": 234
},
{
"epoch": 0.1301218161683278,
"grad_norm": 0.4995569884777069,
"learning_rate": 9.972777621705833e-06,
"loss": 0.5044,
"step": 235
},
{
"epoch": 0.13067552602436322,
"grad_norm": 0.46285441517829895,
"learning_rate": 9.97176100885618e-06,
"loss": 0.5111,
"step": 236
},
{
"epoch": 0.13122923588039867,
"grad_norm": 0.5752739906311035,
"learning_rate": 9.970725813550081e-06,
"loss": 0.5184,
"step": 237
},
{
"epoch": 0.13178294573643412,
"grad_norm": 0.5005012154579163,
"learning_rate": 9.969672039656684e-06,
"loss": 0.5095,
"step": 238
},
{
"epoch": 0.13233665559246954,
"grad_norm": 0.5429736375808716,
"learning_rate": 9.968599691114573e-06,
"loss": 0.4957,
"step": 239
},
{
"epoch": 0.132890365448505,
"grad_norm": 0.4731188118457794,
"learning_rate": 9.967508771931761e-06,
"loss": 0.5231,
"step": 240
},
{
"epoch": 0.13344407530454042,
"grad_norm": 0.4413149356842041,
"learning_rate": 9.966399286185666e-06,
"loss": 0.4852,
"step": 241
},
{
"epoch": 0.13399778516057587,
"grad_norm": 0.5604010224342346,
"learning_rate": 9.965271238023102e-06,
"loss": 0.4953,
"step": 242
},
{
"epoch": 0.1345514950166113,
"grad_norm": 0.4779647886753082,
"learning_rate": 9.964124631660266e-06,
"loss": 0.4806,
"step": 243
},
{
"epoch": 0.13510520487264674,
"grad_norm": 0.4577624201774597,
"learning_rate": 9.96295947138271e-06,
"loss": 0.5214,
"step": 244
},
{
"epoch": 0.13565891472868216,
"grad_norm": 0.5134978890419006,
"learning_rate": 9.96177576154534e-06,
"loss": 0.5123,
"step": 245
},
{
"epoch": 0.1362126245847176,
"grad_norm": 0.4479624032974243,
"learning_rate": 9.960573506572391e-06,
"loss": 0.4983,
"step": 246
},
{
"epoch": 0.13676633444075303,
"grad_norm": 0.4445071518421173,
"learning_rate": 9.959352710957408e-06,
"loss": 0.4886,
"step": 247
},
{
"epoch": 0.13732004429678848,
"grad_norm": 0.4781419634819031,
"learning_rate": 9.958113379263243e-06,
"loss": 0.5194,
"step": 248
},
{
"epoch": 0.1378737541528239,
"grad_norm": 0.46459653973579407,
"learning_rate": 9.95685551612202e-06,
"loss": 0.5139,
"step": 249
},
{
"epoch": 0.13842746400885936,
"grad_norm": 0.491256445646286,
"learning_rate": 9.955579126235129e-06,
"loss": 0.518,
"step": 250
},
{
"epoch": 0.1389811738648948,
"grad_norm": 0.5470618009567261,
"learning_rate": 9.954284214373204e-06,
"loss": 0.5461,
"step": 251
},
{
"epoch": 0.13953488372093023,
"grad_norm": 0.5898417830467224,
"learning_rate": 9.95297078537611e-06,
"loss": 0.523,
"step": 252
},
{
"epoch": 0.14008859357696568,
"grad_norm": 0.48347923159599304,
"learning_rate": 9.95163884415292e-06,
"loss": 0.51,
"step": 253
},
{
"epoch": 0.1406423034330011,
"grad_norm": 0.5616808533668518,
"learning_rate": 9.950288395681898e-06,
"loss": 0.5021,
"step": 254
},
{
"epoch": 0.14119601328903655,
"grad_norm": 0.6873836517333984,
"learning_rate": 9.94891944501048e-06,
"loss": 0.5247,
"step": 255
},
{
"epoch": 0.14174972314507198,
"grad_norm": 0.6375631093978882,
"learning_rate": 9.947531997255256e-06,
"loss": 0.5126,
"step": 256
},
{
"epoch": 0.14230343300110743,
"grad_norm": 0.6284964084625244,
"learning_rate": 9.946126057601954e-06,
"loss": 0.514,
"step": 257
},
{
"epoch": 0.14285714285714285,
"grad_norm": 0.6849797368049622,
"learning_rate": 9.944701631305413e-06,
"loss": 0.5186,
"step": 258
},
{
"epoch": 0.1434108527131783,
"grad_norm": 0.5404208898544312,
"learning_rate": 9.94325872368957e-06,
"loss": 0.5004,
"step": 259
},
{
"epoch": 0.14396456256921372,
"grad_norm": 0.6033118367195129,
"learning_rate": 9.941797340147439e-06,
"loss": 0.5213,
"step": 260
},
{
"epoch": 0.14451827242524917,
"grad_norm": 0.5408035516738892,
"learning_rate": 9.940317486141084e-06,
"loss": 0.545,
"step": 261
},
{
"epoch": 0.1450719822812846,
"grad_norm": 0.5264489650726318,
"learning_rate": 9.93881916720161e-06,
"loss": 0.5101,
"step": 262
},
{
"epoch": 0.14562569213732005,
"grad_norm": 0.5625776648521423,
"learning_rate": 9.937302388929136e-06,
"loss": 0.5039,
"step": 263
},
{
"epoch": 0.1461794019933555,
"grad_norm": 0.5865488052368164,
"learning_rate": 9.93576715699277e-06,
"loss": 0.5297,
"step": 264
},
{
"epoch": 0.14673311184939092,
"grad_norm": 0.5614110231399536,
"learning_rate": 9.934213477130599e-06,
"loss": 0.4956,
"step": 265
},
{
"epoch": 0.14728682170542637,
"grad_norm": 0.5261831879615784,
"learning_rate": 9.932641355149655e-06,
"loss": 0.5429,
"step": 266
},
{
"epoch": 0.1478405315614618,
"grad_norm": 0.7056304812431335,
"learning_rate": 9.931050796925906e-06,
"loss": 0.5356,
"step": 267
},
{
"epoch": 0.14839424141749724,
"grad_norm": 0.5166594386100769,
"learning_rate": 9.929441808404217e-06,
"loss": 0.5076,
"step": 268
},
{
"epoch": 0.14894795127353266,
"grad_norm": 0.5735101103782654,
"learning_rate": 9.927814395598349e-06,
"loss": 0.5305,
"step": 269
},
{
"epoch": 0.14950166112956811,
"grad_norm": 0.5775477886199951,
"learning_rate": 9.92616856459092e-06,
"loss": 0.5089,
"step": 270
},
{
"epoch": 0.15005537098560354,
"grad_norm": 0.48364558815956116,
"learning_rate": 9.924504321533387e-06,
"loss": 0.484,
"step": 271
},
{
"epoch": 0.150609080841639,
"grad_norm": 0.5991694331169128,
"learning_rate": 9.922821672646028e-06,
"loss": 0.5232,
"step": 272
},
{
"epoch": 0.1511627906976744,
"grad_norm": 0.5030311942100525,
"learning_rate": 9.92112062421791e-06,
"loss": 0.5018,
"step": 273
},
{
"epoch": 0.15171650055370986,
"grad_norm": 0.5942660570144653,
"learning_rate": 9.919401182606876e-06,
"loss": 0.501,
"step": 274
},
{
"epoch": 0.15227021040974528,
"grad_norm": 0.4828101396560669,
"learning_rate": 9.917663354239508e-06,
"loss": 0.501,
"step": 275
},
{
"epoch": 0.15282392026578073,
"grad_norm": 0.6254435181617737,
"learning_rate": 9.915907145611117e-06,
"loss": 0.5147,
"step": 276
},
{
"epoch": 0.15337763012181616,
"grad_norm": 0.510230541229248,
"learning_rate": 9.914132563285707e-06,
"loss": 0.5085,
"step": 277
},
{
"epoch": 0.1539313399778516,
"grad_norm": 0.5653350949287415,
"learning_rate": 9.91233961389596e-06,
"loss": 0.5024,
"step": 278
},
{
"epoch": 0.15448504983388706,
"grad_norm": 0.5247920751571655,
"learning_rate": 9.910528304143203e-06,
"loss": 0.4716,
"step": 279
},
{
"epoch": 0.15503875968992248,
"grad_norm": 0.624584972858429,
"learning_rate": 9.908698640797389e-06,
"loss": 0.5145,
"step": 280
},
{
"epoch": 0.15559246954595793,
"grad_norm": 0.521739661693573,
"learning_rate": 9.906850630697068e-06,
"loss": 0.5145,
"step": 281
},
{
"epoch": 0.15614617940199335,
"grad_norm": 0.5611006617546082,
"learning_rate": 9.904984280749364e-06,
"loss": 0.4909,
"step": 282
},
{
"epoch": 0.1566998892580288,
"grad_norm": 0.5463786721229553,
"learning_rate": 9.903099597929947e-06,
"loss": 0.5127,
"step": 283
},
{
"epoch": 0.15725359911406422,
"grad_norm": 0.5769833326339722,
"learning_rate": 9.901196589283009e-06,
"loss": 0.4759,
"step": 284
},
{
"epoch": 0.15780730897009967,
"grad_norm": 0.5909711122512817,
"learning_rate": 9.899275261921236e-06,
"loss": 0.4908,
"step": 285
},
{
"epoch": 0.1583610188261351,
"grad_norm": 0.47673356533050537,
"learning_rate": 9.89733562302578e-06,
"loss": 0.494,
"step": 286
},
{
"epoch": 0.15891472868217055,
"grad_norm": 0.5346972346305847,
"learning_rate": 9.89537767984624e-06,
"loss": 0.5071,
"step": 287
},
{
"epoch": 0.15946843853820597,
"grad_norm": 0.5338662266731262,
"learning_rate": 9.893401439700624e-06,
"loss": 0.491,
"step": 288
},
{
"epoch": 0.16002214839424142,
"grad_norm": 0.4213932156562805,
"learning_rate": 9.891406909975328e-06,
"loss": 0.5105,
"step": 289
},
{
"epoch": 0.16057585825027684,
"grad_norm": 0.4888423979282379,
"learning_rate": 9.889394098125105e-06,
"loss": 0.4887,
"step": 290
},
{
"epoch": 0.1611295681063123,
"grad_norm": 0.4586566686630249,
"learning_rate": 9.887363011673046e-06,
"loss": 0.5191,
"step": 291
},
{
"epoch": 0.16168327796234774,
"grad_norm": 0.41070297360420227,
"learning_rate": 9.885313658210537e-06,
"loss": 0.5102,
"step": 292
},
{
"epoch": 0.16223698781838317,
"grad_norm": 0.46849679946899414,
"learning_rate": 9.883246045397244e-06,
"loss": 0.522,
"step": 293
},
{
"epoch": 0.16279069767441862,
"grad_norm": 0.4664832353591919,
"learning_rate": 9.881160180961078e-06,
"loss": 0.4865,
"step": 294
},
{
"epoch": 0.16334440753045404,
"grad_norm": 0.49740853905677795,
"learning_rate": 9.879056072698168e-06,
"loss": 0.4989,
"step": 295
},
{
"epoch": 0.1638981173864895,
"grad_norm": 0.5010776519775391,
"learning_rate": 9.876933728472826e-06,
"loss": 0.4947,
"step": 296
},
{
"epoch": 0.1644518272425249,
"grad_norm": 0.6259229183197021,
"learning_rate": 9.874793156217533e-06,
"loss": 0.5134,
"step": 297
},
{
"epoch": 0.16500553709856036,
"grad_norm": 0.5744408965110779,
"learning_rate": 9.872634363932887e-06,
"loss": 0.5049,
"step": 298
},
{
"epoch": 0.16555924695459578,
"grad_norm": 0.5046638250350952,
"learning_rate": 9.870457359687593e-06,
"loss": 0.4824,
"step": 299
},
{
"epoch": 0.16611295681063123,
"grad_norm": 0.4570577144622803,
"learning_rate": 9.868262151618426e-06,
"loss": 0.5228,
"step": 300
},
{
"epoch": 0.16666666666666666,
"grad_norm": 0.45581790804862976,
"learning_rate": 9.866048747930194e-06,
"loss": 0.5099,
"step": 301
},
{
"epoch": 0.1672203765227021,
"grad_norm": 0.4104360044002533,
"learning_rate": 9.863817156895715e-06,
"loss": 0.4927,
"step": 302
},
{
"epoch": 0.16777408637873753,
"grad_norm": 0.44734492897987366,
"learning_rate": 9.861567386855789e-06,
"loss": 0.4926,
"step": 303
},
{
"epoch": 0.16832779623477298,
"grad_norm": 0.49278324842453003,
"learning_rate": 9.85929944621915e-06,
"loss": 0.5049,
"step": 304
},
{
"epoch": 0.1688815060908084,
"grad_norm": 0.48833972215652466,
"learning_rate": 9.857013343462463e-06,
"loss": 0.5319,
"step": 305
},
{
"epoch": 0.16943521594684385,
"grad_norm": 0.41666170954704285,
"learning_rate": 9.854709087130261e-06,
"loss": 0.51,
"step": 306
},
{
"epoch": 0.1699889258028793,
"grad_norm": 0.4672697186470032,
"learning_rate": 9.852386685834936e-06,
"loss": 0.5044,
"step": 307
},
{
"epoch": 0.17054263565891473,
"grad_norm": 0.5205575823783875,
"learning_rate": 9.850046148256693e-06,
"loss": 0.5341,
"step": 308
},
{
"epoch": 0.17109634551495018,
"grad_norm": 0.4313163161277771,
"learning_rate": 9.847687483143532e-06,
"loss": 0.4979,
"step": 309
},
{
"epoch": 0.1716500553709856,
"grad_norm": 0.5207853317260742,
"learning_rate": 9.845310699311197e-06,
"loss": 0.5417,
"step": 310
},
{
"epoch": 0.17220376522702105,
"grad_norm": 0.4820159077644348,
"learning_rate": 9.842915805643156e-06,
"loss": 0.529,
"step": 311
},
{
"epoch": 0.17275747508305647,
"grad_norm": 0.5075066685676575,
"learning_rate": 9.840502811090568e-06,
"loss": 0.4788,
"step": 312
},
{
"epoch": 0.17331118493909192,
"grad_norm": 0.5771706104278564,
"learning_rate": 9.838071724672239e-06,
"loss": 0.4903,
"step": 313
},
{
"epoch": 0.17386489479512734,
"grad_norm": 0.5288019776344299,
"learning_rate": 9.835622555474601e-06,
"loss": 0.4909,
"step": 314
},
{
"epoch": 0.1744186046511628,
"grad_norm": 0.5946136116981506,
"learning_rate": 9.83315531265167e-06,
"loss": 0.5153,
"step": 315
},
{
"epoch": 0.17497231450719822,
"grad_norm": 0.5433559417724609,
"learning_rate": 9.830670005425012e-06,
"loss": 0.5281,
"step": 316
},
{
"epoch": 0.17552602436323367,
"grad_norm": 0.419915109872818,
"learning_rate": 9.828166643083716e-06,
"loss": 0.4612,
"step": 317
},
{
"epoch": 0.1760797342192691,
"grad_norm": 0.6224991679191589,
"learning_rate": 9.825645234984347e-06,
"loss": 0.5013,
"step": 318
},
{
"epoch": 0.17663344407530454,
"grad_norm": 0.5434386134147644,
"learning_rate": 9.823105790550925e-06,
"loss": 0.4927,
"step": 319
},
{
"epoch": 0.17718715393134,
"grad_norm": 0.5309631824493408,
"learning_rate": 9.820548319274875e-06,
"loss": 0.522,
"step": 320
},
{
"epoch": 0.1777408637873754,
"grad_norm": 0.6311752200126648,
"learning_rate": 9.817972830715003e-06,
"loss": 0.5171,
"step": 321
},
{
"epoch": 0.17829457364341086,
"grad_norm": 0.5074132084846497,
"learning_rate": 9.81537933449746e-06,
"loss": 0.4971,
"step": 322
},
{
"epoch": 0.1788482834994463,
"grad_norm": 0.5580747723579407,
"learning_rate": 9.812767840315693e-06,
"loss": 0.4926,
"step": 323
},
{
"epoch": 0.17940199335548174,
"grad_norm": 0.6106091737747192,
"learning_rate": 9.81013835793043e-06,
"loss": 0.5026,
"step": 324
},
{
"epoch": 0.17995570321151716,
"grad_norm": 0.5311607122421265,
"learning_rate": 9.807490897169622e-06,
"loss": 0.4811,
"step": 325
},
{
"epoch": 0.1805094130675526,
"grad_norm": 0.5105768442153931,
"learning_rate": 9.804825467928423e-06,
"loss": 0.4779,
"step": 326
},
{
"epoch": 0.18106312292358803,
"grad_norm": 0.558866024017334,
"learning_rate": 9.802142080169138e-06,
"loss": 0.5181,
"step": 327
},
{
"epoch": 0.18161683277962348,
"grad_norm": 0.5689678192138672,
"learning_rate": 9.799440743921203e-06,
"loss": 0.4731,
"step": 328
},
{
"epoch": 0.1821705426356589,
"grad_norm": 0.4810315668582916,
"learning_rate": 9.79672146928113e-06,
"loss": 0.4948,
"step": 329
},
{
"epoch": 0.18272425249169436,
"grad_norm": 0.5226458311080933,
"learning_rate": 9.793984266412481e-06,
"loss": 0.4862,
"step": 330
},
{
"epoch": 0.18327796234772978,
"grad_norm": 0.4900760054588318,
"learning_rate": 9.791229145545832e-06,
"loss": 0.4869,
"step": 331
},
{
"epoch": 0.18383167220376523,
"grad_norm": 0.5214505195617676,
"learning_rate": 9.78845611697872e-06,
"loss": 0.4987,
"step": 332
},
{
"epoch": 0.18438538205980065,
"grad_norm": 0.46621766686439514,
"learning_rate": 9.785665191075618e-06,
"loss": 0.4897,
"step": 333
},
{
"epoch": 0.1849390919158361,
"grad_norm": 0.45763033628463745,
"learning_rate": 9.782856378267893e-06,
"loss": 0.5123,
"step": 334
},
{
"epoch": 0.18549280177187155,
"grad_norm": 0.4653575122356415,
"learning_rate": 9.780029689053765e-06,
"loss": 0.5129,
"step": 335
},
{
"epoch": 0.18604651162790697,
"grad_norm": 0.43498504161834717,
"learning_rate": 9.777185133998268e-06,
"loss": 0.4994,
"step": 336
},
{
"epoch": 0.18660022148394242,
"grad_norm": 0.5097606182098389,
"learning_rate": 9.774322723733216e-06,
"loss": 0.4974,
"step": 337
},
{
"epoch": 0.18715393133997785,
"grad_norm": 0.4725209176540375,
"learning_rate": 9.771442468957151e-06,
"loss": 0.473,
"step": 338
},
{
"epoch": 0.1877076411960133,
"grad_norm": 0.4831293821334839,
"learning_rate": 9.768544380435316e-06,
"loss": 0.5038,
"step": 339
},
{
"epoch": 0.18826135105204872,
"grad_norm": 0.5691421031951904,
"learning_rate": 9.765628468999609e-06,
"loss": 0.493,
"step": 340
},
{
"epoch": 0.18881506090808417,
"grad_norm": 0.4951483905315399,
"learning_rate": 9.76269474554854e-06,
"loss": 0.5177,
"step": 341
},
{
"epoch": 0.1893687707641196,
"grad_norm": 0.5355369448661804,
"learning_rate": 9.759743221047198e-06,
"loss": 0.4999,
"step": 342
},
{
"epoch": 0.18992248062015504,
"grad_norm": 0.5432518124580383,
"learning_rate": 9.756773906527199e-06,
"loss": 0.4963,
"step": 343
},
{
"epoch": 0.19047619047619047,
"grad_norm": 0.5756740570068359,
"learning_rate": 9.753786813086658e-06,
"loss": 0.5119,
"step": 344
},
{
"epoch": 0.19102990033222592,
"grad_norm": 0.6138593554496765,
"learning_rate": 9.750781951890135e-06,
"loss": 0.5123,
"step": 345
},
{
"epoch": 0.19158361018826134,
"grad_norm": 0.4841378331184387,
"learning_rate": 9.747759334168602e-06,
"loss": 0.5032,
"step": 346
},
{
"epoch": 0.1921373200442968,
"grad_norm": 0.5776627063751221,
"learning_rate": 9.744718971219395e-06,
"loss": 0.5259,
"step": 347
},
{
"epoch": 0.19269102990033224,
"grad_norm": 0.4886176884174347,
"learning_rate": 9.741660874406176e-06,
"loss": 0.4962,
"step": 348
},
{
"epoch": 0.19324473975636766,
"grad_norm": 0.6149806380271912,
"learning_rate": 9.738585055158891e-06,
"loss": 0.498,
"step": 349
},
{
"epoch": 0.1937984496124031,
"grad_norm": 0.43883392214775085,
"learning_rate": 9.735491524973723e-06,
"loss": 0.4827,
"step": 350
},
{
"epoch": 0.19435215946843853,
"grad_norm": 0.5252010226249695,
"learning_rate": 9.73238029541305e-06,
"loss": 0.4993,
"step": 351
},
{
"epoch": 0.19490586932447398,
"grad_norm": 0.5176405906677246,
"learning_rate": 9.729251378105407e-06,
"loss": 0.4976,
"step": 352
},
{
"epoch": 0.1954595791805094,
"grad_norm": 0.5629587769508362,
"learning_rate": 9.726104784745437e-06,
"loss": 0.5145,
"step": 353
},
{
"epoch": 0.19601328903654486,
"grad_norm": 0.5694234371185303,
"learning_rate": 9.722940527093847e-06,
"loss": 0.516,
"step": 354
},
{
"epoch": 0.19656699889258028,
"grad_norm": 0.48244956135749817,
"learning_rate": 9.719758616977372e-06,
"loss": 0.4862,
"step": 355
},
{
"epoch": 0.19712070874861573,
"grad_norm": 0.5657898187637329,
"learning_rate": 9.716559066288716e-06,
"loss": 0.4904,
"step": 356
},
{
"epoch": 0.19767441860465115,
"grad_norm": 0.5076687335968018,
"learning_rate": 9.713341886986525e-06,
"loss": 0.5049,
"step": 357
},
{
"epoch": 0.1982281284606866,
"grad_norm": 0.5623184442520142,
"learning_rate": 9.710107091095328e-06,
"loss": 0.4778,
"step": 358
},
{
"epoch": 0.19878183831672203,
"grad_norm": 0.5495237708091736,
"learning_rate": 9.706854690705501e-06,
"loss": 0.5049,
"step": 359
},
{
"epoch": 0.19933554817275748,
"grad_norm": 0.5105648040771484,
"learning_rate": 9.703584697973213e-06,
"loss": 0.4879,
"step": 360
},
{
"epoch": 0.1998892580287929,
"grad_norm": 0.46286460757255554,
"learning_rate": 9.7002971251204e-06,
"loss": 0.4927,
"step": 361
},
{
"epoch": 0.20044296788482835,
"grad_norm": 0.48685401678085327,
"learning_rate": 9.696991984434686e-06,
"loss": 0.51,
"step": 362
},
{
"epoch": 0.2009966777408638,
"grad_norm": 0.4311748743057251,
"learning_rate": 9.693669288269371e-06,
"loss": 0.5145,
"step": 363
},
{
"epoch": 0.20155038759689922,
"grad_norm": 0.4911429286003113,
"learning_rate": 9.690329049043366e-06,
"loss": 0.514,
"step": 364
},
{
"epoch": 0.20210409745293467,
"grad_norm": 0.4419792890548706,
"learning_rate": 9.686971279241151e-06,
"loss": 0.5039,
"step": 365
},
{
"epoch": 0.2026578073089701,
"grad_norm": 0.5263059735298157,
"learning_rate": 9.683595991412725e-06,
"loss": 0.4971,
"step": 366
},
{
"epoch": 0.20321151716500555,
"grad_norm": 0.40632978081703186,
"learning_rate": 9.68020319817357e-06,
"loss": 0.4705,
"step": 367
},
{
"epoch": 0.20376522702104097,
"grad_norm": 0.4889412820339203,
"learning_rate": 9.676792912204589e-06,
"loss": 0.4831,
"step": 368
},
{
"epoch": 0.20431893687707642,
"grad_norm": 0.44732460379600525,
"learning_rate": 9.673365146252067e-06,
"loss": 0.5058,
"step": 369
},
{
"epoch": 0.20487264673311184,
"grad_norm": 0.43679168820381165,
"learning_rate": 9.669919913127628e-06,
"loss": 0.4924,
"step": 370
},
{
"epoch": 0.2054263565891473,
"grad_norm": 0.4535861909389496,
"learning_rate": 9.666457225708175e-06,
"loss": 0.4978,
"step": 371
},
{
"epoch": 0.2059800664451827,
"grad_norm": 0.4649087190628052,
"learning_rate": 9.662977096935849e-06,
"loss": 0.5243,
"step": 372
},
{
"epoch": 0.20653377630121816,
"grad_norm": 0.4369707405567169,
"learning_rate": 9.659479539817982e-06,
"loss": 0.4962,
"step": 373
},
{
"epoch": 0.2070874861572536,
"grad_norm": 0.45305919647216797,
"learning_rate": 9.655964567427046e-06,
"loss": 0.4802,
"step": 374
},
{
"epoch": 0.20764119601328904,
"grad_norm": 0.47477301955223083,
"learning_rate": 9.652432192900602e-06,
"loss": 0.5024,
"step": 375
},
{
"epoch": 0.2081949058693245,
"grad_norm": 0.420511394739151,
"learning_rate": 9.648882429441258e-06,
"loss": 0.4961,
"step": 376
},
{
"epoch": 0.2087486157253599,
"grad_norm": 0.46836888790130615,
"learning_rate": 9.645315290316607e-06,
"loss": 0.4913,
"step": 377
},
{
"epoch": 0.20930232558139536,
"grad_norm": 0.48647841811180115,
"learning_rate": 9.641730788859194e-06,
"loss": 0.4992,
"step": 378
},
{
"epoch": 0.20985603543743078,
"grad_norm": 0.48303425312042236,
"learning_rate": 9.63812893846645e-06,
"loss": 0.4811,
"step": 379
},
{
"epoch": 0.21040974529346623,
"grad_norm": 0.5188234448432922,
"learning_rate": 9.634509752600658e-06,
"loss": 0.4814,
"step": 380
},
{
"epoch": 0.21096345514950166,
"grad_norm": 0.5916765928268433,
"learning_rate": 9.630873244788884e-06,
"loss": 0.4971,
"step": 381
},
{
"epoch": 0.2115171650055371,
"grad_norm": 0.47264301776885986,
"learning_rate": 9.627219428622942e-06,
"loss": 0.4978,
"step": 382
},
{
"epoch": 0.21207087486157253,
"grad_norm": 0.5251865386962891,
"learning_rate": 9.62354831775934e-06,
"loss": 0.5099,
"step": 383
},
{
"epoch": 0.21262458471760798,
"grad_norm": 0.5563480854034424,
"learning_rate": 9.61985992591922e-06,
"loss": 0.5136,
"step": 384
},
{
"epoch": 0.2131782945736434,
"grad_norm": 0.490176796913147,
"learning_rate": 9.61615426688832e-06,
"loss": 0.4963,
"step": 385
},
{
"epoch": 0.21373200442967885,
"grad_norm": 0.46513262391090393,
"learning_rate": 9.612431354516912e-06,
"loss": 0.4945,
"step": 386
},
{
"epoch": 0.21428571428571427,
"grad_norm": 0.4832211136817932,
"learning_rate": 9.608691202719755e-06,
"loss": 0.4794,
"step": 387
},
{
"epoch": 0.21483942414174972,
"grad_norm": 0.4905065894126892,
"learning_rate": 9.604933825476044e-06,
"loss": 0.5094,
"step": 388
},
{
"epoch": 0.21539313399778517,
"grad_norm": 0.45242419838905334,
"learning_rate": 9.601159236829353e-06,
"loss": 0.4969,
"step": 389
},
{
"epoch": 0.2159468438538206,
"grad_norm": 0.5123769640922546,
"learning_rate": 9.597367450887585e-06,
"loss": 0.489,
"step": 390
},
{
"epoch": 0.21650055370985605,
"grad_norm": 0.43457096815109253,
"learning_rate": 9.593558481822923e-06,
"loss": 0.5024,
"step": 391
},
{
"epoch": 0.21705426356589147,
"grad_norm": 0.4841693937778473,
"learning_rate": 9.58973234387177e-06,
"loss": 0.5089,
"step": 392
},
{
"epoch": 0.21760797342192692,
"grad_norm": 0.4351038336753845,
"learning_rate": 9.585889051334702e-06,
"loss": 0.5071,
"step": 393
},
{
"epoch": 0.21816168327796234,
"grad_norm": 0.46998244524002075,
"learning_rate": 9.58202861857641e-06,
"loss": 0.513,
"step": 394
},
{
"epoch": 0.2187153931339978,
"grad_norm": 0.46563470363616943,
"learning_rate": 9.578151060025654e-06,
"loss": 0.4747,
"step": 395
},
{
"epoch": 0.21926910299003322,
"grad_norm": 0.475676566362381,
"learning_rate": 9.574256390175192e-06,
"loss": 0.498,
"step": 396
},
{
"epoch": 0.21982281284606867,
"grad_norm": 0.49711453914642334,
"learning_rate": 9.570344623581748e-06,
"loss": 0.4831,
"step": 397
},
{
"epoch": 0.2203765227021041,
"grad_norm": 0.48777928948402405,
"learning_rate": 9.566415774865943e-06,
"loss": 0.4876,
"step": 398
},
{
"epoch": 0.22093023255813954,
"grad_norm": 0.4752987325191498,
"learning_rate": 9.562469858712243e-06,
"loss": 0.4664,
"step": 399
},
{
"epoch": 0.22148394241417496,
"grad_norm": 0.5489518046379089,
"learning_rate": 9.558506889868906e-06,
"loss": 0.4946,
"step": 400
},
{
"epoch": 0.2220376522702104,
"grad_norm": 0.5207077264785767,
"learning_rate": 9.554526883147926e-06,
"loss": 0.4835,
"step": 401
},
{
"epoch": 0.22259136212624583,
"grad_norm": 0.6723493933677673,
"learning_rate": 9.550529853424979e-06,
"loss": 0.4743,
"step": 402
},
{
"epoch": 0.22314507198228128,
"grad_norm": 0.44945645332336426,
"learning_rate": 9.546515815639365e-06,
"loss": 0.4774,
"step": 403
},
{
"epoch": 0.22369878183831673,
"grad_norm": 0.49513328075408936,
"learning_rate": 9.542484784793954e-06,
"loss": 0.5039,
"step": 404
},
{
"epoch": 0.22425249169435216,
"grad_norm": 0.5728231072425842,
"learning_rate": 9.538436775955128e-06,
"loss": 0.5207,
"step": 405
},
{
"epoch": 0.2248062015503876,
"grad_norm": 0.48615574836730957,
"learning_rate": 9.534371804252727e-06,
"loss": 0.4949,
"step": 406
},
{
"epoch": 0.22535991140642303,
"grad_norm": 0.4927036166191101,
"learning_rate": 9.530289884879993e-06,
"loss": 0.4657,
"step": 407
},
{
"epoch": 0.22591362126245848,
"grad_norm": 0.5298251509666443,
"learning_rate": 9.526191033093509e-06,
"loss": 0.4885,
"step": 408
},
{
"epoch": 0.2264673311184939,
"grad_norm": 0.49086251854896545,
"learning_rate": 9.522075264213144e-06,
"loss": 0.4696,
"step": 409
},
{
"epoch": 0.22702104097452935,
"grad_norm": 0.5596606135368347,
"learning_rate": 9.517942593621998e-06,
"loss": 0.534,
"step": 410
},
{
"epoch": 0.22757475083056478,
"grad_norm": 0.5208204388618469,
"learning_rate": 9.513793036766345e-06,
"loss": 0.4666,
"step": 411
},
{
"epoch": 0.22812846068660023,
"grad_norm": 0.49649903178215027,
"learning_rate": 9.50962660915557e-06,
"loss": 0.4396,
"step": 412
},
{
"epoch": 0.22868217054263565,
"grad_norm": 0.6174575090408325,
"learning_rate": 9.505443326362113e-06,
"loss": 0.5177,
"step": 413
},
{
"epoch": 0.2292358803986711,
"grad_norm": 0.4835375249385834,
"learning_rate": 9.501243204021415e-06,
"loss": 0.4791,
"step": 414
},
{
"epoch": 0.22978959025470652,
"grad_norm": 0.509118378162384,
"learning_rate": 9.497026257831856e-06,
"loss": 0.4943,
"step": 415
},
{
"epoch": 0.23034330011074197,
"grad_norm": 0.5107400417327881,
"learning_rate": 9.492792503554695e-06,
"loss": 0.4875,
"step": 416
},
{
"epoch": 0.23089700996677742,
"grad_norm": 0.45292192697525024,
"learning_rate": 9.488541957014017e-06,
"loss": 0.5137,
"step": 417
},
{
"epoch": 0.23145071982281284,
"grad_norm": 0.5446093678474426,
"learning_rate": 9.484274634096663e-06,
"loss": 0.4871,
"step": 418
},
{
"epoch": 0.2320044296788483,
"grad_norm": 0.4633888304233551,
"learning_rate": 9.479990550752184e-06,
"loss": 0.4926,
"step": 419
},
{
"epoch": 0.23255813953488372,
"grad_norm": 0.455936461687088,
"learning_rate": 9.47568972299277e-06,
"loss": 0.4705,
"step": 420
},
{
"epoch": 0.23311184939091917,
"grad_norm": 0.4749588072299957,
"learning_rate": 9.4713721668932e-06,
"loss": 0.4988,
"step": 421
},
{
"epoch": 0.2336655592469546,
"grad_norm": 0.4403315782546997,
"learning_rate": 9.46703789859077e-06,
"loss": 0.4785,
"step": 422
},
{
"epoch": 0.23421926910299004,
"grad_norm": 0.4634638726711273,
"learning_rate": 9.462686934285245e-06,
"loss": 0.5134,
"step": 423
},
{
"epoch": 0.23477297895902546,
"grad_norm": 0.47634756565093994,
"learning_rate": 9.458319290238793e-06,
"loss": 0.4953,
"step": 424
},
{
"epoch": 0.2353266888150609,
"grad_norm": 0.4883749186992645,
"learning_rate": 9.45393498277592e-06,
"loss": 0.5086,
"step": 425
},
{
"epoch": 0.23588039867109634,
"grad_norm": 0.4272972345352173,
"learning_rate": 9.44953402828342e-06,
"loss": 0.4852,
"step": 426
},
{
"epoch": 0.2364341085271318,
"grad_norm": 0.45259276032447815,
"learning_rate": 9.445116443210299e-06,
"loss": 0.4689,
"step": 427
},
{
"epoch": 0.2369878183831672,
"grad_norm": 0.48061129450798035,
"learning_rate": 9.440682244067724e-06,
"loss": 0.5048,
"step": 428
},
{
"epoch": 0.23754152823920266,
"grad_norm": 0.4182446300983429,
"learning_rate": 9.436231447428964e-06,
"loss": 0.5004,
"step": 429
},
{
"epoch": 0.23809523809523808,
"grad_norm": 0.4484826624393463,
"learning_rate": 9.431764069929314e-06,
"loss": 0.4719,
"step": 430
},
{
"epoch": 0.23864894795127353,
"grad_norm": 0.4638371467590332,
"learning_rate": 9.427280128266049e-06,
"loss": 0.5041,
"step": 431
},
{
"epoch": 0.23920265780730898,
"grad_norm": 0.4376615881919861,
"learning_rate": 9.422779639198353e-06,
"loss": 0.4749,
"step": 432
},
{
"epoch": 0.2397563676633444,
"grad_norm": 0.426476389169693,
"learning_rate": 9.418262619547255e-06,
"loss": 0.4676,
"step": 433
},
{
"epoch": 0.24031007751937986,
"grad_norm": 0.46875908970832825,
"learning_rate": 9.41372908619557e-06,
"loss": 0.4663,
"step": 434
},
{
"epoch": 0.24086378737541528,
"grad_norm": 0.40445950627326965,
"learning_rate": 9.409179056087836e-06,
"loss": 0.5113,
"step": 435
},
{
"epoch": 0.24141749723145073,
"grad_norm": 0.44816190004348755,
"learning_rate": 9.404612546230244e-06,
"loss": 0.4909,
"step": 436
},
{
"epoch": 0.24197120708748615,
"grad_norm": 0.4637437164783478,
"learning_rate": 9.400029573690586e-06,
"loss": 0.5023,
"step": 437
},
{
"epoch": 0.2425249169435216,
"grad_norm": 0.43382760882377625,
"learning_rate": 9.395430155598182e-06,
"loss": 0.4956,
"step": 438
},
{
"epoch": 0.24307862679955702,
"grad_norm": 0.4382372796535492,
"learning_rate": 9.39081430914382e-06,
"loss": 0.4634,
"step": 439
},
{
"epoch": 0.24363233665559247,
"grad_norm": 0.40292054414749146,
"learning_rate": 9.38618205157969e-06,
"loss": 0.48,
"step": 440
},
{
"epoch": 0.2441860465116279,
"grad_norm": 0.5044105648994446,
"learning_rate": 9.381533400219319e-06,
"loss": 0.4981,
"step": 441
},
{
"epoch": 0.24473975636766335,
"grad_norm": 0.4372206926345825,
"learning_rate": 9.37686837243751e-06,
"loss": 0.4665,
"step": 442
},
{
"epoch": 0.24529346622369877,
"grad_norm": 0.5021594166755676,
"learning_rate": 9.37218698567027e-06,
"loss": 0.5061,
"step": 443
},
{
"epoch": 0.24584717607973422,
"grad_norm": 0.45043492317199707,
"learning_rate": 9.367489257414759e-06,
"loss": 0.4939,
"step": 444
},
{
"epoch": 0.24640088593576967,
"grad_norm": 0.4383167028427124,
"learning_rate": 9.362775205229201e-06,
"loss": 0.458,
"step": 445
},
{
"epoch": 0.2469545957918051,
"grad_norm": 0.4411497712135315,
"learning_rate": 9.358044846732848e-06,
"loss": 0.4899,
"step": 446
},
{
"epoch": 0.24750830564784054,
"grad_norm": 0.5189668536186218,
"learning_rate": 9.353298199605882e-06,
"loss": 0.4853,
"step": 447
},
{
"epoch": 0.24806201550387597,
"grad_norm": 0.530316174030304,
"learning_rate": 9.348535281589379e-06,
"loss": 0.4822,
"step": 448
},
{
"epoch": 0.24861572535991142,
"grad_norm": 0.48705223202705383,
"learning_rate": 9.34375611048522e-06,
"loss": 0.4794,
"step": 449
},
{
"epoch": 0.24916943521594684,
"grad_norm": 0.5219366550445557,
"learning_rate": 9.338960704156042e-06,
"loss": 0.4532,
"step": 450
},
{
"epoch": 0.2497231450719823,
"grad_norm": 0.4505665898323059,
"learning_rate": 9.334149080525154e-06,
"loss": 0.5145,
"step": 451
},
{
"epoch": 0.2502768549280177,
"grad_norm": 0.4620222747325897,
"learning_rate": 9.329321257576487e-06,
"loss": 0.4703,
"step": 452
},
{
"epoch": 0.25083056478405313,
"grad_norm": 0.44555017352104187,
"learning_rate": 9.32447725335451e-06,
"loss": 0.5018,
"step": 453
},
{
"epoch": 0.2513842746400886,
"grad_norm": 0.4511682689189911,
"learning_rate": 9.319617085964177e-06,
"loss": 0.4998,
"step": 454
},
{
"epoch": 0.25193798449612403,
"grad_norm": 0.4824512004852295,
"learning_rate": 9.314740773570854e-06,
"loss": 0.4958,
"step": 455
},
{
"epoch": 0.25249169435215946,
"grad_norm": 0.42782482504844666,
"learning_rate": 9.309848334400247e-06,
"loss": 0.4762,
"step": 456
},
{
"epoch": 0.2530454042081949,
"grad_norm": 0.5117877125740051,
"learning_rate": 9.30493978673834e-06,
"loss": 0.4906,
"step": 457
},
{
"epoch": 0.25359911406423036,
"grad_norm": 0.48038557171821594,
"learning_rate": 9.300015148931321e-06,
"loss": 0.4771,
"step": 458
},
{
"epoch": 0.2541528239202658,
"grad_norm": 0.4039292335510254,
"learning_rate": 9.295074439385521e-06,
"loss": 0.4614,
"step": 459
},
{
"epoch": 0.2547065337763012,
"grad_norm": 0.44665515422821045,
"learning_rate": 9.290117676567339e-06,
"loss": 0.4749,
"step": 460
},
{
"epoch": 0.2552602436323367,
"grad_norm": 0.44022291898727417,
"learning_rate": 9.285144879003173e-06,
"loss": 0.4935,
"step": 461
},
{
"epoch": 0.2558139534883721,
"grad_norm": 0.4289686381816864,
"learning_rate": 9.280156065279353e-06,
"loss": 0.4812,
"step": 462
},
{
"epoch": 0.2563676633444075,
"grad_norm": 0.5331130027770996,
"learning_rate": 9.275151254042072e-06,
"loss": 0.4951,
"step": 463
},
{
"epoch": 0.25692137320044295,
"grad_norm": 0.43817734718322754,
"learning_rate": 9.270130463997317e-06,
"loss": 0.4943,
"step": 464
},
{
"epoch": 0.2574750830564784,
"grad_norm": 0.5689101815223694,
"learning_rate": 9.265093713910792e-06,
"loss": 0.4549,
"step": 465
},
{
"epoch": 0.25802879291251385,
"grad_norm": 0.48502880334854126,
"learning_rate": 9.26004102260786e-06,
"loss": 0.4844,
"step": 466
},
{
"epoch": 0.25858250276854927,
"grad_norm": 0.5620638728141785,
"learning_rate": 9.25497240897346e-06,
"loss": 0.4903,
"step": 467
},
{
"epoch": 0.2591362126245847,
"grad_norm": 0.5323494076728821,
"learning_rate": 9.249887891952047e-06,
"loss": 0.4995,
"step": 468
},
{
"epoch": 0.2596899224806202,
"grad_norm": 0.4310426414012909,
"learning_rate": 9.244787490547513e-06,
"loss": 0.4763,
"step": 469
},
{
"epoch": 0.2602436323366556,
"grad_norm": 0.5547283291816711,
"learning_rate": 9.23967122382312e-06,
"loss": 0.4784,
"step": 470
},
{
"epoch": 0.260797342192691,
"grad_norm": 0.6222092509269714,
"learning_rate": 9.23453911090143e-06,
"loss": 0.4986,
"step": 471
},
{
"epoch": 0.26135105204872644,
"grad_norm": 0.495560884475708,
"learning_rate": 9.229391170964233e-06,
"loss": 0.4729,
"step": 472
},
{
"epoch": 0.2619047619047619,
"grad_norm": 0.5985239744186401,
"learning_rate": 9.224227423252468e-06,
"loss": 0.4979,
"step": 473
},
{
"epoch": 0.26245847176079734,
"grad_norm": 0.4933107793331146,
"learning_rate": 9.219047887066163e-06,
"loss": 0.4934,
"step": 474
},
{
"epoch": 0.26301218161683276,
"grad_norm": 0.46847131848335266,
"learning_rate": 9.213852581764358e-06,
"loss": 0.4931,
"step": 475
},
{
"epoch": 0.26356589147286824,
"grad_norm": 0.584668755531311,
"learning_rate": 9.208641526765024e-06,
"loss": 0.5173,
"step": 476
},
{
"epoch": 0.26411960132890366,
"grad_norm": 0.48668819665908813,
"learning_rate": 9.203414741545003e-06,
"loss": 0.4867,
"step": 477
},
{
"epoch": 0.2646733111849391,
"grad_norm": 0.47595974802970886,
"learning_rate": 9.198172245639932e-06,
"loss": 0.4921,
"step": 478
},
{
"epoch": 0.2652270210409745,
"grad_norm": 0.4644550681114197,
"learning_rate": 9.192914058644167e-06,
"loss": 0.4832,
"step": 479
},
{
"epoch": 0.26578073089701,
"grad_norm": 0.46028774976730347,
"learning_rate": 9.18764020021071e-06,
"loss": 0.4826,
"step": 480
},
{
"epoch": 0.2663344407530454,
"grad_norm": 0.4817364811897278,
"learning_rate": 9.182350690051134e-06,
"loss": 0.4847,
"step": 481
},
{
"epoch": 0.26688815060908083,
"grad_norm": 0.4588821232318878,
"learning_rate": 9.177045547935519e-06,
"loss": 0.4507,
"step": 482
},
{
"epoch": 0.26744186046511625,
"grad_norm": 0.49955952167510986,
"learning_rate": 9.171724793692363e-06,
"loss": 0.4958,
"step": 483
},
{
"epoch": 0.26799557032115173,
"grad_norm": 0.46924176812171936,
"learning_rate": 9.166388447208524e-06,
"loss": 0.4977,
"step": 484
},
{
"epoch": 0.26854928017718716,
"grad_norm": 0.44414854049682617,
"learning_rate": 9.161036528429128e-06,
"loss": 0.4989,
"step": 485
},
{
"epoch": 0.2691029900332226,
"grad_norm": 0.4286213219165802,
"learning_rate": 9.155669057357515e-06,
"loss": 0.4721,
"step": 486
},
{
"epoch": 0.26965669988925806,
"grad_norm": 0.47305360436439514,
"learning_rate": 9.150286054055143e-06,
"loss": 0.4812,
"step": 487
},
{
"epoch": 0.2702104097452935,
"grad_norm": 0.49933749437332153,
"learning_rate": 9.144887538641532e-06,
"loss": 0.4762,
"step": 488
},
{
"epoch": 0.2707641196013289,
"grad_norm": 0.4837988018989563,
"learning_rate": 9.139473531294171e-06,
"loss": 0.4857,
"step": 489
},
{
"epoch": 0.2713178294573643,
"grad_norm": 0.5129116177558899,
"learning_rate": 9.13404405224846e-06,
"loss": 0.4954,
"step": 490
},
{
"epoch": 0.2718715393133998,
"grad_norm": 0.46823567152023315,
"learning_rate": 9.12859912179762e-06,
"loss": 0.4532,
"step": 491
},
{
"epoch": 0.2724252491694352,
"grad_norm": 0.5194388628005981,
"learning_rate": 9.123138760292631e-06,
"loss": 0.4809,
"step": 492
},
{
"epoch": 0.27297895902547065,
"grad_norm": 0.52385413646698,
"learning_rate": 9.117662988142138e-06,
"loss": 0.476,
"step": 493
},
{
"epoch": 0.27353266888150607,
"grad_norm": 0.47071465849876404,
"learning_rate": 9.11217182581239e-06,
"loss": 0.498,
"step": 494
},
{
"epoch": 0.27408637873754155,
"grad_norm": 0.44149866700172424,
"learning_rate": 9.106665293827162e-06,
"loss": 0.489,
"step": 495
},
{
"epoch": 0.27464008859357697,
"grad_norm": 0.526095986366272,
"learning_rate": 9.101143412767665e-06,
"loss": 0.498,
"step": 496
},
{
"epoch": 0.2751937984496124,
"grad_norm": 0.5003870725631714,
"learning_rate": 9.09560620327249e-06,
"loss": 0.5074,
"step": 497
},
{
"epoch": 0.2757475083056478,
"grad_norm": 0.4868812561035156,
"learning_rate": 9.09005368603751e-06,
"loss": 0.518,
"step": 498
},
{
"epoch": 0.2763012181616833,
"grad_norm": 0.46059319376945496,
"learning_rate": 9.084485881815818e-06,
"loss": 0.489,
"step": 499
},
{
"epoch": 0.2768549280177187,
"grad_norm": 0.4523199200630188,
"learning_rate": 9.07890281141764e-06,
"loss": 0.4737,
"step": 500
},
{
"epoch": 0.27740863787375414,
"grad_norm": 0.5640616416931152,
"learning_rate": 9.073304495710267e-06,
"loss": 0.4811,
"step": 501
},
{
"epoch": 0.2779623477297896,
"grad_norm": 0.4549531042575836,
"learning_rate": 9.067690955617962e-06,
"loss": 0.4714,
"step": 502
},
{
"epoch": 0.27851605758582504,
"grad_norm": 0.49987301230430603,
"learning_rate": 9.062062212121897e-06,
"loss": 0.4923,
"step": 503
},
{
"epoch": 0.27906976744186046,
"grad_norm": 0.5010316967964172,
"learning_rate": 9.056418286260066e-06,
"loss": 0.4672,
"step": 504
},
{
"epoch": 0.2796234772978959,
"grad_norm": 0.47485628724098206,
"learning_rate": 9.050759199127211e-06,
"loss": 0.4889,
"step": 505
},
{
"epoch": 0.28017718715393136,
"grad_norm": 0.4733990430831909,
"learning_rate": 9.045084971874738e-06,
"loss": 0.5024,
"step": 506
},
{
"epoch": 0.2807308970099668,
"grad_norm": 0.4807707965373993,
"learning_rate": 9.039395625710641e-06,
"loss": 0.4899,
"step": 507
},
{
"epoch": 0.2812846068660022,
"grad_norm": 0.49026361107826233,
"learning_rate": 9.033691181899422e-06,
"loss": 0.4824,
"step": 508
},
{
"epoch": 0.28183831672203763,
"grad_norm": 0.46360817551612854,
"learning_rate": 9.027971661762016e-06,
"loss": 0.478,
"step": 509
},
{
"epoch": 0.2823920265780731,
"grad_norm": 0.47777828574180603,
"learning_rate": 9.022237086675702e-06,
"loss": 0.5157,
"step": 510
},
{
"epoch": 0.28294573643410853,
"grad_norm": 0.4797463119029999,
"learning_rate": 9.016487478074032e-06,
"loss": 0.4767,
"step": 511
},
{
"epoch": 0.28349944629014395,
"grad_norm": 0.5193693041801453,
"learning_rate": 9.010722857446745e-06,
"loss": 0.4857,
"step": 512
},
{
"epoch": 0.2840531561461794,
"grad_norm": 0.454267293214798,
"learning_rate": 9.00494324633969e-06,
"loss": 0.5032,
"step": 513
},
{
"epoch": 0.28460686600221485,
"grad_norm": 0.4507046341896057,
"learning_rate": 8.999148666354746e-06,
"loss": 0.4872,
"step": 514
},
{
"epoch": 0.2851605758582503,
"grad_norm": 0.5065588355064392,
"learning_rate": 8.993339139149737e-06,
"loss": 0.4834,
"step": 515
},
{
"epoch": 0.2857142857142857,
"grad_norm": 0.5654738545417786,
"learning_rate": 8.987514686438353e-06,
"loss": 0.496,
"step": 516
},
{
"epoch": 0.2862679955703212,
"grad_norm": 0.4761645197868347,
"learning_rate": 8.981675329990074e-06,
"loss": 0.4953,
"step": 517
},
{
"epoch": 0.2868217054263566,
"grad_norm": 0.5840123295783997,
"learning_rate": 8.975821091630082e-06,
"loss": 0.5041,
"step": 518
},
{
"epoch": 0.287375415282392,
"grad_norm": 0.47698304057121277,
"learning_rate": 8.969951993239177e-06,
"loss": 0.4965,
"step": 519
},
{
"epoch": 0.28792912513842744,
"grad_norm": 0.5020713806152344,
"learning_rate": 8.964068056753708e-06,
"loss": 0.5008,
"step": 520
},
{
"epoch": 0.2884828349944629,
"grad_norm": 0.5571221113204956,
"learning_rate": 8.95816930416548e-06,
"loss": 0.4879,
"step": 521
},
{
"epoch": 0.28903654485049834,
"grad_norm": 0.4395380914211273,
"learning_rate": 8.952255757521669e-06,
"loss": 0.5181,
"step": 522
},
{
"epoch": 0.28959025470653377,
"grad_norm": 0.5196503400802612,
"learning_rate": 8.946327438924755e-06,
"loss": 0.4755,
"step": 523
},
{
"epoch": 0.2901439645625692,
"grad_norm": 0.46694231033325195,
"learning_rate": 8.94038437053242e-06,
"loss": 0.4723,
"step": 524
},
{
"epoch": 0.29069767441860467,
"grad_norm": 0.47375500202178955,
"learning_rate": 8.934426574557483e-06,
"loss": 0.4697,
"step": 525
},
{
"epoch": 0.2912513842746401,
"grad_norm": 0.40896084904670715,
"learning_rate": 8.928454073267801e-06,
"loss": 0.4844,
"step": 526
},
{
"epoch": 0.2918050941306755,
"grad_norm": 0.5264450907707214,
"learning_rate": 8.922466888986203e-06,
"loss": 0.4682,
"step": 527
},
{
"epoch": 0.292358803986711,
"grad_norm": 0.5325335264205933,
"learning_rate": 8.916465044090389e-06,
"loss": 0.4813,
"step": 528
},
{
"epoch": 0.2929125138427464,
"grad_norm": 0.4511902630329132,
"learning_rate": 8.910448561012859e-06,
"loss": 0.489,
"step": 529
},
{
"epoch": 0.29346622369878184,
"grad_norm": 0.5324682593345642,
"learning_rate": 8.90441746224082e-06,
"loss": 0.5032,
"step": 530
},
{
"epoch": 0.29401993355481726,
"grad_norm": 0.4793945848941803,
"learning_rate": 8.898371770316113e-06,
"loss": 0.4649,
"step": 531
},
{
"epoch": 0.29457364341085274,
"grad_norm": 0.419558048248291,
"learning_rate": 8.892311507835118e-06,
"loss": 0.5066,
"step": 532
},
{
"epoch": 0.29512735326688816,
"grad_norm": 0.443803995847702,
"learning_rate": 8.886236697448675e-06,
"loss": 0.4939,
"step": 533
},
{
"epoch": 0.2956810631229236,
"grad_norm": 0.4384360909461975,
"learning_rate": 8.880147361862e-06,
"loss": 0.4853,
"step": 534
},
{
"epoch": 0.296234772978959,
"grad_norm": 0.4051086902618408,
"learning_rate": 8.874043523834593e-06,
"loss": 0.4765,
"step": 535
},
{
"epoch": 0.2967884828349945,
"grad_norm": 0.4231143295764923,
"learning_rate": 8.867925206180166e-06,
"loss": 0.4914,
"step": 536
},
{
"epoch": 0.2973421926910299,
"grad_norm": 0.4417089521884918,
"learning_rate": 8.861792431766544e-06,
"loss": 0.5051,
"step": 537
},
{
"epoch": 0.2978959025470653,
"grad_norm": 0.4100184440612793,
"learning_rate": 8.85564522351559e-06,
"loss": 0.4933,
"step": 538
},
{
"epoch": 0.29844961240310075,
"grad_norm": 0.4534027576446533,
"learning_rate": 8.849483604403108e-06,
"loss": 0.4824,
"step": 539
},
{
"epoch": 0.29900332225913623,
"grad_norm": 0.47366371750831604,
"learning_rate": 8.843307597458775e-06,
"loss": 0.4855,
"step": 540
},
{
"epoch": 0.29955703211517165,
"grad_norm": 0.5023186206817627,
"learning_rate": 8.837117225766033e-06,
"loss": 0.5054,
"step": 541
},
{
"epoch": 0.3001107419712071,
"grad_norm": 0.42479321360588074,
"learning_rate": 8.830912512462018e-06,
"loss": 0.4959,
"step": 542
},
{
"epoch": 0.30066445182724255,
"grad_norm": 0.5450031161308289,
"learning_rate": 8.824693480737472e-06,
"loss": 0.4906,
"step": 543
},
{
"epoch": 0.301218161683278,
"grad_norm": 0.416326642036438,
"learning_rate": 8.818460153836653e-06,
"loss": 0.504,
"step": 544
},
{
"epoch": 0.3017718715393134,
"grad_norm": 0.4415566623210907,
"learning_rate": 8.81221255505724e-06,
"loss": 0.5013,
"step": 545
},
{
"epoch": 0.3023255813953488,
"grad_norm": 0.405342698097229,
"learning_rate": 8.805950707750268e-06,
"loss": 0.4886,
"step": 546
},
{
"epoch": 0.3028792912513843,
"grad_norm": 0.43602555990219116,
"learning_rate": 8.799674635320017e-06,
"loss": 0.4953,
"step": 547
},
{
"epoch": 0.3034330011074197,
"grad_norm": 0.4456259608268738,
"learning_rate": 8.79338436122394e-06,
"loss": 0.4985,
"step": 548
},
{
"epoch": 0.30398671096345514,
"grad_norm": 0.40702182054519653,
"learning_rate": 8.787079908972567e-06,
"loss": 0.4674,
"step": 549
},
{
"epoch": 0.30454042081949056,
"grad_norm": 0.5078481435775757,
"learning_rate": 8.780761302129424e-06,
"loss": 0.506,
"step": 550
},
{
"epoch": 0.30509413067552604,
"grad_norm": 0.4316215217113495,
"learning_rate": 8.774428564310939e-06,
"loss": 0.4771,
"step": 551
},
{
"epoch": 0.30564784053156147,
"grad_norm": 0.40345603227615356,
"learning_rate": 8.768081719186354e-06,
"loss": 0.5055,
"step": 552
},
{
"epoch": 0.3062015503875969,
"grad_norm": 0.5201309323310852,
"learning_rate": 8.76172079047764e-06,
"loss": 0.5051,
"step": 553
},
{
"epoch": 0.3067552602436323,
"grad_norm": 0.4317896366119385,
"learning_rate": 8.755345801959412e-06,
"loss": 0.4869,
"step": 554
},
{
"epoch": 0.3073089700996678,
"grad_norm": 0.41571563482284546,
"learning_rate": 8.748956777458828e-06,
"loss": 0.4908,
"step": 555
},
{
"epoch": 0.3078626799557032,
"grad_norm": 0.46132898330688477,
"learning_rate": 8.742553740855507e-06,
"loss": 0.4869,
"step": 556
},
{
"epoch": 0.30841638981173863,
"grad_norm": 0.4685657322406769,
"learning_rate": 8.736136716081443e-06,
"loss": 0.4896,
"step": 557
},
{
"epoch": 0.3089700996677741,
"grad_norm": 0.43309783935546875,
"learning_rate": 8.729705727120911e-06,
"loss": 0.4873,
"step": 558
},
{
"epoch": 0.30952380952380953,
"grad_norm": 0.4360080063343048,
"learning_rate": 8.72326079801038e-06,
"loss": 0.4486,
"step": 559
},
{
"epoch": 0.31007751937984496,
"grad_norm": 0.4630446434020996,
"learning_rate": 8.71680195283842e-06,
"loss": 0.4756,
"step": 560
},
{
"epoch": 0.3106312292358804,
"grad_norm": 0.441049724817276,
"learning_rate": 8.710329215745612e-06,
"loss": 0.4804,
"step": 561
},
{
"epoch": 0.31118493909191586,
"grad_norm": 0.4433397650718689,
"learning_rate": 8.703842610924463e-06,
"loss": 0.4681,
"step": 562
},
{
"epoch": 0.3117386489479513,
"grad_norm": 0.4608594477176666,
"learning_rate": 8.697342162619308e-06,
"loss": 0.5161,
"step": 563
},
{
"epoch": 0.3122923588039867,
"grad_norm": 0.4613628089427948,
"learning_rate": 8.69082789512623e-06,
"loss": 0.4891,
"step": 564
},
{
"epoch": 0.3128460686600221,
"grad_norm": 0.41381844878196716,
"learning_rate": 8.684299832792958e-06,
"loss": 0.4929,
"step": 565
},
{
"epoch": 0.3133997785160576,
"grad_norm": 0.46331459283828735,
"learning_rate": 8.677758000018777e-06,
"loss": 0.4786,
"step": 566
},
{
"epoch": 0.313953488372093,
"grad_norm": 0.44575268030166626,
"learning_rate": 8.671202421254448e-06,
"loss": 0.5057,
"step": 567
},
{
"epoch": 0.31450719822812845,
"grad_norm": 0.4823036193847656,
"learning_rate": 8.664633121002103e-06,
"loss": 0.4785,
"step": 568
},
{
"epoch": 0.31506090808416387,
"grad_norm": 0.43519729375839233,
"learning_rate": 8.658050123815166e-06,
"loss": 0.4987,
"step": 569
},
{
"epoch": 0.31561461794019935,
"grad_norm": 0.47774675488471985,
"learning_rate": 8.651453454298244e-06,
"loss": 0.4667,
"step": 570
},
{
"epoch": 0.31616832779623477,
"grad_norm": 0.5016129612922668,
"learning_rate": 8.644843137107058e-06,
"loss": 0.4776,
"step": 571
},
{
"epoch": 0.3167220376522702,
"grad_norm": 0.40757277607917786,
"learning_rate": 8.638219196948332e-06,
"loss": 0.4707,
"step": 572
},
{
"epoch": 0.31727574750830567,
"grad_norm": 0.4288807213306427,
"learning_rate": 8.631581658579706e-06,
"loss": 0.4859,
"step": 573
},
{
"epoch": 0.3178294573643411,
"grad_norm": 0.4194124639034271,
"learning_rate": 8.624930546809649e-06,
"loss": 0.4499,
"step": 574
},
{
"epoch": 0.3183831672203765,
"grad_norm": 0.45525485277175903,
"learning_rate": 8.618265886497357e-06,
"loss": 0.4783,
"step": 575
},
{
"epoch": 0.31893687707641194,
"grad_norm": 0.435594379901886,
"learning_rate": 8.61158770255267e-06,
"loss": 0.481,
"step": 576
},
{
"epoch": 0.3194905869324474,
"grad_norm": 0.4703901410102844,
"learning_rate": 8.604896019935971e-06,
"loss": 0.4903,
"step": 577
},
{
"epoch": 0.32004429678848284,
"grad_norm": 0.40026766061782837,
"learning_rate": 8.598190863658096e-06,
"loss": 0.4995,
"step": 578
},
{
"epoch": 0.32059800664451826,
"grad_norm": 0.4388001561164856,
"learning_rate": 8.591472258780242e-06,
"loss": 0.4662,
"step": 579
},
{
"epoch": 0.3211517165005537,
"grad_norm": 0.49517446756362915,
"learning_rate": 8.584740230413867e-06,
"loss": 0.4639,
"step": 580
},
{
"epoch": 0.32170542635658916,
"grad_norm": 0.3800186216831207,
"learning_rate": 8.577994803720605e-06,
"loss": 0.4756,
"step": 581
},
{
"epoch": 0.3222591362126246,
"grad_norm": 0.5055624842643738,
"learning_rate": 8.57123600391217e-06,
"loss": 0.4823,
"step": 582
},
{
"epoch": 0.32281284606866,
"grad_norm": 0.4609836935997009,
"learning_rate": 8.56446385625025e-06,
"loss": 0.4909,
"step": 583
},
{
"epoch": 0.3233665559246955,
"grad_norm": 0.49489548802375793,
"learning_rate": 8.557678386046429e-06,
"loss": 0.4568,
"step": 584
},
{
"epoch": 0.3239202657807309,
"grad_norm": 0.47447633743286133,
"learning_rate": 8.550879618662083e-06,
"loss": 0.488,
"step": 585
},
{
"epoch": 0.32447397563676633,
"grad_norm": 0.44722503423690796,
"learning_rate": 8.544067579508292e-06,
"loss": 0.4867,
"step": 586
},
{
"epoch": 0.32502768549280175,
"grad_norm": 0.5135213136672974,
"learning_rate": 8.537242294045733e-06,
"loss": 0.4976,
"step": 587
},
{
"epoch": 0.32558139534883723,
"grad_norm": 0.4393743574619293,
"learning_rate": 8.5304037877846e-06,
"loss": 0.472,
"step": 588
},
{
"epoch": 0.32613510520487266,
"grad_norm": 0.47811412811279297,
"learning_rate": 8.523552086284495e-06,
"loss": 0.486,
"step": 589
},
{
"epoch": 0.3266888150609081,
"grad_norm": 0.4685250520706177,
"learning_rate": 8.516687215154341e-06,
"loss": 0.4721,
"step": 590
},
{
"epoch": 0.3272425249169435,
"grad_norm": 0.4332244098186493,
"learning_rate": 8.509809200052286e-06,
"loss": 0.4712,
"step": 591
},
{
"epoch": 0.327796234772979,
"grad_norm": 0.5710234045982361,
"learning_rate": 8.5029180666856e-06,
"loss": 0.5037,
"step": 592
},
{
"epoch": 0.3283499446290144,
"grad_norm": 0.48631003499031067,
"learning_rate": 8.496013840810586e-06,
"loss": 0.4998,
"step": 593
},
{
"epoch": 0.3289036544850498,
"grad_norm": 0.5860962271690369,
"learning_rate": 8.489096548232485e-06,
"loss": 0.5009,
"step": 594
},
{
"epoch": 0.32945736434108525,
"grad_norm": 0.4546487033367157,
"learning_rate": 8.482166214805374e-06,
"loss": 0.4571,
"step": 595
},
{
"epoch": 0.3300110741971207,
"grad_norm": 0.5256265997886658,
"learning_rate": 8.475222866432065e-06,
"loss": 0.4758,
"step": 596
},
{
"epoch": 0.33056478405315615,
"grad_norm": 0.4427199959754944,
"learning_rate": 8.468266529064025e-06,
"loss": 0.4751,
"step": 597
},
{
"epoch": 0.33111849390919157,
"grad_norm": 0.51978600025177,
"learning_rate": 8.461297228701264e-06,
"loss": 0.4886,
"step": 598
},
{
"epoch": 0.33167220376522705,
"grad_norm": 0.438180148601532,
"learning_rate": 8.45431499139224e-06,
"loss": 0.4514,
"step": 599
},
{
"epoch": 0.33222591362126247,
"grad_norm": 0.38737887144088745,
"learning_rate": 8.44731984323377e-06,
"loss": 0.4647,
"step": 600
},
{
"epoch": 0.3327796234772979,
"grad_norm": 0.42485183477401733,
"learning_rate": 8.440311810370921e-06,
"loss": 0.4648,
"step": 601
},
{
"epoch": 0.3333333333333333,
"grad_norm": 0.44904154539108276,
"learning_rate": 8.433290918996921e-06,
"loss": 0.5129,
"step": 602
},
{
"epoch": 0.3338870431893688,
"grad_norm": 0.43713030219078064,
"learning_rate": 8.426257195353055e-06,
"loss": 0.4506,
"step": 603
},
{
"epoch": 0.3344407530454042,
"grad_norm": 0.40078917145729065,
"learning_rate": 8.419210665728577e-06,
"loss": 0.4887,
"step": 604
},
{
"epoch": 0.33499446290143964,
"grad_norm": 0.45686566829681396,
"learning_rate": 8.412151356460593e-06,
"loss": 0.46,
"step": 605
},
{
"epoch": 0.33554817275747506,
"grad_norm": 0.4962942600250244,
"learning_rate": 8.405079293933986e-06,
"loss": 0.4685,
"step": 606
},
{
"epoch": 0.33610188261351054,
"grad_norm": 0.38770344853401184,
"learning_rate": 8.3979945045813e-06,
"loss": 0.4919,
"step": 607
},
{
"epoch": 0.33665559246954596,
"grad_norm": 0.5221530795097351,
"learning_rate": 8.390897014882645e-06,
"loss": 0.4757,
"step": 608
},
{
"epoch": 0.3372093023255814,
"grad_norm": 0.44620612263679504,
"learning_rate": 8.383786851365601e-06,
"loss": 0.489,
"step": 609
},
{
"epoch": 0.3377630121816168,
"grad_norm": 0.47988009452819824,
"learning_rate": 8.376664040605122e-06,
"loss": 0.474,
"step": 610
},
{
"epoch": 0.3383167220376523,
"grad_norm": 0.42253127694129944,
"learning_rate": 8.36952860922343e-06,
"loss": 0.4723,
"step": 611
},
{
"epoch": 0.3388704318936877,
"grad_norm": 0.48353639245033264,
"learning_rate": 8.362380583889912e-06,
"loss": 0.4513,
"step": 612
},
{
"epoch": 0.33942414174972313,
"grad_norm": 0.46105682849884033,
"learning_rate": 8.355219991321035e-06,
"loss": 0.5074,
"step": 613
},
{
"epoch": 0.3399778516057586,
"grad_norm": 0.4596821367740631,
"learning_rate": 8.348046858280233e-06,
"loss": 0.505,
"step": 614
},
{
"epoch": 0.34053156146179403,
"grad_norm": 0.3729395270347595,
"learning_rate": 8.34086121157781e-06,
"loss": 0.4834,
"step": 615
},
{
"epoch": 0.34108527131782945,
"grad_norm": 0.511202871799469,
"learning_rate": 8.333663078070845e-06,
"loss": 0.4521,
"step": 616
},
{
"epoch": 0.3416389811738649,
"grad_norm": 0.4841696321964264,
"learning_rate": 8.326452484663083e-06,
"loss": 0.4761,
"step": 617
},
{
"epoch": 0.34219269102990035,
"grad_norm": 0.3986373543739319,
"learning_rate": 8.319229458304843e-06,
"loss": 0.4655,
"step": 618
},
{
"epoch": 0.3427464008859358,
"grad_norm": 0.5737797021865845,
"learning_rate": 8.311994025992912e-06,
"loss": 0.5066,
"step": 619
},
{
"epoch": 0.3433001107419712,
"grad_norm": 0.44409626722335815,
"learning_rate": 8.304746214770445e-06,
"loss": 0.4903,
"step": 620
},
{
"epoch": 0.3438538205980066,
"grad_norm": 0.49516579508781433,
"learning_rate": 8.297486051726864e-06,
"loss": 0.4537,
"step": 621
},
{
"epoch": 0.3444075304540421,
"grad_norm": 0.498826801776886,
"learning_rate": 8.290213563997758e-06,
"loss": 0.4901,
"step": 622
},
{
"epoch": 0.3449612403100775,
"grad_norm": 0.5155895948410034,
"learning_rate": 8.282928778764783e-06,
"loss": 0.456,
"step": 623
},
{
"epoch": 0.34551495016611294,
"grad_norm": 0.4448562264442444,
"learning_rate": 8.275631723255556e-06,
"loss": 0.4912,
"step": 624
},
{
"epoch": 0.34606866002214837,
"grad_norm": 0.4876919984817505,
"learning_rate": 8.268322424743552e-06,
"loss": 0.4935,
"step": 625
},
{
"epoch": 0.34662236987818384,
"grad_norm": 0.46334221959114075,
"learning_rate": 8.26100091054801e-06,
"loss": 0.4767,
"step": 626
},
{
"epoch": 0.34717607973421927,
"grad_norm": 0.4294886291027069,
"learning_rate": 8.253667208033828e-06,
"loss": 0.4636,
"step": 627
},
{
"epoch": 0.3477297895902547,
"grad_norm": 0.5191714763641357,
"learning_rate": 8.246321344611455e-06,
"loss": 0.482,
"step": 628
},
{
"epoch": 0.34828349944629017,
"grad_norm": 0.5100964903831482,
"learning_rate": 8.23896334773679e-06,
"loss": 0.4838,
"step": 629
},
{
"epoch": 0.3488372093023256,
"grad_norm": 0.4342910349369049,
"learning_rate": 8.23159324491109e-06,
"loss": 0.5089,
"step": 630
},
{
"epoch": 0.349390919158361,
"grad_norm": 0.44776540994644165,
"learning_rate": 8.224211063680854e-06,
"loss": 0.479,
"step": 631
},
{
"epoch": 0.34994462901439644,
"grad_norm": 0.47771504521369934,
"learning_rate": 8.216816831637726e-06,
"loss": 0.4875,
"step": 632
},
{
"epoch": 0.3504983388704319,
"grad_norm": 0.44938650727272034,
"learning_rate": 8.209410576418391e-06,
"loss": 0.4789,
"step": 633
},
{
"epoch": 0.35105204872646734,
"grad_norm": 0.4639085531234741,
"learning_rate": 8.201992325704473e-06,
"loss": 0.4756,
"step": 634
},
{
"epoch": 0.35160575858250276,
"grad_norm": 0.47028908133506775,
"learning_rate": 8.19456210722243e-06,
"loss": 0.4708,
"step": 635
},
{
"epoch": 0.3521594684385382,
"grad_norm": 0.44380685687065125,
"learning_rate": 8.18711994874345e-06,
"loss": 0.4676,
"step": 636
},
{
"epoch": 0.35271317829457366,
"grad_norm": 0.4652746021747589,
"learning_rate": 8.179665878083347e-06,
"loss": 0.4828,
"step": 637
},
{
"epoch": 0.3532668881506091,
"grad_norm": 0.4208925664424896,
"learning_rate": 8.172199923102459e-06,
"loss": 0.4877,
"step": 638
},
{
"epoch": 0.3538205980066445,
"grad_norm": 0.5734950304031372,
"learning_rate": 8.164722111705545e-06,
"loss": 0.5077,
"step": 639
},
{
"epoch": 0.35437430786268,
"grad_norm": 0.42535200715065,
"learning_rate": 8.157232471841676e-06,
"loss": 0.4602,
"step": 640
},
{
"epoch": 0.3549280177187154,
"grad_norm": 0.4676467180252075,
"learning_rate": 8.149731031504136e-06,
"loss": 0.4651,
"step": 641
},
{
"epoch": 0.3554817275747508,
"grad_norm": 0.4505887031555176,
"learning_rate": 8.142217818730307e-06,
"loss": 0.4801,
"step": 642
},
{
"epoch": 0.35603543743078625,
"grad_norm": 0.4688456952571869,
"learning_rate": 8.13469286160158e-06,
"loss": 0.4717,
"step": 643
},
{
"epoch": 0.35658914728682173,
"grad_norm": 0.4681268334388733,
"learning_rate": 8.127156188243239e-06,
"loss": 0.4702,
"step": 644
},
{
"epoch": 0.35714285714285715,
"grad_norm": 0.3870069086551666,
"learning_rate": 8.119607826824356e-06,
"loss": 0.4632,
"step": 645
},
{
"epoch": 0.3576965669988926,
"grad_norm": 0.5866634249687195,
"learning_rate": 8.112047805557693e-06,
"loss": 0.4879,
"step": 646
},
{
"epoch": 0.358250276854928,
"grad_norm": 0.4573242664337158,
"learning_rate": 8.104476152699587e-06,
"loss": 0.5011,
"step": 647
},
{
"epoch": 0.3588039867109635,
"grad_norm": 0.4478485584259033,
"learning_rate": 8.096892896549853e-06,
"loss": 0.4865,
"step": 648
},
{
"epoch": 0.3593576965669989,
"grad_norm": 0.4456096291542053,
"learning_rate": 8.089298065451673e-06,
"loss": 0.4943,
"step": 649
},
{
"epoch": 0.3599114064230343,
"grad_norm": 0.4899655878543854,
"learning_rate": 8.081691687791491e-06,
"loss": 0.4712,
"step": 650
},
{
"epoch": 0.36046511627906974,
"grad_norm": 0.4818911850452423,
"learning_rate": 8.074073791998907e-06,
"loss": 0.4808,
"step": 651
},
{
"epoch": 0.3610188261351052,
"grad_norm": 0.4100266695022583,
"learning_rate": 8.066444406546573e-06,
"loss": 0.4944,
"step": 652
},
{
"epoch": 0.36157253599114064,
"grad_norm": 0.43672144412994385,
"learning_rate": 8.058803559950086e-06,
"loss": 0.4906,
"step": 653
},
{
"epoch": 0.36212624584717606,
"grad_norm": 0.45784133672714233,
"learning_rate": 8.051151280767874e-06,
"loss": 0.467,
"step": 654
},
{
"epoch": 0.36267995570321154,
"grad_norm": 0.5059024691581726,
"learning_rate": 8.043487597601104e-06,
"loss": 0.482,
"step": 655
},
{
"epoch": 0.36323366555924697,
"grad_norm": 0.391807496547699,
"learning_rate": 8.035812539093557e-06,
"loss": 0.4749,
"step": 656
},
{
"epoch": 0.3637873754152824,
"grad_norm": 0.49046629667282104,
"learning_rate": 8.02812613393154e-06,
"loss": 0.5014,
"step": 657
},
{
"epoch": 0.3643410852713178,
"grad_norm": 0.509397029876709,
"learning_rate": 8.020428410843762e-06,
"loss": 0.4889,
"step": 658
},
{
"epoch": 0.3648947951273533,
"grad_norm": 0.46450507640838623,
"learning_rate": 8.012719398601239e-06,
"loss": 0.49,
"step": 659
},
{
"epoch": 0.3654485049833887,
"grad_norm": 0.46102917194366455,
"learning_rate": 8.004999126017177e-06,
"loss": 0.4549,
"step": 660
},
{
"epoch": 0.36600221483942413,
"grad_norm": 0.5289508104324341,
"learning_rate": 7.997267621946871e-06,
"loss": 0.4809,
"step": 661
},
{
"epoch": 0.36655592469545956,
"grad_norm": 0.4355222284793854,
"learning_rate": 7.989524915287595e-06,
"loss": 0.4654,
"step": 662
},
{
"epoch": 0.36710963455149503,
"grad_norm": 0.44085797667503357,
"learning_rate": 7.981771034978494e-06,
"loss": 0.5027,
"step": 663
},
{
"epoch": 0.36766334440753046,
"grad_norm": 0.41785871982574463,
"learning_rate": 7.974006010000474e-06,
"loss": 0.4424,
"step": 664
},
{
"epoch": 0.3682170542635659,
"grad_norm": 0.46425795555114746,
"learning_rate": 7.966229869376097e-06,
"loss": 0.4901,
"step": 665
},
{
"epoch": 0.3687707641196013,
"grad_norm": 0.40710482001304626,
"learning_rate": 7.958442642169469e-06,
"loss": 0.4632,
"step": 666
},
{
"epoch": 0.3693244739756368,
"grad_norm": 0.39024198055267334,
"learning_rate": 7.950644357486134e-06,
"loss": 0.4792,
"step": 667
},
{
"epoch": 0.3698781838316722,
"grad_norm": 0.40468570590019226,
"learning_rate": 7.942835044472965e-06,
"loss": 0.4895,
"step": 668
},
{
"epoch": 0.3704318936877076,
"grad_norm": 0.41393882036209106,
"learning_rate": 7.935014732318057e-06,
"loss": 0.4763,
"step": 669
},
{
"epoch": 0.3709856035437431,
"grad_norm": 0.44807004928588867,
"learning_rate": 7.92718345025061e-06,
"loss": 0.4789,
"step": 670
},
{
"epoch": 0.3715393133997785,
"grad_norm": 0.4278004467487335,
"learning_rate": 7.919341227540828e-06,
"loss": 0.4627,
"step": 671
},
{
"epoch": 0.37209302325581395,
"grad_norm": 0.40687111020088196,
"learning_rate": 7.911488093499806e-06,
"loss": 0.4686,
"step": 672
},
{
"epoch": 0.37264673311184937,
"grad_norm": 0.4299582540988922,
"learning_rate": 7.903624077479424e-06,
"loss": 0.4911,
"step": 673
},
{
"epoch": 0.37320044296788485,
"grad_norm": 0.40537703037261963,
"learning_rate": 7.895749208872232e-06,
"loss": 0.4676,
"step": 674
},
{
"epoch": 0.37375415282392027,
"grad_norm": 0.3911261260509491,
"learning_rate": 7.887863517111337e-06,
"loss": 0.4721,
"step": 675
},
{
"epoch": 0.3743078626799557,
"grad_norm": 0.4901961088180542,
"learning_rate": 7.879967031670313e-06,
"loss": 0.5181,
"step": 676
},
{
"epoch": 0.3748615725359911,
"grad_norm": 0.4302717447280884,
"learning_rate": 7.872059782063064e-06,
"loss": 0.4837,
"step": 677
},
{
"epoch": 0.3754152823920266,
"grad_norm": 0.3713219165802002,
"learning_rate": 7.86414179784373e-06,
"loss": 0.4703,
"step": 678
},
{
"epoch": 0.375968992248062,
"grad_norm": 0.47798439860343933,
"learning_rate": 7.856213108606571e-06,
"loss": 0.459,
"step": 679
},
{
"epoch": 0.37652270210409744,
"grad_norm": 0.45034360885620117,
"learning_rate": 7.848273743985863e-06,
"loss": 0.4803,
"step": 680
},
{
"epoch": 0.3770764119601329,
"grad_norm": 0.45023995637893677,
"learning_rate": 7.84032373365578e-06,
"loss": 0.4545,
"step": 681
},
{
"epoch": 0.37763012181616834,
"grad_norm": 0.4966435432434082,
"learning_rate": 7.832363107330281e-06,
"loss": 0.5031,
"step": 682
},
{
"epoch": 0.37818383167220376,
"grad_norm": 0.6042452454566956,
"learning_rate": 7.824391894763008e-06,
"loss": 0.4875,
"step": 683
},
{
"epoch": 0.3787375415282392,
"grad_norm": 0.4374452531337738,
"learning_rate": 7.816410125747172e-06,
"loss": 0.4841,
"step": 684
},
{
"epoch": 0.37929125138427466,
"grad_norm": 0.47047990560531616,
"learning_rate": 7.808417830115432e-06,
"loss": 0.4634,
"step": 685
},
{
"epoch": 0.3798449612403101,
"grad_norm": 0.5158135890960693,
"learning_rate": 7.800415037739802e-06,
"loss": 0.4796,
"step": 686
},
{
"epoch": 0.3803986710963455,
"grad_norm": 0.48473745584487915,
"learning_rate": 7.792401778531517e-06,
"loss": 0.4689,
"step": 687
},
{
"epoch": 0.38095238095238093,
"grad_norm": 0.4797394871711731,
"learning_rate": 7.78437808244094e-06,
"loss": 0.4863,
"step": 688
},
{
"epoch": 0.3815060908084164,
"grad_norm": 0.4698885977268219,
"learning_rate": 7.776343979457446e-06,
"loss": 0.4736,
"step": 689
},
{
"epoch": 0.38205980066445183,
"grad_norm": 0.5596975088119507,
"learning_rate": 7.768299499609296e-06,
"loss": 0.4818,
"step": 690
},
{
"epoch": 0.38261351052048725,
"grad_norm": 0.4723421037197113,
"learning_rate": 7.760244672963548e-06,
"loss": 0.4832,
"step": 691
},
{
"epoch": 0.3831672203765227,
"grad_norm": 0.4778919816017151,
"learning_rate": 7.752179529625922e-06,
"loss": 0.487,
"step": 692
},
{
"epoch": 0.38372093023255816,
"grad_norm": 0.46932482719421387,
"learning_rate": 7.744104099740703e-06,
"loss": 0.4793,
"step": 693
},
{
"epoch": 0.3842746400885936,
"grad_norm": 0.4695429801940918,
"learning_rate": 7.736018413490622e-06,
"loss": 0.4824,
"step": 694
},
{
"epoch": 0.384828349944629,
"grad_norm": 0.5957847833633423,
"learning_rate": 7.727922501096743e-06,
"loss": 0.4762,
"step": 695
},
{
"epoch": 0.3853820598006645,
"grad_norm": 0.4242111146450043,
"learning_rate": 7.719816392818354e-06,
"loss": 0.476,
"step": 696
},
{
"epoch": 0.3859357696566999,
"grad_norm": 0.5669819116592407,
"learning_rate": 7.711700118952848e-06,
"loss": 0.4537,
"step": 697
},
{
"epoch": 0.3864894795127353,
"grad_norm": 0.4524780809879303,
"learning_rate": 7.703573709835614e-06,
"loss": 0.4592,
"step": 698
},
{
"epoch": 0.38704318936877075,
"grad_norm": 0.44671958684921265,
"learning_rate": 7.695437195839925e-06,
"loss": 0.472,
"step": 699
},
{
"epoch": 0.3875968992248062,
"grad_norm": 0.5542345643043518,
"learning_rate": 7.687290607376816e-06,
"loss": 0.4881,
"step": 700
},
{
"epoch": 0.38815060908084165,
"grad_norm": 0.4053798317909241,
"learning_rate": 7.679133974894984e-06,
"loss": 0.4556,
"step": 701
},
{
"epoch": 0.38870431893687707,
"grad_norm": 0.4634324014186859,
"learning_rate": 7.67096732888066e-06,
"loss": 0.4926,
"step": 702
},
{
"epoch": 0.3892580287929125,
"grad_norm": 0.40314167737960815,
"learning_rate": 7.662790699857506e-06,
"loss": 0.4779,
"step": 703
},
{
"epoch": 0.38981173864894797,
"grad_norm": 0.40341395139694214,
"learning_rate": 7.654604118386494e-06,
"loss": 0.4719,
"step": 704
},
{
"epoch": 0.3903654485049834,
"grad_norm": 0.49669671058654785,
"learning_rate": 7.646407615065796e-06,
"loss": 0.4764,
"step": 705
},
{
"epoch": 0.3909191583610188,
"grad_norm": 0.38740184903144836,
"learning_rate": 7.638201220530664e-06,
"loss": 0.4801,
"step": 706
},
{
"epoch": 0.39147286821705424,
"grad_norm": 0.4930890202522278,
"learning_rate": 7.629984965453326e-06,
"loss": 0.4877,
"step": 707
},
{
"epoch": 0.3920265780730897,
"grad_norm": 0.39426112174987793,
"learning_rate": 7.621758880542859e-06,
"loss": 0.4567,
"step": 708
},
{
"epoch": 0.39258028792912514,
"grad_norm": 0.502477765083313,
"learning_rate": 7.613522996545082e-06,
"loss": 0.4924,
"step": 709
},
{
"epoch": 0.39313399778516056,
"grad_norm": 0.4422747492790222,
"learning_rate": 7.60527734424244e-06,
"loss": 0.4679,
"step": 710
},
{
"epoch": 0.39368770764119604,
"grad_norm": 0.3748304843902588,
"learning_rate": 7.597021954453887e-06,
"loss": 0.5187,
"step": 711
},
{
"epoch": 0.39424141749723146,
"grad_norm": 0.43841761350631714,
"learning_rate": 7.588756858034772e-06,
"loss": 0.4762,
"step": 712
},
{
"epoch": 0.3947951273532669,
"grad_norm": 0.4414544105529785,
"learning_rate": 7.580482085876722e-06,
"loss": 0.4796,
"step": 713
},
{
"epoch": 0.3953488372093023,
"grad_norm": 0.42060527205467224,
"learning_rate": 7.572197668907533e-06,
"loss": 0.5126,
"step": 714
},
{
"epoch": 0.3959025470653378,
"grad_norm": 0.5003881454467773,
"learning_rate": 7.563903638091042e-06,
"loss": 0.4744,
"step": 715
},
{
"epoch": 0.3964562569213732,
"grad_norm": 0.4138297736644745,
"learning_rate": 7.555600024427028e-06,
"loss": 0.4629,
"step": 716
},
{
"epoch": 0.39700996677740863,
"grad_norm": 0.5040468573570251,
"learning_rate": 7.547286858951075e-06,
"loss": 0.4491,
"step": 717
},
{
"epoch": 0.39756367663344405,
"grad_norm": 0.5510542392730713,
"learning_rate": 7.538964172734479e-06,
"loss": 0.4804,
"step": 718
},
{
"epoch": 0.39811738648947953,
"grad_norm": 0.39894410967826843,
"learning_rate": 7.530631996884117e-06,
"loss": 0.4842,
"step": 719
},
{
"epoch": 0.39867109634551495,
"grad_norm": 0.5509627461433411,
"learning_rate": 7.522290362542329e-06,
"loss": 0.4684,
"step": 720
},
{
"epoch": 0.3992248062015504,
"grad_norm": 0.4482865035533905,
"learning_rate": 7.513939300886816e-06,
"loss": 0.4619,
"step": 721
},
{
"epoch": 0.3997785160575858,
"grad_norm": 0.442939817905426,
"learning_rate": 7.505578843130508e-06,
"loss": 0.4642,
"step": 722
},
{
"epoch": 0.4003322259136213,
"grad_norm": 0.4382091760635376,
"learning_rate": 7.4972090205214564e-06,
"loss": 0.4803,
"step": 723
},
{
"epoch": 0.4008859357696567,
"grad_norm": 0.44710400700569153,
"learning_rate": 7.488829864342717e-06,
"loss": 0.4651,
"step": 724
},
{
"epoch": 0.4014396456256921,
"grad_norm": 0.4985845386981964,
"learning_rate": 7.480441405912223e-06,
"loss": 0.477,
"step": 725
},
{
"epoch": 0.4019933554817276,
"grad_norm": 0.44586989283561707,
"learning_rate": 7.472043676582685e-06,
"loss": 0.4749,
"step": 726
},
{
"epoch": 0.402547065337763,
"grad_norm": 0.5637676119804382,
"learning_rate": 7.463636707741458e-06,
"loss": 0.4686,
"step": 727
},
{
"epoch": 0.40310077519379844,
"grad_norm": 0.5102462768554688,
"learning_rate": 7.455220530810436e-06,
"loss": 0.4895,
"step": 728
},
{
"epoch": 0.40365448504983387,
"grad_norm": 0.4997349977493286,
"learning_rate": 7.446795177245923e-06,
"loss": 0.486,
"step": 729
},
{
"epoch": 0.40420819490586934,
"grad_norm": 0.5639171600341797,
"learning_rate": 7.4383606785385254e-06,
"loss": 0.4731,
"step": 730
},
{
"epoch": 0.40476190476190477,
"grad_norm": 0.448219895362854,
"learning_rate": 7.42991706621303e-06,
"loss": 0.4297,
"step": 731
},
{
"epoch": 0.4053156146179402,
"grad_norm": 0.6095700860023499,
"learning_rate": 7.4214643718282886e-06,
"loss": 0.4727,
"step": 732
},
{
"epoch": 0.4058693244739756,
"grad_norm": 0.44827568531036377,
"learning_rate": 7.413002626977092e-06,
"loss": 0.4684,
"step": 733
},
{
"epoch": 0.4064230343300111,
"grad_norm": 0.44243839383125305,
"learning_rate": 7.404531863286066e-06,
"loss": 0.4696,
"step": 734
},
{
"epoch": 0.4069767441860465,
"grad_norm": 0.53190016746521,
"learning_rate": 7.396052112415539e-06,
"loss": 0.5035,
"step": 735
},
{
"epoch": 0.40753045404208194,
"grad_norm": 0.4007076025009155,
"learning_rate": 7.387563406059433e-06,
"loss": 0.4649,
"step": 736
},
{
"epoch": 0.4080841638981174,
"grad_norm": 0.4566161036491394,
"learning_rate": 7.37906577594514e-06,
"loss": 0.4703,
"step": 737
},
{
"epoch": 0.40863787375415284,
"grad_norm": 0.4270188510417938,
"learning_rate": 7.370559253833407e-06,
"loss": 0.468,
"step": 738
},
{
"epoch": 0.40919158361018826,
"grad_norm": 0.4487841725349426,
"learning_rate": 7.362043871518216e-06,
"loss": 0.4454,
"step": 739
},
{
"epoch": 0.4097452934662237,
"grad_norm": 0.4769379496574402,
"learning_rate": 7.353519660826665e-06,
"loss": 0.4815,
"step": 740
},
{
"epoch": 0.41029900332225916,
"grad_norm": 0.4311200976371765,
"learning_rate": 7.344986653618844e-06,
"loss": 0.4951,
"step": 741
},
{
"epoch": 0.4108527131782946,
"grad_norm": 0.4666946828365326,
"learning_rate": 7.33644488178773e-06,
"loss": 0.4806,
"step": 742
},
{
"epoch": 0.41140642303433,
"grad_norm": 0.4465944170951843,
"learning_rate": 7.327894377259051e-06,
"loss": 0.4892,
"step": 743
},
{
"epoch": 0.4119601328903654,
"grad_norm": 0.3847818076610565,
"learning_rate": 7.319335171991178e-06,
"loss": 0.4801,
"step": 744
},
{
"epoch": 0.4125138427464009,
"grad_norm": 0.3697161078453064,
"learning_rate": 7.310767297975e-06,
"loss": 0.4735,
"step": 745
},
{
"epoch": 0.4130675526024363,
"grad_norm": 0.44218286871910095,
"learning_rate": 7.302190787233808e-06,
"loss": 0.4794,
"step": 746
},
{
"epoch": 0.41362126245847175,
"grad_norm": 0.40929174423217773,
"learning_rate": 7.293605671823173e-06,
"loss": 0.4502,
"step": 747
},
{
"epoch": 0.4141749723145072,
"grad_norm": 0.41390007734298706,
"learning_rate": 7.2850119838308255e-06,
"loss": 0.4673,
"step": 748
},
{
"epoch": 0.41472868217054265,
"grad_norm": 0.3917429447174072,
"learning_rate": 7.27640975537654e-06,
"loss": 0.4625,
"step": 749
},
{
"epoch": 0.4152823920265781,
"grad_norm": 0.44105830788612366,
"learning_rate": 7.267799018612008e-06,
"loss": 0.4546,
"step": 750
},
{
"epoch": 0.4158361018826135,
"grad_norm": 0.4174969494342804,
"learning_rate": 7.259179805720726e-06,
"loss": 0.4889,
"step": 751
},
{
"epoch": 0.416389811738649,
"grad_norm": 0.452042818069458,
"learning_rate": 7.250552148917865e-06,
"loss": 0.4856,
"step": 752
},
{
"epoch": 0.4169435215946844,
"grad_norm": 0.38706308603286743,
"learning_rate": 7.241916080450163e-06,
"loss": 0.5123,
"step": 753
},
{
"epoch": 0.4174972314507198,
"grad_norm": 0.39623257517814636,
"learning_rate": 7.2332716325957905e-06,
"loss": 0.4785,
"step": 754
},
{
"epoch": 0.41805094130675524,
"grad_norm": 0.4757280945777893,
"learning_rate": 7.224618837664241e-06,
"loss": 0.4895,
"step": 755
},
{
"epoch": 0.4186046511627907,
"grad_norm": 0.39697709679603577,
"learning_rate": 7.215957727996208e-06,
"loss": 0.4812,
"step": 756
},
{
"epoch": 0.41915836101882614,
"grad_norm": 0.4762042760848999,
"learning_rate": 7.207288335963456e-06,
"loss": 0.4858,
"step": 757
},
{
"epoch": 0.41971207087486156,
"grad_norm": 0.41127634048461914,
"learning_rate": 7.198610693968711e-06,
"loss": 0.4963,
"step": 758
},
{
"epoch": 0.420265780730897,
"grad_norm": 0.4493284523487091,
"learning_rate": 7.18992483444553e-06,
"loss": 0.4685,
"step": 759
},
{
"epoch": 0.42081949058693247,
"grad_norm": 0.3796096444129944,
"learning_rate": 7.181230789858186e-06,
"loss": 0.4608,
"step": 760
},
{
"epoch": 0.4213732004429679,
"grad_norm": 0.40398499369621277,
"learning_rate": 7.17252859270155e-06,
"loss": 0.4976,
"step": 761
},
{
"epoch": 0.4219269102990033,
"grad_norm": 0.4124141335487366,
"learning_rate": 7.163818275500951e-06,
"loss": 0.4507,
"step": 762
},
{
"epoch": 0.42248062015503873,
"grad_norm": 0.4746420979499817,
"learning_rate": 7.1550998708120785e-06,
"loss": 0.4811,
"step": 763
},
{
"epoch": 0.4230343300110742,
"grad_norm": 0.41351497173309326,
"learning_rate": 7.146373411220846e-06,
"loss": 0.4819,
"step": 764
},
{
"epoch": 0.42358803986710963,
"grad_norm": 0.5182345509529114,
"learning_rate": 7.137638929343274e-06,
"loss": 0.4929,
"step": 765
},
{
"epoch": 0.42414174972314506,
"grad_norm": 0.3741333782672882,
"learning_rate": 7.128896457825364e-06,
"loss": 0.4508,
"step": 766
},
{
"epoch": 0.42469545957918053,
"grad_norm": 0.48205241560935974,
"learning_rate": 7.120146029342985e-06,
"loss": 0.469,
"step": 767
},
{
"epoch": 0.42524916943521596,
"grad_norm": 0.4315168857574463,
"learning_rate": 7.11138767660174e-06,
"loss": 0.4873,
"step": 768
},
{
"epoch": 0.4258028792912514,
"grad_norm": 0.4207547903060913,
"learning_rate": 7.102621432336853e-06,
"loss": 0.4851,
"step": 769
},
{
"epoch": 0.4263565891472868,
"grad_norm": 0.4015323221683502,
"learning_rate": 7.093847329313046e-06,
"loss": 0.4694,
"step": 770
},
{
"epoch": 0.4269102990033223,
"grad_norm": 0.5271881818771362,
"learning_rate": 7.085065400324407e-06,
"loss": 0.5133,
"step": 771
},
{
"epoch": 0.4274640088593577,
"grad_norm": 0.46586957573890686,
"learning_rate": 7.07627567819428e-06,
"loss": 0.4873,
"step": 772
},
{
"epoch": 0.4280177187153931,
"grad_norm": 0.5215753316879272,
"learning_rate": 7.0674781957751346e-06,
"loss": 0.4536,
"step": 773
},
{
"epoch": 0.42857142857142855,
"grad_norm": 0.4311859905719757,
"learning_rate": 7.058672985948447e-06,
"loss": 0.4794,
"step": 774
},
{
"epoch": 0.429125138427464,
"grad_norm": 0.45404115319252014,
"learning_rate": 7.049860081624572e-06,
"loss": 0.4752,
"step": 775
},
{
"epoch": 0.42967884828349945,
"grad_norm": 0.3787217140197754,
"learning_rate": 7.041039515742626e-06,
"loss": 0.4666,
"step": 776
},
{
"epoch": 0.43023255813953487,
"grad_norm": 0.4381999373435974,
"learning_rate": 7.0322113212703594e-06,
"loss": 0.4722,
"step": 777
},
{
"epoch": 0.43078626799557035,
"grad_norm": 0.46380746364593506,
"learning_rate": 7.023375531204038e-06,
"loss": 0.4953,
"step": 778
},
{
"epoch": 0.43133997785160577,
"grad_norm": 0.42247340083122253,
"learning_rate": 7.014532178568314e-06,
"loss": 0.4864,
"step": 779
},
{
"epoch": 0.4318936877076412,
"grad_norm": 0.4111511707305908,
"learning_rate": 7.005681296416107e-06,
"loss": 0.4859,
"step": 780
},
{
"epoch": 0.4324473975636766,
"grad_norm": 0.45308950543403625,
"learning_rate": 6.9968229178284775e-06,
"loss": 0.484,
"step": 781
},
{
"epoch": 0.4330011074197121,
"grad_norm": 0.40989553928375244,
"learning_rate": 6.9879570759145085e-06,
"loss": 0.4985,
"step": 782
},
{
"epoch": 0.4335548172757475,
"grad_norm": 0.4102574288845062,
"learning_rate": 6.979083803811173e-06,
"loss": 0.4838,
"step": 783
},
{
"epoch": 0.43410852713178294,
"grad_norm": 0.41136178374290466,
"learning_rate": 6.970203134683218e-06,
"loss": 0.4483,
"step": 784
},
{
"epoch": 0.43466223698781836,
"grad_norm": 0.4319741129875183,
"learning_rate": 6.961315101723036e-06,
"loss": 0.4738,
"step": 785
},
{
"epoch": 0.43521594684385384,
"grad_norm": 0.3880046010017395,
"learning_rate": 6.952419738150546e-06,
"loss": 0.4729,
"step": 786
},
{
"epoch": 0.43576965669988926,
"grad_norm": 0.5079280734062195,
"learning_rate": 6.94351707721306e-06,
"loss": 0.4801,
"step": 787
},
{
"epoch": 0.4363233665559247,
"grad_norm": 0.44152313470840454,
"learning_rate": 6.934607152185169e-06,
"loss": 0.4843,
"step": 788
},
{
"epoch": 0.4368770764119601,
"grad_norm": 0.40796777606010437,
"learning_rate": 6.9256899963686145e-06,
"loss": 0.4654,
"step": 789
},
{
"epoch": 0.4374307862679956,
"grad_norm": 0.5313437581062317,
"learning_rate": 6.916765643092162e-06,
"loss": 0.4652,
"step": 790
},
{
"epoch": 0.437984496124031,
"grad_norm": 0.508833110332489,
"learning_rate": 6.9078341257114765e-06,
"loss": 0.4676,
"step": 791
},
{
"epoch": 0.43853820598006643,
"grad_norm": 0.41130679845809937,
"learning_rate": 6.898895477609007e-06,
"loss": 0.4833,
"step": 792
},
{
"epoch": 0.4390919158361019,
"grad_norm": 0.4427821636199951,
"learning_rate": 6.889949732193844e-06,
"loss": 0.4807,
"step": 793
},
{
"epoch": 0.43964562569213733,
"grad_norm": 0.46690595149993896,
"learning_rate": 6.880996922901613e-06,
"loss": 0.5057,
"step": 794
},
{
"epoch": 0.44019933554817275,
"grad_norm": 0.4449859857559204,
"learning_rate": 6.8720370831943385e-06,
"loss": 0.4806,
"step": 795
},
{
"epoch": 0.4407530454042082,
"grad_norm": 0.3911839425563812,
"learning_rate": 6.863070246560319e-06,
"loss": 0.4865,
"step": 796
},
{
"epoch": 0.44130675526024365,
"grad_norm": 0.39620286226272583,
"learning_rate": 6.85409644651401e-06,
"loss": 0.4897,
"step": 797
},
{
"epoch": 0.4418604651162791,
"grad_norm": 0.45482540130615234,
"learning_rate": 6.845115716595893e-06,
"loss": 0.455,
"step": 798
},
{
"epoch": 0.4424141749723145,
"grad_norm": 0.4602965712547302,
"learning_rate": 6.836128090372345e-06,
"loss": 0.4692,
"step": 799
},
{
"epoch": 0.4429678848283499,
"grad_norm": 0.3748089075088501,
"learning_rate": 6.827133601435524e-06,
"loss": 0.4638,
"step": 800
},
{
"epoch": 0.4435215946843854,
"grad_norm": 0.3914950489997864,
"learning_rate": 6.818132283403236e-06,
"loss": 0.4794,
"step": 801
},
{
"epoch": 0.4440753045404208,
"grad_norm": 0.4420660436153412,
"learning_rate": 6.80912416991881e-06,
"loss": 0.4861,
"step": 802
},
{
"epoch": 0.44462901439645625,
"grad_norm": 0.47777289152145386,
"learning_rate": 6.800109294650981e-06,
"loss": 0.5003,
"step": 803
},
{
"epoch": 0.44518272425249167,
"grad_norm": 0.49404582381248474,
"learning_rate": 6.7910876912937455e-06,
"loss": 0.4506,
"step": 804
},
{
"epoch": 0.44573643410852715,
"grad_norm": 0.40919193625450134,
"learning_rate": 6.782059393566254e-06,
"loss": 0.4529,
"step": 805
},
{
"epoch": 0.44629014396456257,
"grad_norm": 0.46079081296920776,
"learning_rate": 6.773024435212678e-06,
"loss": 0.4674,
"step": 806
},
{
"epoch": 0.446843853820598,
"grad_norm": 0.43327951431274414,
"learning_rate": 6.763982850002084e-06,
"loss": 0.4565,
"step": 807
},
{
"epoch": 0.44739756367663347,
"grad_norm": 0.4099007546901703,
"learning_rate": 6.754934671728301e-06,
"loss": 0.4714,
"step": 808
},
{
"epoch": 0.4479512735326689,
"grad_norm": 0.45909908413887024,
"learning_rate": 6.745879934209808e-06,
"loss": 0.4851,
"step": 809
},
{
"epoch": 0.4485049833887043,
"grad_norm": 0.42515650391578674,
"learning_rate": 6.736818671289596e-06,
"loss": 0.4425,
"step": 810
},
{
"epoch": 0.44905869324473974,
"grad_norm": 0.4645708501338959,
"learning_rate": 6.7277509168350445e-06,
"loss": 0.4846,
"step": 811
},
{
"epoch": 0.4496124031007752,
"grad_norm": 0.44562819600105286,
"learning_rate": 6.718676704737798e-06,
"loss": 0.4713,
"step": 812
},
{
"epoch": 0.45016611295681064,
"grad_norm": 0.4501514434814453,
"learning_rate": 6.709596068913635e-06,
"loss": 0.4569,
"step": 813
},
{
"epoch": 0.45071982281284606,
"grad_norm": 0.38734912872314453,
"learning_rate": 6.700509043302349e-06,
"loss": 0.4444,
"step": 814
},
{
"epoch": 0.4512735326688815,
"grad_norm": 0.44585832953453064,
"learning_rate": 6.6914156618676065e-06,
"loss": 0.4747,
"step": 815
},
{
"epoch": 0.45182724252491696,
"grad_norm": 0.473246306180954,
"learning_rate": 6.6823159585968355e-06,
"loss": 0.4614,
"step": 816
},
{
"epoch": 0.4523809523809524,
"grad_norm": 0.44483834505081177,
"learning_rate": 6.673209967501093e-06,
"loss": 0.4819,
"step": 817
},
{
"epoch": 0.4529346622369878,
"grad_norm": 0.4596951901912689,
"learning_rate": 6.664097722614934e-06,
"loss": 0.4717,
"step": 818
},
{
"epoch": 0.45348837209302323,
"grad_norm": 0.4508548080921173,
"learning_rate": 6.654979257996292e-06,
"loss": 0.5065,
"step": 819
},
{
"epoch": 0.4540420819490587,
"grad_norm": 0.41932225227355957,
"learning_rate": 6.645854607726343e-06,
"loss": 0.4982,
"step": 820
},
{
"epoch": 0.45459579180509413,
"grad_norm": 0.4616346061229706,
"learning_rate": 6.636723805909384e-06,
"loss": 0.4782,
"step": 821
},
{
"epoch": 0.45514950166112955,
"grad_norm": 0.41363874077796936,
"learning_rate": 6.627586886672707e-06,
"loss": 0.4667,
"step": 822
},
{
"epoch": 0.45570321151716503,
"grad_norm": 0.4332622289657593,
"learning_rate": 6.6184438841664635e-06,
"loss": 0.4681,
"step": 823
},
{
"epoch": 0.45625692137320045,
"grad_norm": 0.4488495886325836,
"learning_rate": 6.6092948325635466e-06,
"loss": 0.4591,
"step": 824
},
{
"epoch": 0.4568106312292359,
"grad_norm": 0.39039409160614014,
"learning_rate": 6.600139766059453e-06,
"loss": 0.4544,
"step": 825
},
{
"epoch": 0.4573643410852713,
"grad_norm": 0.4736301898956299,
"learning_rate": 6.590978718872166e-06,
"loss": 0.469,
"step": 826
},
{
"epoch": 0.4579180509413068,
"grad_norm": 0.4278981685638428,
"learning_rate": 6.58181172524202e-06,
"loss": 0.5094,
"step": 827
},
{
"epoch": 0.4584717607973422,
"grad_norm": 0.4077831506729126,
"learning_rate": 6.572638819431576e-06,
"loss": 0.4631,
"step": 828
},
{
"epoch": 0.4590254706533776,
"grad_norm": 0.37462881207466125,
"learning_rate": 6.563460035725489e-06,
"loss": 0.4709,
"step": 829
},
{
"epoch": 0.45957918050941304,
"grad_norm": 0.3800598978996277,
"learning_rate": 6.554275408430388e-06,
"loss": 0.4896,
"step": 830
},
{
"epoch": 0.4601328903654485,
"grad_norm": 0.4581829011440277,
"learning_rate": 6.545084971874738e-06,
"loss": 0.4839,
"step": 831
},
{
"epoch": 0.46068660022148394,
"grad_norm": 0.3906550705432892,
"learning_rate": 6.535888760408722e-06,
"loss": 0.4923,
"step": 832
},
{
"epoch": 0.46124031007751937,
"grad_norm": 0.3892533481121063,
"learning_rate": 6.526686808404101e-06,
"loss": 0.476,
"step": 833
},
{
"epoch": 0.46179401993355484,
"grad_norm": 0.38538169860839844,
"learning_rate": 6.517479150254099e-06,
"loss": 0.4492,
"step": 834
},
{
"epoch": 0.46234772978959027,
"grad_norm": 0.4162067770957947,
"learning_rate": 6.508265820373262e-06,
"loss": 0.4496,
"step": 835
},
{
"epoch": 0.4629014396456257,
"grad_norm": 0.4131470322608948,
"learning_rate": 6.499046853197338e-06,
"loss": 0.4703,
"step": 836
},
{
"epoch": 0.4634551495016611,
"grad_norm": 0.425884485244751,
"learning_rate": 6.489822283183142e-06,
"loss": 0.4682,
"step": 837
},
{
"epoch": 0.4640088593576966,
"grad_norm": 0.40070706605911255,
"learning_rate": 6.48059214480843e-06,
"loss": 0.4764,
"step": 838
},
{
"epoch": 0.464562569213732,
"grad_norm": 0.43763288855552673,
"learning_rate": 6.4713564725717736e-06,
"loss": 0.4711,
"step": 839
},
{
"epoch": 0.46511627906976744,
"grad_norm": 0.4114384651184082,
"learning_rate": 6.462115300992427e-06,
"loss": 0.4701,
"step": 840
},
{
"epoch": 0.46566998892580286,
"grad_norm": 0.44266989827156067,
"learning_rate": 6.452868664610197e-06,
"loss": 0.4989,
"step": 841
},
{
"epoch": 0.46622369878183834,
"grad_norm": 0.4306333065032959,
"learning_rate": 6.443616597985315e-06,
"loss": 0.4924,
"step": 842
},
{
"epoch": 0.46677740863787376,
"grad_norm": 0.4200533926486969,
"learning_rate": 6.434359135698311e-06,
"loss": 0.4782,
"step": 843
},
{
"epoch": 0.4673311184939092,
"grad_norm": 0.4041113257408142,
"learning_rate": 6.425096312349881e-06,
"loss": 0.4602,
"step": 844
},
{
"epoch": 0.4678848283499446,
"grad_norm": 0.46134626865386963,
"learning_rate": 6.415828162560758e-06,
"loss": 0.452,
"step": 845
},
{
"epoch": 0.4684385382059801,
"grad_norm": 0.4863552451133728,
"learning_rate": 6.406554720971583e-06,
"loss": 0.486,
"step": 846
},
{
"epoch": 0.4689922480620155,
"grad_norm": 0.4371113181114197,
"learning_rate": 6.397276022242775e-06,
"loss": 0.4846,
"step": 847
},
{
"epoch": 0.4695459579180509,
"grad_norm": 0.42974036931991577,
"learning_rate": 6.3879921010544055e-06,
"loss": 0.4689,
"step": 848
},
{
"epoch": 0.4700996677740864,
"grad_norm": 0.44053035974502563,
"learning_rate": 6.3787029921060615e-06,
"loss": 0.4669,
"step": 849
},
{
"epoch": 0.4706533776301218,
"grad_norm": 0.40121155977249146,
"learning_rate": 6.369408730116721e-06,
"loss": 0.4883,
"step": 850
},
{
"epoch": 0.47120708748615725,
"grad_norm": 0.42475467920303345,
"learning_rate": 6.3601093498246215e-06,
"loss": 0.4653,
"step": 851
},
{
"epoch": 0.4717607973421927,
"grad_norm": 0.42221271991729736,
"learning_rate": 6.350804885987133e-06,
"loss": 0.4889,
"step": 852
},
{
"epoch": 0.47231450719822815,
"grad_norm": 0.4220812916755676,
"learning_rate": 6.341495373380625e-06,
"loss": 0.4519,
"step": 853
},
{
"epoch": 0.4728682170542636,
"grad_norm": 0.42883118987083435,
"learning_rate": 6.332180846800335e-06,
"loss": 0.4625,
"step": 854
},
{
"epoch": 0.473421926910299,
"grad_norm": 0.40138551592826843,
"learning_rate": 6.322861341060241e-06,
"loss": 0.4538,
"step": 855
},
{
"epoch": 0.4739756367663344,
"grad_norm": 0.4246816635131836,
"learning_rate": 6.313536890992935e-06,
"loss": 0.4815,
"step": 856
},
{
"epoch": 0.4745293466223699,
"grad_norm": 0.39402779936790466,
"learning_rate": 6.304207531449486e-06,
"loss": 0.4705,
"step": 857
},
{
"epoch": 0.4750830564784053,
"grad_norm": 0.40957632660865784,
"learning_rate": 6.29487329729931e-06,
"loss": 0.478,
"step": 858
},
{
"epoch": 0.47563676633444074,
"grad_norm": 0.39527779817581177,
"learning_rate": 6.2855342234300475e-06,
"loss": 0.4632,
"step": 859
},
{
"epoch": 0.47619047619047616,
"grad_norm": 0.40325671434402466,
"learning_rate": 6.2761903447474285e-06,
"loss": 0.4812,
"step": 860
},
{
"epoch": 0.47674418604651164,
"grad_norm": 0.3848477900028229,
"learning_rate": 6.266841696175132e-06,
"loss": 0.4764,
"step": 861
},
{
"epoch": 0.47729789590254706,
"grad_norm": 0.36117005348205566,
"learning_rate": 6.257488312654678e-06,
"loss": 0.4682,
"step": 862
},
{
"epoch": 0.4778516057585825,
"grad_norm": 0.4402804970741272,
"learning_rate": 6.248130229145273e-06,
"loss": 0.4768,
"step": 863
},
{
"epoch": 0.47840531561461797,
"grad_norm": 0.415265291929245,
"learning_rate": 6.238767480623697e-06,
"loss": 0.4896,
"step": 864
},
{
"epoch": 0.4789590254706534,
"grad_norm": 0.35998308658599854,
"learning_rate": 6.229400102084162e-06,
"loss": 0.4617,
"step": 865
},
{
"epoch": 0.4795127353266888,
"grad_norm": 0.5202546119689941,
"learning_rate": 6.220028128538188e-06,
"loss": 0.4704,
"step": 866
},
{
"epoch": 0.48006644518272423,
"grad_norm": 0.3583022654056549,
"learning_rate": 6.210651595014468e-06,
"loss": 0.4531,
"step": 867
},
{
"epoch": 0.4806201550387597,
"grad_norm": 0.40789127349853516,
"learning_rate": 6.201270536558738e-06,
"loss": 0.471,
"step": 868
},
{
"epoch": 0.48117386489479513,
"grad_norm": 0.4366607367992401,
"learning_rate": 6.191884988233647e-06,
"loss": 0.478,
"step": 869
},
{
"epoch": 0.48172757475083056,
"grad_norm": 0.36813703179359436,
"learning_rate": 6.182494985118625e-06,
"loss": 0.459,
"step": 870
},
{
"epoch": 0.482281284606866,
"grad_norm": 0.3980870842933655,
"learning_rate": 6.173100562309751e-06,
"loss": 0.4494,
"step": 871
},
{
"epoch": 0.48283499446290146,
"grad_norm": 0.3553970456123352,
"learning_rate": 6.163701754919626e-06,
"loss": 0.4728,
"step": 872
},
{
"epoch": 0.4833887043189369,
"grad_norm": 0.38380372524261475,
"learning_rate": 6.15429859807724e-06,
"loss": 0.4675,
"step": 873
},
{
"epoch": 0.4839424141749723,
"grad_norm": 0.4252444803714752,
"learning_rate": 6.14489112692783e-06,
"loss": 0.4878,
"step": 874
},
{
"epoch": 0.4844961240310077,
"grad_norm": 0.39901813864707947,
"learning_rate": 6.1354793766327706e-06,
"loss": 0.4662,
"step": 875
},
{
"epoch": 0.4850498338870432,
"grad_norm": 0.398833304643631,
"learning_rate": 6.1260633823694224e-06,
"loss": 0.4907,
"step": 876
},
{
"epoch": 0.4856035437430786,
"grad_norm": 0.39341622591018677,
"learning_rate": 6.1166431793310095e-06,
"loss": 0.491,
"step": 877
},
{
"epoch": 0.48615725359911405,
"grad_norm": 0.3746095299720764,
"learning_rate": 6.10721880272649e-06,
"loss": 0.4732,
"step": 878
},
{
"epoch": 0.4867109634551495,
"grad_norm": 0.384216845035553,
"learning_rate": 6.097790287780417e-06,
"loss": 0.4574,
"step": 879
},
{
"epoch": 0.48726467331118495,
"grad_norm": 0.39541691541671753,
"learning_rate": 6.08835766973281e-06,
"loss": 0.4686,
"step": 880
},
{
"epoch": 0.48781838316722037,
"grad_norm": 0.4029048681259155,
"learning_rate": 6.078920983839032e-06,
"loss": 0.4843,
"step": 881
},
{
"epoch": 0.4883720930232558,
"grad_norm": 0.37557727098464966,
"learning_rate": 6.069480265369642e-06,
"loss": 0.4515,
"step": 882
},
{
"epoch": 0.48892580287929127,
"grad_norm": 0.469387948513031,
"learning_rate": 6.060035549610275e-06,
"loss": 0.4749,
"step": 883
},
{
"epoch": 0.4894795127353267,
"grad_norm": 0.4006125032901764,
"learning_rate": 6.050586871861503e-06,
"loss": 0.462,
"step": 884
},
{
"epoch": 0.4900332225913621,
"grad_norm": 0.36115720868110657,
"learning_rate": 6.041134267438713e-06,
"loss": 0.4589,
"step": 885
},
{
"epoch": 0.49058693244739754,
"grad_norm": 0.39920949935913086,
"learning_rate": 6.031677771671962e-06,
"loss": 0.4508,
"step": 886
},
{
"epoch": 0.491140642303433,
"grad_norm": 0.36050114035606384,
"learning_rate": 6.022217419905851e-06,
"loss": 0.4697,
"step": 887
},
{
"epoch": 0.49169435215946844,
"grad_norm": 0.36328694224357605,
"learning_rate": 6.0127532474993985e-06,
"loss": 0.475,
"step": 888
},
{
"epoch": 0.49224806201550386,
"grad_norm": 0.3833206593990326,
"learning_rate": 6.0032852898258996e-06,
"loss": 0.475,
"step": 889
},
{
"epoch": 0.49280177187153934,
"grad_norm": 0.41371577978134155,
"learning_rate": 5.9938135822727984e-06,
"loss": 0.4687,
"step": 890
},
{
"epoch": 0.49335548172757476,
"grad_norm": 0.3939271867275238,
"learning_rate": 5.984338160241552e-06,
"loss": 0.4724,
"step": 891
},
{
"epoch": 0.4939091915836102,
"grad_norm": 0.37482693791389465,
"learning_rate": 5.974859059147503e-06,
"loss": 0.486,
"step": 892
},
{
"epoch": 0.4944629014396456,
"grad_norm": 0.3931341767311096,
"learning_rate": 5.965376314419744e-06,
"loss": 0.4768,
"step": 893
},
{
"epoch": 0.4950166112956811,
"grad_norm": 0.40611469745635986,
"learning_rate": 5.955889961500988e-06,
"loss": 0.4655,
"step": 894
},
{
"epoch": 0.4955703211517165,
"grad_norm": 0.39005982875823975,
"learning_rate": 5.946400035847431e-06,
"loss": 0.4787,
"step": 895
},
{
"epoch": 0.49612403100775193,
"grad_norm": 0.43622326850891113,
"learning_rate": 5.936906572928625e-06,
"loss": 0.4943,
"step": 896
},
{
"epoch": 0.49667774086378735,
"grad_norm": 0.3975144624710083,
"learning_rate": 5.927409608227339e-06,
"loss": 0.4595,
"step": 897
},
{
"epoch": 0.49723145071982283,
"grad_norm": 0.3896026909351349,
"learning_rate": 5.917909177239438e-06,
"loss": 0.5006,
"step": 898
},
{
"epoch": 0.49778516057585825,
"grad_norm": 0.4361266791820526,
"learning_rate": 5.908405315473733e-06,
"loss": 0.4685,
"step": 899
},
{
"epoch": 0.4983388704318937,
"grad_norm": 0.4157734513282776,
"learning_rate": 5.898898058451865e-06,
"loss": 0.479,
"step": 900
},
{
"epoch": 0.4988925802879291,
"grad_norm": 0.4577910602092743,
"learning_rate": 5.889387441708162e-06,
"loss": 0.4809,
"step": 901
},
{
"epoch": 0.4994462901439646,
"grad_norm": 0.458420991897583,
"learning_rate": 5.8798735007895095e-06,
"loss": 0.4635,
"step": 902
},
{
"epoch": 0.5,
"grad_norm": 0.3840048015117645,
"learning_rate": 5.8703562712552195e-06,
"loss": 0.4587,
"step": 903
},
{
"epoch": 0.5005537098560354,
"grad_norm": 0.4343316853046417,
"learning_rate": 5.860835788676892e-06,
"loss": 0.5033,
"step": 904
},
{
"epoch": 0.5011074197120708,
"grad_norm": 0.4067619740962982,
"learning_rate": 5.851312088638287e-06,
"loss": 0.4762,
"step": 905
},
{
"epoch": 0.5016611295681063,
"grad_norm": 0.4486081302165985,
"learning_rate": 5.841785206735192e-06,
"loss": 0.4712,
"step": 906
},
{
"epoch": 0.5022148394241418,
"grad_norm": 0.5145954489707947,
"learning_rate": 5.832255178575288e-06,
"loss": 0.4599,
"step": 907
},
{
"epoch": 0.5027685492801772,
"grad_norm": 0.4490329921245575,
"learning_rate": 5.822722039778008e-06,
"loss": 0.4578,
"step": 908
},
{
"epoch": 0.5033222591362126,
"grad_norm": 0.39363399147987366,
"learning_rate": 5.813185825974419e-06,
"loss": 0.478,
"step": 909
},
{
"epoch": 0.5038759689922481,
"grad_norm": 0.4732396602630615,
"learning_rate": 5.803646572807078e-06,
"loss": 0.4729,
"step": 910
},
{
"epoch": 0.5044296788482835,
"grad_norm": 0.43937402963638306,
"learning_rate": 5.794104315929904e-06,
"loss": 0.4867,
"step": 911
},
{
"epoch": 0.5049833887043189,
"grad_norm": 0.38331788778305054,
"learning_rate": 5.784559091008037e-06,
"loss": 0.4994,
"step": 912
},
{
"epoch": 0.5055370985603543,
"grad_norm": 0.36274176836013794,
"learning_rate": 5.7750109337177185e-06,
"loss": 0.4517,
"step": 913
},
{
"epoch": 0.5060908084163898,
"grad_norm": 0.4266386330127716,
"learning_rate": 5.7654598797461445e-06,
"loss": 0.491,
"step": 914
},
{
"epoch": 0.5066445182724253,
"grad_norm": 0.42704376578330994,
"learning_rate": 5.755905964791341e-06,
"loss": 0.4807,
"step": 915
},
{
"epoch": 0.5071982281284607,
"grad_norm": 0.4034692943096161,
"learning_rate": 5.746349224562021e-06,
"loss": 0.4639,
"step": 916
},
{
"epoch": 0.5077519379844961,
"grad_norm": 0.39037638902664185,
"learning_rate": 5.736789694777465e-06,
"loss": 0.4515,
"step": 917
},
{
"epoch": 0.5083056478405316,
"grad_norm": 0.45550546050071716,
"learning_rate": 5.727227411167377e-06,
"loss": 0.471,
"step": 918
},
{
"epoch": 0.508859357696567,
"grad_norm": 0.4011116325855255,
"learning_rate": 5.717662409471751e-06,
"loss": 0.4617,
"step": 919
},
{
"epoch": 0.5094130675526024,
"grad_norm": 0.4205820560455322,
"learning_rate": 5.708094725440742e-06,
"loss": 0.4531,
"step": 920
},
{
"epoch": 0.5099667774086378,
"grad_norm": 0.5211153626441956,
"learning_rate": 5.698524394834531e-06,
"loss": 0.5083,
"step": 921
},
{
"epoch": 0.5105204872646734,
"grad_norm": 0.40547680854797363,
"learning_rate": 5.68895145342319e-06,
"loss": 0.4626,
"step": 922
},
{
"epoch": 0.5110741971207088,
"grad_norm": 0.4124176502227783,
"learning_rate": 5.679375936986553e-06,
"loss": 0.4612,
"step": 923
},
{
"epoch": 0.5116279069767442,
"grad_norm": 0.413004606962204,
"learning_rate": 5.669797881314072e-06,
"loss": 0.4644,
"step": 924
},
{
"epoch": 0.5121816168327796,
"grad_norm": 0.42170557379722595,
"learning_rate": 5.660217322204692e-06,
"loss": 0.4743,
"step": 925
},
{
"epoch": 0.512735326688815,
"grad_norm": 0.4040520191192627,
"learning_rate": 5.650634295466717e-06,
"loss": 0.4898,
"step": 926
},
{
"epoch": 0.5132890365448505,
"grad_norm": 0.3825586140155792,
"learning_rate": 5.641048836917672e-06,
"loss": 0.4778,
"step": 927
},
{
"epoch": 0.5138427464008859,
"grad_norm": 0.3947638273239136,
"learning_rate": 5.631460982384174e-06,
"loss": 0.4488,
"step": 928
},
{
"epoch": 0.5143964562569213,
"grad_norm": 0.4007805287837982,
"learning_rate": 5.621870767701788e-06,
"loss": 0.5048,
"step": 929
},
{
"epoch": 0.5149501661129569,
"grad_norm": 0.4218864142894745,
"learning_rate": 5.612278228714909e-06,
"loss": 0.4668,
"step": 930
},
{
"epoch": 0.5155038759689923,
"grad_norm": 0.3780379593372345,
"learning_rate": 5.6026834012766155e-06,
"loss": 0.433,
"step": 931
},
{
"epoch": 0.5160575858250277,
"grad_norm": 0.37311238050460815,
"learning_rate": 5.593086321248539e-06,
"loss": 0.4871,
"step": 932
},
{
"epoch": 0.5166112956810631,
"grad_norm": 0.4193789064884186,
"learning_rate": 5.583487024500729e-06,
"loss": 0.482,
"step": 933
},
{
"epoch": 0.5171650055370985,
"grad_norm": 0.43569833040237427,
"learning_rate": 5.573885546911523e-06,
"loss": 0.4499,
"step": 934
},
{
"epoch": 0.517718715393134,
"grad_norm": 0.3610110580921173,
"learning_rate": 5.5642819243674085e-06,
"loss": 0.4838,
"step": 935
},
{
"epoch": 0.5182724252491694,
"grad_norm": 0.4621051549911499,
"learning_rate": 5.554676192762891e-06,
"loss": 0.4646,
"step": 936
},
{
"epoch": 0.5188261351052049,
"grad_norm": 0.4081271290779114,
"learning_rate": 5.5450683880003555e-06,
"loss": 0.4675,
"step": 937
},
{
"epoch": 0.5193798449612403,
"grad_norm": 0.4200206696987152,
"learning_rate": 5.535458545989939e-06,
"loss": 0.4545,
"step": 938
},
{
"epoch": 0.5199335548172758,
"grad_norm": 0.44750723242759705,
"learning_rate": 5.525846702649394e-06,
"loss": 0.5031,
"step": 939
},
{
"epoch": 0.5204872646733112,
"grad_norm": 0.4381738305091858,
"learning_rate": 5.516232893903946e-06,
"loss": 0.4738,
"step": 940
},
{
"epoch": 0.5210409745293466,
"grad_norm": 0.39572015404701233,
"learning_rate": 5.506617155686177e-06,
"loss": 0.4856,
"step": 941
},
{
"epoch": 0.521594684385382,
"grad_norm": 0.38003742694854736,
"learning_rate": 5.49699952393587e-06,
"loss": 0.4857,
"step": 942
},
{
"epoch": 0.5221483942414175,
"grad_norm": 0.364629328250885,
"learning_rate": 5.487380034599893e-06,
"loss": 0.4649,
"step": 943
},
{
"epoch": 0.5227021040974529,
"grad_norm": 0.40337440371513367,
"learning_rate": 5.477758723632055e-06,
"loss": 0.5055,
"step": 944
},
{
"epoch": 0.5232558139534884,
"grad_norm": 0.40286746621131897,
"learning_rate": 5.4681356269929704e-06,
"loss": 0.4466,
"step": 945
},
{
"epoch": 0.5238095238095238,
"grad_norm": 0.368254154920578,
"learning_rate": 5.458510780649932e-06,
"loss": 0.4984,
"step": 946
},
{
"epoch": 0.5243632336655593,
"grad_norm": 0.38113436102867126,
"learning_rate": 5.448884220576768e-06,
"loss": 0.4902,
"step": 947
},
{
"epoch": 0.5249169435215947,
"grad_norm": 0.40411466360092163,
"learning_rate": 5.439255982753717e-06,
"loss": 0.473,
"step": 948
},
{
"epoch": 0.5254706533776301,
"grad_norm": 0.32906490564346313,
"learning_rate": 5.429626103167284e-06,
"loss": 0.4664,
"step": 949
},
{
"epoch": 0.5260243632336655,
"grad_norm": 0.3590736389160156,
"learning_rate": 5.41999461781011e-06,
"loss": 0.4544,
"step": 950
},
{
"epoch": 0.526578073089701,
"grad_norm": 0.3264465630054474,
"learning_rate": 5.4103615626808426e-06,
"loss": 0.4455,
"step": 951
},
{
"epoch": 0.5271317829457365,
"grad_norm": 0.39324450492858887,
"learning_rate": 5.400726973783993e-06,
"loss": 0.4935,
"step": 952
},
{
"epoch": 0.5276854928017719,
"grad_norm": 0.3779219388961792,
"learning_rate": 5.391090887129804e-06,
"loss": 0.4859,
"step": 953
},
{
"epoch": 0.5282392026578073,
"grad_norm": 0.37633198499679565,
"learning_rate": 5.381453338734119e-06,
"loss": 0.4607,
"step": 954
},
{
"epoch": 0.5287929125138427,
"grad_norm": 0.4018500745296478,
"learning_rate": 5.371814364618244e-06,
"loss": 0.4624,
"step": 955
},
{
"epoch": 0.5293466223698782,
"grad_norm": 0.40497103333473206,
"learning_rate": 5.362174000808813e-06,
"loss": 0.4885,
"step": 956
},
{
"epoch": 0.5299003322259136,
"grad_norm": 0.3769908547401428,
"learning_rate": 5.352532283337655e-06,
"loss": 0.4967,
"step": 957
},
{
"epoch": 0.530454042081949,
"grad_norm": 0.36260783672332764,
"learning_rate": 5.342889248241656e-06,
"loss": 0.5198,
"step": 958
},
{
"epoch": 0.5310077519379846,
"grad_norm": 0.44563496112823486,
"learning_rate": 5.33324493156263e-06,
"loss": 0.4935,
"step": 959
},
{
"epoch": 0.53156146179402,
"grad_norm": 0.36103156208992004,
"learning_rate": 5.323599369347181e-06,
"loss": 0.4701,
"step": 960
},
{
"epoch": 0.5321151716500554,
"grad_norm": 0.37980565428733826,
"learning_rate": 5.3139525976465675e-06,
"loss": 0.4718,
"step": 961
},
{
"epoch": 0.5326688815060908,
"grad_norm": 0.4384520351886749,
"learning_rate": 5.304304652516566e-06,
"loss": 0.4595,
"step": 962
},
{
"epoch": 0.5332225913621262,
"grad_norm": 0.3531224727630615,
"learning_rate": 5.294655570017344e-06,
"loss": 0.4469,
"step": 963
},
{
"epoch": 0.5337763012181617,
"grad_norm": 0.3683699071407318,
"learning_rate": 5.2850053862133135e-06,
"loss": 0.4648,
"step": 964
},
{
"epoch": 0.5343300110741971,
"grad_norm": 0.4316402077674866,
"learning_rate": 5.27535413717301e-06,
"loss": 0.4605,
"step": 965
},
{
"epoch": 0.5348837209302325,
"grad_norm": 0.4019063115119934,
"learning_rate": 5.265701858968944e-06,
"loss": 0.4468,
"step": 966
},
{
"epoch": 0.535437430786268,
"grad_norm": 0.4216720163822174,
"learning_rate": 5.256048587677476e-06,
"loss": 0.5113,
"step": 967
},
{
"epoch": 0.5359911406423035,
"grad_norm": 0.3609980046749115,
"learning_rate": 5.246394359378678e-06,
"loss": 0.4999,
"step": 968
},
{
"epoch": 0.5365448504983389,
"grad_norm": 0.3771976828575134,
"learning_rate": 5.236739210156201e-06,
"loss": 0.4787,
"step": 969
},
{
"epoch": 0.5370985603543743,
"grad_norm": 0.3443913757801056,
"learning_rate": 5.22708317609713e-06,
"loss": 0.4793,
"step": 970
},
{
"epoch": 0.5376522702104097,
"grad_norm": 0.3635353147983551,
"learning_rate": 5.217426293291869e-06,
"loss": 0.4803,
"step": 971
},
{
"epoch": 0.5382059800664452,
"grad_norm": 0.37993019819259644,
"learning_rate": 5.207768597833982e-06,
"loss": 0.4866,
"step": 972
},
{
"epoch": 0.5387596899224806,
"grad_norm": 0.32572728395462036,
"learning_rate": 5.198110125820082e-06,
"loss": 0.4793,
"step": 973
},
{
"epoch": 0.5393133997785161,
"grad_norm": 0.3797398805618286,
"learning_rate": 5.188450913349674e-06,
"loss": 0.456,
"step": 974
},
{
"epoch": 0.5398671096345515,
"grad_norm": 0.40099626779556274,
"learning_rate": 5.178790996525038e-06,
"loss": 0.4947,
"step": 975
},
{
"epoch": 0.540420819490587,
"grad_norm": 0.35796770453453064,
"learning_rate": 5.169130411451083e-06,
"loss": 0.4529,
"step": 976
},
{
"epoch": 0.5409745293466224,
"grad_norm": 0.39428651332855225,
"learning_rate": 5.1594691942352195e-06,
"loss": 0.4799,
"step": 977
},
{
"epoch": 0.5415282392026578,
"grad_norm": 0.4011034369468689,
"learning_rate": 5.149807380987213e-06,
"loss": 0.4938,
"step": 978
},
{
"epoch": 0.5420819490586932,
"grad_norm": 0.3786684572696686,
"learning_rate": 5.140145007819064e-06,
"loss": 0.4474,
"step": 979
},
{
"epoch": 0.5426356589147286,
"grad_norm": 0.3715454041957855,
"learning_rate": 5.1304821108448645e-06,
"loss": 0.4317,
"step": 980
},
{
"epoch": 0.5431893687707641,
"grad_norm": 0.4249853789806366,
"learning_rate": 5.120818726180662e-06,
"loss": 0.5195,
"step": 981
},
{
"epoch": 0.5437430786267996,
"grad_norm": 0.38960522413253784,
"learning_rate": 5.111154889944328e-06,
"loss": 0.4787,
"step": 982
},
{
"epoch": 0.544296788482835,
"grad_norm": 0.4187440574169159,
"learning_rate": 5.1014906382554206e-06,
"loss": 0.4718,
"step": 983
},
{
"epoch": 0.5448504983388704,
"grad_norm": 0.4285171627998352,
"learning_rate": 5.091826007235053e-06,
"loss": 0.48,
"step": 984
},
{
"epoch": 0.5454042081949059,
"grad_norm": 0.37511613965034485,
"learning_rate": 5.0821610330057545e-06,
"loss": 0.4623,
"step": 985
},
{
"epoch": 0.5459579180509413,
"grad_norm": 0.40002259612083435,
"learning_rate": 5.072495751691338e-06,
"loss": 0.4527,
"step": 986
},
{
"epoch": 0.5465116279069767,
"grad_norm": 0.3954589366912842,
"learning_rate": 5.062830199416764e-06,
"loss": 0.4515,
"step": 987
},
{
"epoch": 0.5470653377630121,
"grad_norm": 0.40525105595588684,
"learning_rate": 5.053164412308005e-06,
"loss": 0.4916,
"step": 988
},
{
"epoch": 0.5476190476190477,
"grad_norm": 0.3854787051677704,
"learning_rate": 5.043498426491911e-06,
"loss": 0.5072,
"step": 989
},
{
"epoch": 0.5481727574750831,
"grad_norm": 0.3894112706184387,
"learning_rate": 5.033832278096077e-06,
"loss": 0.4542,
"step": 990
},
{
"epoch": 0.5487264673311185,
"grad_norm": 0.4283125400543213,
"learning_rate": 5.024166003248703e-06,
"loss": 0.463,
"step": 991
},
{
"epoch": 0.5492801771871539,
"grad_norm": 0.39848053455352783,
"learning_rate": 5.014499638078463e-06,
"loss": 0.4835,
"step": 992
},
{
"epoch": 0.5498338870431894,
"grad_norm": 0.40187305212020874,
"learning_rate": 5.004833218714368e-06,
"loss": 0.4613,
"step": 993
},
{
"epoch": 0.5503875968992248,
"grad_norm": 0.38503843545913696,
"learning_rate": 4.995166781285633e-06,
"loss": 0.4754,
"step": 994
},
{
"epoch": 0.5509413067552602,
"grad_norm": 0.388287752866745,
"learning_rate": 4.985500361921539e-06,
"loss": 0.4695,
"step": 995
},
{
"epoch": 0.5514950166112956,
"grad_norm": 0.39484068751335144,
"learning_rate": 4.9758339967512995e-06,
"loss": 0.4676,
"step": 996
},
{
"epoch": 0.5520487264673312,
"grad_norm": 0.4063197076320648,
"learning_rate": 4.966167721903925e-06,
"loss": 0.4418,
"step": 997
},
{
"epoch": 0.5526024363233666,
"grad_norm": 0.3962213099002838,
"learning_rate": 4.956501573508091e-06,
"loss": 0.476,
"step": 998
},
{
"epoch": 0.553156146179402,
"grad_norm": 0.4217541515827179,
"learning_rate": 4.946835587691997e-06,
"loss": 0.4653,
"step": 999
},
{
"epoch": 0.5537098560354374,
"grad_norm": 0.3963378369808197,
"learning_rate": 4.937169800583237e-06,
"loss": 0.4785,
"step": 1000
},
{
"epoch": 0.5542635658914729,
"grad_norm": 0.40990614891052246,
"learning_rate": 4.927504248308663e-06,
"loss": 0.4887,
"step": 1001
},
{
"epoch": 0.5548172757475083,
"grad_norm": 0.41964200139045715,
"learning_rate": 4.917838966994246e-06,
"loss": 0.4649,
"step": 1002
},
{
"epoch": 0.5553709856035437,
"grad_norm": 0.4327523410320282,
"learning_rate": 4.908173992764949e-06,
"loss": 0.4834,
"step": 1003
},
{
"epoch": 0.5559246954595792,
"grad_norm": 0.39587798714637756,
"learning_rate": 4.898509361744581e-06,
"loss": 0.4874,
"step": 1004
},
{
"epoch": 0.5564784053156147,
"grad_norm": 0.4436945617198944,
"learning_rate": 4.888845110055674e-06,
"loss": 0.457,
"step": 1005
},
{
"epoch": 0.5570321151716501,
"grad_norm": 0.40062418580055237,
"learning_rate": 4.87918127381934e-06,
"loss": 0.4727,
"step": 1006
},
{
"epoch": 0.5575858250276855,
"grad_norm": 0.41501954197883606,
"learning_rate": 4.869517889155136e-06,
"loss": 0.4644,
"step": 1007
},
{
"epoch": 0.5581395348837209,
"grad_norm": 0.41733792424201965,
"learning_rate": 4.8598549921809364e-06,
"loss": 0.4644,
"step": 1008
},
{
"epoch": 0.5586932447397563,
"grad_norm": 0.3771803677082062,
"learning_rate": 4.8501926190127895e-06,
"loss": 0.4728,
"step": 1009
},
{
"epoch": 0.5592469545957918,
"grad_norm": 0.3525100648403168,
"learning_rate": 4.840530805764783e-06,
"loss": 0.4761,
"step": 1010
},
{
"epoch": 0.5598006644518272,
"grad_norm": 0.37297025322914124,
"learning_rate": 4.830869588548918e-06,
"loss": 0.4827,
"step": 1011
},
{
"epoch": 0.5603543743078627,
"grad_norm": 0.41562744975090027,
"learning_rate": 4.821209003474963e-06,
"loss": 0.4667,
"step": 1012
},
{
"epoch": 0.5609080841638981,
"grad_norm": 0.39811256527900696,
"learning_rate": 4.811549086650327e-06,
"loss": 0.459,
"step": 1013
},
{
"epoch": 0.5614617940199336,
"grad_norm": 0.3600512742996216,
"learning_rate": 4.801889874179921e-06,
"loss": 0.4695,
"step": 1014
},
{
"epoch": 0.562015503875969,
"grad_norm": 0.3680749833583832,
"learning_rate": 4.792231402166019e-06,
"loss": 0.4491,
"step": 1015
},
{
"epoch": 0.5625692137320044,
"grad_norm": 0.44280192255973816,
"learning_rate": 4.782573706708133e-06,
"loss": 0.4828,
"step": 1016
},
{
"epoch": 0.5631229235880398,
"grad_norm": 0.37224942445755005,
"learning_rate": 4.772916823902871e-06,
"loss": 0.45,
"step": 1017
},
{
"epoch": 0.5636766334440753,
"grad_norm": 0.3616679906845093,
"learning_rate": 4.763260789843801e-06,
"loss": 0.4882,
"step": 1018
},
{
"epoch": 0.5642303433001108,
"grad_norm": 0.47011885046958923,
"learning_rate": 4.753605640621323e-06,
"loss": 0.4584,
"step": 1019
},
{
"epoch": 0.5647840531561462,
"grad_norm": 0.4231053292751312,
"learning_rate": 4.743951412322524e-06,
"loss": 0.4806,
"step": 1020
},
{
"epoch": 0.5653377630121816,
"grad_norm": 0.38794267177581787,
"learning_rate": 4.734298141031057e-06,
"loss": 0.4364,
"step": 1021
},
{
"epoch": 0.5658914728682171,
"grad_norm": 0.44208985567092896,
"learning_rate": 4.724645862826992e-06,
"loss": 0.5135,
"step": 1022
},
{
"epoch": 0.5664451827242525,
"grad_norm": 0.36872124671936035,
"learning_rate": 4.7149946137866865e-06,
"loss": 0.456,
"step": 1023
},
{
"epoch": 0.5669988925802879,
"grad_norm": 0.47403469681739807,
"learning_rate": 4.705344429982658e-06,
"loss": 0.4622,
"step": 1024
},
{
"epoch": 0.5675526024363233,
"grad_norm": 0.39829424023628235,
"learning_rate": 4.6956953474834355e-06,
"loss": 0.4562,
"step": 1025
},
{
"epoch": 0.5681063122923588,
"grad_norm": 0.36361780762672424,
"learning_rate": 4.686047402353433e-06,
"loss": 0.4603,
"step": 1026
},
{
"epoch": 0.5686600221483943,
"grad_norm": 0.37249621748924255,
"learning_rate": 4.67640063065282e-06,
"loss": 0.4335,
"step": 1027
},
{
"epoch": 0.5692137320044297,
"grad_norm": 0.43237707018852234,
"learning_rate": 4.6667550684373705e-06,
"loss": 0.4742,
"step": 1028
},
{
"epoch": 0.5697674418604651,
"grad_norm": 0.4022100865840912,
"learning_rate": 4.657110751758346e-06,
"loss": 0.4659,
"step": 1029
},
{
"epoch": 0.5703211517165006,
"grad_norm": 0.3795289099216461,
"learning_rate": 4.647467716662349e-06,
"loss": 0.4469,
"step": 1030
},
{
"epoch": 0.570874861572536,
"grad_norm": 0.43125849962234497,
"learning_rate": 4.637825999191189e-06,
"loss": 0.4573,
"step": 1031
},
{
"epoch": 0.5714285714285714,
"grad_norm": 0.3857414126396179,
"learning_rate": 4.628185635381757e-06,
"loss": 0.46,
"step": 1032
},
{
"epoch": 0.5719822812846068,
"grad_norm": 0.3959072530269623,
"learning_rate": 4.6185466612658825e-06,
"loss": 0.4626,
"step": 1033
},
{
"epoch": 0.5725359911406424,
"grad_norm": 0.39055493474006653,
"learning_rate": 4.608909112870197e-06,
"loss": 0.4705,
"step": 1034
},
{
"epoch": 0.5730897009966778,
"grad_norm": 0.40661776065826416,
"learning_rate": 4.599273026216009e-06,
"loss": 0.4673,
"step": 1035
},
{
"epoch": 0.5736434108527132,
"grad_norm": 0.36414268612861633,
"learning_rate": 4.589638437319157e-06,
"loss": 0.4659,
"step": 1036
},
{
"epoch": 0.5741971207087486,
"grad_norm": 0.407930463552475,
"learning_rate": 4.580005382189891e-06,
"loss": 0.4689,
"step": 1037
},
{
"epoch": 0.574750830564784,
"grad_norm": 0.34159794449806213,
"learning_rate": 4.5703738968327194e-06,
"loss": 0.4716,
"step": 1038
},
{
"epoch": 0.5753045404208195,
"grad_norm": 0.3685348927974701,
"learning_rate": 4.560744017246284e-06,
"loss": 0.473,
"step": 1039
},
{
"epoch": 0.5758582502768549,
"grad_norm": 0.37837618589401245,
"learning_rate": 4.551115779423234e-06,
"loss": 0.4688,
"step": 1040
},
{
"epoch": 0.5764119601328903,
"grad_norm": 0.352651983499527,
"learning_rate": 4.541489219350069e-06,
"loss": 0.4649,
"step": 1041
},
{
"epoch": 0.5769656699889258,
"grad_norm": 0.33390361070632935,
"learning_rate": 4.53186437300703e-06,
"loss": 0.4572,
"step": 1042
},
{
"epoch": 0.5775193798449613,
"grad_norm": 0.3548870086669922,
"learning_rate": 4.522241276367948e-06,
"loss": 0.4641,
"step": 1043
},
{
"epoch": 0.5780730897009967,
"grad_norm": 0.33972635865211487,
"learning_rate": 4.512619965400107e-06,
"loss": 0.457,
"step": 1044
},
{
"epoch": 0.5786267995570321,
"grad_norm": 0.34692293405532837,
"learning_rate": 4.503000476064131e-06,
"loss": 0.4372,
"step": 1045
},
{
"epoch": 0.5791805094130675,
"grad_norm": 0.3577854335308075,
"learning_rate": 4.493382844313826e-06,
"loss": 0.467,
"step": 1046
},
{
"epoch": 0.579734219269103,
"grad_norm": 0.3625636696815491,
"learning_rate": 4.483767106096055e-06,
"loss": 0.46,
"step": 1047
},
{
"epoch": 0.5802879291251384,
"grad_norm": 0.3549644947052002,
"learning_rate": 4.474153297350608e-06,
"loss": 0.4898,
"step": 1048
},
{
"epoch": 0.5808416389811739,
"grad_norm": 0.35848698019981384,
"learning_rate": 4.464541454010061e-06,
"loss": 0.4918,
"step": 1049
},
{
"epoch": 0.5813953488372093,
"grad_norm": 0.3760617971420288,
"learning_rate": 4.454931611999646e-06,
"loss": 0.4626,
"step": 1050
},
{
"epoch": 0.5819490586932448,
"grad_norm": 0.3809838891029358,
"learning_rate": 4.445323807237112e-06,
"loss": 0.4471,
"step": 1051
},
{
"epoch": 0.5825027685492802,
"grad_norm": 0.3663243353366852,
"learning_rate": 4.4357180756325915e-06,
"loss": 0.4865,
"step": 1052
},
{
"epoch": 0.5830564784053156,
"grad_norm": 0.3606342077255249,
"learning_rate": 4.426114453088479e-06,
"loss": 0.4627,
"step": 1053
},
{
"epoch": 0.583610188261351,
"grad_norm": 0.36999428272247314,
"learning_rate": 4.4165129754992736e-06,
"loss": 0.4499,
"step": 1054
},
{
"epoch": 0.5841638981173864,
"grad_norm": 0.3812705874443054,
"learning_rate": 4.406913678751463e-06,
"loss": 0.486,
"step": 1055
},
{
"epoch": 0.584717607973422,
"grad_norm": 0.3783071041107178,
"learning_rate": 4.397316598723385e-06,
"loss": 0.4596,
"step": 1056
},
{
"epoch": 0.5852713178294574,
"grad_norm": 0.473897248506546,
"learning_rate": 4.387721771285091e-06,
"loss": 0.4448,
"step": 1057
},
{
"epoch": 0.5858250276854928,
"grad_norm": 0.390671968460083,
"learning_rate": 4.378129232298213e-06,
"loss": 0.4495,
"step": 1058
},
{
"epoch": 0.5863787375415282,
"grad_norm": 0.4029969871044159,
"learning_rate": 4.3685390176158295e-06,
"loss": 0.4644,
"step": 1059
},
{
"epoch": 0.5869324473975637,
"grad_norm": 0.4267359972000122,
"learning_rate": 4.358951163082328e-06,
"loss": 0.4639,
"step": 1060
},
{
"epoch": 0.5874861572535991,
"grad_norm": 0.40942493081092834,
"learning_rate": 4.349365704533285e-06,
"loss": 0.455,
"step": 1061
},
{
"epoch": 0.5880398671096345,
"grad_norm": 0.40282365679740906,
"learning_rate": 4.33978267779531e-06,
"loss": 0.4696,
"step": 1062
},
{
"epoch": 0.5885935769656699,
"grad_norm": 0.4315662086009979,
"learning_rate": 4.33020211868593e-06,
"loss": 0.4714,
"step": 1063
},
{
"epoch": 0.5891472868217055,
"grad_norm": 0.38608118891716003,
"learning_rate": 4.320624063013449e-06,
"loss": 0.4901,
"step": 1064
},
{
"epoch": 0.5897009966777409,
"grad_norm": 0.3713338375091553,
"learning_rate": 4.31104854657681e-06,
"loss": 0.4566,
"step": 1065
},
{
"epoch": 0.5902547065337763,
"grad_norm": 0.37049630284309387,
"learning_rate": 4.301475605165471e-06,
"loss": 0.4652,
"step": 1066
},
{
"epoch": 0.5908084163898117,
"grad_norm": 0.4406527578830719,
"learning_rate": 4.291905274559262e-06,
"loss": 0.4783,
"step": 1067
},
{
"epoch": 0.5913621262458472,
"grad_norm": 0.385221928358078,
"learning_rate": 4.282337590528251e-06,
"loss": 0.4625,
"step": 1068
},
{
"epoch": 0.5919158361018826,
"grad_norm": 0.3984861969947815,
"learning_rate": 4.272772588832626e-06,
"loss": 0.4652,
"step": 1069
},
{
"epoch": 0.592469545957918,
"grad_norm": 0.3912111222743988,
"learning_rate": 4.263210305222535e-06,
"loss": 0.4624,
"step": 1070
},
{
"epoch": 0.5930232558139535,
"grad_norm": 0.4461668133735657,
"learning_rate": 4.25365077543798e-06,
"loss": 0.4763,
"step": 1071
},
{
"epoch": 0.593576965669989,
"grad_norm": 0.37593454122543335,
"learning_rate": 4.244094035208662e-06,
"loss": 0.4716,
"step": 1072
},
{
"epoch": 0.5941306755260244,
"grad_norm": 0.39943602681159973,
"learning_rate": 4.2345401202538555e-06,
"loss": 0.4556,
"step": 1073
},
{
"epoch": 0.5946843853820598,
"grad_norm": 0.3936353027820587,
"learning_rate": 4.224989066282282e-06,
"loss": 0.4495,
"step": 1074
},
{
"epoch": 0.5952380952380952,
"grad_norm": 0.359379380941391,
"learning_rate": 4.2154409089919654e-06,
"loss": 0.4216,
"step": 1075
},
{
"epoch": 0.5957918050941307,
"grad_norm": 0.37251192331314087,
"learning_rate": 4.205895684070099e-06,
"loss": 0.4679,
"step": 1076
},
{
"epoch": 0.5963455149501661,
"grad_norm": 0.41129574179649353,
"learning_rate": 4.1963534271929235e-06,
"loss": 0.4697,
"step": 1077
},
{
"epoch": 0.5968992248062015,
"grad_norm": 0.39789658784866333,
"learning_rate": 4.186814174025582e-06,
"loss": 0.485,
"step": 1078
},
{
"epoch": 0.597452934662237,
"grad_norm": 0.40488293766975403,
"learning_rate": 4.177277960221993e-06,
"loss": 0.4886,
"step": 1079
},
{
"epoch": 0.5980066445182725,
"grad_norm": 0.4230530261993408,
"learning_rate": 4.167744821424714e-06,
"loss": 0.4796,
"step": 1080
},
{
"epoch": 0.5985603543743079,
"grad_norm": 0.3796553611755371,
"learning_rate": 4.158214793264808e-06,
"loss": 0.4483,
"step": 1081
},
{
"epoch": 0.5991140642303433,
"grad_norm": 0.36902010440826416,
"learning_rate": 4.148687911361714e-06,
"loss": 0.461,
"step": 1082
},
{
"epoch": 0.5996677740863787,
"grad_norm": 0.42175090312957764,
"learning_rate": 4.139164211323111e-06,
"loss": 0.4723,
"step": 1083
},
{
"epoch": 0.6002214839424141,
"grad_norm": 0.38418591022491455,
"learning_rate": 4.129643728744782e-06,
"loss": 0.492,
"step": 1084
},
{
"epoch": 0.6007751937984496,
"grad_norm": 0.39179977774620056,
"learning_rate": 4.120126499210491e-06,
"loss": 0.4718,
"step": 1085
},
{
"epoch": 0.6013289036544851,
"grad_norm": 0.3454154431819916,
"learning_rate": 4.1106125582918385e-06,
"loss": 0.4647,
"step": 1086
},
{
"epoch": 0.6018826135105205,
"grad_norm": 0.3575115203857422,
"learning_rate": 4.101101941548136e-06,
"loss": 0.4789,
"step": 1087
},
{
"epoch": 0.602436323366556,
"grad_norm": 0.43414831161499023,
"learning_rate": 4.091594684526269e-06,
"loss": 0.4646,
"step": 1088
},
{
"epoch": 0.6029900332225914,
"grad_norm": 0.37361279129981995,
"learning_rate": 4.082090822760563e-06,
"loss": 0.475,
"step": 1089
},
{
"epoch": 0.6035437430786268,
"grad_norm": 0.3831091821193695,
"learning_rate": 4.072590391772662e-06,
"loss": 0.4601,
"step": 1090
},
{
"epoch": 0.6040974529346622,
"grad_norm": 0.35764801502227783,
"learning_rate": 4.063093427071376e-06,
"loss": 0.4461,
"step": 1091
},
{
"epoch": 0.6046511627906976,
"grad_norm": 0.35605666041374207,
"learning_rate": 4.05359996415257e-06,
"loss": 0.4778,
"step": 1092
},
{
"epoch": 0.6052048726467331,
"grad_norm": 0.3591611981391907,
"learning_rate": 4.044110038499014e-06,
"loss": 0.4526,
"step": 1093
},
{
"epoch": 0.6057585825027686,
"grad_norm": 0.3578726649284363,
"learning_rate": 4.034623685580257e-06,
"loss": 0.4478,
"step": 1094
},
{
"epoch": 0.606312292358804,
"grad_norm": 0.35668474435806274,
"learning_rate": 4.0251409408524985e-06,
"loss": 0.4653,
"step": 1095
},
{
"epoch": 0.6068660022148394,
"grad_norm": 0.35329243540763855,
"learning_rate": 4.01566183975845e-06,
"loss": 0.4801,
"step": 1096
},
{
"epoch": 0.6074197120708749,
"grad_norm": 0.354142963886261,
"learning_rate": 4.006186417727203e-06,
"loss": 0.4598,
"step": 1097
},
{
"epoch": 0.6079734219269103,
"grad_norm": 0.3247867226600647,
"learning_rate": 3.996714710174101e-06,
"loss": 0.4464,
"step": 1098
},
{
"epoch": 0.6085271317829457,
"grad_norm": 0.3490568697452545,
"learning_rate": 3.987246752500601e-06,
"loss": 0.4573,
"step": 1099
},
{
"epoch": 0.6090808416389811,
"grad_norm": 0.38571274280548096,
"learning_rate": 3.97778258009415e-06,
"loss": 0.4771,
"step": 1100
},
{
"epoch": 0.6096345514950167,
"grad_norm": 0.39078643918037415,
"learning_rate": 3.968322228328041e-06,
"loss": 0.4766,
"step": 1101
},
{
"epoch": 0.6101882613510521,
"grad_norm": 0.351081520318985,
"learning_rate": 3.958865732561288e-06,
"loss": 0.4777,
"step": 1102
},
{
"epoch": 0.6107419712070875,
"grad_norm": 0.3306477665901184,
"learning_rate": 3.9494131281384975e-06,
"loss": 0.4573,
"step": 1103
},
{
"epoch": 0.6112956810631229,
"grad_norm": 0.3765094578266144,
"learning_rate": 3.939964450389728e-06,
"loss": 0.478,
"step": 1104
},
{
"epoch": 0.6118493909191584,
"grad_norm": 0.3690263032913208,
"learning_rate": 3.93051973463036e-06,
"loss": 0.4546,
"step": 1105
},
{
"epoch": 0.6124031007751938,
"grad_norm": 0.40970906615257263,
"learning_rate": 3.92107901616097e-06,
"loss": 0.5092,
"step": 1106
},
{
"epoch": 0.6129568106312292,
"grad_norm": 0.35802197456359863,
"learning_rate": 3.911642330267191e-06,
"loss": 0.4453,
"step": 1107
},
{
"epoch": 0.6135105204872646,
"grad_norm": 0.35536572337150574,
"learning_rate": 3.902209712219586e-06,
"loss": 0.4637,
"step": 1108
},
{
"epoch": 0.6140642303433002,
"grad_norm": 0.3452344834804535,
"learning_rate": 3.892781197273512e-06,
"loss": 0.4587,
"step": 1109
},
{
"epoch": 0.6146179401993356,
"grad_norm": 0.3386285901069641,
"learning_rate": 3.883356820668991e-06,
"loss": 0.4518,
"step": 1110
},
{
"epoch": 0.615171650055371,
"grad_norm": 0.3814370632171631,
"learning_rate": 3.873936617630578e-06,
"loss": 0.4759,
"step": 1111
},
{
"epoch": 0.6157253599114064,
"grad_norm": 0.40263649821281433,
"learning_rate": 3.864520623367231e-06,
"loss": 0.4768,
"step": 1112
},
{
"epoch": 0.6162790697674418,
"grad_norm": 0.3794068694114685,
"learning_rate": 3.855108873072171e-06,
"loss": 0.471,
"step": 1113
},
{
"epoch": 0.6168327796234773,
"grad_norm": 0.36389413475990295,
"learning_rate": 3.845701401922763e-06,
"loss": 0.4668,
"step": 1114
},
{
"epoch": 0.6173864894795127,
"grad_norm": 0.3553192913532257,
"learning_rate": 3.836298245080374e-06,
"loss": 0.4709,
"step": 1115
},
{
"epoch": 0.6179401993355482,
"grad_norm": 0.4235493242740631,
"learning_rate": 3.82689943769025e-06,
"loss": 0.4916,
"step": 1116
},
{
"epoch": 0.6184939091915836,
"grad_norm": 0.3584069311618805,
"learning_rate": 3.817505014881378e-06,
"loss": 0.4455,
"step": 1117
},
{
"epoch": 0.6190476190476191,
"grad_norm": 0.36167699098587036,
"learning_rate": 3.8081150117663547e-06,
"loss": 0.4902,
"step": 1118
},
{
"epoch": 0.6196013289036545,
"grad_norm": 0.38572049140930176,
"learning_rate": 3.7987294634412643e-06,
"loss": 0.4788,
"step": 1119
},
{
"epoch": 0.6201550387596899,
"grad_norm": 0.35073527693748474,
"learning_rate": 3.7893484049855323e-06,
"loss": 0.483,
"step": 1120
},
{
"epoch": 0.6207087486157253,
"grad_norm": 0.3707284927368164,
"learning_rate": 3.779971871461813e-06,
"loss": 0.4665,
"step": 1121
},
{
"epoch": 0.6212624584717608,
"grad_norm": 0.3623058497905731,
"learning_rate": 3.77059989791584e-06,
"loss": 0.4945,
"step": 1122
},
{
"epoch": 0.6218161683277962,
"grad_norm": 0.36620059609413147,
"learning_rate": 3.7612325193763045e-06,
"loss": 0.4728,
"step": 1123
},
{
"epoch": 0.6223698781838317,
"grad_norm": 0.3564867079257965,
"learning_rate": 3.7518697708547285e-06,
"loss": 0.498,
"step": 1124
},
{
"epoch": 0.6229235880398671,
"grad_norm": 0.3427715599536896,
"learning_rate": 3.742511687345325e-06,
"loss": 0.4391,
"step": 1125
},
{
"epoch": 0.6234772978959026,
"grad_norm": 0.34288641810417175,
"learning_rate": 3.7331583038248688e-06,
"loss": 0.4517,
"step": 1126
},
{
"epoch": 0.624031007751938,
"grad_norm": 0.32349908351898193,
"learning_rate": 3.7238096552525736e-06,
"loss": 0.4588,
"step": 1127
},
{
"epoch": 0.6245847176079734,
"grad_norm": 0.3646223247051239,
"learning_rate": 3.714465776569952e-06,
"loss": 0.4508,
"step": 1128
},
{
"epoch": 0.6251384274640088,
"grad_norm": 0.40397608280181885,
"learning_rate": 3.705126702700691e-06,
"loss": 0.4988,
"step": 1129
},
{
"epoch": 0.6256921373200443,
"grad_norm": 0.419245183467865,
"learning_rate": 3.695792468550517e-06,
"loss": 0.4731,
"step": 1130
},
{
"epoch": 0.6262458471760798,
"grad_norm": 0.3632652461528778,
"learning_rate": 3.6864631090070656e-06,
"loss": 0.482,
"step": 1131
},
{
"epoch": 0.6267995570321152,
"grad_norm": 0.38198965787887573,
"learning_rate": 3.6771386589397608e-06,
"loss": 0.4964,
"step": 1132
},
{
"epoch": 0.6273532668881506,
"grad_norm": 0.38686636090278625,
"learning_rate": 3.6678191531996683e-06,
"loss": 0.4447,
"step": 1133
},
{
"epoch": 0.627906976744186,
"grad_norm": 0.4080636501312256,
"learning_rate": 3.658504626619376e-06,
"loss": 0.4557,
"step": 1134
},
{
"epoch": 0.6284606866002215,
"grad_norm": 0.3649447560310364,
"learning_rate": 3.6491951140128685e-06,
"loss": 0.4828,
"step": 1135
},
{
"epoch": 0.6290143964562569,
"grad_norm": 0.45332175493240356,
"learning_rate": 3.639890650175379e-06,
"loss": 0.4802,
"step": 1136
},
{
"epoch": 0.6295681063122923,
"grad_norm": 0.361508846282959,
"learning_rate": 3.6305912698832813e-06,
"loss": 0.4757,
"step": 1137
},
{
"epoch": 0.6301218161683277,
"grad_norm": 0.3899231255054474,
"learning_rate": 3.6212970078939414e-06,
"loss": 0.4664,
"step": 1138
},
{
"epoch": 0.6306755260243633,
"grad_norm": 0.424633651971817,
"learning_rate": 3.6120078989455953e-06,
"loss": 0.4905,
"step": 1139
},
{
"epoch": 0.6312292358803987,
"grad_norm": 0.37771520018577576,
"learning_rate": 3.6027239777572253e-06,
"loss": 0.4464,
"step": 1140
},
{
"epoch": 0.6317829457364341,
"grad_norm": 0.3850044012069702,
"learning_rate": 3.593445279028418e-06,
"loss": 0.4665,
"step": 1141
},
{
"epoch": 0.6323366555924695,
"grad_norm": 0.44365182518959045,
"learning_rate": 3.5841718374392435e-06,
"loss": 0.4637,
"step": 1142
},
{
"epoch": 0.632890365448505,
"grad_norm": 0.4106205403804779,
"learning_rate": 3.5749036876501196e-06,
"loss": 0.4544,
"step": 1143
},
{
"epoch": 0.6334440753045404,
"grad_norm": 0.32994240522384644,
"learning_rate": 3.5656408643016892e-06,
"loss": 0.4729,
"step": 1144
},
{
"epoch": 0.6339977851605758,
"grad_norm": 0.38055476546287537,
"learning_rate": 3.5563834020146864e-06,
"loss": 0.4486,
"step": 1145
},
{
"epoch": 0.6345514950166113,
"grad_norm": 0.48720213770866394,
"learning_rate": 3.5471313353898056e-06,
"loss": 0.4491,
"step": 1146
},
{
"epoch": 0.6351052048726468,
"grad_norm": 0.43611204624176025,
"learning_rate": 3.5378846990075734e-06,
"loss": 0.5003,
"step": 1147
},
{
"epoch": 0.6356589147286822,
"grad_norm": 0.38427719473838806,
"learning_rate": 3.5286435274282277e-06,
"loss": 0.4511,
"step": 1148
},
{
"epoch": 0.6362126245847176,
"grad_norm": 0.39842531085014343,
"learning_rate": 3.5194078551915704e-06,
"loss": 0.4951,
"step": 1149
},
{
"epoch": 0.636766334440753,
"grad_norm": 0.4029242694377899,
"learning_rate": 3.5101777168168603e-06,
"loss": 0.482,
"step": 1150
},
{
"epoch": 0.6373200442967885,
"grad_norm": 0.35558003187179565,
"learning_rate": 3.5009531468026646e-06,
"loss": 0.4548,
"step": 1151
},
{
"epoch": 0.6378737541528239,
"grad_norm": 0.36913105845451355,
"learning_rate": 3.491734179626738e-06,
"loss": 0.4639,
"step": 1152
},
{
"epoch": 0.6384274640088593,
"grad_norm": 0.3489953279495239,
"learning_rate": 3.482520849745902e-06,
"loss": 0.4699,
"step": 1153
},
{
"epoch": 0.6389811738648948,
"grad_norm": 0.3615221679210663,
"learning_rate": 3.4733131915959008e-06,
"loss": 0.4629,
"step": 1154
},
{
"epoch": 0.6395348837209303,
"grad_norm": 0.4305569529533386,
"learning_rate": 3.46411123959128e-06,
"loss": 0.4781,
"step": 1155
},
{
"epoch": 0.6400885935769657,
"grad_norm": 0.3422296643257141,
"learning_rate": 3.4549150281252635e-06,
"loss": 0.453,
"step": 1156
},
{
"epoch": 0.6406423034330011,
"grad_norm": 0.4306909143924713,
"learning_rate": 3.4457245915696134e-06,
"loss": 0.4936,
"step": 1157
},
{
"epoch": 0.6411960132890365,
"grad_norm": 0.3581586480140686,
"learning_rate": 3.436539964274512e-06,
"loss": 0.4652,
"step": 1158
},
{
"epoch": 0.641749723145072,
"grad_norm": 0.37111344933509827,
"learning_rate": 3.4273611805684254e-06,
"loss": 0.4532,
"step": 1159
},
{
"epoch": 0.6423034330011074,
"grad_norm": 0.4637528955936432,
"learning_rate": 3.41818827475798e-06,
"loss": 0.4633,
"step": 1160
},
{
"epoch": 0.6428571428571429,
"grad_norm": 0.3899680972099304,
"learning_rate": 3.409021281127835e-06,
"loss": 0.448,
"step": 1161
},
{
"epoch": 0.6434108527131783,
"grad_norm": 0.3871779441833496,
"learning_rate": 3.3998602339405495e-06,
"loss": 0.4699,
"step": 1162
},
{
"epoch": 0.6439645625692137,
"grad_norm": 0.39854106307029724,
"learning_rate": 3.3907051674364555e-06,
"loss": 0.4591,
"step": 1163
},
{
"epoch": 0.6445182724252492,
"grad_norm": 0.41372933983802795,
"learning_rate": 3.381556115833538e-06,
"loss": 0.4825,
"step": 1164
},
{
"epoch": 0.6450719822812846,
"grad_norm": 0.35258594155311584,
"learning_rate": 3.3724131133272937e-06,
"loss": 0.4702,
"step": 1165
},
{
"epoch": 0.64562569213732,
"grad_norm": 0.3785751461982727,
"learning_rate": 3.3632761940906167e-06,
"loss": 0.4598,
"step": 1166
},
{
"epoch": 0.6461794019933554,
"grad_norm": 0.36929285526275635,
"learning_rate": 3.35414539227366e-06,
"loss": 0.4626,
"step": 1167
},
{
"epoch": 0.646733111849391,
"grad_norm": 0.3928925693035126,
"learning_rate": 3.3450207420037094e-06,
"loss": 0.4503,
"step": 1168
},
{
"epoch": 0.6472868217054264,
"grad_norm": 0.43884503841400146,
"learning_rate": 3.3359022773850673e-06,
"loss": 0.4991,
"step": 1169
},
{
"epoch": 0.6478405315614618,
"grad_norm": 0.40776365995407104,
"learning_rate": 3.3267900324989087e-06,
"loss": 0.4883,
"step": 1170
},
{
"epoch": 0.6483942414174972,
"grad_norm": 0.3640215992927551,
"learning_rate": 3.3176840414031653e-06,
"loss": 0.4856,
"step": 1171
},
{
"epoch": 0.6489479512735327,
"grad_norm": 0.3607640862464905,
"learning_rate": 3.3085843381323956e-06,
"loss": 0.4526,
"step": 1172
},
{
"epoch": 0.6495016611295681,
"grad_norm": 0.40391892194747925,
"learning_rate": 3.299490956697653e-06,
"loss": 0.4709,
"step": 1173
},
{
"epoch": 0.6500553709856035,
"grad_norm": 0.3893533945083618,
"learning_rate": 3.2904039310863654e-06,
"loss": 0.4507,
"step": 1174
},
{
"epoch": 0.6506090808416389,
"grad_norm": 0.34658515453338623,
"learning_rate": 3.281323295262203e-06,
"loss": 0.4437,
"step": 1175
},
{
"epoch": 0.6511627906976745,
"grad_norm": 0.38739728927612305,
"learning_rate": 3.2722490831649568e-06,
"loss": 0.4797,
"step": 1176
},
{
"epoch": 0.6517165005537099,
"grad_norm": 0.38036441802978516,
"learning_rate": 3.2631813287104065e-06,
"loss": 0.4714,
"step": 1177
},
{
"epoch": 0.6522702104097453,
"grad_norm": 0.3743520677089691,
"learning_rate": 3.254120065790193e-06,
"loss": 0.4658,
"step": 1178
},
{
"epoch": 0.6528239202657807,
"grad_norm": 0.346492737531662,
"learning_rate": 3.2450653282717003e-06,
"loss": 0.4731,
"step": 1179
},
{
"epoch": 0.6533776301218162,
"grad_norm": 0.3950633704662323,
"learning_rate": 3.2360171499979186e-06,
"loss": 0.4668,
"step": 1180
},
{
"epoch": 0.6539313399778516,
"grad_norm": 0.36540117859840393,
"learning_rate": 3.226975564787322e-06,
"loss": 0.4664,
"step": 1181
},
{
"epoch": 0.654485049833887,
"grad_norm": 0.3785662055015564,
"learning_rate": 3.217940606433747e-06,
"loss": 0.4797,
"step": 1182
},
{
"epoch": 0.6550387596899225,
"grad_norm": 0.36079704761505127,
"learning_rate": 3.2089123087062574e-06,
"loss": 0.4706,
"step": 1183
},
{
"epoch": 0.655592469545958,
"grad_norm": 0.4160771667957306,
"learning_rate": 3.199890705349021e-06,
"loss": 0.4633,
"step": 1184
},
{
"epoch": 0.6561461794019934,
"grad_norm": 0.3826664984226227,
"learning_rate": 3.1908758300811902e-06,
"loss": 0.4436,
"step": 1185
},
{
"epoch": 0.6566998892580288,
"grad_norm": 0.3960643410682678,
"learning_rate": 3.181867716596765e-06,
"loss": 0.47,
"step": 1186
},
{
"epoch": 0.6572535991140642,
"grad_norm": 0.37728485465049744,
"learning_rate": 3.172866398564477e-06,
"loss": 0.462,
"step": 1187
},
{
"epoch": 0.6578073089700996,
"grad_norm": 0.3493827283382416,
"learning_rate": 3.1638719096276565e-06,
"loss": 0.4714,
"step": 1188
},
{
"epoch": 0.6583610188261351,
"grad_norm": 0.4141739308834076,
"learning_rate": 3.1548842834041083e-06,
"loss": 0.4814,
"step": 1189
},
{
"epoch": 0.6589147286821705,
"grad_norm": 0.3749389052391052,
"learning_rate": 3.1459035534859906e-06,
"loss": 0.4969,
"step": 1190
},
{
"epoch": 0.659468438538206,
"grad_norm": 0.3280302882194519,
"learning_rate": 3.1369297534396823e-06,
"loss": 0.4745,
"step": 1191
},
{
"epoch": 0.6600221483942414,
"grad_norm": 0.368042916059494,
"learning_rate": 3.1279629168056635e-06,
"loss": 0.4589,
"step": 1192
},
{
"epoch": 0.6605758582502769,
"grad_norm": 0.3801417052745819,
"learning_rate": 3.1190030770983894e-06,
"loss": 0.4485,
"step": 1193
},
{
"epoch": 0.6611295681063123,
"grad_norm": 0.36523929238319397,
"learning_rate": 3.1100502678061566e-06,
"loss": 0.4749,
"step": 1194
},
{
"epoch": 0.6616832779623477,
"grad_norm": 0.3876314163208008,
"learning_rate": 3.1011045223909954e-06,
"loss": 0.4407,
"step": 1195
},
{
"epoch": 0.6622369878183831,
"grad_norm": 0.3626687526702881,
"learning_rate": 3.092165874288525e-06,
"loss": 0.4769,
"step": 1196
},
{
"epoch": 0.6627906976744186,
"grad_norm": 0.38137850165367126,
"learning_rate": 3.08323435690784e-06,
"loss": 0.4747,
"step": 1197
},
{
"epoch": 0.6633444075304541,
"grad_norm": 0.3258206844329834,
"learning_rate": 3.0743100036313876e-06,
"loss": 0.4757,
"step": 1198
},
{
"epoch": 0.6638981173864895,
"grad_norm": 0.3893488049507141,
"learning_rate": 3.065392847814832e-06,
"loss": 0.4631,
"step": 1199
},
{
"epoch": 0.6644518272425249,
"grad_norm": 0.4116152226924896,
"learning_rate": 3.056482922786942e-06,
"loss": 0.4965,
"step": 1200
},
{
"epoch": 0.6650055370985604,
"grad_norm": 0.3937761187553406,
"learning_rate": 3.0475802618494564e-06,
"loss": 0.4452,
"step": 1201
},
{
"epoch": 0.6655592469545958,
"grad_norm": 0.38089385628700256,
"learning_rate": 3.038684898276964e-06,
"loss": 0.4912,
"step": 1202
},
{
"epoch": 0.6661129568106312,
"grad_norm": 0.36229807138442993,
"learning_rate": 3.0297968653167833e-06,
"loss": 0.4743,
"step": 1203
},
{
"epoch": 0.6666666666666666,
"grad_norm": 0.374954491853714,
"learning_rate": 3.0209161961888283e-06,
"loss": 0.4553,
"step": 1204
},
{
"epoch": 0.667220376522702,
"grad_norm": 0.39966997504234314,
"learning_rate": 3.0120429240854927e-06,
"loss": 0.4504,
"step": 1205
},
{
"epoch": 0.6677740863787376,
"grad_norm": 0.3550257682800293,
"learning_rate": 3.0031770821715233e-06,
"loss": 0.4563,
"step": 1206
},
{
"epoch": 0.668327796234773,
"grad_norm": 0.38138848543167114,
"learning_rate": 2.9943187035838937e-06,
"loss": 0.4545,
"step": 1207
},
{
"epoch": 0.6688815060908084,
"grad_norm": 0.3662111759185791,
"learning_rate": 2.9854678214316875e-06,
"loss": 0.4671,
"step": 1208
},
{
"epoch": 0.6694352159468439,
"grad_norm": 0.3831300735473633,
"learning_rate": 2.9766244687959643e-06,
"loss": 0.4651,
"step": 1209
},
{
"epoch": 0.6699889258028793,
"grad_norm": 0.3436860144138336,
"learning_rate": 2.967788678729641e-06,
"loss": 0.4455,
"step": 1210
},
{
"epoch": 0.6705426356589147,
"grad_norm": 0.40770378708839417,
"learning_rate": 2.9589604842573762e-06,
"loss": 0.4374,
"step": 1211
},
{
"epoch": 0.6710963455149501,
"grad_norm": 0.3695485591888428,
"learning_rate": 2.9501399183754297e-06,
"loss": 0.4902,
"step": 1212
},
{
"epoch": 0.6716500553709857,
"grad_norm": 0.3176988363265991,
"learning_rate": 2.941327014051554e-06,
"loss": 0.464,
"step": 1213
},
{
"epoch": 0.6722037652270211,
"grad_norm": 0.3670944273471832,
"learning_rate": 2.932521804224866e-06,
"loss": 0.4565,
"step": 1214
},
{
"epoch": 0.6727574750830565,
"grad_norm": 0.3454347252845764,
"learning_rate": 2.92372432180572e-06,
"loss": 0.471,
"step": 1215
},
{
"epoch": 0.6733111849390919,
"grad_norm": 0.36517590284347534,
"learning_rate": 2.914934599675594e-06,
"loss": 0.4668,
"step": 1216
},
{
"epoch": 0.6738648947951273,
"grad_norm": 0.3948805034160614,
"learning_rate": 2.906152670686957e-06,
"loss": 0.4839,
"step": 1217
},
{
"epoch": 0.6744186046511628,
"grad_norm": 0.39370593428611755,
"learning_rate": 2.897378567663147e-06,
"loss": 0.4666,
"step": 1218
},
{
"epoch": 0.6749723145071982,
"grad_norm": 0.33612701296806335,
"learning_rate": 2.8886123233982623e-06,
"loss": 0.4575,
"step": 1219
},
{
"epoch": 0.6755260243632336,
"grad_norm": 0.3216228485107422,
"learning_rate": 2.879853970657016e-06,
"loss": 0.4569,
"step": 1220
},
{
"epoch": 0.6760797342192691,
"grad_norm": 0.35591426491737366,
"learning_rate": 2.871103542174637e-06,
"loss": 0.4555,
"step": 1221
},
{
"epoch": 0.6766334440753046,
"grad_norm": 0.3582440912723541,
"learning_rate": 2.862361070656728e-06,
"loss": 0.4594,
"step": 1222
},
{
"epoch": 0.67718715393134,
"grad_norm": 0.34880882501602173,
"learning_rate": 2.853626588779154e-06,
"loss": 0.4193,
"step": 1223
},
{
"epoch": 0.6777408637873754,
"grad_norm": 0.3358144462108612,
"learning_rate": 2.844900129187922e-06,
"loss": 0.4843,
"step": 1224
},
{
"epoch": 0.6782945736434108,
"grad_norm": 0.3304033875465393,
"learning_rate": 2.836181724499051e-06,
"loss": 0.4528,
"step": 1225
},
{
"epoch": 0.6788482834994463,
"grad_norm": 0.33431103825569153,
"learning_rate": 2.827471407298451e-06,
"loss": 0.4593,
"step": 1226
},
{
"epoch": 0.6794019933554817,
"grad_norm": 0.3479402959346771,
"learning_rate": 2.8187692101418127e-06,
"loss": 0.4689,
"step": 1227
},
{
"epoch": 0.6799557032115172,
"grad_norm": 0.3931315243244171,
"learning_rate": 2.8100751655544716e-06,
"loss": 0.4749,
"step": 1228
},
{
"epoch": 0.6805094130675526,
"grad_norm": 0.36747539043426514,
"learning_rate": 2.8013893060312923e-06,
"loss": 0.4823,
"step": 1229
},
{
"epoch": 0.6810631229235881,
"grad_norm": 0.3489404618740082,
"learning_rate": 2.792711664036547e-06,
"loss": 0.467,
"step": 1230
},
{
"epoch": 0.6816168327796235,
"grad_norm": 0.318730890750885,
"learning_rate": 2.7840422720037943e-06,
"loss": 0.4383,
"step": 1231
},
{
"epoch": 0.6821705426356589,
"grad_norm": 0.37444746494293213,
"learning_rate": 2.7753811623357607e-06,
"loss": 0.4608,
"step": 1232
},
{
"epoch": 0.6827242524916943,
"grad_norm": 0.3400449752807617,
"learning_rate": 2.7667283674042132e-06,
"loss": 0.4401,
"step": 1233
},
{
"epoch": 0.6832779623477298,
"grad_norm": 0.3871501684188843,
"learning_rate": 2.7580839195498397e-06,
"loss": 0.4544,
"step": 1234
},
{
"epoch": 0.6838316722037652,
"grad_norm": 0.34454110264778137,
"learning_rate": 2.749447851082137e-06,
"loss": 0.4701,
"step": 1235
},
{
"epoch": 0.6843853820598007,
"grad_norm": 0.3465222716331482,
"learning_rate": 2.7408201942792755e-06,
"loss": 0.4815,
"step": 1236
},
{
"epoch": 0.6849390919158361,
"grad_norm": 0.3567723333835602,
"learning_rate": 2.732200981387993e-06,
"loss": 0.4555,
"step": 1237
},
{
"epoch": 0.6854928017718716,
"grad_norm": 0.30830711126327515,
"learning_rate": 2.723590244623462e-06,
"loss": 0.4738,
"step": 1238
},
{
"epoch": 0.686046511627907,
"grad_norm": 0.3953273296356201,
"learning_rate": 2.714988016169175e-06,
"loss": 0.4785,
"step": 1239
},
{
"epoch": 0.6866002214839424,
"grad_norm": 0.36648789048194885,
"learning_rate": 2.706394328176829e-06,
"loss": 0.4632,
"step": 1240
},
{
"epoch": 0.6871539313399778,
"grad_norm": 0.3629242479801178,
"learning_rate": 2.697809212766195e-06,
"loss": 0.4333,
"step": 1241
},
{
"epoch": 0.6877076411960132,
"grad_norm": 0.3562483489513397,
"learning_rate": 2.6892327020250013e-06,
"loss": 0.4497,
"step": 1242
},
{
"epoch": 0.6882613510520488,
"grad_norm": 0.3277631103992462,
"learning_rate": 2.6806648280088243e-06,
"loss": 0.4761,
"step": 1243
},
{
"epoch": 0.6888150609080842,
"grad_norm": 0.3348289132118225,
"learning_rate": 2.67210562274095e-06,
"loss": 0.4475,
"step": 1244
},
{
"epoch": 0.6893687707641196,
"grad_norm": 0.34823429584503174,
"learning_rate": 2.663555118212272e-06,
"loss": 0.4468,
"step": 1245
},
{
"epoch": 0.689922480620155,
"grad_norm": 0.362206369638443,
"learning_rate": 2.655013346381158e-06,
"loss": 0.4937,
"step": 1246
},
{
"epoch": 0.6904761904761905,
"grad_norm": 0.36554214358329773,
"learning_rate": 2.646480339173337e-06,
"loss": 0.4469,
"step": 1247
},
{
"epoch": 0.6910299003322259,
"grad_norm": 0.35671401023864746,
"learning_rate": 2.6379561284817856e-06,
"loss": 0.4761,
"step": 1248
},
{
"epoch": 0.6915836101882613,
"grad_norm": 0.38154640793800354,
"learning_rate": 2.6294407461665927e-06,
"loss": 0.4695,
"step": 1249
},
{
"epoch": 0.6921373200442967,
"grad_norm": 0.3522584140300751,
"learning_rate": 2.620934224054861e-06,
"loss": 0.4794,
"step": 1250
},
{
"epoch": 0.6926910299003323,
"grad_norm": 0.3220556974411011,
"learning_rate": 2.612436593940568e-06,
"loss": 0.4845,
"step": 1251
},
{
"epoch": 0.6932447397563677,
"grad_norm": 0.3791353702545166,
"learning_rate": 2.6039478875844603e-06,
"loss": 0.4887,
"step": 1252
},
{
"epoch": 0.6937984496124031,
"grad_norm": 0.34861162304878235,
"learning_rate": 2.595468136713934e-06,
"loss": 0.4857,
"step": 1253
},
{
"epoch": 0.6943521594684385,
"grad_norm": 0.3978254199028015,
"learning_rate": 2.586997373022908e-06,
"loss": 0.4762,
"step": 1254
},
{
"epoch": 0.694905869324474,
"grad_norm": 0.375564843416214,
"learning_rate": 2.578535628171711e-06,
"loss": 0.4591,
"step": 1255
},
{
"epoch": 0.6954595791805094,
"grad_norm": 0.4125734567642212,
"learning_rate": 2.57008293378697e-06,
"loss": 0.453,
"step": 1256
},
{
"epoch": 0.6960132890365448,
"grad_norm": 0.3337445855140686,
"learning_rate": 2.561639321461476e-06,
"loss": 0.4481,
"step": 1257
},
{
"epoch": 0.6965669988925803,
"grad_norm": 0.38525938987731934,
"learning_rate": 2.5532048227540773e-06,
"loss": 0.4791,
"step": 1258
},
{
"epoch": 0.6971207087486158,
"grad_norm": 0.35480937361717224,
"learning_rate": 2.5447794691895657e-06,
"loss": 0.4436,
"step": 1259
},
{
"epoch": 0.6976744186046512,
"grad_norm": 0.34399178624153137,
"learning_rate": 2.536363292258543e-06,
"loss": 0.4605,
"step": 1260
},
{
"epoch": 0.6982281284606866,
"grad_norm": 0.3646879196166992,
"learning_rate": 2.5279563234173177e-06,
"loss": 0.4759,
"step": 1261
},
{
"epoch": 0.698781838316722,
"grad_norm": 0.36042407155036926,
"learning_rate": 2.519558594087778e-06,
"loss": 0.4642,
"step": 1262
},
{
"epoch": 0.6993355481727574,
"grad_norm": 0.35599207878112793,
"learning_rate": 2.511170135657286e-06,
"loss": 0.4768,
"step": 1263
},
{
"epoch": 0.6998892580287929,
"grad_norm": 0.3732999265193939,
"learning_rate": 2.5027909794785452e-06,
"loss": 0.4819,
"step": 1264
},
{
"epoch": 0.7004429678848284,
"grad_norm": 0.37701067328453064,
"learning_rate": 2.494421156869493e-06,
"loss": 0.4843,
"step": 1265
},
{
"epoch": 0.7009966777408638,
"grad_norm": 0.35381215810775757,
"learning_rate": 2.4860606991131857e-06,
"loss": 0.4837,
"step": 1266
},
{
"epoch": 0.7015503875968992,
"grad_norm": 0.3333852291107178,
"learning_rate": 2.4777096374576724e-06,
"loss": 0.4719,
"step": 1267
},
{
"epoch": 0.7021040974529347,
"grad_norm": 0.3859536647796631,
"learning_rate": 2.4693680031158844e-06,
"loss": 0.479,
"step": 1268
},
{
"epoch": 0.7026578073089701,
"grad_norm": 0.4252912402153015,
"learning_rate": 2.4610358272655214e-06,
"loss": 0.4704,
"step": 1269
},
{
"epoch": 0.7032115171650055,
"grad_norm": 0.37061989307403564,
"learning_rate": 2.4527131410489267e-06,
"loss": 0.4977,
"step": 1270
},
{
"epoch": 0.7037652270210409,
"grad_norm": 0.32284095883369446,
"learning_rate": 2.444399975572974e-06,
"loss": 0.4469,
"step": 1271
},
{
"epoch": 0.7043189368770764,
"grad_norm": 0.3401859700679779,
"learning_rate": 2.4360963619089584e-06,
"loss": 0.4563,
"step": 1272
},
{
"epoch": 0.7048726467331119,
"grad_norm": 0.3887861371040344,
"learning_rate": 2.4278023310924676e-06,
"loss": 0.4489,
"step": 1273
},
{
"epoch": 0.7054263565891473,
"grad_norm": 0.3525770902633667,
"learning_rate": 2.4195179141232787e-06,
"loss": 0.4727,
"step": 1274
},
{
"epoch": 0.7059800664451827,
"grad_norm": 0.3447239398956299,
"learning_rate": 2.4112431419652305e-06,
"loss": 0.4821,
"step": 1275
},
{
"epoch": 0.7065337763012182,
"grad_norm": 0.37898221611976624,
"learning_rate": 2.402978045546114e-06,
"loss": 0.4754,
"step": 1276
},
{
"epoch": 0.7070874861572536,
"grad_norm": 0.3542633056640625,
"learning_rate": 2.3947226557575615e-06,
"loss": 0.5011,
"step": 1277
},
{
"epoch": 0.707641196013289,
"grad_norm": 0.33939436078071594,
"learning_rate": 2.3864770034549186e-06,
"loss": 0.4461,
"step": 1278
},
{
"epoch": 0.7081949058693244,
"grad_norm": 0.34798309206962585,
"learning_rate": 2.3782411194571425e-06,
"loss": 0.4591,
"step": 1279
},
{
"epoch": 0.70874861572536,
"grad_norm": 0.3668760061264038,
"learning_rate": 2.3700150345466754e-06,
"loss": 0.4584,
"step": 1280
},
{
"epoch": 0.7093023255813954,
"grad_norm": 0.37262019515037537,
"learning_rate": 2.3617987794693358e-06,
"loss": 0.478,
"step": 1281
},
{
"epoch": 0.7098560354374308,
"grad_norm": 0.3444177508354187,
"learning_rate": 2.353592384934206e-06,
"loss": 0.488,
"step": 1282
},
{
"epoch": 0.7104097452934662,
"grad_norm": 0.3594864010810852,
"learning_rate": 2.345395881613507e-06,
"loss": 0.4681,
"step": 1283
},
{
"epoch": 0.7109634551495017,
"grad_norm": 0.3391314744949341,
"learning_rate": 2.337209300142494e-06,
"loss": 0.4594,
"step": 1284
},
{
"epoch": 0.7115171650055371,
"grad_norm": 0.35806921124458313,
"learning_rate": 2.3290326711193407e-06,
"loss": 0.4779,
"step": 1285
},
{
"epoch": 0.7120708748615725,
"grad_norm": 0.39370259642601013,
"learning_rate": 2.320866025105016e-06,
"loss": 0.4686,
"step": 1286
},
{
"epoch": 0.7126245847176079,
"grad_norm": 0.3590087592601776,
"learning_rate": 2.3127093926231842e-06,
"loss": 0.4743,
"step": 1287
},
{
"epoch": 0.7131782945736435,
"grad_norm": 0.3464341163635254,
"learning_rate": 2.304562804160077e-06,
"loss": 0.4739,
"step": 1288
},
{
"epoch": 0.7137320044296789,
"grad_norm": 0.31786057353019714,
"learning_rate": 2.2964262901643875e-06,
"loss": 0.4318,
"step": 1289
},
{
"epoch": 0.7142857142857143,
"grad_norm": 0.3480754792690277,
"learning_rate": 2.288299881047153e-06,
"loss": 0.4591,
"step": 1290
},
{
"epoch": 0.7148394241417497,
"grad_norm": 0.344590961933136,
"learning_rate": 2.2801836071816476e-06,
"loss": 0.458,
"step": 1291
},
{
"epoch": 0.7153931339977851,
"grad_norm": 0.3685104250907898,
"learning_rate": 2.2720774989032583e-06,
"loss": 0.4689,
"step": 1292
},
{
"epoch": 0.7159468438538206,
"grad_norm": 0.3524629473686218,
"learning_rate": 2.263981586509381e-06,
"loss": 0.4528,
"step": 1293
},
{
"epoch": 0.716500553709856,
"grad_norm": 0.31772321462631226,
"learning_rate": 2.255895900259298e-06,
"loss": 0.4476,
"step": 1294
},
{
"epoch": 0.7170542635658915,
"grad_norm": 0.3279765844345093,
"learning_rate": 2.2478204703740796e-06,
"loss": 0.4553,
"step": 1295
},
{
"epoch": 0.717607973421927,
"grad_norm": 0.3653477430343628,
"learning_rate": 2.2397553270364546e-06,
"loss": 0.4362,
"step": 1296
},
{
"epoch": 0.7181616832779624,
"grad_norm": 0.3631284534931183,
"learning_rate": 2.2317005003907044e-06,
"loss": 0.4776,
"step": 1297
},
{
"epoch": 0.7187153931339978,
"grad_norm": 0.33382081985473633,
"learning_rate": 2.2236560205425565e-06,
"loss": 0.4563,
"step": 1298
},
{
"epoch": 0.7192691029900332,
"grad_norm": 0.3235117793083191,
"learning_rate": 2.2156219175590623e-06,
"loss": 0.4498,
"step": 1299
},
{
"epoch": 0.7198228128460686,
"grad_norm": 0.3693845570087433,
"learning_rate": 2.2075982214684855e-06,
"loss": 0.4643,
"step": 1300
},
{
"epoch": 0.7203765227021041,
"grad_norm": 0.36044248938560486,
"learning_rate": 2.1995849622602017e-06,
"loss": 0.4705,
"step": 1301
},
{
"epoch": 0.7209302325581395,
"grad_norm": 0.3060952425003052,
"learning_rate": 2.1915821698845687e-06,
"loss": 0.443,
"step": 1302
},
{
"epoch": 0.721483942414175,
"grad_norm": 0.365112841129303,
"learning_rate": 2.18358987425283e-06,
"loss": 0.4414,
"step": 1303
},
{
"epoch": 0.7220376522702104,
"grad_norm": 0.35041436553001404,
"learning_rate": 2.175608105236993e-06,
"loss": 0.4599,
"step": 1304
},
{
"epoch": 0.7225913621262459,
"grad_norm": 0.3773949444293976,
"learning_rate": 2.16763689266972e-06,
"loss": 0.4568,
"step": 1305
},
{
"epoch": 0.7231450719822813,
"grad_norm": 0.3188624978065491,
"learning_rate": 2.159676266344222e-06,
"loss": 0.4283,
"step": 1306
},
{
"epoch": 0.7236987818383167,
"grad_norm": 0.35047447681427,
"learning_rate": 2.151726256014136e-06,
"loss": 0.4658,
"step": 1307
},
{
"epoch": 0.7242524916943521,
"grad_norm": 0.33076420426368713,
"learning_rate": 2.143786891393429e-06,
"loss": 0.457,
"step": 1308
},
{
"epoch": 0.7248062015503876,
"grad_norm": 0.32635653018951416,
"learning_rate": 2.1358582021562724e-06,
"loss": 0.4376,
"step": 1309
},
{
"epoch": 0.7253599114064231,
"grad_norm": 0.35420915484428406,
"learning_rate": 2.1279402179369363e-06,
"loss": 0.4753,
"step": 1310
},
{
"epoch": 0.7259136212624585,
"grad_norm": 0.330625981092453,
"learning_rate": 2.120032968329687e-06,
"loss": 0.4578,
"step": 1311
},
{
"epoch": 0.7264673311184939,
"grad_norm": 0.3413280248641968,
"learning_rate": 2.112136482888663e-06,
"loss": 0.483,
"step": 1312
},
{
"epoch": 0.7270210409745294,
"grad_norm": 0.32706838846206665,
"learning_rate": 2.10425079112777e-06,
"loss": 0.4465,
"step": 1313
},
{
"epoch": 0.7275747508305648,
"grad_norm": 0.300112247467041,
"learning_rate": 2.0963759225205764e-06,
"loss": 0.4377,
"step": 1314
},
{
"epoch": 0.7281284606866002,
"grad_norm": 0.3354182243347168,
"learning_rate": 2.088511906500193e-06,
"loss": 0.4747,
"step": 1315
},
{
"epoch": 0.7286821705426356,
"grad_norm": 0.3380527198314667,
"learning_rate": 2.0806587724591725e-06,
"loss": 0.4821,
"step": 1316
},
{
"epoch": 0.729235880398671,
"grad_norm": 0.35922887921333313,
"learning_rate": 2.0728165497493913e-06,
"loss": 0.4804,
"step": 1317
},
{
"epoch": 0.7297895902547066,
"grad_norm": 0.3212301433086395,
"learning_rate": 2.0649852676819426e-06,
"loss": 0.4503,
"step": 1318
},
{
"epoch": 0.730343300110742,
"grad_norm": 0.3472149074077606,
"learning_rate": 2.0571649555270345e-06,
"loss": 0.4654,
"step": 1319
},
{
"epoch": 0.7308970099667774,
"grad_norm": 0.3622226119041443,
"learning_rate": 2.049355642513868e-06,
"loss": 0.4337,
"step": 1320
},
{
"epoch": 0.7314507198228128,
"grad_norm": 0.353891521692276,
"learning_rate": 2.0415573578305343e-06,
"loss": 0.4485,
"step": 1321
},
{
"epoch": 0.7320044296788483,
"grad_norm": 0.3391493558883667,
"learning_rate": 2.0337701306239048e-06,
"loss": 0.4843,
"step": 1322
},
{
"epoch": 0.7325581395348837,
"grad_norm": 0.32824766635894775,
"learning_rate": 2.025993989999528e-06,
"loss": 0.4853,
"step": 1323
},
{
"epoch": 0.7331118493909191,
"grad_norm": 0.33276161551475525,
"learning_rate": 2.0182289650215082e-06,
"loss": 0.4489,
"step": 1324
},
{
"epoch": 0.7336655592469546,
"grad_norm": 0.3398074209690094,
"learning_rate": 2.0104750847124075e-06,
"loss": 0.4525,
"step": 1325
},
{
"epoch": 0.7342192691029901,
"grad_norm": 0.34588155150413513,
"learning_rate": 2.0027323780531312e-06,
"loss": 0.4582,
"step": 1326
},
{
"epoch": 0.7347729789590255,
"grad_norm": 0.333475798368454,
"learning_rate": 1.995000873982826e-06,
"loss": 0.4527,
"step": 1327
},
{
"epoch": 0.7353266888150609,
"grad_norm": 0.3277673125267029,
"learning_rate": 1.9872806013987626e-06,
"loss": 0.453,
"step": 1328
},
{
"epoch": 0.7358803986710963,
"grad_norm": 0.329764187335968,
"learning_rate": 1.9795715891562393e-06,
"loss": 0.4547,
"step": 1329
},
{
"epoch": 0.7364341085271318,
"grad_norm": 0.3777225911617279,
"learning_rate": 1.9718738660684627e-06,
"loss": 0.4953,
"step": 1330
},
{
"epoch": 0.7369878183831672,
"grad_norm": 0.35038667917251587,
"learning_rate": 1.9641874609064443e-06,
"loss": 0.4764,
"step": 1331
},
{
"epoch": 0.7375415282392026,
"grad_norm": 0.3255554437637329,
"learning_rate": 1.956512402398899e-06,
"loss": 0.4557,
"step": 1332
},
{
"epoch": 0.7380952380952381,
"grad_norm": 0.32248249650001526,
"learning_rate": 1.948848719232128e-06,
"loss": 0.4455,
"step": 1333
},
{
"epoch": 0.7386489479512736,
"grad_norm": 0.34824302792549133,
"learning_rate": 1.941196440049916e-06,
"loss": 0.4608,
"step": 1334
},
{
"epoch": 0.739202657807309,
"grad_norm": 0.36187997460365295,
"learning_rate": 1.9335555934534283e-06,
"loss": 0.4614,
"step": 1335
},
{
"epoch": 0.7397563676633444,
"grad_norm": 0.32338353991508484,
"learning_rate": 1.9259262080010938e-06,
"loss": 0.4761,
"step": 1336
},
{
"epoch": 0.7403100775193798,
"grad_norm": 0.32866108417510986,
"learning_rate": 1.918308312208511e-06,
"loss": 0.461,
"step": 1337
},
{
"epoch": 0.7408637873754153,
"grad_norm": 0.3138168454170227,
"learning_rate": 1.910701934548329e-06,
"loss": 0.4486,
"step": 1338
},
{
"epoch": 0.7414174972314507,
"grad_norm": 0.34086328744888306,
"learning_rate": 1.9031071034501475e-06,
"loss": 0.4873,
"step": 1339
},
{
"epoch": 0.7419712070874862,
"grad_norm": 0.34671589732170105,
"learning_rate": 1.895523847300414e-06,
"loss": 0.4529,
"step": 1340
},
{
"epoch": 0.7425249169435216,
"grad_norm": 0.34147655963897705,
"learning_rate": 1.887952194442309e-06,
"loss": 0.4652,
"step": 1341
},
{
"epoch": 0.743078626799557,
"grad_norm": 0.32080572843551636,
"learning_rate": 1.8803921731756447e-06,
"loss": 0.4613,
"step": 1342
},
{
"epoch": 0.7436323366555925,
"grad_norm": 0.3686572015285492,
"learning_rate": 1.8728438117567626e-06,
"loss": 0.48,
"step": 1343
},
{
"epoch": 0.7441860465116279,
"grad_norm": 0.35076168179512024,
"learning_rate": 1.86530713839842e-06,
"loss": 0.4854,
"step": 1344
},
{
"epoch": 0.7447397563676633,
"grad_norm": 0.3071606457233429,
"learning_rate": 1.8577821812696939e-06,
"loss": 0.4453,
"step": 1345
},
{
"epoch": 0.7452934662236987,
"grad_norm": 0.36736711859703064,
"learning_rate": 1.8502689684958664e-06,
"loss": 0.4646,
"step": 1346
},
{
"epoch": 0.7458471760797342,
"grad_norm": 0.3428582549095154,
"learning_rate": 1.8427675281583229e-06,
"loss": 0.4709,
"step": 1347
},
{
"epoch": 0.7464008859357697,
"grad_norm": 0.3570258617401123,
"learning_rate": 1.835277888294455e-06,
"loss": 0.4632,
"step": 1348
},
{
"epoch": 0.7469545957918051,
"grad_norm": 0.3332633078098297,
"learning_rate": 1.827800076897542e-06,
"loss": 0.4798,
"step": 1349
},
{
"epoch": 0.7475083056478405,
"grad_norm": 0.31923386454582214,
"learning_rate": 1.8203341219166537e-06,
"loss": 0.449,
"step": 1350
},
{
"epoch": 0.748062015503876,
"grad_norm": 0.3124280273914337,
"learning_rate": 1.8128800512565514e-06,
"loss": 0.4416,
"step": 1351
},
{
"epoch": 0.7486157253599114,
"grad_norm": 0.38136282563209534,
"learning_rate": 1.8054378927775713e-06,
"loss": 0.471,
"step": 1352
},
{
"epoch": 0.7491694352159468,
"grad_norm": 0.3354772925376892,
"learning_rate": 1.7980076742955282e-06,
"loss": 0.4869,
"step": 1353
},
{
"epoch": 0.7497231450719822,
"grad_norm": 0.35330212116241455,
"learning_rate": 1.7905894235816096e-06,
"loss": 0.4727,
"step": 1354
},
{
"epoch": 0.7502768549280178,
"grad_norm": 0.34706899523735046,
"learning_rate": 1.7831831683622758e-06,
"loss": 0.4707,
"step": 1355
},
{
"epoch": 0.7508305647840532,
"grad_norm": 0.3170265257358551,
"learning_rate": 1.7757889363191484e-06,
"loss": 0.4745,
"step": 1356
},
{
"epoch": 0.7513842746400886,
"grad_norm": 0.3163388669490814,
"learning_rate": 1.768406755088911e-06,
"loss": 0.4739,
"step": 1357
},
{
"epoch": 0.751937984496124,
"grad_norm": 0.39844390749931335,
"learning_rate": 1.7610366522632122e-06,
"loss": 0.4722,
"step": 1358
},
{
"epoch": 0.7524916943521595,
"grad_norm": 0.3498047888278961,
"learning_rate": 1.7536786553885488e-06,
"loss": 0.4423,
"step": 1359
},
{
"epoch": 0.7530454042081949,
"grad_norm": 0.36837103962898254,
"learning_rate": 1.7463327919661732e-06,
"loss": 0.48,
"step": 1360
},
{
"epoch": 0.7535991140642303,
"grad_norm": 0.3232395648956299,
"learning_rate": 1.738999089451991e-06,
"loss": 0.4858,
"step": 1361
},
{
"epoch": 0.7541528239202658,
"grad_norm": 0.37191465497016907,
"learning_rate": 1.7316775752564512e-06,
"loss": 0.4691,
"step": 1362
},
{
"epoch": 0.7547065337763013,
"grad_norm": 0.36526915431022644,
"learning_rate": 1.7243682767444463e-06,
"loss": 0.4562,
"step": 1363
},
{
"epoch": 0.7552602436323367,
"grad_norm": 0.3136215806007385,
"learning_rate": 1.7170712212352187e-06,
"loss": 0.4571,
"step": 1364
},
{
"epoch": 0.7558139534883721,
"grad_norm": 0.32871949672698975,
"learning_rate": 1.7097864360022426e-06,
"loss": 0.446,
"step": 1365
},
{
"epoch": 0.7563676633444075,
"grad_norm": 0.37790757417678833,
"learning_rate": 1.7025139482731385e-06,
"loss": 0.4768,
"step": 1366
},
{
"epoch": 0.756921373200443,
"grad_norm": 0.3020164370536804,
"learning_rate": 1.695253785229558e-06,
"loss": 0.45,
"step": 1367
},
{
"epoch": 0.7574750830564784,
"grad_norm": 0.3682115375995636,
"learning_rate": 1.6880059740070897e-06,
"loss": 0.4685,
"step": 1368
},
{
"epoch": 0.7580287929125138,
"grad_norm": 0.36990371346473694,
"learning_rate": 1.6807705416951587e-06,
"loss": 0.46,
"step": 1369
},
{
"epoch": 0.7585825027685493,
"grad_norm": 0.3583049774169922,
"learning_rate": 1.673547515336919e-06,
"loss": 0.4788,
"step": 1370
},
{
"epoch": 0.7591362126245847,
"grad_norm": 0.3231860399246216,
"learning_rate": 1.6663369219291558e-06,
"loss": 0.4708,
"step": 1371
},
{
"epoch": 0.7596899224806202,
"grad_norm": 0.32504814863204956,
"learning_rate": 1.6591387884221905e-06,
"loss": 0.4601,
"step": 1372
},
{
"epoch": 0.7602436323366556,
"grad_norm": 0.3197862207889557,
"learning_rate": 1.651953141719767e-06,
"loss": 0.4721,
"step": 1373
},
{
"epoch": 0.760797342192691,
"grad_norm": 0.3388617932796478,
"learning_rate": 1.6447800086789651e-06,
"loss": 0.4727,
"step": 1374
},
{
"epoch": 0.7613510520487264,
"grad_norm": 0.357910692691803,
"learning_rate": 1.637619416110089e-06,
"loss": 0.4763,
"step": 1375
},
{
"epoch": 0.7619047619047619,
"grad_norm": 0.32476869225502014,
"learning_rate": 1.6304713907765713e-06,
"loss": 0.4831,
"step": 1376
},
{
"epoch": 0.7624584717607974,
"grad_norm": 0.33310264348983765,
"learning_rate": 1.6233359593948777e-06,
"loss": 0.4373,
"step": 1377
},
{
"epoch": 0.7630121816168328,
"grad_norm": 0.32121679186820984,
"learning_rate": 1.6162131486344e-06,
"loss": 0.4924,
"step": 1378
},
{
"epoch": 0.7635658914728682,
"grad_norm": 0.38050976395606995,
"learning_rate": 1.6091029851173567e-06,
"loss": 0.4614,
"step": 1379
},
{
"epoch": 0.7641196013289037,
"grad_norm": 0.3336823880672455,
"learning_rate": 1.602005495418702e-06,
"loss": 0.4379,
"step": 1380
},
{
"epoch": 0.7646733111849391,
"grad_norm": 0.3727882206439972,
"learning_rate": 1.5949207060660138e-06,
"loss": 0.4645,
"step": 1381
},
{
"epoch": 0.7652270210409745,
"grad_norm": 0.3008807897567749,
"learning_rate": 1.587848643539407e-06,
"loss": 0.442,
"step": 1382
},
{
"epoch": 0.7657807308970099,
"grad_norm": 0.31580933928489685,
"learning_rate": 1.5807893342714247e-06,
"loss": 0.4631,
"step": 1383
},
{
"epoch": 0.7663344407530454,
"grad_norm": 0.32674816250801086,
"learning_rate": 1.5737428046469455e-06,
"loss": 0.4694,
"step": 1384
},
{
"epoch": 0.7668881506090809,
"grad_norm": 0.34736204147338867,
"learning_rate": 1.56670908100308e-06,
"loss": 0.4578,
"step": 1385
},
{
"epoch": 0.7674418604651163,
"grad_norm": 0.3090921938419342,
"learning_rate": 1.55968818962908e-06,
"loss": 0.4608,
"step": 1386
},
{
"epoch": 0.7679955703211517,
"grad_norm": 0.3129696249961853,
"learning_rate": 1.5526801567662315e-06,
"loss": 0.4572,
"step": 1387
},
{
"epoch": 0.7685492801771872,
"grad_norm": 0.3233101963996887,
"learning_rate": 1.5456850086077613e-06,
"loss": 0.4508,
"step": 1388
},
{
"epoch": 0.7691029900332226,
"grad_norm": 0.33598968386650085,
"learning_rate": 1.5387027712987368e-06,
"loss": 0.4512,
"step": 1389
},
{
"epoch": 0.769656699889258,
"grad_norm": 0.32115107774734497,
"learning_rate": 1.531733470935976e-06,
"loss": 0.4532,
"step": 1390
},
{
"epoch": 0.7702104097452934,
"grad_norm": 0.34383609890937805,
"learning_rate": 1.5247771335679372e-06,
"loss": 0.4579,
"step": 1391
},
{
"epoch": 0.770764119601329,
"grad_norm": 0.32946261763572693,
"learning_rate": 1.517833785194629e-06,
"loss": 0.4668,
"step": 1392
},
{
"epoch": 0.7713178294573644,
"grad_norm": 0.31901848316192627,
"learning_rate": 1.5109034517675164e-06,
"loss": 0.4625,
"step": 1393
},
{
"epoch": 0.7718715393133998,
"grad_norm": 0.33310940861701965,
"learning_rate": 1.5039861591894146e-06,
"loss": 0.4823,
"step": 1394
},
{
"epoch": 0.7724252491694352,
"grad_norm": 0.32345050573349,
"learning_rate": 1.4970819333144026e-06,
"loss": 0.4592,
"step": 1395
},
{
"epoch": 0.7729789590254706,
"grad_norm": 0.3341536819934845,
"learning_rate": 1.4901907999477167e-06,
"loss": 0.4517,
"step": 1396
},
{
"epoch": 0.7735326688815061,
"grad_norm": 0.3557744026184082,
"learning_rate": 1.4833127848456597e-06,
"loss": 0.4695,
"step": 1397
},
{
"epoch": 0.7740863787375415,
"grad_norm": 0.32580217719078064,
"learning_rate": 1.4764479137155063e-06,
"loss": 0.4751,
"step": 1398
},
{
"epoch": 0.7746400885935769,
"grad_norm": 0.31983494758605957,
"learning_rate": 1.4695962122154023e-06,
"loss": 0.4472,
"step": 1399
},
{
"epoch": 0.7751937984496124,
"grad_norm": 0.2909253239631653,
"learning_rate": 1.4627577059542675e-06,
"loss": 0.4353,
"step": 1400
},
{
"epoch": 0.7757475083056479,
"grad_norm": 0.3315528929233551,
"learning_rate": 1.4559324204917102e-06,
"loss": 0.463,
"step": 1401
},
{
"epoch": 0.7763012181616833,
"grad_norm": 0.31947755813598633,
"learning_rate": 1.4491203813379174e-06,
"loss": 0.4607,
"step": 1402
},
{
"epoch": 0.7768549280177187,
"grad_norm": 0.3263440430164337,
"learning_rate": 1.4423216139535735e-06,
"loss": 0.4631,
"step": 1403
},
{
"epoch": 0.7774086378737541,
"grad_norm": 0.29621192812919617,
"learning_rate": 1.4355361437497533e-06,
"loss": 0.4324,
"step": 1404
},
{
"epoch": 0.7779623477297896,
"grad_norm": 0.3122243583202362,
"learning_rate": 1.4287639960878318e-06,
"loss": 0.4558,
"step": 1405
},
{
"epoch": 0.778516057585825,
"grad_norm": 0.30342355370521545,
"learning_rate": 1.4220051962793952e-06,
"loss": 0.4657,
"step": 1406
},
{
"epoch": 0.7790697674418605,
"grad_norm": 0.36989858746528625,
"learning_rate": 1.4152597695861331e-06,
"loss": 0.4808,
"step": 1407
},
{
"epoch": 0.7796234772978959,
"grad_norm": 0.34097298979759216,
"learning_rate": 1.408527741219759e-06,
"loss": 0.4524,
"step": 1408
},
{
"epoch": 0.7801771871539314,
"grad_norm": 0.30833345651626587,
"learning_rate": 1.4018091363419046e-06,
"loss": 0.4455,
"step": 1409
},
{
"epoch": 0.7807308970099668,
"grad_norm": 0.32465872168540955,
"learning_rate": 1.3951039800640292e-06,
"loss": 0.4355,
"step": 1410
},
{
"epoch": 0.7812846068660022,
"grad_norm": 0.3296130299568176,
"learning_rate": 1.3884122974473307e-06,
"loss": 0.4696,
"step": 1411
},
{
"epoch": 0.7818383167220376,
"grad_norm": 0.3251802623271942,
"learning_rate": 1.381734113502644e-06,
"loss": 0.4431,
"step": 1412
},
{
"epoch": 0.782392026578073,
"grad_norm": 0.33919668197631836,
"learning_rate": 1.3750694531903518e-06,
"loss": 0.4759,
"step": 1413
},
{
"epoch": 0.7829457364341085,
"grad_norm": 0.3112054467201233,
"learning_rate": 1.3684183414202946e-06,
"loss": 0.4521,
"step": 1414
},
{
"epoch": 0.783499446290144,
"grad_norm": 0.32274097204208374,
"learning_rate": 1.3617808030516694e-06,
"loss": 0.4641,
"step": 1415
},
{
"epoch": 0.7840531561461794,
"grad_norm": 0.29862481355667114,
"learning_rate": 1.3551568628929434e-06,
"loss": 0.491,
"step": 1416
},
{
"epoch": 0.7846068660022149,
"grad_norm": 0.3179886043071747,
"learning_rate": 1.3485465457017567e-06,
"loss": 0.4605,
"step": 1417
},
{
"epoch": 0.7851605758582503,
"grad_norm": 0.3223581612110138,
"learning_rate": 1.341949876184837e-06,
"loss": 0.4829,
"step": 1418
},
{
"epoch": 0.7857142857142857,
"grad_norm": 0.34942424297332764,
"learning_rate": 1.3353668789978991e-06,
"loss": 0.4524,
"step": 1419
},
{
"epoch": 0.7862679955703211,
"grad_norm": 0.3048610985279083,
"learning_rate": 1.3287975787455554e-06,
"loss": 0.4788,
"step": 1420
},
{
"epoch": 0.7868217054263565,
"grad_norm": 0.32578060030937195,
"learning_rate": 1.3222419999812248e-06,
"loss": 0.492,
"step": 1421
},
{
"epoch": 0.7873754152823921,
"grad_norm": 0.29815563559532166,
"learning_rate": 1.3157001672070445e-06,
"loss": 0.4337,
"step": 1422
},
{
"epoch": 0.7879291251384275,
"grad_norm": 0.3292684853076935,
"learning_rate": 1.3091721048737699e-06,
"loss": 0.4438,
"step": 1423
},
{
"epoch": 0.7884828349944629,
"grad_norm": 0.3099350333213806,
"learning_rate": 1.3026578373806925e-06,
"loss": 0.4711,
"step": 1424
},
{
"epoch": 0.7890365448504983,
"grad_norm": 0.3220853805541992,
"learning_rate": 1.2961573890755398e-06,
"loss": 0.4717,
"step": 1425
},
{
"epoch": 0.7895902547065338,
"grad_norm": 0.31580111384391785,
"learning_rate": 1.2896707842543898e-06,
"loss": 0.4682,
"step": 1426
},
{
"epoch": 0.7901439645625692,
"grad_norm": 0.2921631932258606,
"learning_rate": 1.2831980471615824e-06,
"loss": 0.4445,
"step": 1427
},
{
"epoch": 0.7906976744186046,
"grad_norm": 0.3160112202167511,
"learning_rate": 1.2767392019896218e-06,
"loss": 0.4798,
"step": 1428
},
{
"epoch": 0.79125138427464,
"grad_norm": 0.3386807143688202,
"learning_rate": 1.2702942728790897e-06,
"loss": 0.4538,
"step": 1429
},
{
"epoch": 0.7918050941306756,
"grad_norm": 0.3259252905845642,
"learning_rate": 1.263863283918559e-06,
"loss": 0.4732,
"step": 1430
},
{
"epoch": 0.792358803986711,
"grad_norm": 0.34992191195487976,
"learning_rate": 1.257446259144494e-06,
"loss": 0.4719,
"step": 1431
},
{
"epoch": 0.7929125138427464,
"grad_norm": 0.3109996020793915,
"learning_rate": 1.2510432225411738e-06,
"loss": 0.4588,
"step": 1432
},
{
"epoch": 0.7934662236987818,
"grad_norm": 0.33270999789237976,
"learning_rate": 1.244654198040589e-06,
"loss": 0.4662,
"step": 1433
},
{
"epoch": 0.7940199335548173,
"grad_norm": 0.31173089146614075,
"learning_rate": 1.238279209522359e-06,
"loss": 0.4564,
"step": 1434
},
{
"epoch": 0.7945736434108527,
"grad_norm": 0.322552889585495,
"learning_rate": 1.2319182808136476e-06,
"loss": 0.4779,
"step": 1435
},
{
"epoch": 0.7951273532668881,
"grad_norm": 0.3441169261932373,
"learning_rate": 1.225571435689062e-06,
"loss": 0.4623,
"step": 1436
},
{
"epoch": 0.7956810631229236,
"grad_norm": 0.3150555491447449,
"learning_rate": 1.2192386978705766e-06,
"loss": 0.4659,
"step": 1437
},
{
"epoch": 0.7962347729789591,
"grad_norm": 0.33501121401786804,
"learning_rate": 1.2129200910274341e-06,
"loss": 0.4299,
"step": 1438
},
{
"epoch": 0.7967884828349945,
"grad_norm": 0.32314276695251465,
"learning_rate": 1.206615638776061e-06,
"loss": 0.4588,
"step": 1439
},
{
"epoch": 0.7973421926910299,
"grad_norm": 0.3319147825241089,
"learning_rate": 1.2003253646799846e-06,
"loss": 0.4456,
"step": 1440
},
{
"epoch": 0.7978959025470653,
"grad_norm": 0.3265640139579773,
"learning_rate": 1.1940492922497337e-06,
"loss": 0.4751,
"step": 1441
},
{
"epoch": 0.7984496124031008,
"grad_norm": 0.3368552029132843,
"learning_rate": 1.18778744494276e-06,
"loss": 0.4484,
"step": 1442
},
{
"epoch": 0.7990033222591362,
"grad_norm": 0.3024345636367798,
"learning_rate": 1.1815398461633498e-06,
"loss": 0.461,
"step": 1443
},
{
"epoch": 0.7995570321151716,
"grad_norm": 0.3282533288002014,
"learning_rate": 1.175306519262529e-06,
"loss": 0.4899,
"step": 1444
},
{
"epoch": 0.8001107419712071,
"grad_norm": 0.331606924533844,
"learning_rate": 1.1690874875379822e-06,
"loss": 0.4851,
"step": 1445
},
{
"epoch": 0.8006644518272426,
"grad_norm": 0.31335535645484924,
"learning_rate": 1.1628827742339688e-06,
"loss": 0.4681,
"step": 1446
},
{
"epoch": 0.801218161683278,
"grad_norm": 0.3058898150920868,
"learning_rate": 1.1566924025412268e-06,
"loss": 0.4361,
"step": 1447
},
{
"epoch": 0.8017718715393134,
"grad_norm": 0.3312506377696991,
"learning_rate": 1.1505163955968928e-06,
"loss": 0.4701,
"step": 1448
},
{
"epoch": 0.8023255813953488,
"grad_norm": 0.3389173448085785,
"learning_rate": 1.1443547764844114e-06,
"loss": 0.4992,
"step": 1449
},
{
"epoch": 0.8028792912513842,
"grad_norm": 0.3187425136566162,
"learning_rate": 1.1382075682334566e-06,
"loss": 0.4621,
"step": 1450
},
{
"epoch": 0.8034330011074197,
"grad_norm": 0.31678736209869385,
"learning_rate": 1.1320747938198356e-06,
"loss": 0.4828,
"step": 1451
},
{
"epoch": 0.8039867109634552,
"grad_norm": 0.3524612784385681,
"learning_rate": 1.1259564761654073e-06,
"loss": 0.4691,
"step": 1452
},
{
"epoch": 0.8045404208194906,
"grad_norm": 0.29647573828697205,
"learning_rate": 1.119852638138002e-06,
"loss": 0.4725,
"step": 1453
},
{
"epoch": 0.805094130675526,
"grad_norm": 0.30620694160461426,
"learning_rate": 1.1137633025513267e-06,
"loss": 0.4476,
"step": 1454
},
{
"epoch": 0.8056478405315615,
"grad_norm": 0.33965837955474854,
"learning_rate": 1.1076884921648834e-06,
"loss": 0.4479,
"step": 1455
},
{
"epoch": 0.8062015503875969,
"grad_norm": 0.31289494037628174,
"learning_rate": 1.1016282296838887e-06,
"loss": 0.4877,
"step": 1456
},
{
"epoch": 0.8067552602436323,
"grad_norm": 0.35544541478157043,
"learning_rate": 1.0955825377591823e-06,
"loss": 0.4788,
"step": 1457
},
{
"epoch": 0.8073089700996677,
"grad_norm": 0.3119059205055237,
"learning_rate": 1.0895514389871436e-06,
"loss": 0.4651,
"step": 1458
},
{
"epoch": 0.8078626799557033,
"grad_norm": 0.3125481903553009,
"learning_rate": 1.0835349559096125e-06,
"loss": 0.4609,
"step": 1459
},
{
"epoch": 0.8084163898117387,
"grad_norm": 0.3452129662036896,
"learning_rate": 1.0775331110137977e-06,
"loss": 0.4821,
"step": 1460
},
{
"epoch": 0.8089700996677741,
"grad_norm": 0.31825172901153564,
"learning_rate": 1.0715459267321998e-06,
"loss": 0.4664,
"step": 1461
},
{
"epoch": 0.8095238095238095,
"grad_norm": 0.3356800079345703,
"learning_rate": 1.06557342544252e-06,
"loss": 0.4714,
"step": 1462
},
{
"epoch": 0.810077519379845,
"grad_norm": 0.32622677087783813,
"learning_rate": 1.0596156294675813e-06,
"loss": 0.4664,
"step": 1463
},
{
"epoch": 0.8106312292358804,
"grad_norm": 0.28277918696403503,
"learning_rate": 1.0536725610752475e-06,
"loss": 0.4277,
"step": 1464
},
{
"epoch": 0.8111849390919158,
"grad_norm": 0.34184351563453674,
"learning_rate": 1.0477442424783306e-06,
"loss": 0.4587,
"step": 1465
},
{
"epoch": 0.8117386489479512,
"grad_norm": 0.33527636528015137,
"learning_rate": 1.0418306958345214e-06,
"loss": 0.4489,
"step": 1466
},
{
"epoch": 0.8122923588039868,
"grad_norm": 0.32856327295303345,
"learning_rate": 1.0359319432462922e-06,
"loss": 0.4883,
"step": 1467
},
{
"epoch": 0.8128460686600222,
"grad_norm": 0.32732442021369934,
"learning_rate": 1.0300480067608232e-06,
"loss": 0.4688,
"step": 1468
},
{
"epoch": 0.8133997785160576,
"grad_norm": 0.3294256925582886,
"learning_rate": 1.02417890836992e-06,
"loss": 0.4834,
"step": 1469
},
{
"epoch": 0.813953488372093,
"grad_norm": 0.2979672849178314,
"learning_rate": 1.018324670009927e-06,
"loss": 0.454,
"step": 1470
},
{
"epoch": 0.8145071982281284,
"grad_norm": 0.3292641341686249,
"learning_rate": 1.0124853135616475e-06,
"loss": 0.4384,
"step": 1471
},
{
"epoch": 0.8150609080841639,
"grad_norm": 0.33436697721481323,
"learning_rate": 1.0066608608502647e-06,
"loss": 0.4745,
"step": 1472
},
{
"epoch": 0.8156146179401993,
"grad_norm": 0.30958297848701477,
"learning_rate": 1.000851333645254e-06,
"loss": 0.4507,
"step": 1473
},
{
"epoch": 0.8161683277962348,
"grad_norm": 0.3246972858905792,
"learning_rate": 9.9505675366031e-07,
"loss": 0.4799,
"step": 1474
},
{
"epoch": 0.8167220376522702,
"grad_norm": 0.3213115334510803,
"learning_rate": 9.89277142553256e-07,
"loss": 0.4831,
"step": 1475
},
{
"epoch": 0.8172757475083057,
"grad_norm": 0.29282888770103455,
"learning_rate": 9.835125219259694e-07,
"loss": 0.4532,
"step": 1476
},
{
"epoch": 0.8178294573643411,
"grad_norm": 0.327378511428833,
"learning_rate": 9.777629133242982e-07,
"loss": 0.4562,
"step": 1477
},
{
"epoch": 0.8183831672203765,
"grad_norm": 0.3104061484336853,
"learning_rate": 9.720283382379852e-07,
"loss": 0.4666,
"step": 1478
},
{
"epoch": 0.8189368770764119,
"grad_norm": 0.3109104037284851,
"learning_rate": 9.663088181005792e-07,
"loss": 0.4735,
"step": 1479
},
{
"epoch": 0.8194905869324474,
"grad_norm": 0.34149253368377686,
"learning_rate": 9.606043742893616e-07,
"loss": 0.4735,
"step": 1480
},
{
"epoch": 0.8200442967884828,
"grad_norm": 0.29113996028900146,
"learning_rate": 9.549150281252633e-07,
"loss": 0.4864,
"step": 1481
},
{
"epoch": 0.8205980066445183,
"grad_norm": 0.3134409785270691,
"learning_rate": 9.492408008727899e-07,
"loss": 0.4717,
"step": 1482
},
{
"epoch": 0.8211517165005537,
"grad_norm": 0.35266929864883423,
"learning_rate": 9.435817137399351e-07,
"loss": 0.4873,
"step": 1483
},
{
"epoch": 0.8217054263565892,
"grad_norm": 0.341102659702301,
"learning_rate": 9.379377878781044e-07,
"loss": 0.4468,
"step": 1484
},
{
"epoch": 0.8222591362126246,
"grad_norm": 0.3171820640563965,
"learning_rate": 9.323090443820404e-07,
"loss": 0.4445,
"step": 1485
},
{
"epoch": 0.82281284606866,
"grad_norm": 0.3287108540534973,
"learning_rate": 9.266955042897357e-07,
"loss": 0.4614,
"step": 1486
},
{
"epoch": 0.8233665559246954,
"grad_norm": 0.2895143926143646,
"learning_rate": 9.210971885823605e-07,
"loss": 0.4456,
"step": 1487
},
{
"epoch": 0.8239202657807309,
"grad_norm": 0.27934545278549194,
"learning_rate": 9.155141181841843e-07,
"loss": 0.4541,
"step": 1488
},
{
"epoch": 0.8244739756367664,
"grad_norm": 0.322287917137146,
"learning_rate": 9.099463139624914e-07,
"loss": 0.4707,
"step": 1489
},
{
"epoch": 0.8250276854928018,
"grad_norm": 0.31403809785842896,
"learning_rate": 9.043937967275119e-07,
"loss": 0.4629,
"step": 1490
},
{
"epoch": 0.8255813953488372,
"grad_norm": 0.34129196405410767,
"learning_rate": 8.988565872323362e-07,
"loss": 0.4812,
"step": 1491
},
{
"epoch": 0.8261351052048727,
"grad_norm": 0.3059549927711487,
"learning_rate": 8.933347061728398e-07,
"loss": 0.4743,
"step": 1492
},
{
"epoch": 0.8266888150609081,
"grad_norm": 0.3288021981716156,
"learning_rate": 8.878281741876105e-07,
"loss": 0.4661,
"step": 1493
},
{
"epoch": 0.8272425249169435,
"grad_norm": 0.31026124954223633,
"learning_rate": 8.823370118578628e-07,
"loss": 0.4679,
"step": 1494
},
{
"epoch": 0.8277962347729789,
"grad_norm": 0.3222355544567108,
"learning_rate": 8.7686123970737e-07,
"loss": 0.4873,
"step": 1495
},
{
"epoch": 0.8283499446290143,
"grad_norm": 0.29187437891960144,
"learning_rate": 8.714008782023797e-07,
"loss": 0.4762,
"step": 1496
},
{
"epoch": 0.8289036544850499,
"grad_norm": 0.30391842126846313,
"learning_rate": 8.659559477515406e-07,
"loss": 0.4432,
"step": 1497
},
{
"epoch": 0.8294573643410853,
"grad_norm": 0.33190590143203735,
"learning_rate": 8.605264687058302e-07,
"loss": 0.4779,
"step": 1498
},
{
"epoch": 0.8300110741971207,
"grad_norm": 0.29238536953926086,
"learning_rate": 8.551124613584705e-07,
"loss": 0.4592,
"step": 1499
},
{
"epoch": 0.8305647840531561,
"grad_norm": 0.2889476418495178,
"learning_rate": 8.497139459448573e-07,
"loss": 0.4639,
"step": 1500
},
{
"epoch": 0.8311184939091916,
"grad_norm": 0.28619977831840515,
"learning_rate": 8.443309426424862e-07,
"loss": 0.4684,
"step": 1501
},
{
"epoch": 0.831672203765227,
"grad_norm": 0.324690580368042,
"learning_rate": 8.389634715708711e-07,
"loss": 0.492,
"step": 1502
},
{
"epoch": 0.8322259136212624,
"grad_norm": 0.30507704615592957,
"learning_rate": 8.336115527914774e-07,
"loss": 0.4645,
"step": 1503
},
{
"epoch": 0.832779623477298,
"grad_norm": 0.307434618473053,
"learning_rate": 8.282752063076371e-07,
"loss": 0.4735,
"step": 1504
},
{
"epoch": 0.8333333333333334,
"grad_norm": 0.3095044791698456,
"learning_rate": 8.229544520644817e-07,
"loss": 0.4732,
"step": 1505
},
{
"epoch": 0.8338870431893688,
"grad_norm": 0.3193853199481964,
"learning_rate": 8.176493099488664e-07,
"loss": 0.4541,
"step": 1506
},
{
"epoch": 0.8344407530454042,
"grad_norm": 0.3113101124763489,
"learning_rate": 8.123597997892918e-07,
"loss": 0.4514,
"step": 1507
},
{
"epoch": 0.8349944629014396,
"grad_norm": 0.3148142695426941,
"learning_rate": 8.070859413558329e-07,
"loss": 0.4545,
"step": 1508
},
{
"epoch": 0.8355481727574751,
"grad_norm": 0.32536906003952026,
"learning_rate": 8.018277543600683e-07,
"loss": 0.4549,
"step": 1509
},
{
"epoch": 0.8361018826135105,
"grad_norm": 0.31320637464523315,
"learning_rate": 7.965852584549983e-07,
"loss": 0.4666,
"step": 1510
},
{
"epoch": 0.8366555924695459,
"grad_norm": 0.3198712170124054,
"learning_rate": 7.913584732349788e-07,
"loss": 0.4612,
"step": 1511
},
{
"epoch": 0.8372093023255814,
"grad_norm": 0.32573768496513367,
"learning_rate": 7.861474182356449e-07,
"loss": 0.4404,
"step": 1512
},
{
"epoch": 0.8377630121816169,
"grad_norm": 0.3041425943374634,
"learning_rate": 7.809521129338371e-07,
"loss": 0.4841,
"step": 1513
},
{
"epoch": 0.8383167220376523,
"grad_norm": 0.30132317543029785,
"learning_rate": 7.757725767475332e-07,
"loss": 0.4708,
"step": 1514
},
{
"epoch": 0.8388704318936877,
"grad_norm": 0.32302555441856384,
"learning_rate": 7.706088290357683e-07,
"loss": 0.4544,
"step": 1515
},
{
"epoch": 0.8394241417497231,
"grad_norm": 0.2973669469356537,
"learning_rate": 7.654608890985709e-07,
"loss": 0.4586,
"step": 1516
},
{
"epoch": 0.8399778516057586,
"grad_norm": 0.30986711382865906,
"learning_rate": 7.603287761768824e-07,
"loss": 0.4888,
"step": 1517
},
{
"epoch": 0.840531561461794,
"grad_norm": 0.31494471430778503,
"learning_rate": 7.552125094524893e-07,
"loss": 0.4699,
"step": 1518
},
{
"epoch": 0.8410852713178295,
"grad_norm": 0.3044757843017578,
"learning_rate": 7.501121080479551e-07,
"loss": 0.446,
"step": 1519
},
{
"epoch": 0.8416389811738649,
"grad_norm": 0.2932833731174469,
"learning_rate": 7.450275910265415e-07,
"loss": 0.473,
"step": 1520
},
{
"epoch": 0.8421926910299004,
"grad_norm": 0.294283002614975,
"learning_rate": 7.399589773921412e-07,
"loss": 0.4609,
"step": 1521
},
{
"epoch": 0.8427464008859358,
"grad_norm": 0.2805401384830475,
"learning_rate": 7.349062860892092e-07,
"loss": 0.4383,
"step": 1522
},
{
"epoch": 0.8433001107419712,
"grad_norm": 0.33512675762176514,
"learning_rate": 7.298695360026842e-07,
"loss": 0.4602,
"step": 1523
},
{
"epoch": 0.8438538205980066,
"grad_norm": 0.29946646094322205,
"learning_rate": 7.248487459579284e-07,
"loss": 0.4289,
"step": 1524
},
{
"epoch": 0.844407530454042,
"grad_norm": 0.30807411670684814,
"learning_rate": 7.198439347206487e-07,
"loss": 0.446,
"step": 1525
},
{
"epoch": 0.8449612403100775,
"grad_norm": 0.31810346245765686,
"learning_rate": 7.148551209968279e-07,
"loss": 0.477,
"step": 1526
},
{
"epoch": 0.845514950166113,
"grad_norm": 0.34772250056266785,
"learning_rate": 7.098823234326618e-07,
"loss": 0.4578,
"step": 1527
},
{
"epoch": 0.8460686600221484,
"grad_norm": 0.29736942052841187,
"learning_rate": 7.049255606144795e-07,
"loss": 0.4601,
"step": 1528
},
{
"epoch": 0.8466223698781838,
"grad_norm": 0.3065546452999115,
"learning_rate": 6.999848510686791e-07,
"loss": 0.4552,
"step": 1529
},
{
"epoch": 0.8471760797342193,
"grad_norm": 0.34162911772727966,
"learning_rate": 6.950602132616618e-07,
"loss": 0.4564,
"step": 1530
},
{
"epoch": 0.8477297895902547,
"grad_norm": 0.3589327931404114,
"learning_rate": 6.901516655997536e-07,
"loss": 0.4817,
"step": 1531
},
{
"epoch": 0.8482834994462901,
"grad_norm": 0.310115784406662,
"learning_rate": 6.852592264291468e-07,
"loss": 0.4635,
"step": 1532
},
{
"epoch": 0.8488372093023255,
"grad_norm": 0.2983943223953247,
"learning_rate": 6.803829140358237e-07,
"loss": 0.4392,
"step": 1533
},
{
"epoch": 0.8493909191583611,
"grad_norm": 0.3104465901851654,
"learning_rate": 6.755227466454912e-07,
"loss": 0.4707,
"step": 1534
},
{
"epoch": 0.8499446290143965,
"grad_norm": 0.32061025500297546,
"learning_rate": 6.706787424235145e-07,
"loss": 0.4636,
"step": 1535
},
{
"epoch": 0.8504983388704319,
"grad_norm": 0.31521013379096985,
"learning_rate": 6.658509194748463e-07,
"loss": 0.4529,
"step": 1536
},
{
"epoch": 0.8510520487264673,
"grad_norm": 0.3287814259529114,
"learning_rate": 6.610392958439582e-07,
"loss": 0.4682,
"step": 1537
},
{
"epoch": 0.8516057585825028,
"grad_norm": 0.3258051872253418,
"learning_rate": 6.562438895147799e-07,
"loss": 0.4673,
"step": 1538
},
{
"epoch": 0.8521594684385382,
"grad_norm": 0.2964293360710144,
"learning_rate": 6.514647184106232e-07,
"loss": 0.466,
"step": 1539
},
{
"epoch": 0.8527131782945736,
"grad_norm": 0.28835752606391907,
"learning_rate": 6.467018003941189e-07,
"loss": 0.4636,
"step": 1540
},
{
"epoch": 0.853266888150609,
"grad_norm": 0.3069021701812744,
"learning_rate": 6.419551532671542e-07,
"loss": 0.4921,
"step": 1541
},
{
"epoch": 0.8538205980066446,
"grad_norm": 0.303408682346344,
"learning_rate": 6.372247947707988e-07,
"loss": 0.4482,
"step": 1542
},
{
"epoch": 0.85437430786268,
"grad_norm": 0.3295760452747345,
"learning_rate": 6.325107425852433e-07,
"loss": 0.4899,
"step": 1543
},
{
"epoch": 0.8549280177187154,
"grad_norm": 0.30452761054039,
"learning_rate": 6.278130143297295e-07,
"loss": 0.455,
"step": 1544
},
{
"epoch": 0.8554817275747508,
"grad_norm": 0.33882924914360046,
"learning_rate": 6.231316275624921e-07,
"loss": 0.4454,
"step": 1545
},
{
"epoch": 0.8560354374307863,
"grad_norm": 0.3470396101474762,
"learning_rate": 6.184665997806832e-07,
"loss": 0.4663,
"step": 1546
},
{
"epoch": 0.8565891472868217,
"grad_norm": 0.33550286293029785,
"learning_rate": 6.138179484203117e-07,
"loss": 0.4469,
"step": 1547
},
{
"epoch": 0.8571428571428571,
"grad_norm": 0.29510611295700073,
"learning_rate": 6.091856908561811e-07,
"loss": 0.4452,
"step": 1548
},
{
"epoch": 0.8576965669988926,
"grad_norm": 0.30699166655540466,
"learning_rate": 6.045698444018194e-07,
"loss": 0.4325,
"step": 1549
},
{
"epoch": 0.858250276854928,
"grad_norm": 0.3179960548877716,
"learning_rate": 5.999704263094147e-07,
"loss": 0.4515,
"step": 1550
},
{
"epoch": 0.8588039867109635,
"grad_norm": 0.37318259477615356,
"learning_rate": 5.953874537697573e-07,
"loss": 0.4755,
"step": 1551
},
{
"epoch": 0.8593576965669989,
"grad_norm": 0.29407113790512085,
"learning_rate": 5.908209439121648e-07,
"loss": 0.4582,
"step": 1552
},
{
"epoch": 0.8599114064230343,
"grad_norm": 0.30949074029922485,
"learning_rate": 5.862709138044298e-07,
"loss": 0.4515,
"step": 1553
},
{
"epoch": 0.8604651162790697,
"grad_norm": 0.3142622709274292,
"learning_rate": 5.817373804527449e-07,
"loss": 0.4545,
"step": 1554
},
{
"epoch": 0.8610188261351052,
"grad_norm": 0.28854456543922424,
"learning_rate": 5.772203608016464e-07,
"loss": 0.4563,
"step": 1555
},
{
"epoch": 0.8615725359911407,
"grad_norm": 0.3129185140132904,
"learning_rate": 5.727198717339511e-07,
"loss": 0.4407,
"step": 1556
},
{
"epoch": 0.8621262458471761,
"grad_norm": 0.29644128680229187,
"learning_rate": 5.68235930070688e-07,
"loss": 0.4401,
"step": 1557
},
{
"epoch": 0.8626799557032115,
"grad_norm": 0.3216518461704254,
"learning_rate": 5.637685525710384e-07,
"loss": 0.4764,
"step": 1558
},
{
"epoch": 0.863233665559247,
"grad_norm": 0.29683998227119446,
"learning_rate": 5.593177559322776e-07,
"loss": 0.4586,
"step": 1559
},
{
"epoch": 0.8637873754152824,
"grad_norm": 0.31808000802993774,
"learning_rate": 5.548835567897031e-07,
"loss": 0.4541,
"step": 1560
},
{
"epoch": 0.8643410852713178,
"grad_norm": 0.2945192754268646,
"learning_rate": 5.504659717165812e-07,
"loss": 0.4668,
"step": 1561
},
{
"epoch": 0.8648947951273532,
"grad_norm": 0.30420994758605957,
"learning_rate": 5.460650172240795e-07,
"loss": 0.4588,
"step": 1562
},
{
"epoch": 0.8654485049833887,
"grad_norm": 0.3046761751174927,
"learning_rate": 5.416807097612071e-07,
"loss": 0.447,
"step": 1563
},
{
"epoch": 0.8660022148394242,
"grad_norm": 0.2961207628250122,
"learning_rate": 5.373130657147552e-07,
"loss": 0.4357,
"step": 1564
},
{
"epoch": 0.8665559246954596,
"grad_norm": 0.3244534730911255,
"learning_rate": 5.329621014092318e-07,
"loss": 0.4773,
"step": 1565
},
{
"epoch": 0.867109634551495,
"grad_norm": 0.28872519731521606,
"learning_rate": 5.286278331068018e-07,
"loss": 0.4718,
"step": 1566
},
{
"epoch": 0.8676633444075305,
"grad_norm": 0.2985605299472809,
"learning_rate": 5.243102770072306e-07,
"loss": 0.4594,
"step": 1567
},
{
"epoch": 0.8682170542635659,
"grad_norm": 0.30403968691825867,
"learning_rate": 5.200094492478169e-07,
"loss": 0.4869,
"step": 1568
},
{
"epoch": 0.8687707641196013,
"grad_norm": 0.2938711941242218,
"learning_rate": 5.157253659033379e-07,
"loss": 0.4498,
"step": 1569
},
{
"epoch": 0.8693244739756367,
"grad_norm": 0.32247763872146606,
"learning_rate": 5.114580429859844e-07,
"loss": 0.4642,
"step": 1570
},
{
"epoch": 0.8698781838316723,
"grad_norm": 0.3126090466976166,
"learning_rate": 5.072074964453055e-07,
"loss": 0.4743,
"step": 1571
},
{
"epoch": 0.8704318936877077,
"grad_norm": 0.29365554451942444,
"learning_rate": 5.029737421681446e-07,
"loss": 0.4649,
"step": 1572
},
{
"epoch": 0.8709856035437431,
"grad_norm": 0.32420745491981506,
"learning_rate": 4.98756795978586e-07,
"loss": 0.4653,
"step": 1573
},
{
"epoch": 0.8715393133997785,
"grad_norm": 0.3036859631538391,
"learning_rate": 4.945566736378887e-07,
"loss": 0.465,
"step": 1574
},
{
"epoch": 0.872093023255814,
"grad_norm": 0.2864364683628082,
"learning_rate": 4.903733908444325e-07,
"loss": 0.4465,
"step": 1575
},
{
"epoch": 0.8726467331118494,
"grad_norm": 0.30921804904937744,
"learning_rate": 4.862069632336558e-07,
"loss": 0.4459,
"step": 1576
},
{
"epoch": 0.8732004429678848,
"grad_norm": 0.3261505365371704,
"learning_rate": 4.820574063780031e-07,
"loss": 0.4849,
"step": 1577
},
{
"epoch": 0.8737541528239202,
"grad_norm": 0.3188358545303345,
"learning_rate": 4.779247357868583e-07,
"loss": 0.475,
"step": 1578
},
{
"epoch": 0.8743078626799557,
"grad_norm": 0.3161148726940155,
"learning_rate": 4.738089669064927e-07,
"loss": 0.4494,
"step": 1579
},
{
"epoch": 0.8748615725359912,
"grad_norm": 0.3190706670284271,
"learning_rate": 4.697101151200079e-07,
"loss": 0.459,
"step": 1580
},
{
"epoch": 0.8754152823920266,
"grad_norm": 0.30358344316482544,
"learning_rate": 4.6562819574727304e-07,
"loss": 0.482,
"step": 1581
},
{
"epoch": 0.875968992248062,
"grad_norm": 0.29602691531181335,
"learning_rate": 4.6156322404487306e-07,
"loss": 0.4779,
"step": 1582
},
{
"epoch": 0.8765227021040974,
"grad_norm": 0.31430983543395996,
"learning_rate": 4.575152152060475e-07,
"loss": 0.4677,
"step": 1583
},
{
"epoch": 0.8770764119601329,
"grad_norm": 0.30270159244537354,
"learning_rate": 4.534841843606358e-07,
"loss": 0.4335,
"step": 1584
},
{
"epoch": 0.8776301218161683,
"grad_norm": 0.3131905794143677,
"learning_rate": 4.494701465750217e-07,
"loss": 0.4775,
"step": 1585
},
{
"epoch": 0.8781838316722038,
"grad_norm": 0.2916598916053772,
"learning_rate": 4.454731168520754e-07,
"loss": 0.4704,
"step": 1586
},
{
"epoch": 0.8787375415282392,
"grad_norm": 0.31324756145477295,
"learning_rate": 4.414931101310954e-07,
"loss": 0.4474,
"step": 1587
},
{
"epoch": 0.8792912513842747,
"grad_norm": 0.2947539687156677,
"learning_rate": 4.375301412877586e-07,
"loss": 0.4763,
"step": 1588
},
{
"epoch": 0.8798449612403101,
"grad_norm": 0.3165675401687622,
"learning_rate": 4.3358422513405776e-07,
"loss": 0.4715,
"step": 1589
},
{
"epoch": 0.8803986710963455,
"grad_norm": 0.2980518043041229,
"learning_rate": 4.296553764182526e-07,
"loss": 0.4567,
"step": 1590
},
{
"epoch": 0.8809523809523809,
"grad_norm": 0.31142324209213257,
"learning_rate": 4.257436098248091e-07,
"loss": 0.4584,
"step": 1591
},
{
"epoch": 0.8815060908084164,
"grad_norm": 0.31078335642814636,
"learning_rate": 4.218489399743481e-07,
"loss": 0.4914,
"step": 1592
},
{
"epoch": 0.8820598006644518,
"grad_norm": 0.30437183380126953,
"learning_rate": 4.179713814235903e-07,
"loss": 0.4673,
"step": 1593
},
{
"epoch": 0.8826135105204873,
"grad_norm": 0.3099023997783661,
"learning_rate": 4.141109486652989e-07,
"loss": 0.4551,
"step": 1594
},
{
"epoch": 0.8831672203765227,
"grad_norm": 0.3001941740512848,
"learning_rate": 4.1026765612823147e-07,
"loss": 0.448,
"step": 1595
},
{
"epoch": 0.8837209302325582,
"grad_norm": 0.29348331689834595,
"learning_rate": 4.064415181770787e-07,
"loss": 0.4466,
"step": 1596
},
{
"epoch": 0.8842746400885936,
"grad_norm": 0.3348081707954407,
"learning_rate": 4.0263254911241555e-07,
"loss": 0.4672,
"step": 1597
},
{
"epoch": 0.884828349944629,
"grad_norm": 0.31024014949798584,
"learning_rate": 3.9884076317064813e-07,
"loss": 0.4681,
"step": 1598
},
{
"epoch": 0.8853820598006644,
"grad_norm": 0.29897528886795044,
"learning_rate": 3.9506617452395647e-07,
"loss": 0.4818,
"step": 1599
},
{
"epoch": 0.8859357696566998,
"grad_norm": 0.27957743406295776,
"learning_rate": 3.913087972802443e-07,
"loss": 0.4321,
"step": 1600
},
{
"epoch": 0.8864894795127354,
"grad_norm": 0.3344254195690155,
"learning_rate": 3.875686454830885e-07,
"loss": 0.4694,
"step": 1601
},
{
"epoch": 0.8870431893687708,
"grad_norm": 0.2915976345539093,
"learning_rate": 3.8384573311168126e-07,
"loss": 0.4728,
"step": 1602
},
{
"epoch": 0.8875968992248062,
"grad_norm": 0.32636356353759766,
"learning_rate": 3.8014007408078144e-07,
"loss": 0.4639,
"step": 1603
},
{
"epoch": 0.8881506090808416,
"grad_norm": 0.33974215388298035,
"learning_rate": 3.764516822406616e-07,
"loss": 0.4676,
"step": 1604
},
{
"epoch": 0.8887043189368771,
"grad_norm": 0.312438040971756,
"learning_rate": 3.727805713770588e-07,
"loss": 0.4828,
"step": 1605
},
{
"epoch": 0.8892580287929125,
"grad_norm": 0.30062320828437805,
"learning_rate": 3.691267552111183e-07,
"loss": 0.4655,
"step": 1606
},
{
"epoch": 0.8898117386489479,
"grad_norm": 0.2907237410545349,
"learning_rate": 3.654902473993438e-07,
"loss": 0.4711,
"step": 1607
},
{
"epoch": 0.8903654485049833,
"grad_norm": 0.701089084148407,
"learning_rate": 3.618710615335497e-07,
"loss": 0.4406,
"step": 1608
},
{
"epoch": 0.8909191583610189,
"grad_norm": 0.29503801465034485,
"learning_rate": 3.5826921114080704e-07,
"loss": 0.426,
"step": 1609
},
{
"epoch": 0.8914728682170543,
"grad_norm": 0.31222304701805115,
"learning_rate": 3.546847096833933e-07,
"loss": 0.4556,
"step": 1610
},
{
"epoch": 0.8920265780730897,
"grad_norm": 0.31220102310180664,
"learning_rate": 3.511175705587433e-07,
"loss": 0.4523,
"step": 1611
},
{
"epoch": 0.8925802879291251,
"grad_norm": 0.3270728588104248,
"learning_rate": 3.4756780709939817e-07,
"loss": 0.4634,
"step": 1612
},
{
"epoch": 0.8931339977851606,
"grad_norm": 0.3142661154270172,
"learning_rate": 3.440354325729545e-07,
"loss": 0.4797,
"step": 1613
},
{
"epoch": 0.893687707641196,
"grad_norm": 0.3256533145904541,
"learning_rate": 3.405204601820189e-07,
"loss": 0.5019,
"step": 1614
},
{
"epoch": 0.8942414174972314,
"grad_norm": 0.3360338509082794,
"learning_rate": 3.370229030641525e-07,
"loss": 0.4744,
"step": 1615
},
{
"epoch": 0.8947951273532669,
"grad_norm": 0.2963426113128662,
"learning_rate": 3.3354277429182626e-07,
"loss": 0.4325,
"step": 1616
},
{
"epoch": 0.8953488372093024,
"grad_norm": 0.2993135154247284,
"learning_rate": 3.3008008687237305e-07,
"loss": 0.45,
"step": 1617
},
{
"epoch": 0.8959025470653378,
"grad_norm": 0.3325082063674927,
"learning_rate": 3.2663485374793304e-07,
"loss": 0.4673,
"step": 1618
},
{
"epoch": 0.8964562569213732,
"grad_norm": 0.3224189281463623,
"learning_rate": 3.23207087795413e-07,
"loss": 0.4478,
"step": 1619
},
{
"epoch": 0.8970099667774086,
"grad_norm": 0.30114316940307617,
"learning_rate": 3.1979680182643134e-07,
"loss": 0.4535,
"step": 1620
},
{
"epoch": 0.897563676633444,
"grad_norm": 0.2878979742527008,
"learning_rate": 3.164040085872755e-07,
"loss": 0.4835,
"step": 1621
},
{
"epoch": 0.8981173864894795,
"grad_norm": 0.30875077843666077,
"learning_rate": 3.130287207588506e-07,
"loss": 0.4636,
"step": 1622
},
{
"epoch": 0.8986710963455149,
"grad_norm": 0.29143431782722473,
"learning_rate": 3.0967095095663424e-07,
"loss": 0.4674,
"step": 1623
},
{
"epoch": 0.8992248062015504,
"grad_norm": 0.278022825717926,
"learning_rate": 3.0633071173062966e-07,
"loss": 0.4511,
"step": 1624
},
{
"epoch": 0.8997785160575859,
"grad_norm": 0.3196825385093689,
"learning_rate": 3.0300801556531536e-07,
"loss": 0.4487,
"step": 1625
},
{
"epoch": 0.9003322259136213,
"grad_norm": 0.2895258069038391,
"learning_rate": 2.997028748796016e-07,
"loss": 0.4693,
"step": 1626
},
{
"epoch": 0.9008859357696567,
"grad_norm": 0.2995036542415619,
"learning_rate": 2.9641530202678626e-07,
"loss": 0.4484,
"step": 1627
},
{
"epoch": 0.9014396456256921,
"grad_norm": 0.29738712310791016,
"learning_rate": 2.9314530929450137e-07,
"loss": 0.443,
"step": 1628
},
{
"epoch": 0.9019933554817275,
"grad_norm": 0.3146614134311676,
"learning_rate": 2.8989290890467314e-07,
"loss": 0.4699,
"step": 1629
},
{
"epoch": 0.902547065337763,
"grad_norm": 0.2757892906665802,
"learning_rate": 2.86658113013476e-07,
"loss": 0.4328,
"step": 1630
},
{
"epoch": 0.9031007751937985,
"grad_norm": 0.30202972888946533,
"learning_rate": 2.834409337112842e-07,
"loss": 0.4416,
"step": 1631
},
{
"epoch": 0.9036544850498339,
"grad_norm": 0.31334805488586426,
"learning_rate": 2.8024138302262913e-07,
"loss": 0.4592,
"step": 1632
},
{
"epoch": 0.9042081949058693,
"grad_norm": 0.3020130693912506,
"learning_rate": 2.770594729061532e-07,
"loss": 0.4496,
"step": 1633
},
{
"epoch": 0.9047619047619048,
"grad_norm": 0.31226208806037903,
"learning_rate": 2.738952152545643e-07,
"loss": 0.4484,
"step": 1634
},
{
"epoch": 0.9053156146179402,
"grad_norm": 0.3205219805240631,
"learning_rate": 2.7074862189459426e-07,
"loss": 0.4908,
"step": 1635
},
{
"epoch": 0.9058693244739756,
"grad_norm": 0.2950468063354492,
"learning_rate": 2.676197045869511e-07,
"loss": 0.4308,
"step": 1636
},
{
"epoch": 0.906423034330011,
"grad_norm": 0.2958923876285553,
"learning_rate": 2.6450847502627883e-07,
"loss": 0.4338,
"step": 1637
},
{
"epoch": 0.9069767441860465,
"grad_norm": 0.29932379722595215,
"learning_rate": 2.6141494484111017e-07,
"loss": 0.4521,
"step": 1638
},
{
"epoch": 0.907530454042082,
"grad_norm": 0.3040353059768677,
"learning_rate": 2.5833912559382444e-07,
"loss": 0.4603,
"step": 1639
},
{
"epoch": 0.9080841638981174,
"grad_norm": 0.28034743666648865,
"learning_rate": 2.5528102878060626e-07,
"loss": 0.4446,
"step": 1640
},
{
"epoch": 0.9086378737541528,
"grad_norm": 0.35027840733528137,
"learning_rate": 2.522406658313997e-07,
"loss": 0.4989,
"step": 1641
},
{
"epoch": 0.9091915836101883,
"grad_norm": 0.3450046181678772,
"learning_rate": 2.492180481098655e-07,
"loss": 0.4531,
"step": 1642
},
{
"epoch": 0.9097452934662237,
"grad_norm": 0.3023633062839508,
"learning_rate": 2.462131869133427e-07,
"loss": 0.4493,
"step": 1643
},
{
"epoch": 0.9102990033222591,
"grad_norm": 0.2984960079193115,
"learning_rate": 2.4322609347280204e-07,
"loss": 0.439,
"step": 1644
},
{
"epoch": 0.9108527131782945,
"grad_norm": 0.2857573926448822,
"learning_rate": 2.4025677895280377e-07,
"loss": 0.4474,
"step": 1645
},
{
"epoch": 0.9114064230343301,
"grad_norm": 0.31294727325439453,
"learning_rate": 2.3730525445146146e-07,
"loss": 0.4427,
"step": 1646
},
{
"epoch": 0.9119601328903655,
"grad_norm": 0.29343825578689575,
"learning_rate": 2.3437153100039244e-07,
"loss": 0.4839,
"step": 1647
},
{
"epoch": 0.9125138427464009,
"grad_norm": 0.2941429615020752,
"learning_rate": 2.3145561956468555e-07,
"loss": 0.4744,
"step": 1648
},
{
"epoch": 0.9130675526024363,
"grad_norm": 0.2945179045200348,
"learning_rate": 2.2855753104285062e-07,
"loss": 0.4654,
"step": 1649
},
{
"epoch": 0.9136212624584718,
"grad_norm": 0.288025438785553,
"learning_rate": 2.2567727626678527e-07,
"loss": 0.4441,
"step": 1650
},
{
"epoch": 0.9141749723145072,
"grad_norm": 0.2953549921512604,
"learning_rate": 2.2281486600173207e-07,
"loss": 0.4745,
"step": 1651
},
{
"epoch": 0.9147286821705426,
"grad_norm": 0.3306369483470917,
"learning_rate": 2.199703109462359e-07,
"loss": 0.4593,
"step": 1652
},
{
"epoch": 0.915282392026578,
"grad_norm": 0.31291428208351135,
"learning_rate": 2.1714362173210824e-07,
"loss": 0.4909,
"step": 1653
},
{
"epoch": 0.9158361018826136,
"grad_norm": 0.2797071635723114,
"learning_rate": 2.1433480892438353e-07,
"loss": 0.446,
"step": 1654
},
{
"epoch": 0.916389811738649,
"grad_norm": 0.2792530953884125,
"learning_rate": 2.1154388302128126e-07,
"loss": 0.4325,
"step": 1655
},
{
"epoch": 0.9169435215946844,
"grad_norm": 0.2993029057979584,
"learning_rate": 2.0877085445416889e-07,
"loss": 0.4564,
"step": 1656
},
{
"epoch": 0.9174972314507198,
"grad_norm": 0.2832440435886383,
"learning_rate": 2.0601573358751904e-07,
"loss": 0.4597,
"step": 1657
},
{
"epoch": 0.9180509413067552,
"grad_norm": 0.2859455943107605,
"learning_rate": 2.0327853071887172e-07,
"loss": 0.4505,
"step": 1658
},
{
"epoch": 0.9186046511627907,
"grad_norm": 0.2933015823364258,
"learning_rate": 2.0055925607879888e-07,
"loss": 0.4547,
"step": 1659
},
{
"epoch": 0.9191583610188261,
"grad_norm": 0.29843026399612427,
"learning_rate": 1.978579198308622e-07,
"loss": 0.444,
"step": 1660
},
{
"epoch": 0.9197120708748616,
"grad_norm": 0.30015841126441956,
"learning_rate": 1.9517453207157865e-07,
"loss": 0.4544,
"step": 1661
},
{
"epoch": 0.920265780730897,
"grad_norm": 0.30778196454048157,
"learning_rate": 1.9250910283037826e-07,
"loss": 0.4556,
"step": 1662
},
{
"epoch": 0.9208194905869325,
"grad_norm": 0.3055875897407532,
"learning_rate": 1.8986164206957037e-07,
"loss": 0.452,
"step": 1663
},
{
"epoch": 0.9213732004429679,
"grad_norm": 0.33771249651908875,
"learning_rate": 1.8723215968430687e-07,
"loss": 0.4612,
"step": 1664
},
{
"epoch": 0.9219269102990033,
"grad_norm": 0.31438618898391724,
"learning_rate": 1.8462066550254232e-07,
"loss": 0.4467,
"step": 1665
},
{
"epoch": 0.9224806201550387,
"grad_norm": 0.298490971326828,
"learning_rate": 1.8202716928499842e-07,
"loss": 0.4786,
"step": 1666
},
{
"epoch": 0.9230343300110742,
"grad_norm": 0.293795108795166,
"learning_rate": 1.7945168072512732e-07,
"loss": 0.4509,
"step": 1667
},
{
"epoch": 0.9235880398671097,
"grad_norm": 0.31182658672332764,
"learning_rate": 1.7689420944907666e-07,
"loss": 0.4548,
"step": 1668
},
{
"epoch": 0.9241417497231451,
"grad_norm": 0.29164862632751465,
"learning_rate": 1.743547650156535e-07,
"loss": 0.4418,
"step": 1669
},
{
"epoch": 0.9246954595791805,
"grad_norm": 0.3175080716609955,
"learning_rate": 1.7183335691628556e-07,
"loss": 0.4611,
"step": 1670
},
{
"epoch": 0.925249169435216,
"grad_norm": 0.2677769958972931,
"learning_rate": 1.6932999457498823e-07,
"loss": 0.439,
"step": 1671
},
{
"epoch": 0.9258028792912514,
"grad_norm": 0.3190261423587799,
"learning_rate": 1.6684468734833149e-07,
"loss": 0.4751,
"step": 1672
},
{
"epoch": 0.9263565891472868,
"grad_norm": 0.28873810172080994,
"learning_rate": 1.6437744452539983e-07,
"loss": 0.4619,
"step": 1673
},
{
"epoch": 0.9269102990033222,
"grad_norm": 0.2862297594547272,
"learning_rate": 1.6192827532776235e-07,
"loss": 0.4522,
"step": 1674
},
{
"epoch": 0.9274640088593576,
"grad_norm": 0.26955926418304443,
"learning_rate": 1.5949718890943377e-07,
"loss": 0.4688,
"step": 1675
},
{
"epoch": 0.9280177187153932,
"grad_norm": 0.3056979477405548,
"learning_rate": 1.5708419435684463e-07,
"loss": 0.4674,
"step": 1676
},
{
"epoch": 0.9285714285714286,
"grad_norm": 0.2731686532497406,
"learning_rate": 1.546893006888045e-07,
"loss": 0.4383,
"step": 1677
},
{
"epoch": 0.929125138427464,
"grad_norm": 0.31720587611198425,
"learning_rate": 1.523125168564693e-07,
"loss": 0.4678,
"step": 1678
},
{
"epoch": 0.9296788482834994,
"grad_norm": 0.3040732145309448,
"learning_rate": 1.499538517433069e-07,
"loss": 0.4598,
"step": 1679
},
{
"epoch": 0.9302325581395349,
"grad_norm": 0.309842973947525,
"learning_rate": 1.4761331416506596e-07,
"loss": 0.4482,
"step": 1680
},
{
"epoch": 0.9307862679955703,
"grad_norm": 0.2698879837989807,
"learning_rate": 1.4529091286973994e-07,
"loss": 0.4442,
"step": 1681
},
{
"epoch": 0.9313399778516057,
"grad_norm": 0.31583163142204285,
"learning_rate": 1.4298665653753818e-07,
"loss": 0.4895,
"step": 1682
},
{
"epoch": 0.9318936877076412,
"grad_norm": 0.3226719796657562,
"learning_rate": 1.407005537808498e-07,
"loss": 0.4477,
"step": 1683
},
{
"epoch": 0.9324473975636767,
"grad_norm": 0.30792638659477234,
"learning_rate": 1.384326131442132e-07,
"loss": 0.4736,
"step": 1684
},
{
"epoch": 0.9330011074197121,
"grad_norm": 0.3065618574619293,
"learning_rate": 1.3618284310428554e-07,
"loss": 0.4637,
"step": 1685
},
{
"epoch": 0.9335548172757475,
"grad_norm": 0.2993546724319458,
"learning_rate": 1.3395125206980774e-07,
"loss": 0.4272,
"step": 1686
},
{
"epoch": 0.9341085271317829,
"grad_norm": 0.3157908618450165,
"learning_rate": 1.317378483815751e-07,
"loss": 0.44,
"step": 1687
},
{
"epoch": 0.9346622369878184,
"grad_norm": 0.3331739604473114,
"learning_rate": 1.2954264031240727e-07,
"loss": 0.4629,
"step": 1688
},
{
"epoch": 0.9352159468438538,
"grad_norm": 0.31486618518829346,
"learning_rate": 1.2736563606711384e-07,
"loss": 0.4528,
"step": 1689
},
{
"epoch": 0.9357696566998892,
"grad_norm": 0.31547436118125916,
"learning_rate": 1.2520684378246884e-07,
"loss": 0.4645,
"step": 1690
},
{
"epoch": 0.9363233665559247,
"grad_norm": 0.3062952160835266,
"learning_rate": 1.230662715271741e-07,
"loss": 0.4522,
"step": 1691
},
{
"epoch": 0.9368770764119602,
"grad_norm": 0.313684344291687,
"learning_rate": 1.2094392730183312e-07,
"loss": 0.4547,
"step": 1692
},
{
"epoch": 0.9374307862679956,
"grad_norm": 0.2886435389518738,
"learning_rate": 1.1883981903892228e-07,
"loss": 0.4552,
"step": 1693
},
{
"epoch": 0.937984496124031,
"grad_norm": 0.33046281337738037,
"learning_rate": 1.1675395460275629e-07,
"loss": 0.496,
"step": 1694
},
{
"epoch": 0.9385382059800664,
"grad_norm": 0.3314034044742584,
"learning_rate": 1.1468634178946392e-07,
"loss": 0.482,
"step": 1695
},
{
"epoch": 0.9390919158361019,
"grad_norm": 0.30235835909843445,
"learning_rate": 1.1263698832695513e-07,
"loss": 0.4503,
"step": 1696
},
{
"epoch": 0.9396456256921373,
"grad_norm": 0.3231419324874878,
"learning_rate": 1.1060590187489562e-07,
"loss": 0.475,
"step": 1697
},
{
"epoch": 0.9401993355481728,
"grad_norm": 0.2842075228691101,
"learning_rate": 1.08593090024674e-07,
"loss": 0.4566,
"step": 1698
},
{
"epoch": 0.9407530454042082,
"grad_norm": 0.3157922625541687,
"learning_rate": 1.0659856029937688e-07,
"loss": 0.4773,
"step": 1699
},
{
"epoch": 0.9413067552602437,
"grad_norm": 0.30469900369644165,
"learning_rate": 1.046223201537605e-07,
"loss": 0.4837,
"step": 1700
},
{
"epoch": 0.9418604651162791,
"grad_norm": 0.3071087598800659,
"learning_rate": 1.0266437697422026e-07,
"loss": 0.4611,
"step": 1701
},
{
"epoch": 0.9424141749723145,
"grad_norm": 0.27550947666168213,
"learning_rate": 1.007247380787657e-07,
"loss": 0.4418,
"step": 1702
},
{
"epoch": 0.9429678848283499,
"grad_norm": 0.27691787481307983,
"learning_rate": 9.880341071699274e-08,
"loss": 0.4683,
"step": 1703
},
{
"epoch": 0.9435215946843853,
"grad_norm": 0.3100975751876831,
"learning_rate": 9.690040207005425e-08,
"loss": 0.4815,
"step": 1704
},
{
"epoch": 0.9440753045404208,
"grad_norm": 0.26599806547164917,
"learning_rate": 9.501571925063735e-08,
"loss": 0.4554,
"step": 1705
},
{
"epoch": 0.9446290143964563,
"grad_norm": 0.30481967329978943,
"learning_rate": 9.314936930293283e-08,
"loss": 0.4633,
"step": 1706
},
{
"epoch": 0.9451827242524917,
"grad_norm": 0.3184366226196289,
"learning_rate": 9.130135920261185e-08,
"loss": 0.4567,
"step": 1707
},
{
"epoch": 0.9457364341085271,
"grad_norm": 0.2845808267593384,
"learning_rate": 8.94716958567976e-08,
"loss": 0.4686,
"step": 1708
},
{
"epoch": 0.9462901439645626,
"grad_norm": 0.3194059729576111,
"learning_rate": 8.766038610404037e-08,
"loss": 0.4747,
"step": 1709
},
{
"epoch": 0.946843853820598,
"grad_norm": 0.2878986597061157,
"learning_rate": 8.58674367142931e-08,
"loss": 0.4453,
"step": 1710
},
{
"epoch": 0.9473975636766334,
"grad_norm": 0.30820009112358093,
"learning_rate": 8.40928543888836e-08,
"loss": 0.4616,
"step": 1711
},
{
"epoch": 0.9479512735326688,
"grad_norm": 0.34989163279533386,
"learning_rate": 8.233664576049239e-08,
"loss": 0.4737,
"step": 1712
},
{
"epoch": 0.9485049833887044,
"grad_norm": 0.2795168161392212,
"learning_rate": 8.059881739312492e-08,
"loss": 0.4454,
"step": 1713
},
{
"epoch": 0.9490586932447398,
"grad_norm": 0.28132033348083496,
"learning_rate": 7.887937578208992e-08,
"loss": 0.4487,
"step": 1714
},
{
"epoch": 0.9496124031007752,
"grad_norm": 0.2817052900791168,
"learning_rate": 7.717832735397335e-08,
"loss": 0.4594,
"step": 1715
},
{
"epoch": 0.9501661129568106,
"grad_norm": 0.3057515621185303,
"learning_rate": 7.549567846661388e-08,
"loss": 0.4434,
"step": 1716
},
{
"epoch": 0.9507198228128461,
"grad_norm": 0.2940051257610321,
"learning_rate": 7.383143540908189e-08,
"loss": 0.4566,
"step": 1717
},
{
"epoch": 0.9512735326688815,
"grad_norm": 0.30491095781326294,
"learning_rate": 7.218560440165223e-08,
"loss": 0.4598,
"step": 1718
},
{
"epoch": 0.9518272425249169,
"grad_norm": 0.2990501821041107,
"learning_rate": 7.055819159578425e-08,
"loss": 0.4486,
"step": 1719
},
{
"epoch": 0.9523809523809523,
"grad_norm": 0.29552608728408813,
"learning_rate": 6.894920307409624e-08,
"loss": 0.4769,
"step": 1720
},
{
"epoch": 0.9529346622369879,
"grad_norm": 0.31582629680633545,
"learning_rate": 6.735864485034493e-08,
"loss": 0.4729,
"step": 1721
},
{
"epoch": 0.9534883720930233,
"grad_norm": 0.28552842140197754,
"learning_rate": 6.578652286940213e-08,
"loss": 0.486,
"step": 1722
},
{
"epoch": 0.9540420819490587,
"grad_norm": 0.2903539538383484,
"learning_rate": 6.423284300723087e-08,
"loss": 0.4664,
"step": 1723
},
{
"epoch": 0.9545957918050941,
"grad_norm": 0.2721926271915436,
"learning_rate": 6.269761107086548e-08,
"loss": 0.4703,
"step": 1724
},
{
"epoch": 0.9551495016611296,
"grad_norm": 0.2885482907295227,
"learning_rate": 6.118083279839094e-08,
"loss": 0.4686,
"step": 1725
},
{
"epoch": 0.955703211517165,
"grad_norm": 0.28239181637763977,
"learning_rate": 5.968251385891744e-08,
"loss": 0.4543,
"step": 1726
},
{
"epoch": 0.9562569213732004,
"grad_norm": 0.32002443075180054,
"learning_rate": 5.820265985256257e-08,
"loss": 0.4776,
"step": 1727
},
{
"epoch": 0.9568106312292359,
"grad_norm": 0.2961246967315674,
"learning_rate": 5.674127631043025e-08,
"loss": 0.4498,
"step": 1728
},
{
"epoch": 0.9573643410852714,
"grad_norm": 0.3170013129711151,
"learning_rate": 5.529836869458738e-08,
"loss": 0.4737,
"step": 1729
},
{
"epoch": 0.9579180509413068,
"grad_norm": 0.2816410958766937,
"learning_rate": 5.387394239804666e-08,
"loss": 0.4628,
"step": 1730
},
{
"epoch": 0.9584717607973422,
"grad_norm": 0.3266596794128418,
"learning_rate": 5.246800274474439e-08,
"loss": 0.4755,
"step": 1731
},
{
"epoch": 0.9590254706533776,
"grad_norm": 0.30849379301071167,
"learning_rate": 5.1080554989522136e-08,
"loss": 0.4629,
"step": 1732
},
{
"epoch": 0.959579180509413,
"grad_norm": 0.31269577145576477,
"learning_rate": 4.971160431810396e-08,
"loss": 0.4689,
"step": 1733
},
{
"epoch": 0.9601328903654485,
"grad_norm": 0.28276434540748596,
"learning_rate": 4.8361155847080896e-08,
"loss": 0.4354,
"step": 1734
},
{
"epoch": 0.9606866002214839,
"grad_norm": 0.2990057170391083,
"learning_rate": 4.7029214623890984e-08,
"loss": 0.4491,
"step": 1735
},
{
"epoch": 0.9612403100775194,
"grad_norm": 0.2993354797363281,
"learning_rate": 4.571578562679757e-08,
"loss": 0.4614,
"step": 1736
},
{
"epoch": 0.9617940199335548,
"grad_norm": 0.2988957464694977,
"learning_rate": 4.44208737648727e-08,
"loss": 0.4296,
"step": 1737
},
{
"epoch": 0.9623477297895903,
"grad_norm": 0.2898132801055908,
"learning_rate": 4.314448387798098e-08,
"loss": 0.4654,
"step": 1738
},
{
"epoch": 0.9629014396456257,
"grad_norm": 0.2892858684062958,
"learning_rate": 4.1886620736757425e-08,
"loss": 0.4774,
"step": 1739
},
{
"epoch": 0.9634551495016611,
"grad_norm": 0.33196502923965454,
"learning_rate": 4.064728904259185e-08,
"loss": 0.4687,
"step": 1740
},
{
"epoch": 0.9640088593576965,
"grad_norm": 0.30135905742645264,
"learning_rate": 3.9426493427611177e-08,
"loss": 0.468,
"step": 1741
},
{
"epoch": 0.964562569213732,
"grad_norm": 0.295409619808197,
"learning_rate": 3.8224238454661056e-08,
"loss": 0.4378,
"step": 1742
},
{
"epoch": 0.9651162790697675,
"grad_norm": 0.3131703734397888,
"learning_rate": 3.704052861729146e-08,
"loss": 0.4601,
"step": 1743
},
{
"epoch": 0.9656699889258029,
"grad_norm": 0.3141990602016449,
"learning_rate": 3.587536833973559e-08,
"loss": 0.4844,
"step": 1744
},
{
"epoch": 0.9662236987818383,
"grad_norm": 0.32879433035850525,
"learning_rate": 3.472876197689823e-08,
"loss": 0.4773,
"step": 1745
},
{
"epoch": 0.9667774086378738,
"grad_norm": 0.29594501852989197,
"learning_rate": 3.360071381433516e-08,
"loss": 0.4628,
"step": 1746
},
{
"epoch": 0.9673311184939092,
"grad_norm": 0.29919105768203735,
"learning_rate": 3.24912280682399e-08,
"loss": 0.437,
"step": 1747
},
{
"epoch": 0.9678848283499446,
"grad_norm": 0.3089485764503479,
"learning_rate": 3.1400308885427e-08,
"loss": 0.4666,
"step": 1748
},
{
"epoch": 0.96843853820598,
"grad_norm": 0.2961753308773041,
"learning_rate": 3.0327960343317084e-08,
"loss": 0.4576,
"step": 1749
},
{
"epoch": 0.9689922480620154,
"grad_norm": 0.2940447926521301,
"learning_rate": 2.927418644991964e-08,
"loss": 0.4601,
"step": 1750
},
{
"epoch": 0.969545957918051,
"grad_norm": 0.28923407196998596,
"learning_rate": 2.823899114382078e-08,
"loss": 0.4709,
"step": 1751
},
{
"epoch": 0.9700996677740864,
"grad_norm": 0.33245790004730225,
"learning_rate": 2.722237829416774e-08,
"loss": 0.4832,
"step": 1752
},
{
"epoch": 0.9706533776301218,
"grad_norm": 0.3206743001937866,
"learning_rate": 2.6224351700652184e-08,
"loss": 0.4695,
"step": 1753
},
{
"epoch": 0.9712070874861573,
"grad_norm": 0.30548450350761414,
"learning_rate": 2.5244915093499134e-08,
"loss": 0.4379,
"step": 1754
},
{
"epoch": 0.9717607973421927,
"grad_norm": 0.30433905124664307,
"learning_rate": 2.4284072133451408e-08,
"loss": 0.4377,
"step": 1755
},
{
"epoch": 0.9723145071982281,
"grad_norm": 0.29879283905029297,
"learning_rate": 2.3341826411756863e-08,
"loss": 0.4641,
"step": 1756
},
{
"epoch": 0.9728682170542635,
"grad_norm": 0.3096272647380829,
"learning_rate": 2.241818145015284e-08,
"loss": 0.4694,
"step": 1757
},
{
"epoch": 0.973421926910299,
"grad_norm": 0.29536715149879456,
"learning_rate": 2.1513140700855085e-08,
"loss": 0.4439,
"step": 1758
},
{
"epoch": 0.9739756367663345,
"grad_norm": 0.31942957639694214,
"learning_rate": 2.0626707546543833e-08,
"loss": 0.4351,
"step": 1759
},
{
"epoch": 0.9745293466223699,
"grad_norm": 0.29551762342453003,
"learning_rate": 1.9758885300353858e-08,
"loss": 0.4833,
"step": 1760
},
{
"epoch": 0.9750830564784053,
"grad_norm": 0.3155532479286194,
"learning_rate": 1.8909677205856682e-08,
"loss": 0.4722,
"step": 1761
},
{
"epoch": 0.9756367663344407,
"grad_norm": 0.30302658677101135,
"learning_rate": 1.8079086437052805e-08,
"loss": 0.4622,
"step": 1762
},
{
"epoch": 0.9761904761904762,
"grad_norm": 0.3077971041202545,
"learning_rate": 1.7267116098359516e-08,
"loss": 0.4336,
"step": 1763
},
{
"epoch": 0.9767441860465116,
"grad_norm": 0.28718769550323486,
"learning_rate": 1.647376922459698e-08,
"loss": 0.4413,
"step": 1764
},
{
"epoch": 0.9772978959025471,
"grad_norm": 0.3143884837627411,
"learning_rate": 1.5699048780979388e-08,
"loss": 0.4567,
"step": 1765
},
{
"epoch": 0.9778516057585825,
"grad_norm": 0.30350565910339355,
"learning_rate": 1.494295766310161e-08,
"loss": 0.4637,
"step": 1766
},
{
"epoch": 0.978405315614618,
"grad_norm": 0.30483105778694153,
"learning_rate": 1.4205498696930332e-08,
"loss": 0.4617,
"step": 1767
},
{
"epoch": 0.9789590254706534,
"grad_norm": 0.30251219868659973,
"learning_rate": 1.348667463879294e-08,
"loss": 0.4829,
"step": 1768
},
{
"epoch": 0.9795127353266888,
"grad_norm": 0.3110658824443817,
"learning_rate": 1.2786488175366429e-08,
"loss": 0.4719,
"step": 1769
},
{
"epoch": 0.9800664451827242,
"grad_norm": 0.2927253544330597,
"learning_rate": 1.2104941923667956e-08,
"loss": 0.4556,
"step": 1770
},
{
"epoch": 0.9806201550387597,
"grad_norm": 0.2785692512989044,
"learning_rate": 1.1442038431044856e-08,
"loss": 0.4622,
"step": 1771
},
{
"epoch": 0.9811738648947951,
"grad_norm": 0.30565908551216125,
"learning_rate": 1.0797780175166305e-08,
"loss": 0.4356,
"step": 1772
},
{
"epoch": 0.9817275747508306,
"grad_norm": 0.28984615206718445,
"learning_rate": 1.017216956401168e-08,
"loss": 0.4377,
"step": 1773
},
{
"epoch": 0.982281284606866,
"grad_norm": 0.31095460057258606,
"learning_rate": 9.565208935863878e-09,
"loss": 0.453,
"step": 1774
},
{
"epoch": 0.9828349944629015,
"grad_norm": 0.3264619708061218,
"learning_rate": 8.97690055929934e-09,
"loss": 0.4563,
"step": 1775
},
{
"epoch": 0.9833887043189369,
"grad_norm": 0.309268057346344,
"learning_rate": 8.407246633178601e-09,
"loss": 0.4777,
"step": 1776
},
{
"epoch": 0.9839424141749723,
"grad_norm": 0.30327308177948,
"learning_rate": 7.856249286642414e-09,
"loss": 0.465,
"step": 1777
},
{
"epoch": 0.9844961240310077,
"grad_norm": 0.31233885884284973,
"learning_rate": 7.32391057909676e-09,
"loss": 0.4541,
"step": 1778
},
{
"epoch": 0.9850498338870431,
"grad_norm": 0.2879256010055542,
"learning_rate": 6.810232500212288e-09,
"loss": 0.4271,
"step": 1779
},
{
"epoch": 0.9856035437430787,
"grad_norm": 0.3132525086402893,
"learning_rate": 6.315216969912663e-09,
"loss": 0.4738,
"step": 1780
},
{
"epoch": 0.9861572535991141,
"grad_norm": 0.31130507588386536,
"learning_rate": 5.838865838366792e-09,
"loss": 0.4787,
"step": 1781
},
{
"epoch": 0.9867109634551495,
"grad_norm": 0.2845606803894043,
"learning_rate": 5.3811808859866035e-09,
"loss": 0.4547,
"step": 1782
},
{
"epoch": 0.987264673311185,
"grad_norm": 0.3038474917411804,
"learning_rate": 4.942163823414281e-09,
"loss": 0.4498,
"step": 1783
},
{
"epoch": 0.9878183831672204,
"grad_norm": 0.2789425253868103,
"learning_rate": 4.521816291520597e-09,
"loss": 0.4579,
"step": 1784
},
{
"epoch": 0.9883720930232558,
"grad_norm": 0.29215115308761597,
"learning_rate": 4.120139861397143e-09,
"loss": 0.491,
"step": 1785
},
{
"epoch": 0.9889258028792912,
"grad_norm": 0.3025710880756378,
"learning_rate": 3.737136034349109e-09,
"loss": 0.4745,
"step": 1786
},
{
"epoch": 0.9894795127353266,
"grad_norm": 0.2971054017543793,
"learning_rate": 3.372806241892512e-09,
"loss": 0.49,
"step": 1787
},
{
"epoch": 0.9900332225913622,
"grad_norm": 0.2840878963470459,
"learning_rate": 3.0271518457464235e-09,
"loss": 0.4816,
"step": 1788
},
{
"epoch": 0.9905869324473976,
"grad_norm": 0.3151835501194,
"learning_rate": 2.7001741378290815e-09,
"loss": 0.4736,
"step": 1789
},
{
"epoch": 0.991140642303433,
"grad_norm": 0.3166757822036743,
"learning_rate": 2.3918743402517874e-09,
"loss": 0.4668,
"step": 1790
},
{
"epoch": 0.9916943521594684,
"grad_norm": 0.295709490776062,
"learning_rate": 2.102253605316684e-09,
"loss": 0.4622,
"step": 1791
},
{
"epoch": 0.9922480620155039,
"grad_norm": 0.31266582012176514,
"learning_rate": 1.8313130155100944e-09,
"loss": 0.4752,
"step": 1792
},
{
"epoch": 0.9928017718715393,
"grad_norm": 0.31861746311187744,
"learning_rate": 1.5790535835003006e-09,
"loss": 0.4417,
"step": 1793
},
{
"epoch": 0.9933554817275747,
"grad_norm": 0.30592185258865356,
"learning_rate": 1.3454762521314391e-09,
"loss": 0.4751,
"step": 1794
},
{
"epoch": 0.9939091915836102,
"grad_norm": 0.29663553833961487,
"learning_rate": 1.130581894422389e-09,
"loss": 0.4939,
"step": 1795
},
{
"epoch": 0.9944629014396457,
"grad_norm": 0.30990320444107056,
"learning_rate": 9.343713135623323e-10,
"loss": 0.4533,
"step": 1796
},
{
"epoch": 0.9950166112956811,
"grad_norm": 0.2900349795818329,
"learning_rate": 7.568452429090877e-10,
"loss": 0.4802,
"step": 1797
},
{
"epoch": 0.9955703211517165,
"grad_norm": 0.2999788224697113,
"learning_rate": 5.980043459830054e-10,
"loss": 0.4733,
"step": 1798
},
{
"epoch": 0.9961240310077519,
"grad_norm": 0.3328549265861511,
"learning_rate": 4.578492164680759e-10,
"loss": 0.4518,
"step": 1799
},
{
"epoch": 0.9966777408637874,
"grad_norm": 0.28633761405944824,
"learning_rate": 3.363803782086006e-10,
"loss": 0.4466,
"step": 1800
},
{
"epoch": 0.9972314507198228,
"grad_norm": 0.3402009606361389,
"learning_rate": 2.335982852064156e-10,
"loss": 0.45,
"step": 1801
},
{
"epoch": 0.9977851605758582,
"grad_norm": 0.2974521517753601,
"learning_rate": 1.4950332161978164e-10,
"loss": 0.4425,
"step": 1802
},
{
"epoch": 0.9983388704318937,
"grad_norm": 0.29301029443740845,
"learning_rate": 8.409580176282905e-11,
"loss": 0.4514,
"step": 1803
},
{
"epoch": 0.9988925802879292,
"grad_norm": 0.2983539402484894,
"learning_rate": 3.737597010111671e-11,
"loss": 0.4824,
"step": 1804
},
{
"epoch": 0.9994462901439646,
"grad_norm": 0.2854422628879547,
"learning_rate": 9.344001256628154e-12,
"loss": 0.4563,
"step": 1805
},
{
"epoch": 1.0,
"grad_norm": 0.2957075536251068,
"learning_rate": 0.0,
"loss": 0.4749,
"step": 1806
},
{
"epoch": 1.0,
"step": 1806,
"total_flos": 2727819468472320.0,
"train_loss": 0.4883633825107006,
"train_runtime": 63393.9102,
"train_samples_per_second": 2.734,
"train_steps_per_second": 0.028
}
],
"logging_steps": 1.0,
"max_steps": 1806,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2727819468472320.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}