PC-Agent-E / trainer_state.json
Henry He
upload the model
877d83b
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9928400954653938,
"eval_steps": 500,
"global_step": 418,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00477326968973747,
"grad_norm": 6.016661782287111,
"learning_rate": 9.523809523809523e-08,
"loss": 1.0606,
"step": 1
},
{
"epoch": 0.00954653937947494,
"grad_norm": 6.0953583965166676,
"learning_rate": 1.9047619047619045e-07,
"loss": 1.0205,
"step": 2
},
{
"epoch": 0.014319809069212411,
"grad_norm": 5.545005089565625,
"learning_rate": 2.857142857142857e-07,
"loss": 1.0092,
"step": 3
},
{
"epoch": 0.01909307875894988,
"grad_norm": 6.013893802513984,
"learning_rate": 3.809523809523809e-07,
"loss": 0.9935,
"step": 4
},
{
"epoch": 0.02386634844868735,
"grad_norm": 5.376025361134291,
"learning_rate": 4.761904761904761e-07,
"loss": 1.0184,
"step": 5
},
{
"epoch": 0.028639618138424822,
"grad_norm": 5.360630219441705,
"learning_rate": 5.714285714285714e-07,
"loss": 1.0072,
"step": 6
},
{
"epoch": 0.03341288782816229,
"grad_norm": 5.135874756495844,
"learning_rate": 6.666666666666666e-07,
"loss": 0.9987,
"step": 7
},
{
"epoch": 0.03818615751789976,
"grad_norm": 4.183937042855387,
"learning_rate": 7.619047619047618e-07,
"loss": 0.9639,
"step": 8
},
{
"epoch": 0.04295942720763723,
"grad_norm": 4.099248159117762,
"learning_rate": 8.57142857142857e-07,
"loss": 0.9497,
"step": 9
},
{
"epoch": 0.0477326968973747,
"grad_norm": 4.049680347020253,
"learning_rate": 9.523809523809522e-07,
"loss": 0.9488,
"step": 10
},
{
"epoch": 0.05250596658711217,
"grad_norm": 3.3413136627880506,
"learning_rate": 1.0476190476190476e-06,
"loss": 0.925,
"step": 11
},
{
"epoch": 0.057279236276849645,
"grad_norm": 3.0774268853711955,
"learning_rate": 1.1428571428571428e-06,
"loss": 0.9231,
"step": 12
},
{
"epoch": 0.06205250596658711,
"grad_norm": 2.911361629869161,
"learning_rate": 1.238095238095238e-06,
"loss": 0.9177,
"step": 13
},
{
"epoch": 0.06682577565632458,
"grad_norm": 2.428528787361087,
"learning_rate": 1.3333333333333332e-06,
"loss": 0.8811,
"step": 14
},
{
"epoch": 0.07159904534606205,
"grad_norm": 1.8195746682720535,
"learning_rate": 1.4285714285714286e-06,
"loss": 0.8533,
"step": 15
},
{
"epoch": 0.07637231503579953,
"grad_norm": 1.947836891057091,
"learning_rate": 1.5238095238095236e-06,
"loss": 0.8706,
"step": 16
},
{
"epoch": 0.081145584725537,
"grad_norm": 1.8462478481431221,
"learning_rate": 1.619047619047619e-06,
"loss": 0.8208,
"step": 17
},
{
"epoch": 0.08591885441527446,
"grad_norm": 2.137106522697402,
"learning_rate": 1.714285714285714e-06,
"loss": 0.8181,
"step": 18
},
{
"epoch": 0.09069212410501193,
"grad_norm": 1.9201958176581,
"learning_rate": 1.8095238095238095e-06,
"loss": 0.8245,
"step": 19
},
{
"epoch": 0.0954653937947494,
"grad_norm": 1.523269971944646,
"learning_rate": 1.9047619047619045e-06,
"loss": 0.7545,
"step": 20
},
{
"epoch": 0.10023866348448687,
"grad_norm": 1.8071688523978784,
"learning_rate": 2e-06,
"loss": 0.776,
"step": 21
},
{
"epoch": 0.10501193317422435,
"grad_norm": 1.5489776099138406,
"learning_rate": 1.9999686897547167e-06,
"loss": 0.7445,
"step": 22
},
{
"epoch": 0.10978520286396182,
"grad_norm": 1.483755076083143,
"learning_rate": 1.9998747609795305e-06,
"loss": 0.7351,
"step": 23
},
{
"epoch": 0.11455847255369929,
"grad_norm": 1.4689369885238521,
"learning_rate": 1.999718219556307e-06,
"loss": 0.7332,
"step": 24
},
{
"epoch": 0.11933174224343675,
"grad_norm": 1.4297615546988631,
"learning_rate": 1.999499075287747e-06,
"loss": 0.738,
"step": 25
},
{
"epoch": 0.12410501193317422,
"grad_norm": 1.1356908706873299,
"learning_rate": 1.999217341896772e-06,
"loss": 0.7089,
"step": 26
},
{
"epoch": 0.1288782816229117,
"grad_norm": 1.2895783555833555,
"learning_rate": 1.998873037025665e-06,
"loss": 0.6936,
"step": 27
},
{
"epoch": 0.13365155131264916,
"grad_norm": 1.1235754706376115,
"learning_rate": 1.9984661822349665e-06,
"loss": 0.6785,
"step": 28
},
{
"epoch": 0.13842482100238662,
"grad_norm": 1.0872843150821465,
"learning_rate": 1.997996803002123e-06,
"loss": 0.6978,
"step": 29
},
{
"epoch": 0.1431980906921241,
"grad_norm": 1.0898740583426263,
"learning_rate": 1.9974649287198914e-06,
"loss": 0.669,
"step": 30
},
{
"epoch": 0.14797136038186157,
"grad_norm": 1.050118078989169,
"learning_rate": 1.9968705926945013e-06,
"loss": 0.6674,
"step": 31
},
{
"epoch": 0.15274463007159905,
"grad_norm": 0.8999107812930152,
"learning_rate": 1.9962138321435656e-06,
"loss": 0.6765,
"step": 32
},
{
"epoch": 0.1575178997613365,
"grad_norm": 0.9612552915492341,
"learning_rate": 1.9954946881937524e-06,
"loss": 0.6745,
"step": 33
},
{
"epoch": 0.162291169451074,
"grad_norm": 0.8921943607730816,
"learning_rate": 1.994713205878208e-06,
"loss": 0.6546,
"step": 34
},
{
"epoch": 0.16706443914081145,
"grad_norm": 1.1285406074360596,
"learning_rate": 1.9938694341337393e-06,
"loss": 0.6612,
"step": 35
},
{
"epoch": 0.1718377088305489,
"grad_norm": 0.9414713249176138,
"learning_rate": 1.9929634257977467e-06,
"loss": 0.6525,
"step": 36
},
{
"epoch": 0.1766109785202864,
"grad_norm": 0.8006257830637218,
"learning_rate": 1.991995237604916e-06,
"loss": 0.6465,
"step": 37
},
{
"epoch": 0.18138424821002386,
"grad_norm": 0.9526156911755369,
"learning_rate": 1.9909649301836674e-06,
"loss": 0.6581,
"step": 38
},
{
"epoch": 0.18615751789976134,
"grad_norm": 0.9928551887252647,
"learning_rate": 1.9898725680523566e-06,
"loss": 0.6623,
"step": 39
},
{
"epoch": 0.1909307875894988,
"grad_norm": 0.9455779960744521,
"learning_rate": 1.9887182196152367e-06,
"loss": 0.6527,
"step": 40
},
{
"epoch": 0.1957040572792363,
"grad_norm": 0.8607850079912805,
"learning_rate": 1.9875019571581726e-06,
"loss": 0.6148,
"step": 41
},
{
"epoch": 0.20047732696897375,
"grad_norm": 1.050584563468554,
"learning_rate": 1.9862238568441165e-06,
"loss": 0.6288,
"step": 42
},
{
"epoch": 0.2052505966587112,
"grad_norm": 0.9453773840363461,
"learning_rate": 1.9848839987083364e-06,
"loss": 0.6373,
"step": 43
},
{
"epoch": 0.2100238663484487,
"grad_norm": 0.8585148243018186,
"learning_rate": 1.983482466653407e-06,
"loss": 0.6401,
"step": 44
},
{
"epoch": 0.21479713603818615,
"grad_norm": 0.9814711262628284,
"learning_rate": 1.982019348443952e-06,
"loss": 0.6274,
"step": 45
},
{
"epoch": 0.21957040572792363,
"grad_norm": 0.9528618373675226,
"learning_rate": 1.9804947357011523e-06,
"loss": 0.6694,
"step": 46
},
{
"epoch": 0.2243436754176611,
"grad_norm": 0.7974828002980384,
"learning_rate": 1.978908723897005e-06,
"loss": 0.6236,
"step": 47
},
{
"epoch": 0.22911694510739858,
"grad_norm": 0.8409814486609728,
"learning_rate": 1.9772614123483485e-06,
"loss": 0.6408,
"step": 48
},
{
"epoch": 0.23389021479713604,
"grad_norm": 0.9308103385624037,
"learning_rate": 1.9755529042106393e-06,
"loss": 0.5919,
"step": 49
},
{
"epoch": 0.2386634844868735,
"grad_norm": 0.8733866970343211,
"learning_rate": 1.973783306471495e-06,
"loss": 0.5969,
"step": 50
},
{
"epoch": 0.24343675417661098,
"grad_norm": 0.785222837947662,
"learning_rate": 1.971952729943994e-06,
"loss": 0.5973,
"step": 51
},
{
"epoch": 0.24821002386634844,
"grad_norm": 0.7734659209134986,
"learning_rate": 1.9700612892597372e-06,
"loss": 0.6098,
"step": 52
},
{
"epoch": 0.2529832935560859,
"grad_norm": 0.7186573988976016,
"learning_rate": 1.9681091028616676e-06,
"loss": 0.5991,
"step": 53
},
{
"epoch": 0.2577565632458234,
"grad_norm": 0.7687713083483249,
"learning_rate": 1.966096292996655e-06,
"loss": 0.612,
"step": 54
},
{
"epoch": 0.26252983293556087,
"grad_norm": 0.8621825025712473,
"learning_rate": 1.9640229857078413e-06,
"loss": 0.5949,
"step": 55
},
{
"epoch": 0.26730310262529833,
"grad_norm": 0.8257565967386995,
"learning_rate": 1.9618893108267454e-06,
"loss": 0.6103,
"step": 56
},
{
"epoch": 0.2720763723150358,
"grad_norm": 0.8446529899139308,
"learning_rate": 1.9596954019651354e-06,
"loss": 0.5788,
"step": 57
},
{
"epoch": 0.27684964200477324,
"grad_norm": 0.7679525180581322,
"learning_rate": 1.95744139650666e-06,
"loss": 0.6069,
"step": 58
},
{
"epoch": 0.28162291169451076,
"grad_norm": 2.820852049381465,
"learning_rate": 1.955127435598247e-06,
"loss": 0.5987,
"step": 59
},
{
"epoch": 0.2863961813842482,
"grad_norm": 0.814858484667224,
"learning_rate": 1.9527536641412637e-06,
"loss": 0.6112,
"step": 60
},
{
"epoch": 0.2911694510739857,
"grad_norm": 0.8763795811271151,
"learning_rate": 1.950320230782443e-06,
"loss": 0.5827,
"step": 61
},
{
"epoch": 0.29594272076372313,
"grad_norm": 0.7856677046080051,
"learning_rate": 1.9478272879045763e-06,
"loss": 0.596,
"step": 62
},
{
"epoch": 0.30071599045346065,
"grad_norm": 0.922334054326887,
"learning_rate": 1.9452749916169685e-06,
"loss": 0.6131,
"step": 63
},
{
"epoch": 0.3054892601431981,
"grad_norm": 0.9217839433257945,
"learning_rate": 1.942663501745666e-06,
"loss": 0.6014,
"step": 64
},
{
"epoch": 0.31026252983293556,
"grad_norm": 0.7832581576619595,
"learning_rate": 1.939992981823445e-06,
"loss": 0.599,
"step": 65
},
{
"epoch": 0.315035799522673,
"grad_norm": 0.8859458814902181,
"learning_rate": 1.9372635990795744e-06,
"loss": 0.5606,
"step": 66
},
{
"epoch": 0.3198090692124105,
"grad_norm": 0.8263334442045513,
"learning_rate": 1.934475524429339e-06,
"loss": 0.5845,
"step": 67
},
{
"epoch": 0.324582338902148,
"grad_norm": 0.7750993438508201,
"learning_rate": 1.9316289324633416e-06,
"loss": 0.5938,
"step": 68
},
{
"epoch": 0.32935560859188545,
"grad_norm": 0.8557785609879223,
"learning_rate": 1.928724001436568e-06,
"loss": 0.5971,
"step": 69
},
{
"epoch": 0.3341288782816229,
"grad_norm": 0.8226562290018226,
"learning_rate": 1.925760913257224e-06,
"loss": 0.5896,
"step": 70
},
{
"epoch": 0.33890214797136037,
"grad_norm": 0.7875386984949746,
"learning_rate": 1.922739853475345e-06,
"loss": 0.5957,
"step": 71
},
{
"epoch": 0.3436754176610978,
"grad_norm": 0.7466977444466465,
"learning_rate": 1.919661011271176e-06,
"loss": 0.5782,
"step": 72
},
{
"epoch": 0.34844868735083534,
"grad_norm": 0.8961994451430955,
"learning_rate": 1.916524579443327e-06,
"loss": 0.5912,
"step": 73
},
{
"epoch": 0.3532219570405728,
"grad_norm": 0.80271020509165,
"learning_rate": 1.9133307543966972e-06,
"loss": 0.568,
"step": 74
},
{
"epoch": 0.35799522673031026,
"grad_norm": 9.459451148135054,
"learning_rate": 1.910079736130178e-06,
"loss": 0.5831,
"step": 75
},
{
"epoch": 0.3627684964200477,
"grad_norm": 0.8049759872673024,
"learning_rate": 1.9067717282241275e-06,
"loss": 0.5752,
"step": 76
},
{
"epoch": 0.36754176610978523,
"grad_norm": 0.9365222380955207,
"learning_rate": 1.9034069378276248e-06,
"loss": 0.6037,
"step": 77
},
{
"epoch": 0.3723150357995227,
"grad_norm": 0.7813871400804118,
"learning_rate": 1.8999855756454943e-06,
"loss": 0.5814,
"step": 78
},
{
"epoch": 0.37708830548926014,
"grad_norm": 0.8403752789759832,
"learning_rate": 1.8965078559251141e-06,
"loss": 0.5864,
"step": 79
},
{
"epoch": 0.3818615751789976,
"grad_norm": 0.8051967980548511,
"learning_rate": 1.892973996443e-06,
"loss": 0.5872,
"step": 80
},
{
"epoch": 0.38663484486873506,
"grad_norm": 0.8042594188373205,
"learning_rate": 1.8893842184911652e-06,
"loss": 0.5763,
"step": 81
},
{
"epoch": 0.3914081145584726,
"grad_norm": 0.9020174309993688,
"learning_rate": 1.8857387468632673e-06,
"loss": 0.5663,
"step": 82
},
{
"epoch": 0.39618138424821003,
"grad_norm": 0.7886287092080712,
"learning_rate": 1.8820378098405269e-06,
"loss": 0.5749,
"step": 83
},
{
"epoch": 0.4009546539379475,
"grad_norm": 0.7891386094058271,
"learning_rate": 1.878281639177437e-06,
"loss": 0.5791,
"step": 84
},
{
"epoch": 0.40572792362768495,
"grad_norm": 0.8638559742903111,
"learning_rate": 1.874470470087246e-06,
"loss": 0.594,
"step": 85
},
{
"epoch": 0.4105011933174224,
"grad_norm": 0.8722054176885525,
"learning_rate": 1.8706045412272329e-06,
"loss": 0.5958,
"step": 86
},
{
"epoch": 0.4152744630071599,
"grad_norm": 0.8861516356836725,
"learning_rate": 1.8666840946837588e-06,
"loss": 0.5831,
"step": 87
},
{
"epoch": 0.4200477326968974,
"grad_norm": 1.1646833402992178,
"learning_rate": 1.8627093759571097e-06,
"loss": 0.5773,
"step": 88
},
{
"epoch": 0.42482100238663484,
"grad_norm": 1.015546055180046,
"learning_rate": 1.8586806339461223e-06,
"loss": 0.567,
"step": 89
},
{
"epoch": 0.4295942720763723,
"grad_norm": 0.9466071623549958,
"learning_rate": 1.8545981209325974e-06,
"loss": 0.5859,
"step": 90
},
{
"epoch": 0.4343675417661098,
"grad_norm": 0.7761872762176855,
"learning_rate": 1.850462092565503e-06,
"loss": 0.5786,
"step": 91
},
{
"epoch": 0.43914081145584727,
"grad_norm": 0.7212954328261074,
"learning_rate": 1.846272807844964e-06,
"loss": 0.5643,
"step": 92
},
{
"epoch": 0.4439140811455847,
"grad_norm": 0.9652990021129971,
"learning_rate": 1.8420305291060453e-06,
"loss": 0.5772,
"step": 93
},
{
"epoch": 0.4486873508353222,
"grad_norm": 3.476043998914064,
"learning_rate": 1.837735522002322e-06,
"loss": 0.5973,
"step": 94
},
{
"epoch": 0.45346062052505964,
"grad_norm": 0.9648957060855661,
"learning_rate": 1.8333880554892465e-06,
"loss": 0.5683,
"step": 95
},
{
"epoch": 0.45823389021479716,
"grad_norm": 0.8226895202723103,
"learning_rate": 1.828988401807304e-06,
"loss": 0.5631,
"step": 96
},
{
"epoch": 0.4630071599045346,
"grad_norm": 0.8353418687299229,
"learning_rate": 1.8245368364649672e-06,
"loss": 0.5478,
"step": 97
},
{
"epoch": 0.4677804295942721,
"grad_norm": 0.7861845701165756,
"learning_rate": 1.8200336382214404e-06,
"loss": 0.5814,
"step": 98
},
{
"epoch": 0.47255369928400953,
"grad_norm": 0.7869818557092823,
"learning_rate": 1.815479089069208e-06,
"loss": 0.5831,
"step": 99
},
{
"epoch": 0.477326968973747,
"grad_norm": 1.0793699054838668,
"learning_rate": 1.8108734742163714e-06,
"loss": 0.5711,
"step": 100
},
{
"epoch": 0.4821002386634845,
"grad_norm": 0.9191351283369057,
"learning_rate": 1.8062170820687923e-06,
"loss": 0.5829,
"step": 101
},
{
"epoch": 0.48687350835322196,
"grad_norm": 0.8555793060148964,
"learning_rate": 1.8015102042120314e-06,
"loss": 0.5651,
"step": 102
},
{
"epoch": 0.4916467780429594,
"grad_norm": 0.8381062392654873,
"learning_rate": 1.796753135393089e-06,
"loss": 0.578,
"step": 103
},
{
"epoch": 0.4964200477326969,
"grad_norm": 0.9192300787533598,
"learning_rate": 1.791946173501948e-06,
"loss": 0.549,
"step": 104
},
{
"epoch": 0.5011933174224343,
"grad_norm": 0.8307533286502056,
"learning_rate": 1.7870896195529204e-06,
"loss": 0.5427,
"step": 105
},
{
"epoch": 0.5059665871121718,
"grad_norm": 0.7905696548307439,
"learning_rate": 1.7821837776657967e-06,
"loss": 0.5765,
"step": 106
},
{
"epoch": 0.5107398568019093,
"grad_norm": 0.8311340345264336,
"learning_rate": 1.777228955046803e-06,
"loss": 0.5627,
"step": 107
},
{
"epoch": 0.5155131264916468,
"grad_norm": 1.1408460136923761,
"learning_rate": 1.7722254619693617e-06,
"loss": 0.5615,
"step": 108
},
{
"epoch": 0.5202863961813843,
"grad_norm": 0.9215940982960842,
"learning_rate": 1.7671736117546643e-06,
"loss": 0.559,
"step": 109
},
{
"epoch": 0.5250596658711217,
"grad_norm": 0.9073194364535173,
"learning_rate": 1.7620737207520498e-06,
"loss": 0.5675,
"step": 110
},
{
"epoch": 0.5298329355608592,
"grad_norm": 0.9064733521778133,
"learning_rate": 1.756926108319194e-06,
"loss": 0.564,
"step": 111
},
{
"epoch": 0.5346062052505967,
"grad_norm": 0.8006367733355821,
"learning_rate": 1.751731096802113e-06,
"loss": 0.5697,
"step": 112
},
{
"epoch": 0.5393794749403341,
"grad_norm": 0.7703477827683232,
"learning_rate": 1.7464890115149759e-06,
"loss": 0.5556,
"step": 113
},
{
"epoch": 0.5441527446300716,
"grad_norm": 0.7808625090724881,
"learning_rate": 1.7412001807197361e-06,
"loss": 0.5699,
"step": 114
},
{
"epoch": 0.548926014319809,
"grad_norm": 0.7891354086520267,
"learning_rate": 1.735864935605572e-06,
"loss": 0.5535,
"step": 115
},
{
"epoch": 0.5536992840095465,
"grad_norm": 0.8559410057738829,
"learning_rate": 1.7304836102681493e-06,
"loss": 0.5456,
"step": 116
},
{
"epoch": 0.5584725536992841,
"grad_norm": 1.0113045114994854,
"learning_rate": 1.7250565416887015e-06,
"loss": 0.5724,
"step": 117
},
{
"epoch": 0.5632458233890215,
"grad_norm": 0.8876991951748312,
"learning_rate": 1.719584069712925e-06,
"loss": 0.568,
"step": 118
},
{
"epoch": 0.568019093078759,
"grad_norm": 0.8642199309829095,
"learning_rate": 1.7140665370296992e-06,
"loss": 0.5501,
"step": 119
},
{
"epoch": 0.5727923627684964,
"grad_norm": 0.7976943947559357,
"learning_rate": 1.708504289149628e-06,
"loss": 0.586,
"step": 120
},
{
"epoch": 0.5775656324582339,
"grad_norm": 0.8256312101115841,
"learning_rate": 1.702897674383402e-06,
"loss": 0.5533,
"step": 121
},
{
"epoch": 0.5823389021479713,
"grad_norm": 1.0090990785205396,
"learning_rate": 1.697247043819988e-06,
"loss": 0.5662,
"step": 122
},
{
"epoch": 0.5871121718377088,
"grad_norm": 0.9155456337094188,
"learning_rate": 1.6915527513046443e-06,
"loss": 0.5683,
"step": 123
},
{
"epoch": 0.5918854415274463,
"grad_norm": 0.8131468025811117,
"learning_rate": 1.6858151534167616e-06,
"loss": 0.5621,
"step": 124
},
{
"epoch": 0.5966587112171837,
"grad_norm": 0.8064567687343521,
"learning_rate": 1.6800346094475346e-06,
"loss": 0.5596,
"step": 125
},
{
"epoch": 0.6014319809069213,
"grad_norm": 0.7492395201342102,
"learning_rate": 1.6742114813774618e-06,
"loss": 0.5531,
"step": 126
},
{
"epoch": 0.6062052505966588,
"grad_norm": 0.7647965464540142,
"learning_rate": 1.6683461338536798e-06,
"loss": 0.5832,
"step": 127
},
{
"epoch": 0.6109785202863962,
"grad_norm": 0.7808066517921948,
"learning_rate": 1.6624389341671278e-06,
"loss": 0.5541,
"step": 128
},
{
"epoch": 0.6157517899761337,
"grad_norm": 0.8430152851631113,
"learning_rate": 1.656490252229548e-06,
"loss": 0.5528,
"step": 129
},
{
"epoch": 0.6205250596658711,
"grad_norm": 0.799740321239669,
"learning_rate": 1.6505004605503223e-06,
"loss": 0.5754,
"step": 130
},
{
"epoch": 0.6252983293556086,
"grad_norm": 0.8524369396059758,
"learning_rate": 1.6444699342131428e-06,
"loss": 0.5659,
"step": 131
},
{
"epoch": 0.630071599045346,
"grad_norm": 0.8594592125322017,
"learning_rate": 1.638399050852528e-06,
"loss": 0.5468,
"step": 132
},
{
"epoch": 0.6348448687350835,
"grad_norm": 0.8710890648276657,
"learning_rate": 1.632288190630172e-06,
"loss": 0.5547,
"step": 133
},
{
"epoch": 0.639618138424821,
"grad_norm": 1.3695399621239903,
"learning_rate": 1.6261377362111396e-06,
"loss": 0.5475,
"step": 134
},
{
"epoch": 0.6443914081145584,
"grad_norm": 0.9119912953537386,
"learning_rate": 1.6199480727399032e-06,
"loss": 0.5622,
"step": 135
},
{
"epoch": 0.649164677804296,
"grad_norm": 0.8174877663301265,
"learning_rate": 1.6137195878162267e-06,
"loss": 0.5646,
"step": 136
},
{
"epoch": 0.6539379474940334,
"grad_norm": 0.9968710402813645,
"learning_rate": 1.607452671470891e-06,
"loss": 0.5524,
"step": 137
},
{
"epoch": 0.6587112171837709,
"grad_norm": 0.7838173267581942,
"learning_rate": 1.601147716141272e-06,
"loss": 0.5517,
"step": 138
},
{
"epoch": 0.6634844868735084,
"grad_norm": 0.8600041378892647,
"learning_rate": 1.5948051166467657e-06,
"loss": 0.5664,
"step": 139
},
{
"epoch": 0.6682577565632458,
"grad_norm": 0.7393813982622772,
"learning_rate": 1.5884252701640634e-06,
"loss": 0.5611,
"step": 140
},
{
"epoch": 0.6730310262529833,
"grad_norm": 0.8312116599801993,
"learning_rate": 1.5820085762022823e-06,
"loss": 0.5609,
"step": 141
},
{
"epoch": 0.6778042959427207,
"grad_norm": 0.782610924284724,
"learning_rate": 1.5755554365779455e-06,
"loss": 0.5586,
"step": 142
},
{
"epoch": 0.6825775656324582,
"grad_norm": 0.7869375949652244,
"learning_rate": 1.5690662553898222e-06,
"loss": 0.5557,
"step": 143
},
{
"epoch": 0.6873508353221957,
"grad_norm": 0.7871275055021261,
"learning_rate": 1.5625414389936218e-06,
"loss": 0.5379,
"step": 144
},
{
"epoch": 0.6921241050119332,
"grad_norm": 0.7978567113817064,
"learning_rate": 1.555981395976548e-06,
"loss": 0.5459,
"step": 145
},
{
"epoch": 0.6968973747016707,
"grad_norm": 0.8678454065910531,
"learning_rate": 1.5493865371317123e-06,
"loss": 0.5538,
"step": 146
},
{
"epoch": 0.7016706443914081,
"grad_norm": 0.8640558568867235,
"learning_rate": 1.542757275432411e-06,
"loss": 0.5511,
"step": 147
},
{
"epoch": 0.7064439140811456,
"grad_norm": 0.8257539417151866,
"learning_rate": 1.5360940260062635e-06,
"loss": 0.5395,
"step": 148
},
{
"epoch": 0.711217183770883,
"grad_norm": 0.7735477084244853,
"learning_rate": 1.5293972061092185e-06,
"loss": 0.5487,
"step": 149
},
{
"epoch": 0.7159904534606205,
"grad_norm": 2.21607832896325,
"learning_rate": 1.522667235099422e-06,
"loss": 0.5313,
"step": 150
},
{
"epoch": 0.720763723150358,
"grad_norm": 0.8260305997634725,
"learning_rate": 1.515904534410961e-06,
"loss": 0.548,
"step": 151
},
{
"epoch": 0.7255369928400954,
"grad_norm": 0.9282281415854876,
"learning_rate": 1.5091095275274699e-06,
"loss": 0.5366,
"step": 152
},
{
"epoch": 0.7303102625298329,
"grad_norm": 0.835392664470487,
"learning_rate": 1.5022826399556133e-06,
"loss": 0.5365,
"step": 153
},
{
"epoch": 0.7350835322195705,
"grad_norm": 1.0014547232970634,
"learning_rate": 1.4954242991984396e-06,
"loss": 0.5601,
"step": 154
},
{
"epoch": 0.7398568019093079,
"grad_norm": 0.7999358357306402,
"learning_rate": 1.4885349347286115e-06,
"loss": 0.549,
"step": 155
},
{
"epoch": 0.7446300715990454,
"grad_norm": 0.7456244196208853,
"learning_rate": 1.4816149779615126e-06,
"loss": 0.5516,
"step": 156
},
{
"epoch": 0.7494033412887828,
"grad_norm": 0.7568817924270603,
"learning_rate": 1.474664862228229e-06,
"loss": 0.5572,
"step": 157
},
{
"epoch": 0.7541766109785203,
"grad_norm": 0.9329993871672655,
"learning_rate": 1.467685022748419e-06,
"loss": 0.5617,
"step": 158
},
{
"epoch": 0.7589498806682577,
"grad_norm": 0.7402702977169047,
"learning_rate": 1.4606758966030534e-06,
"loss": 0.5426,
"step": 159
},
{
"epoch": 0.7637231503579952,
"grad_norm": 0.7912657849322988,
"learning_rate": 1.4536379227070509e-06,
"loss": 0.544,
"step": 160
},
{
"epoch": 0.7684964200477327,
"grad_norm": 0.8280839624728757,
"learning_rate": 1.4465715417817888e-06,
"loss": 0.5435,
"step": 161
},
{
"epoch": 0.7732696897374701,
"grad_norm": 0.7376680395132865,
"learning_rate": 1.4394771963275076e-06,
"loss": 0.5199,
"step": 162
},
{
"epoch": 0.7780429594272077,
"grad_norm": 0.7984252215551224,
"learning_rate": 1.4323553305955997e-06,
"loss": 0.5479,
"step": 163
},
{
"epoch": 0.7828162291169452,
"grad_norm": 0.788726316639838,
"learning_rate": 1.4252063905607909e-06,
"loss": 0.5219,
"step": 164
},
{
"epoch": 0.7875894988066826,
"grad_norm": 0.7350598897520126,
"learning_rate": 1.4180308238932135e-06,
"loss": 0.531,
"step": 165
},
{
"epoch": 0.7923627684964201,
"grad_norm": 0.7786806805958749,
"learning_rate": 1.410829079930372e-06,
"loss": 0.5481,
"step": 166
},
{
"epoch": 0.7971360381861575,
"grad_norm": 0.9607237271282482,
"learning_rate": 1.4036016096490064e-06,
"loss": 0.5478,
"step": 167
},
{
"epoch": 0.801909307875895,
"grad_norm": 0.7782148550862285,
"learning_rate": 1.3963488656368517e-06,
"loss": 0.535,
"step": 168
},
{
"epoch": 0.8066825775656324,
"grad_norm": 0.8100946646751193,
"learning_rate": 1.389071302064295e-06,
"loss": 0.5277,
"step": 169
},
{
"epoch": 0.8114558472553699,
"grad_norm": 0.7502947220609039,
"learning_rate": 1.381769374655938e-06,
"loss": 0.5553,
"step": 170
},
{
"epoch": 0.8162291169451074,
"grad_norm": 0.9124000354997026,
"learning_rate": 1.374443540662057e-06,
"loss": 0.5518,
"step": 171
},
{
"epoch": 0.8210023866348448,
"grad_norm": 0.8409623949497625,
"learning_rate": 1.3670942588299705e-06,
"loss": 0.5294,
"step": 172
},
{
"epoch": 0.8257756563245824,
"grad_norm": 0.8018568702519514,
"learning_rate": 1.3597219893753117e-06,
"loss": 0.5121,
"step": 173
},
{
"epoch": 0.8305489260143198,
"grad_norm": 0.9262097539109866,
"learning_rate": 1.352327193953211e-06,
"loss": 0.5259,
"step": 174
},
{
"epoch": 0.8353221957040573,
"grad_norm": 0.7289872898963717,
"learning_rate": 1.3449103356293852e-06,
"loss": 0.5601,
"step": 175
},
{
"epoch": 0.8400954653937948,
"grad_norm": 0.7836398407929648,
"learning_rate": 1.337471878851141e-06,
"loss": 0.5359,
"step": 176
},
{
"epoch": 0.8448687350835322,
"grad_norm": 0.8058359597234802,
"learning_rate": 1.3300122894182909e-06,
"loss": 0.5485,
"step": 177
},
{
"epoch": 0.8496420047732697,
"grad_norm": 0.9118002301436436,
"learning_rate": 1.3225320344539842e-06,
"loss": 0.5562,
"step": 178
},
{
"epoch": 0.8544152744630071,
"grad_norm": 0.7609979767002807,
"learning_rate": 1.315031582375457e-06,
"loss": 0.5485,
"step": 179
},
{
"epoch": 0.8591885441527446,
"grad_norm": 0.7105869344115592,
"learning_rate": 1.3075114028646974e-06,
"loss": 0.5444,
"step": 180
},
{
"epoch": 0.863961813842482,
"grad_norm": 0.8004311294692876,
"learning_rate": 1.299971966839036e-06,
"loss": 0.5481,
"step": 181
},
{
"epoch": 0.8687350835322196,
"grad_norm": 0.7667234252631754,
"learning_rate": 1.292413746421655e-06,
"loss": 0.5345,
"step": 182
},
{
"epoch": 0.8735083532219571,
"grad_norm": 0.7709523318159157,
"learning_rate": 1.2848372149120246e-06,
"loss": 0.512,
"step": 183
},
{
"epoch": 0.8782816229116945,
"grad_norm": 0.8742048693859581,
"learning_rate": 1.2772428467562651e-06,
"loss": 0.55,
"step": 184
},
{
"epoch": 0.883054892601432,
"grad_norm": 0.8768649061250284,
"learning_rate": 1.2696311175174357e-06,
"loss": 0.5365,
"step": 185
},
{
"epoch": 0.8878281622911695,
"grad_norm": 0.8468420712736167,
"learning_rate": 1.2620025038457554e-06,
"loss": 0.5421,
"step": 186
},
{
"epoch": 0.8926014319809069,
"grad_norm": 0.725877140171063,
"learning_rate": 1.254357483448755e-06,
"loss": 0.519,
"step": 187
},
{
"epoch": 0.8973747016706444,
"grad_norm": 0.7168188099187686,
"learning_rate": 1.2466965350613615e-06,
"loss": 0.5651,
"step": 188
},
{
"epoch": 0.9021479713603818,
"grad_norm": 0.8993966404570418,
"learning_rate": 1.2390201384159219e-06,
"loss": 0.5603,
"step": 189
},
{
"epoch": 0.9069212410501193,
"grad_norm": 0.741646072361816,
"learning_rate": 1.231328774212159e-06,
"loss": 0.5157,
"step": 190
},
{
"epoch": 0.9116945107398569,
"grad_norm": 0.7741706595084717,
"learning_rate": 1.223622924087073e-06,
"loss": 0.5367,
"step": 191
},
{
"epoch": 0.9164677804295943,
"grad_norm": 0.760645151447744,
"learning_rate": 1.215903070584779e-06,
"loss": 0.5401,
"step": 192
},
{
"epoch": 0.9212410501193318,
"grad_norm": 0.7462809840684769,
"learning_rate": 1.2081696971262903e-06,
"loss": 0.5458,
"step": 193
},
{
"epoch": 0.9260143198090692,
"grad_norm": 0.867349599337623,
"learning_rate": 1.2004232879792464e-06,
"loss": 0.5398,
"step": 194
},
{
"epoch": 0.9307875894988067,
"grad_norm": 0.7728255267176583,
"learning_rate": 1.1926643282275882e-06,
"loss": 0.5343,
"step": 195
},
{
"epoch": 0.9355608591885441,
"grad_norm": 0.7946709962404823,
"learning_rate": 1.1848933037411825e-06,
"loss": 0.5181,
"step": 196
},
{
"epoch": 0.9403341288782816,
"grad_norm": 0.7159173523126642,
"learning_rate": 1.1771107011453933e-06,
"loss": 0.5442,
"step": 197
},
{
"epoch": 0.9451073985680191,
"grad_norm": 0.8493976289870552,
"learning_rate": 1.1693170077906143e-06,
"loss": 0.5467,
"step": 198
},
{
"epoch": 0.9498806682577565,
"grad_norm": 0.7390118080756048,
"learning_rate": 1.1615127117217463e-06,
"loss": 0.5251,
"step": 199
},
{
"epoch": 0.954653937947494,
"grad_norm": 0.7595495597083671,
"learning_rate": 1.1536983016476373e-06,
"loss": 0.5368,
"step": 200
},
{
"epoch": 0.9594272076372315,
"grad_norm": 0.7399505119485492,
"learning_rate": 1.1458742669104803e-06,
"loss": 0.514,
"step": 201
},
{
"epoch": 0.964200477326969,
"grad_norm": 0.7693531287817772,
"learning_rate": 1.1380410974551682e-06,
"loss": 0.5327,
"step": 202
},
{
"epoch": 0.9689737470167065,
"grad_norm": 0.7361655101073081,
"learning_rate": 1.130199283798615e-06,
"loss": 0.5152,
"step": 203
},
{
"epoch": 0.9737470167064439,
"grad_norm": 0.8174253218643999,
"learning_rate": 1.1223493169990391e-06,
"loss": 0.5376,
"step": 204
},
{
"epoch": 0.9785202863961814,
"grad_norm": 0.7646163527785592,
"learning_rate": 1.1144916886252124e-06,
"loss": 0.5198,
"step": 205
},
{
"epoch": 0.9832935560859188,
"grad_norm": 0.7600726494815581,
"learning_rate": 1.1066268907256782e-06,
"loss": 0.5358,
"step": 206
},
{
"epoch": 0.9880668257756563,
"grad_norm": 0.8292480992474258,
"learning_rate": 1.098755415797939e-06,
"loss": 0.5319,
"step": 207
},
{
"epoch": 0.9928400954653938,
"grad_norm": 0.7584975382780693,
"learning_rate": 1.0908777567576168e-06,
"loss": 0.5453,
"step": 208
},
{
"epoch": 0.9976133651551312,
"grad_norm": 0.7360353406613074,
"learning_rate": 1.0829944069075847e-06,
"loss": 0.5398,
"step": 209
},
{
"epoch": 1.0,
"grad_norm": 0.7360353406613074,
"learning_rate": 1.0751058599070781e-06,
"loss": 0.2683,
"step": 210
},
{
"epoch": 1.0047732696897376,
"grad_norm": 0.7735348980384088,
"learning_rate": 1.0672126097407795e-06,
"loss": 0.4862,
"step": 211
},
{
"epoch": 1.009546539379475,
"grad_norm": 0.6892850244639656,
"learning_rate": 1.0593151506878865e-06,
"loss": 0.4886,
"step": 212
},
{
"epoch": 1.0143198090692125,
"grad_norm": 0.7416432308937427,
"learning_rate": 1.0514139772911597e-06,
"loss": 0.4755,
"step": 213
},
{
"epoch": 1.0190930787589498,
"grad_norm": 0.6788376232914372,
"learning_rate": 1.043509584325953e-06,
"loss": 0.4643,
"step": 214
},
{
"epoch": 1.0238663484486874,
"grad_norm": 0.7328906073842687,
"learning_rate": 1.0356024667692314e-06,
"loss": 0.4934,
"step": 215
},
{
"epoch": 1.0286396181384247,
"grad_norm": 0.7697429459150121,
"learning_rate": 1.0276931197685753e-06,
"loss": 0.4976,
"step": 216
},
{
"epoch": 1.0334128878281623,
"grad_norm": 0.7939705310040335,
"learning_rate": 1.0197820386111737e-06,
"loss": 0.4897,
"step": 217
},
{
"epoch": 1.0381861575178997,
"grad_norm": 0.9752936792347606,
"learning_rate": 1.0118697186928105e-06,
"loss": 0.4632,
"step": 218
},
{
"epoch": 1.0429594272076372,
"grad_norm": 0.810300278966379,
"learning_rate": 1.0039566554868392e-06,
"loss": 0.4667,
"step": 219
},
{
"epoch": 1.0477326968973748,
"grad_norm": 0.7651633767231123,
"learning_rate": 9.960433445131607e-07,
"loss": 0.4913,
"step": 220
},
{
"epoch": 1.0525059665871122,
"grad_norm": 0.7783544485209318,
"learning_rate": 9.881302813071896e-07,
"loss": 0.485,
"step": 221
},
{
"epoch": 1.0572792362768497,
"grad_norm": 0.7728747490030172,
"learning_rate": 9.802179613888262e-07,
"loss": 0.4663,
"step": 222
},
{
"epoch": 1.062052505966587,
"grad_norm": 0.7199803548701269,
"learning_rate": 9.723068802314246e-07,
"loss": 0.4724,
"step": 223
},
{
"epoch": 1.0668257756563246,
"grad_norm": 0.8173682429078198,
"learning_rate": 9.643975332307687e-07,
"loss": 0.4777,
"step": 224
},
{
"epoch": 1.071599045346062,
"grad_norm": 0.9029276240129886,
"learning_rate": 9.564904156740471e-07,
"loss": 0.4664,
"step": 225
},
{
"epoch": 1.0763723150357996,
"grad_norm": 0.7595074592495551,
"learning_rate": 9.485860227088405e-07,
"loss": 0.4808,
"step": 226
},
{
"epoch": 1.081145584725537,
"grad_norm": 0.8019805756491788,
"learning_rate": 9.406848493121134e-07,
"loss": 0.4764,
"step": 227
},
{
"epoch": 1.0859188544152745,
"grad_norm": 0.7750922258239085,
"learning_rate": 9.327873902592205e-07,
"loss": 0.4711,
"step": 228
},
{
"epoch": 1.0906921241050118,
"grad_norm": 0.7272348247085987,
"learning_rate": 9.248941400929222e-07,
"loss": 0.4753,
"step": 229
},
{
"epoch": 1.0954653937947494,
"grad_norm": 0.8135968715591004,
"learning_rate": 9.17005593092415e-07,
"loss": 0.49,
"step": 230
},
{
"epoch": 1.100238663484487,
"grad_norm": 0.784517413630989,
"learning_rate": 9.09122243242383e-07,
"loss": 0.4636,
"step": 231
},
{
"epoch": 1.1050119331742243,
"grad_norm": 0.7967633635464352,
"learning_rate": 9.01244584202061e-07,
"loss": 0.4638,
"step": 232
},
{
"epoch": 1.1097852028639619,
"grad_norm": 0.7347125585892648,
"learning_rate": 8.933731092743219e-07,
"loss": 0.4951,
"step": 233
},
{
"epoch": 1.1145584725536992,
"grad_norm": 0.762506543894173,
"learning_rate": 8.855083113747875e-07,
"loss": 0.4715,
"step": 234
},
{
"epoch": 1.1193317422434368,
"grad_norm": 0.7657159811972606,
"learning_rate": 8.776506830009607e-07,
"loss": 0.4792,
"step": 235
},
{
"epoch": 1.1241050119331741,
"grad_norm": 0.7746280343348994,
"learning_rate": 8.698007162013849e-07,
"loss": 0.4734,
"step": 236
},
{
"epoch": 1.1288782816229117,
"grad_norm": 0.7287940319917965,
"learning_rate": 8.619589025448318e-07,
"loss": 0.4899,
"step": 237
},
{
"epoch": 1.1336515513126493,
"grad_norm": 0.7283506274833321,
"learning_rate": 8.541257330895197e-07,
"loss": 0.461,
"step": 238
},
{
"epoch": 1.1384248210023866,
"grad_norm": 1.109020964160513,
"learning_rate": 8.463016983523627e-07,
"loss": 0.4789,
"step": 239
},
{
"epoch": 1.1431980906921242,
"grad_norm": 0.8916069268430648,
"learning_rate": 8.384872882782541e-07,
"loss": 0.4951,
"step": 240
},
{
"epoch": 1.1479713603818615,
"grad_norm": 0.7832561259348029,
"learning_rate": 8.306829922093857e-07,
"loss": 0.4666,
"step": 241
},
{
"epoch": 1.152744630071599,
"grad_norm": 0.7246823419762234,
"learning_rate": 8.228892988546067e-07,
"loss": 0.475,
"step": 242
},
{
"epoch": 1.1575178997613365,
"grad_norm": 0.705366097498364,
"learning_rate": 8.15106696258818e-07,
"loss": 0.4727,
"step": 243
},
{
"epoch": 1.162291169451074,
"grad_norm": 0.7563603316000965,
"learning_rate": 8.073356717724115e-07,
"loss": 0.4779,
"step": 244
},
{
"epoch": 1.1670644391408114,
"grad_norm": 0.7463996376621957,
"learning_rate": 7.995767120207536e-07,
"loss": 0.4647,
"step": 245
},
{
"epoch": 1.171837708830549,
"grad_norm": 0.7117618711530662,
"learning_rate": 7.918303028737096e-07,
"loss": 0.4712,
"step": 246
},
{
"epoch": 1.1766109785202863,
"grad_norm": 0.7445420769436453,
"learning_rate": 7.840969294152211e-07,
"loss": 0.4747,
"step": 247
},
{
"epoch": 1.1813842482100239,
"grad_norm": 0.7339272409779617,
"learning_rate": 7.763770759129269e-07,
"loss": 0.4732,
"step": 248
},
{
"epoch": 1.1861575178997614,
"grad_norm": 0.7680499628702099,
"learning_rate": 7.68671225787841e-07,
"loss": 0.4677,
"step": 249
},
{
"epoch": 1.1909307875894988,
"grad_norm": 0.7289596879207738,
"learning_rate": 7.609798615840785e-07,
"loss": 0.4788,
"step": 250
},
{
"epoch": 1.1957040572792363,
"grad_norm": 0.7375098113291024,
"learning_rate": 7.533034649386384e-07,
"loss": 0.456,
"step": 251
},
{
"epoch": 1.2004773269689737,
"grad_norm": 0.7788484912408599,
"learning_rate": 7.456425165512452e-07,
"loss": 0.4768,
"step": 252
},
{
"epoch": 1.2052505966587113,
"grad_norm": 0.7545300469644135,
"learning_rate": 7.379974961542447e-07,
"loss": 0.4864,
"step": 253
},
{
"epoch": 1.2100238663484486,
"grad_norm": 0.8818787967594464,
"learning_rate": 7.303688824825646e-07,
"loss": 0.4768,
"step": 254
},
{
"epoch": 1.2147971360381862,
"grad_norm": 0.7762788166887581,
"learning_rate": 7.227571532437349e-07,
"loss": 0.4676,
"step": 255
},
{
"epoch": 1.2195704057279237,
"grad_norm": 0.674374793234199,
"learning_rate": 7.151627850879755e-07,
"loss": 0.4688,
"step": 256
},
{
"epoch": 1.224343675417661,
"grad_norm": 0.7391271163895584,
"learning_rate": 7.075862535783453e-07,
"loss": 0.4545,
"step": 257
},
{
"epoch": 1.2291169451073987,
"grad_norm": 0.7377869581736503,
"learning_rate": 7.00028033160964e-07,
"loss": 0.4842,
"step": 258
},
{
"epoch": 1.233890214797136,
"grad_norm": 0.7182033053068443,
"learning_rate": 6.924885971353026e-07,
"loss": 0.4841,
"step": 259
},
{
"epoch": 1.2386634844868736,
"grad_norm": 0.7165206421556828,
"learning_rate": 6.849684176245431e-07,
"loss": 0.4485,
"step": 260
},
{
"epoch": 1.243436754176611,
"grad_norm": 0.8274126483370449,
"learning_rate": 6.774679655460158e-07,
"loss": 0.4632,
"step": 261
},
{
"epoch": 1.2482100238663485,
"grad_norm": 0.7849668814937834,
"learning_rate": 6.699877105817092e-07,
"loss": 0.4701,
"step": 262
},
{
"epoch": 1.2529832935560858,
"grad_norm": 0.7246643685451561,
"learning_rate": 6.625281211488591e-07,
"loss": 0.4884,
"step": 263
},
{
"epoch": 1.2577565632458234,
"grad_norm": 0.7413214893244733,
"learning_rate": 6.55089664370615e-07,
"loss": 0.4821,
"step": 264
},
{
"epoch": 1.2625298329355608,
"grad_norm": 0.7307541408287506,
"learning_rate": 6.476728060467888e-07,
"loss": 0.4585,
"step": 265
},
{
"epoch": 1.2673031026252983,
"grad_norm": 0.7439228818529052,
"learning_rate": 6.402780106246884e-07,
"loss": 0.4688,
"step": 266
},
{
"epoch": 1.272076372315036,
"grad_norm": 0.7075632105234686,
"learning_rate": 6.329057411700298e-07,
"loss": 0.4813,
"step": 267
},
{
"epoch": 1.2768496420047732,
"grad_norm": 0.757650326028371,
"learning_rate": 6.255564593379429e-07,
"loss": 0.4878,
"step": 268
},
{
"epoch": 1.2816229116945108,
"grad_norm": 0.729712295017678,
"learning_rate": 6.182306253440619e-07,
"loss": 0.4629,
"step": 269
},
{
"epoch": 1.2863961813842482,
"grad_norm": 0.8230987908171445,
"learning_rate": 6.109286979357051e-07,
"loss": 0.4842,
"step": 270
},
{
"epoch": 1.2911694510739857,
"grad_norm": 0.7878144207218812,
"learning_rate": 6.036511343631488e-07,
"loss": 0.4588,
"step": 271
},
{
"epoch": 1.295942720763723,
"grad_norm": 0.7162555025211284,
"learning_rate": 5.963983903509935e-07,
"loss": 0.4817,
"step": 272
},
{
"epoch": 1.3007159904534606,
"grad_norm": 0.7352227500252277,
"learning_rate": 5.89170920069628e-07,
"loss": 0.4781,
"step": 273
},
{
"epoch": 1.3054892601431982,
"grad_norm": 0.7097358431174013,
"learning_rate": 5.819691761067865e-07,
"loss": 0.46,
"step": 274
},
{
"epoch": 1.3102625298329356,
"grad_norm": 1.146161188184777,
"learning_rate": 5.747936094392089e-07,
"loss": 0.4647,
"step": 275
},
{
"epoch": 1.315035799522673,
"grad_norm": 0.7072592435264768,
"learning_rate": 5.676446694044002e-07,
"loss": 0.4639,
"step": 276
},
{
"epoch": 1.3198090692124105,
"grad_norm": 0.7215149618117556,
"learning_rate": 5.605228036724927e-07,
"loss": 0.4652,
"step": 277
},
{
"epoch": 1.324582338902148,
"grad_norm": 0.670785774408122,
"learning_rate": 5.534284582182114e-07,
"loss": 0.4717,
"step": 278
},
{
"epoch": 1.3293556085918854,
"grad_norm": 0.747767864677791,
"learning_rate": 5.463620772929494e-07,
"loss": 0.4536,
"step": 279
},
{
"epoch": 1.334128878281623,
"grad_norm": 0.8516514509018951,
"learning_rate": 5.393241033969466e-07,
"loss": 0.4649,
"step": 280
},
{
"epoch": 1.3389021479713603,
"grad_norm": 0.8138001829719436,
"learning_rate": 5.323149772515812e-07,
"loss": 0.4668,
"step": 281
},
{
"epoch": 1.3436754176610979,
"grad_norm": 0.7576171145048753,
"learning_rate": 5.253351377717706e-07,
"loss": 0.4761,
"step": 282
},
{
"epoch": 1.3484486873508352,
"grad_norm": 0.8613520066962265,
"learning_rate": 5.183850220384873e-07,
"loss": 0.469,
"step": 283
},
{
"epoch": 1.3532219570405728,
"grad_norm": 0.766228885306893,
"learning_rate": 5.114650652713884e-07,
"loss": 0.4802,
"step": 284
},
{
"epoch": 1.3579952267303104,
"grad_norm": 0.7068637893292556,
"learning_rate": 5.045757008015606e-07,
"loss": 0.4773,
"step": 285
},
{
"epoch": 1.3627684964200477,
"grad_norm": 0.8429657657602729,
"learning_rate": 4.977173600443868e-07,
"loss": 0.4605,
"step": 286
},
{
"epoch": 1.3675417661097853,
"grad_norm": 0.7007932505507933,
"learning_rate": 4.908904724725299e-07,
"loss": 0.4767,
"step": 287
},
{
"epoch": 1.3723150357995226,
"grad_norm": 0.7671222670718428,
"learning_rate": 4.840954655890391e-07,
"loss": 0.4682,
"step": 288
},
{
"epoch": 1.3770883054892602,
"grad_norm": 0.694265618019185,
"learning_rate": 4.773327649005777e-07,
"loss": 0.4855,
"step": 289
},
{
"epoch": 1.3818615751789975,
"grad_norm": 0.7519150028535938,
"learning_rate": 4.7060279389078184e-07,
"loss": 0.4761,
"step": 290
},
{
"epoch": 1.3866348448687351,
"grad_norm": 0.7486630511459641,
"learning_rate": 4.6390597399373644e-07,
"loss": 0.4565,
"step": 291
},
{
"epoch": 1.3914081145584727,
"grad_norm": 0.7422555751664944,
"learning_rate": 4.5724272456758907e-07,
"loss": 0.4826,
"step": 292
},
{
"epoch": 1.39618138424821,
"grad_norm": 0.77856112043872,
"learning_rate": 4.506134628682877e-07,
"loss": 0.4763,
"step": 293
},
{
"epoch": 1.4009546539379474,
"grad_norm": 0.7684572854516972,
"learning_rate": 4.440186040234524e-07,
"loss": 0.4672,
"step": 294
},
{
"epoch": 1.405727923627685,
"grad_norm": 0.7665847058665568,
"learning_rate": 4.3745856100637834e-07,
"loss": 0.4656,
"step": 295
},
{
"epoch": 1.4105011933174225,
"grad_norm": 0.733469970387663,
"learning_rate": 4.3093374461017785e-07,
"loss": 0.4676,
"step": 296
},
{
"epoch": 1.4152744630071599,
"grad_norm": 0.8421640257156171,
"learning_rate": 4.244445634220545e-07,
"loss": 0.4843,
"step": 297
},
{
"epoch": 1.4200477326968974,
"grad_norm": 0.8009564109297522,
"learning_rate": 4.1799142379771766e-07,
"loss": 0.4809,
"step": 298
},
{
"epoch": 1.4248210023866348,
"grad_norm": 0.7033349702559853,
"learning_rate": 4.115747298359363e-07,
"loss": 0.464,
"step": 299
},
{
"epoch": 1.4295942720763724,
"grad_norm": 0.7437100788001662,
"learning_rate": 4.0519488335323415e-07,
"loss": 0.4851,
"step": 300
},
{
"epoch": 1.4343675417661097,
"grad_norm": 0.7732697984175376,
"learning_rate": 3.9885228385872806e-07,
"loss": 0.4594,
"step": 301
},
{
"epoch": 1.4391408114558473,
"grad_norm": 0.7940793070581448,
"learning_rate": 3.925473285291091e-07,
"loss": 0.4661,
"step": 302
},
{
"epoch": 1.4439140811455848,
"grad_norm": 0.7351909971969558,
"learning_rate": 3.862804121837733e-07,
"loss": 0.4757,
"step": 303
},
{
"epoch": 1.4486873508353222,
"grad_norm": 0.781207875542895,
"learning_rate": 3.8005192726009663e-07,
"loss": 0.4787,
"step": 304
},
{
"epoch": 1.4534606205250595,
"grad_norm": 0.7991516861553173,
"learning_rate": 3.738622637888608e-07,
"loss": 0.4668,
"step": 305
},
{
"epoch": 1.458233890214797,
"grad_norm": 0.8987252432386614,
"learning_rate": 3.677118093698278e-07,
"loss": 0.4606,
"step": 306
},
{
"epoch": 1.4630071599045347,
"grad_norm": 0.698103668533834,
"learning_rate": 3.61600949147472e-07,
"loss": 0.4683,
"step": 307
},
{
"epoch": 1.467780429594272,
"grad_norm": 0.7560261667555234,
"learning_rate": 3.5553006578685706e-07,
"loss": 0.4519,
"step": 308
},
{
"epoch": 1.4725536992840096,
"grad_norm": 0.7382407678980342,
"learning_rate": 3.494995394496778e-07,
"loss": 0.469,
"step": 309
},
{
"epoch": 1.477326968973747,
"grad_norm": 0.720898348204588,
"learning_rate": 3.435097477704517e-07,
"loss": 0.449,
"step": 310
},
{
"epoch": 1.4821002386634845,
"grad_norm": 0.7319822241837816,
"learning_rate": 3.3756106583287205e-07,
"loss": 0.4745,
"step": 311
},
{
"epoch": 1.4868735083532219,
"grad_norm": 0.7518826329514531,
"learning_rate": 3.316538661463204e-07,
"loss": 0.4918,
"step": 312
},
{
"epoch": 1.4916467780429594,
"grad_norm": 0.8013086574909619,
"learning_rate": 3.2578851862253796e-07,
"loss": 0.4846,
"step": 313
},
{
"epoch": 1.496420047732697,
"grad_norm": 0.7101861238945232,
"learning_rate": 3.199653905524654e-07,
"loss": 0.4604,
"step": 314
},
{
"epoch": 1.5011933174224343,
"grad_norm": 0.7204781171906866,
"learning_rate": 3.1418484658323806e-07,
"loss": 0.4772,
"step": 315
},
{
"epoch": 1.5059665871121717,
"grad_norm": 0.73033687450555,
"learning_rate": 3.0844724869535577e-07,
"loss": 0.468,
"step": 316
},
{
"epoch": 1.5107398568019093,
"grad_norm": 0.7700114197888783,
"learning_rate": 3.027529561800117e-07,
"loss": 0.4808,
"step": 317
},
{
"epoch": 1.5155131264916468,
"grad_norm": 0.8599415830432524,
"learning_rate": 2.971023256165983e-07,
"loss": 0.469,
"step": 318
},
{
"epoch": 1.5202863961813842,
"grad_norm": 0.7490557961852297,
"learning_rate": 2.9149571085037215e-07,
"loss": 0.4758,
"step": 319
},
{
"epoch": 1.5250596658711217,
"grad_norm": 0.6911043116400506,
"learning_rate": 2.8593346297030073e-07,
"loss": 0.4662,
"step": 320
},
{
"epoch": 1.5298329355608593,
"grad_norm": 0.7444306144257443,
"learning_rate": 2.804159302870751e-07,
"loss": 0.4638,
"step": 321
},
{
"epoch": 1.5346062052505967,
"grad_norm": 0.6930295325600317,
"learning_rate": 2.7494345831129837e-07,
"loss": 0.4584,
"step": 322
},
{
"epoch": 1.539379474940334,
"grad_norm": 0.7461580524158721,
"learning_rate": 2.6951638973185073e-07,
"loss": 0.4757,
"step": 323
},
{
"epoch": 1.5441527446300716,
"grad_norm": 0.7678530858976563,
"learning_rate": 2.64135064394428e-07,
"loss": 0.4807,
"step": 324
},
{
"epoch": 1.5489260143198091,
"grad_norm": 1.8144860255245707,
"learning_rate": 2.587998192802638e-07,
"loss": 0.4605,
"step": 325
},
{
"epoch": 1.5536992840095465,
"grad_norm": 0.7200319691236525,
"learning_rate": 2.5351098848502386e-07,
"loss": 0.474,
"step": 326
},
{
"epoch": 1.558472553699284,
"grad_norm": 0.7134577877268367,
"learning_rate": 2.482689031978872e-07,
"loss": 0.4715,
"step": 327
},
{
"epoch": 1.5632458233890216,
"grad_norm": 0.9468756981275396,
"learning_rate": 2.4307389168080606e-07,
"loss": 0.4656,
"step": 328
},
{
"epoch": 1.568019093078759,
"grad_norm": 0.6688722309384391,
"learning_rate": 2.3792627924795038e-07,
"loss": 0.4922,
"step": 329
},
{
"epoch": 1.5727923627684963,
"grad_norm": 0.7125789762828182,
"learning_rate": 2.3282638824533529e-07,
"loss": 0.4692,
"step": 330
},
{
"epoch": 1.577565632458234,
"grad_norm": 0.8844333458882234,
"learning_rate": 2.277745380306383e-07,
"loss": 0.4876,
"step": 331
},
{
"epoch": 1.5823389021479715,
"grad_norm": 0.7950308834961601,
"learning_rate": 2.227710449531971e-07,
"loss": 0.4918,
"step": 332
},
{
"epoch": 1.5871121718377088,
"grad_norm": 0.796382860942759,
"learning_rate": 2.178162223342035e-07,
"loss": 0.4641,
"step": 333
},
{
"epoch": 1.5918854415274462,
"grad_norm": 0.7285520770077796,
"learning_rate": 2.1291038044707965e-07,
"loss": 0.4661,
"step": 334
},
{
"epoch": 1.5966587112171837,
"grad_norm": 0.6921820001369808,
"learning_rate": 2.0805382649805225e-07,
"loss": 0.4681,
"step": 335
},
{
"epoch": 1.6014319809069213,
"grad_norm": 0.7552481890637776,
"learning_rate": 2.032468646069112e-07,
"loss": 0.4672,
"step": 336
},
{
"epoch": 1.6062052505966586,
"grad_norm": 0.7155745101307224,
"learning_rate": 1.9848979578796865e-07,
"loss": 0.4767,
"step": 337
},
{
"epoch": 1.6109785202863962,
"grad_norm": 0.6993076336434562,
"learning_rate": 1.937829179312076e-07,
"loss": 0.4822,
"step": 338
},
{
"epoch": 1.6157517899761338,
"grad_norm": 0.7530303728674003,
"learning_rate": 1.8912652578362853e-07,
"loss": 0.4709,
"step": 339
},
{
"epoch": 1.6205250596658711,
"grad_norm": 0.7510327363849882,
"learning_rate": 1.8452091093079215e-07,
"loss": 0.4604,
"step": 340
},
{
"epoch": 1.6252983293556085,
"grad_norm": 0.7282910633876013,
"learning_rate": 1.7996636177855928e-07,
"loss": 0.4984,
"step": 341
},
{
"epoch": 1.630071599045346,
"grad_norm": 0.7524297400825809,
"learning_rate": 1.75463163535033e-07,
"loss": 0.4823,
"step": 342
},
{
"epoch": 1.6348448687350836,
"grad_norm": 0.7049222733481684,
"learning_rate": 1.7101159819269583e-07,
"loss": 0.4635,
"step": 343
},
{
"epoch": 1.639618138424821,
"grad_norm": 1.1034453594616451,
"learning_rate": 1.6661194451075345e-07,
"loss": 0.4765,
"step": 344
},
{
"epoch": 1.6443914081145583,
"grad_norm": 0.83013391018154,
"learning_rate": 1.6226447799767772e-07,
"loss": 0.4533,
"step": 345
},
{
"epoch": 1.649164677804296,
"grad_norm": 2.858030289791699,
"learning_rate": 1.5796947089395475e-07,
"loss": 0.4691,
"step": 346
},
{
"epoch": 1.6539379474940334,
"grad_norm": 0.7332905568570133,
"learning_rate": 1.5372719215503582e-07,
"loss": 0.4544,
"step": 347
},
{
"epoch": 1.6587112171837708,
"grad_norm": 0.7481224605220782,
"learning_rate": 1.4953790743449702e-07,
"loss": 0.4806,
"step": 348
},
{
"epoch": 1.6634844868735084,
"grad_norm": 0.9099408876904721,
"learning_rate": 1.4540187906740241e-07,
"loss": 0.4569,
"step": 349
},
{
"epoch": 1.668257756563246,
"grad_norm": 0.6921320546034447,
"learning_rate": 1.4131936605387762e-07,
"loss": 0.4897,
"step": 350
},
{
"epoch": 1.6730310262529833,
"grad_norm": 0.7172188028374827,
"learning_rate": 1.3729062404289017e-07,
"loss": 0.4799,
"step": 351
},
{
"epoch": 1.6778042959427206,
"grad_norm": 0.7348308299387173,
"learning_rate": 1.3331590531624115e-07,
"loss": 0.4714,
"step": 352
},
{
"epoch": 1.6825775656324582,
"grad_norm": 0.7524117454719962,
"learning_rate": 1.2939545877276726e-07,
"loss": 0.4679,
"step": 353
},
{
"epoch": 1.6873508353221958,
"grad_norm": 0.7609980327732692,
"learning_rate": 1.25529529912754e-07,
"loss": 0.4678,
"step": 354
},
{
"epoch": 1.692124105011933,
"grad_norm": 0.7906234591099575,
"learning_rate": 1.2171836082256316e-07,
"loss": 0.4754,
"step": 355
},
{
"epoch": 1.6968973747016707,
"grad_norm": 0.7519337557814546,
"learning_rate": 1.1796219015947285e-07,
"loss": 0.4803,
"step": 356
},
{
"epoch": 1.7016706443914082,
"grad_norm": 0.6859134821445197,
"learning_rate": 1.1426125313673285e-07,
"loss": 0.4939,
"step": 357
},
{
"epoch": 1.7064439140811456,
"grad_norm": 0.8229493204752176,
"learning_rate": 1.1061578150883444e-07,
"loss": 0.4372,
"step": 358
},
{
"epoch": 1.711217183770883,
"grad_norm": 0.692317996696451,
"learning_rate": 1.070260035570002e-07,
"loss": 0.4792,
"step": 359
},
{
"epoch": 1.7159904534606205,
"grad_norm": 0.7390705342617898,
"learning_rate": 1.0349214407488571e-07,
"loss": 0.4719,
"step": 360
},
{
"epoch": 1.720763723150358,
"grad_norm": 0.7057263439063961,
"learning_rate": 1.000144243545058e-07,
"loss": 0.4724,
"step": 361
},
{
"epoch": 1.7255369928400954,
"grad_norm": 0.707795857913463,
"learning_rate": 9.659306217237517e-08,
"loss": 0.4717,
"step": 362
},
{
"epoch": 1.7303102625298328,
"grad_norm": 0.7912536951606031,
"learning_rate": 9.322827177587212e-08,
"loss": 0.4623,
"step": 363
},
{
"epoch": 1.7350835322195706,
"grad_norm": 0.746736598206851,
"learning_rate": 8.992026386982221e-08,
"loss": 0.4735,
"step": 364
},
{
"epoch": 1.739856801909308,
"grad_norm": 0.6948885657819285,
"learning_rate": 8.66692456033029e-08,
"loss": 0.4825,
"step": 365
},
{
"epoch": 1.7446300715990453,
"grad_norm": 0.7262491961744311,
"learning_rate": 8.347542055667311e-08,
"loss": 0.4699,
"step": 366
},
{
"epoch": 1.7494033412887828,
"grad_norm": 0.7863038143235231,
"learning_rate": 8.033898872882394e-08,
"loss": 0.4679,
"step": 367
},
{
"epoch": 1.7541766109785204,
"grad_norm": 0.6727626949269937,
"learning_rate": 7.726014652465507e-08,
"loss": 0.4421,
"step": 368
},
{
"epoch": 1.7589498806682577,
"grad_norm": 0.6867145980818331,
"learning_rate": 7.423908674277579e-08,
"loss": 0.4778,
"step": 369
},
{
"epoch": 1.763723150357995,
"grad_norm": 1.4213029472300538,
"learning_rate": 7.127599856343192e-08,
"loss": 0.4727,
"step": 370
},
{
"epoch": 1.7684964200477327,
"grad_norm": 0.692012937763345,
"learning_rate": 6.837106753665823e-08,
"loss": 0.4741,
"step": 371
},
{
"epoch": 1.7732696897374702,
"grad_norm": 0.7092148893859065,
"learning_rate": 6.552447557066109e-08,
"loss": 0.4697,
"step": 372
},
{
"epoch": 1.7780429594272076,
"grad_norm": 0.6973356829898804,
"learning_rate": 6.273640092042575e-08,
"loss": 0.4544,
"step": 373
},
{
"epoch": 1.7828162291169452,
"grad_norm": 1.5448551643686548,
"learning_rate": 6.000701817655474e-08,
"loss": 0.4523,
"step": 374
},
{
"epoch": 1.7875894988066827,
"grad_norm": 1.4827724692081619,
"learning_rate": 5.733649825433384e-08,
"loss": 0.4551,
"step": 375
},
{
"epoch": 1.79236276849642,
"grad_norm": 0.7790516793749164,
"learning_rate": 5.47250083830314e-08,
"loss": 0.494,
"step": 376
},
{
"epoch": 1.7971360381861574,
"grad_norm": 0.7365514384441436,
"learning_rate": 5.217271209542384e-08,
"loss": 0.4735,
"step": 377
},
{
"epoch": 1.801909307875895,
"grad_norm": 0.7707502808832377,
"learning_rate": 4.967976921755679e-08,
"loss": 0.4501,
"step": 378
},
{
"epoch": 1.8066825775656326,
"grad_norm": 0.7176835200739754,
"learning_rate": 4.724633585873627e-08,
"loss": 0.4686,
"step": 379
},
{
"epoch": 1.81145584725537,
"grad_norm": 0.6889468337016494,
"learning_rate": 4.487256440175291e-08,
"loss": 0.4771,
"step": 380
},
{
"epoch": 1.8162291169451072,
"grad_norm": 1.0649529564643607,
"learning_rate": 4.255860349334006e-08,
"loss": 0.4661,
"step": 381
},
{
"epoch": 1.8210023866348448,
"grad_norm": 1.1333041301606328,
"learning_rate": 4.030459803486464e-08,
"loss": 0.4606,
"step": 382
},
{
"epoch": 1.8257756563245824,
"grad_norm": 0.765268616008849,
"learning_rate": 3.811068917325444e-08,
"loss": 0.4442,
"step": 383
},
{
"epoch": 1.8305489260143197,
"grad_norm": 0.701547689578903,
"learning_rate": 3.5977014292158495e-08,
"loss": 0.4739,
"step": 384
},
{
"epoch": 1.8353221957040573,
"grad_norm": 0.7141975076446941,
"learning_rate": 3.3903707003344774e-08,
"loss": 0.4719,
"step": 385
},
{
"epoch": 1.8400954653937949,
"grad_norm": 0.6918753885495199,
"learning_rate": 3.189089713833226e-08,
"loss": 0.4772,
"step": 386
},
{
"epoch": 1.8448687350835322,
"grad_norm": 0.714964202433507,
"learning_rate": 2.9938710740262884e-08,
"loss": 0.4561,
"step": 387
},
{
"epoch": 1.8496420047732696,
"grad_norm": 0.7838822438811583,
"learning_rate": 2.8047270056005934e-08,
"loss": 0.4565,
"step": 388
},
{
"epoch": 1.8544152744630071,
"grad_norm": 0.7061577623995287,
"learning_rate": 2.6216693528505195e-08,
"loss": 0.4648,
"step": 389
},
{
"epoch": 1.8591885441527447,
"grad_norm": 0.9071757882196184,
"learning_rate": 2.4447095789360884e-08,
"loss": 0.4711,
"step": 390
},
{
"epoch": 1.863961813842482,
"grad_norm": 0.765845128347514,
"learning_rate": 2.2738587651651487e-08,
"loss": 0.4577,
"step": 391
},
{
"epoch": 1.8687350835322196,
"grad_norm": 0.7650600946027074,
"learning_rate": 2.109127610299466e-08,
"loss": 0.4679,
"step": 392
},
{
"epoch": 1.8735083532219572,
"grad_norm": 0.6957819402359949,
"learning_rate": 1.950526429884769e-08,
"loss": 0.4559,
"step": 393
},
{
"epoch": 1.8782816229116945,
"grad_norm": 0.7430172728751436,
"learning_rate": 1.7980651556048e-08,
"loss": 0.4732,
"step": 394
},
{
"epoch": 1.8830548926014319,
"grad_norm": 0.6767278663023139,
"learning_rate": 1.6517533346593226e-08,
"loss": 0.4758,
"step": 395
},
{
"epoch": 1.8878281622911695,
"grad_norm": 0.7619777582419104,
"learning_rate": 1.5116001291663462e-08,
"loss": 0.4932,
"step": 396
},
{
"epoch": 1.892601431980907,
"grad_norm": 0.790159743362526,
"learning_rate": 1.3776143155883491e-08,
"loss": 0.4558,
"step": 397
},
{
"epoch": 1.8973747016706444,
"grad_norm": 0.7261843559497824,
"learning_rate": 1.2498042841827317e-08,
"loss": 0.4595,
"step": 398
},
{
"epoch": 1.9021479713603817,
"grad_norm": 0.7017669980294373,
"learning_rate": 1.128178038476324e-08,
"loss": 0.4625,
"step": 399
},
{
"epoch": 1.9069212410501193,
"grad_norm": 0.6784318458229694,
"learning_rate": 1.0127431947643316e-08,
"loss": 0.4671,
"step": 400
},
{
"epoch": 1.9116945107398569,
"grad_norm": 0.675130035717692,
"learning_rate": 9.035069816332619e-09,
"loss": 0.464,
"step": 401
},
{
"epoch": 1.9164677804295942,
"grad_norm": 0.8221120490850481,
"learning_rate": 8.004762395083963e-09,
"loss": 0.4537,
"step": 402
},
{
"epoch": 1.9212410501193318,
"grad_norm": 0.7612136405972405,
"learning_rate": 7.036574202253343e-09,
"loss": 0.4914,
"step": 403
},
{
"epoch": 1.9260143198090693,
"grad_norm": 0.8291594902189451,
"learning_rate": 6.130565866260484e-09,
"loss": 0.4727,
"step": 404
},
{
"epoch": 1.9307875894988067,
"grad_norm": 1.0329364100399496,
"learning_rate": 5.286794121791782e-09,
"loss": 0.4767,
"step": 405
},
{
"epoch": 1.935560859188544,
"grad_norm": 0.8758229910700595,
"learning_rate": 4.5053118062478025e-09,
"loss": 0.4501,
"step": 406
},
{
"epoch": 1.9403341288782816,
"grad_norm": 0.7067697193260255,
"learning_rate": 3.786167856434375e-09,
"loss": 0.4747,
"step": 407
},
{
"epoch": 1.9451073985680192,
"grad_norm": 0.7459961970155857,
"learning_rate": 3.1294073054987102e-09,
"loss": 0.4605,
"step": 408
},
{
"epoch": 1.9498806682577565,
"grad_norm": 0.7585534385150827,
"learning_rate": 2.5350712801084363e-09,
"loss": 0.4528,
"step": 409
},
{
"epoch": 1.9546539379474939,
"grad_norm": 0.6767868247269999,
"learning_rate": 2.003196997877099e-09,
"loss": 0.4585,
"step": 410
},
{
"epoch": 1.9594272076372317,
"grad_norm": 0.7126370902337825,
"learning_rate": 1.5338177650332517e-09,
"loss": 0.4591,
"step": 411
},
{
"epoch": 1.964200477326969,
"grad_norm": 0.7172728813954358,
"learning_rate": 1.1269629743346777e-09,
"loss": 0.4589,
"step": 412
},
{
"epoch": 1.9689737470167064,
"grad_norm": 0.8158860106123756,
"learning_rate": 7.826581032279734e-10,
"loss": 0.4601,
"step": 413
},
{
"epoch": 1.973747016706444,
"grad_norm": 0.8261699459606863,
"learning_rate": 5.00924712252937e-10,
"loss": 0.4731,
"step": 414
},
{
"epoch": 1.9785202863961815,
"grad_norm": 0.7168072767819187,
"learning_rate": 2.8178044369286945e-10,
"loss": 0.4657,
"step": 415
},
{
"epoch": 1.9832935560859188,
"grad_norm": 0.6783006404134123,
"learning_rate": 1.2523902046934763e-10,
"loss": 0.452,
"step": 416
},
{
"epoch": 1.9880668257756562,
"grad_norm": 0.7080089156985594,
"learning_rate": 3.131024528302273e-11,
"loss": 0.4737,
"step": 417
},
{
"epoch": 1.9928400954653938,
"grad_norm": 0.7031897431837284,
"learning_rate": 0.0,
"loss": 0.4817,
"step": 418
},
{
"epoch": 1.9928400954653938,
"step": 418,
"total_flos": 3166299160051712.0,
"train_loss": 0.5405693022828353,
"train_runtime": 17864.3337,
"train_samples_per_second": 2.995,
"train_steps_per_second": 0.023
}
],
"logging_steps": 1,
"max_steps": 418,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3166299160051712.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}