pc-agent-7b / trainer_state.json
Henry He
upload the model
31b3ee1
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9949177877428999,
"eval_steps": 500,
"global_step": 418,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.004783258594917788,
"grad_norm": 0.709558174057161,
"learning_rate": 9.523809523809523e-08,
"loss": 1.2153,
"step": 1
},
{
"epoch": 0.009566517189835576,
"grad_norm": 0.7136115199943648,
"learning_rate": 1.9047619047619045e-07,
"loss": 1.1666,
"step": 2
},
{
"epoch": 0.014349775784753363,
"grad_norm": 0.675693723617585,
"learning_rate": 2.857142857142857e-07,
"loss": 1.1641,
"step": 3
},
{
"epoch": 0.019133034379671152,
"grad_norm": 0.6920682930548318,
"learning_rate": 3.809523809523809e-07,
"loss": 1.1522,
"step": 4
},
{
"epoch": 0.02391629297458894,
"grad_norm": 0.7102565482472595,
"learning_rate": 4.761904761904761e-07,
"loss": 1.1849,
"step": 5
},
{
"epoch": 0.028699551569506727,
"grad_norm": 0.7063832004098226,
"learning_rate": 5.714285714285714e-07,
"loss": 1.1832,
"step": 6
},
{
"epoch": 0.03348281016442452,
"grad_norm": 0.7273064919061014,
"learning_rate": 6.666666666666666e-07,
"loss": 1.1646,
"step": 7
},
{
"epoch": 0.038266068759342305,
"grad_norm": 0.6515215643931184,
"learning_rate": 7.619047619047618e-07,
"loss": 1.1492,
"step": 8
},
{
"epoch": 0.04304932735426009,
"grad_norm": 0.662055704210126,
"learning_rate": 8.57142857142857e-07,
"loss": 1.1356,
"step": 9
},
{
"epoch": 0.04783258594917788,
"grad_norm": 0.6717882944749636,
"learning_rate": 9.523809523809522e-07,
"loss": 1.1373,
"step": 10
},
{
"epoch": 0.052615844544095666,
"grad_norm": 0.5448741661939914,
"learning_rate": 1.0476190476190476e-06,
"loss": 1.1133,
"step": 11
},
{
"epoch": 0.05739910313901345,
"grad_norm": 0.5462157636865493,
"learning_rate": 1.1428571428571428e-06,
"loss": 1.1025,
"step": 12
},
{
"epoch": 0.06218236173393124,
"grad_norm": 0.5384420206966651,
"learning_rate": 1.238095238095238e-06,
"loss": 1.1102,
"step": 13
},
{
"epoch": 0.06696562032884903,
"grad_norm": 0.5199166835309963,
"learning_rate": 1.3333333333333332e-06,
"loss": 1.0716,
"step": 14
},
{
"epoch": 0.07174887892376682,
"grad_norm": 0.3132434598969462,
"learning_rate": 1.4285714285714286e-06,
"loss": 1.0198,
"step": 15
},
{
"epoch": 0.07653213751868461,
"grad_norm": 0.2906366968418868,
"learning_rate": 1.5238095238095236e-06,
"loss": 1.0402,
"step": 16
},
{
"epoch": 0.08131539611360239,
"grad_norm": 0.27762786913003945,
"learning_rate": 1.619047619047619e-06,
"loss": 1.0027,
"step": 17
},
{
"epoch": 0.08609865470852018,
"grad_norm": 0.27399342611565175,
"learning_rate": 1.714285714285714e-06,
"loss": 1.0022,
"step": 18
},
{
"epoch": 0.09088191330343796,
"grad_norm": 0.24537662923792491,
"learning_rate": 1.8095238095238095e-06,
"loss": 1.0272,
"step": 19
},
{
"epoch": 0.09566517189835576,
"grad_norm": 0.20010147123002528,
"learning_rate": 1.9047619047619045e-06,
"loss": 0.9569,
"step": 20
},
{
"epoch": 0.10044843049327354,
"grad_norm": 0.25587399010113915,
"learning_rate": 2e-06,
"loss": 0.9848,
"step": 21
},
{
"epoch": 0.10523168908819133,
"grad_norm": 0.3007962756012024,
"learning_rate": 1.9999686897547167e-06,
"loss": 0.9581,
"step": 22
},
{
"epoch": 0.11001494768310911,
"grad_norm": 0.3006848188189002,
"learning_rate": 1.9998747609795305e-06,
"loss": 0.9478,
"step": 23
},
{
"epoch": 0.1147982062780269,
"grad_norm": 0.30781981272131237,
"learning_rate": 1.999718219556307e-06,
"loss": 0.9834,
"step": 24
},
{
"epoch": 0.11958146487294469,
"grad_norm": 0.3118837626745979,
"learning_rate": 1.999499075287747e-06,
"loss": 0.9852,
"step": 25
},
{
"epoch": 0.12436472346786248,
"grad_norm": 0.26545213805928825,
"learning_rate": 1.999217341896772e-06,
"loss": 0.9549,
"step": 26
},
{
"epoch": 0.12914798206278028,
"grad_norm": 0.28369668711343804,
"learning_rate": 1.998873037025665e-06,
"loss": 0.9395,
"step": 27
},
{
"epoch": 0.13393124065769807,
"grad_norm": 0.228056096803738,
"learning_rate": 1.9984661822349665e-06,
"loss": 0.9124,
"step": 28
},
{
"epoch": 0.13871449925261584,
"grad_norm": 0.23393624794141885,
"learning_rate": 1.997996803002123e-06,
"loss": 0.9306,
"step": 29
},
{
"epoch": 0.14349775784753363,
"grad_norm": 0.20376330319941563,
"learning_rate": 1.9974649287198914e-06,
"loss": 0.8882,
"step": 30
},
{
"epoch": 0.14828101644245142,
"grad_norm": 0.19033042713450593,
"learning_rate": 1.9968705926945013e-06,
"loss": 0.8699,
"step": 31
},
{
"epoch": 0.15306427503736922,
"grad_norm": 0.20517384186395837,
"learning_rate": 1.9962138321435656e-06,
"loss": 0.8919,
"step": 32
},
{
"epoch": 0.15784753363228698,
"grad_norm": 0.19219397333283395,
"learning_rate": 1.9954946881937524e-06,
"loss": 0.8985,
"step": 33
},
{
"epoch": 0.16263079222720478,
"grad_norm": 0.18095506989716384,
"learning_rate": 1.994713205878208e-06,
"loss": 0.8504,
"step": 34
},
{
"epoch": 0.16741405082212257,
"grad_norm": 0.1722529909885032,
"learning_rate": 1.9938694341337393e-06,
"loss": 0.8743,
"step": 35
},
{
"epoch": 0.17219730941704037,
"grad_norm": 0.16508567356320156,
"learning_rate": 1.9929634257977467e-06,
"loss": 0.857,
"step": 36
},
{
"epoch": 0.17698056801195813,
"grad_norm": 0.15380307949646846,
"learning_rate": 1.991995237604916e-06,
"loss": 0.8487,
"step": 37
},
{
"epoch": 0.18176382660687593,
"grad_norm": 0.14856130486975244,
"learning_rate": 1.9909649301836674e-06,
"loss": 0.8692,
"step": 38
},
{
"epoch": 0.18654708520179372,
"grad_norm": 0.1518842900714723,
"learning_rate": 1.9898725680523566e-06,
"loss": 0.8679,
"step": 39
},
{
"epoch": 0.19133034379671152,
"grad_norm": 0.1443106182213824,
"learning_rate": 1.9887182196152367e-06,
"loss": 0.8504,
"step": 40
},
{
"epoch": 0.1961136023916293,
"grad_norm": 0.14664015617981188,
"learning_rate": 1.9875019571581726e-06,
"loss": 0.8125,
"step": 41
},
{
"epoch": 0.20089686098654708,
"grad_norm": 0.14692793192413753,
"learning_rate": 1.9862238568441165e-06,
"loss": 0.8257,
"step": 42
},
{
"epoch": 0.20568011958146487,
"grad_norm": 0.13896889627771705,
"learning_rate": 1.9848839987083364e-06,
"loss": 0.8329,
"step": 43
},
{
"epoch": 0.21046337817638266,
"grad_norm": 0.14943974659921427,
"learning_rate": 1.983482466653407e-06,
"loss": 0.8409,
"step": 44
},
{
"epoch": 0.21524663677130046,
"grad_norm": 0.138210028938997,
"learning_rate": 1.982019348443952e-06,
"loss": 0.8323,
"step": 45
},
{
"epoch": 0.22002989536621823,
"grad_norm": 0.1250406305407292,
"learning_rate": 1.9804947357011523e-06,
"loss": 0.8673,
"step": 46
},
{
"epoch": 0.22481315396113602,
"grad_norm": 0.12719252526959784,
"learning_rate": 1.978908723897005e-06,
"loss": 0.8192,
"step": 47
},
{
"epoch": 0.2295964125560538,
"grad_norm": 0.10853106729801387,
"learning_rate": 1.9772614123483485e-06,
"loss": 0.8384,
"step": 48
},
{
"epoch": 0.2343796711509716,
"grad_norm": 0.11375286279894396,
"learning_rate": 1.9755529042106393e-06,
"loss": 0.7854,
"step": 49
},
{
"epoch": 0.23916292974588937,
"grad_norm": 0.11326113932314119,
"learning_rate": 1.973783306471495e-06,
"loss": 0.795,
"step": 50
},
{
"epoch": 0.24394618834080717,
"grad_norm": 0.12664705711535487,
"learning_rate": 1.971952729943994e-06,
"loss": 0.783,
"step": 51
},
{
"epoch": 0.24872944693572496,
"grad_norm": 0.11119059988645158,
"learning_rate": 1.9700612892597372e-06,
"loss": 0.8059,
"step": 52
},
{
"epoch": 0.25351270553064276,
"grad_norm": 0.10545114737351395,
"learning_rate": 1.9681091028616676e-06,
"loss": 0.7885,
"step": 53
},
{
"epoch": 0.25829596412556055,
"grad_norm": 0.11679452392637804,
"learning_rate": 1.966096292996655e-06,
"loss": 0.8031,
"step": 54
},
{
"epoch": 0.26307922272047835,
"grad_norm": 0.11363287552532539,
"learning_rate": 1.9640229857078413e-06,
"loss": 0.7774,
"step": 55
},
{
"epoch": 0.26786248131539614,
"grad_norm": 0.1164225509000403,
"learning_rate": 1.9618893108267454e-06,
"loss": 0.7949,
"step": 56
},
{
"epoch": 0.2726457399103139,
"grad_norm": 0.11077425052933487,
"learning_rate": 1.9596954019651354e-06,
"loss": 0.7674,
"step": 57
},
{
"epoch": 0.27742899850523167,
"grad_norm": 0.10576177825898277,
"learning_rate": 1.95744139650666e-06,
"loss": 0.7953,
"step": 58
},
{
"epoch": 0.28221225710014947,
"grad_norm": 0.10359885133841641,
"learning_rate": 1.955127435598247e-06,
"loss": 0.7881,
"step": 59
},
{
"epoch": 0.28699551569506726,
"grad_norm": 0.10586032252156977,
"learning_rate": 1.9527536641412637e-06,
"loss": 0.7984,
"step": 60
},
{
"epoch": 0.29177877428998505,
"grad_norm": 0.10642116844371083,
"learning_rate": 1.950320230782443e-06,
"loss": 0.7666,
"step": 61
},
{
"epoch": 0.29656203288490285,
"grad_norm": 0.11202675632435576,
"learning_rate": 1.9478272879045763e-06,
"loss": 0.7809,
"step": 62
},
{
"epoch": 0.30134529147982064,
"grad_norm": 0.10728322195233368,
"learning_rate": 1.9452749916169685e-06,
"loss": 0.7948,
"step": 63
},
{
"epoch": 0.30612855007473844,
"grad_norm": 0.10427886124668943,
"learning_rate": 1.942663501745666e-06,
"loss": 0.7843,
"step": 64
},
{
"epoch": 0.3109118086696562,
"grad_norm": 0.09150641957182463,
"learning_rate": 1.939992981823445e-06,
"loss": 0.7713,
"step": 65
},
{
"epoch": 0.31569506726457397,
"grad_norm": 0.10652939965487439,
"learning_rate": 1.9372635990795744e-06,
"loss": 0.7338,
"step": 66
},
{
"epoch": 0.32047832585949176,
"grad_norm": 0.12224668990837938,
"learning_rate": 1.934475524429339e-06,
"loss": 0.7651,
"step": 67
},
{
"epoch": 0.32526158445440956,
"grad_norm": 0.09554788331952155,
"learning_rate": 1.9316289324633416e-06,
"loss": 0.7743,
"step": 68
},
{
"epoch": 0.33004484304932735,
"grad_norm": 0.10311314948775388,
"learning_rate": 1.928724001436568e-06,
"loss": 0.7818,
"step": 69
},
{
"epoch": 0.33482810164424515,
"grad_norm": 0.11402809897006772,
"learning_rate": 1.925760913257224e-06,
"loss": 0.7738,
"step": 70
},
{
"epoch": 0.33961136023916294,
"grad_norm": 0.10099702778225672,
"learning_rate": 1.922739853475345e-06,
"loss": 0.7694,
"step": 71
},
{
"epoch": 0.34439461883408073,
"grad_norm": 0.09669133625846159,
"learning_rate": 1.919661011271176e-06,
"loss": 0.7695,
"step": 72
},
{
"epoch": 0.34917787742899853,
"grad_norm": 0.10013746372306316,
"learning_rate": 1.916524579443327e-06,
"loss": 0.7762,
"step": 73
},
{
"epoch": 0.35396113602391627,
"grad_norm": 0.09840254254939616,
"learning_rate": 1.9133307543966972e-06,
"loss": 0.7465,
"step": 74
},
{
"epoch": 0.35874439461883406,
"grad_norm": 0.10348087475535427,
"learning_rate": 1.910079736130178e-06,
"loss": 0.7591,
"step": 75
},
{
"epoch": 0.36352765321375186,
"grad_norm": 0.09831488128647803,
"learning_rate": 1.9067717282241275e-06,
"loss": 0.7473,
"step": 76
},
{
"epoch": 0.36831091180866965,
"grad_norm": 0.10747256347092367,
"learning_rate": 1.9034069378276248e-06,
"loss": 0.7899,
"step": 77
},
{
"epoch": 0.37309417040358744,
"grad_norm": 0.10145726153107046,
"learning_rate": 1.8999855756454943e-06,
"loss": 0.759,
"step": 78
},
{
"epoch": 0.37787742899850524,
"grad_norm": 0.09521749859691808,
"learning_rate": 1.8965078559251141e-06,
"loss": 0.765,
"step": 79
},
{
"epoch": 0.38266068759342303,
"grad_norm": 0.09559204768504546,
"learning_rate": 1.892973996443e-06,
"loss": 0.7653,
"step": 80
},
{
"epoch": 0.3874439461883408,
"grad_norm": 0.09893961689958143,
"learning_rate": 1.8893842184911652e-06,
"loss": 0.7585,
"step": 81
},
{
"epoch": 0.3922272047832586,
"grad_norm": 0.10469293200053865,
"learning_rate": 1.8857387468632673e-06,
"loss": 0.7396,
"step": 82
},
{
"epoch": 0.39701046337817636,
"grad_norm": 0.09881168266263542,
"learning_rate": 1.8820378098405269e-06,
"loss": 0.7449,
"step": 83
},
{
"epoch": 0.40179372197309415,
"grad_norm": 0.09472923155314936,
"learning_rate": 1.878281639177437e-06,
"loss": 0.7536,
"step": 84
},
{
"epoch": 0.40657698056801195,
"grad_norm": 0.09940252508830999,
"learning_rate": 1.874470470087246e-06,
"loss": 0.7695,
"step": 85
},
{
"epoch": 0.41136023916292974,
"grad_norm": 0.10835992130612712,
"learning_rate": 1.8706045412272329e-06,
"loss": 0.7804,
"step": 86
},
{
"epoch": 0.41614349775784754,
"grad_norm": 0.09850260645852206,
"learning_rate": 1.8666840946837588e-06,
"loss": 0.7581,
"step": 87
},
{
"epoch": 0.42092675635276533,
"grad_norm": 0.10663807706116737,
"learning_rate": 1.8627093759571097e-06,
"loss": 0.7486,
"step": 88
},
{
"epoch": 0.4257100149476831,
"grad_norm": 0.09576966700987803,
"learning_rate": 1.8586806339461223e-06,
"loss": 0.7393,
"step": 89
},
{
"epoch": 0.4304932735426009,
"grad_norm": 0.13616509255793824,
"learning_rate": 1.8545981209325974e-06,
"loss": 0.7412,
"step": 90
},
{
"epoch": 0.43527653213751866,
"grad_norm": 0.10078747049635026,
"learning_rate": 1.850462092565503e-06,
"loss": 0.7522,
"step": 91
},
{
"epoch": 0.44005979073243645,
"grad_norm": 0.09590506182617801,
"learning_rate": 1.846272807844964e-06,
"loss": 0.7361,
"step": 92
},
{
"epoch": 0.44484304932735425,
"grad_norm": 0.09599938671410663,
"learning_rate": 1.8420305291060453e-06,
"loss": 0.7454,
"step": 93
},
{
"epoch": 0.44962630792227204,
"grad_norm": 0.10175459960116054,
"learning_rate": 1.837735522002322e-06,
"loss": 0.7776,
"step": 94
},
{
"epoch": 0.45440956651718983,
"grad_norm": 0.10921604960602464,
"learning_rate": 1.8333880554892465e-06,
"loss": 0.7284,
"step": 95
},
{
"epoch": 0.4591928251121076,
"grad_norm": 0.10701793438795469,
"learning_rate": 1.828988401807304e-06,
"loss": 0.7275,
"step": 96
},
{
"epoch": 0.4639760837070254,
"grad_norm": 0.10671158442373065,
"learning_rate": 1.8245368364649672e-06,
"loss": 0.7176,
"step": 97
},
{
"epoch": 0.4687593423019432,
"grad_norm": 0.09323865008012455,
"learning_rate": 1.8200336382214404e-06,
"loss": 0.7558,
"step": 98
},
{
"epoch": 0.473542600896861,
"grad_norm": 0.09924243426975013,
"learning_rate": 1.815479089069208e-06,
"loss": 0.7477,
"step": 99
},
{
"epoch": 0.47832585949177875,
"grad_norm": 0.10034019533981096,
"learning_rate": 1.8108734742163714e-06,
"loss": 0.7302,
"step": 100
},
{
"epoch": 0.48310911808669654,
"grad_norm": 0.09289950458176202,
"learning_rate": 1.8062170820687923e-06,
"loss": 0.7461,
"step": 101
},
{
"epoch": 0.48789237668161434,
"grad_norm": 0.10063821105969947,
"learning_rate": 1.8015102042120314e-06,
"loss": 0.7374,
"step": 102
},
{
"epoch": 0.49267563527653213,
"grad_norm": 0.10431764482912426,
"learning_rate": 1.796753135393089e-06,
"loss": 0.753,
"step": 103
},
{
"epoch": 0.4974588938714499,
"grad_norm": 0.09777703419526715,
"learning_rate": 1.791946173501948e-06,
"loss": 0.7172,
"step": 104
},
{
"epoch": 0.5022421524663677,
"grad_norm": 0.09880039694565383,
"learning_rate": 1.7870896195529204e-06,
"loss": 0.7157,
"step": 105
},
{
"epoch": 0.5070254110612855,
"grad_norm": 0.10103523012523379,
"learning_rate": 1.7821837776657967e-06,
"loss": 0.7522,
"step": 106
},
{
"epoch": 0.5118086696562033,
"grad_norm": 0.09953632352625874,
"learning_rate": 1.777228955046803e-06,
"loss": 0.7215,
"step": 107
},
{
"epoch": 0.5165919282511211,
"grad_norm": 0.09448842637214858,
"learning_rate": 1.7722254619693617e-06,
"loss": 0.7311,
"step": 108
},
{
"epoch": 0.5213751868460389,
"grad_norm": 0.09926544596139777,
"learning_rate": 1.7671736117546643e-06,
"loss": 0.7242,
"step": 109
},
{
"epoch": 0.5261584454409567,
"grad_norm": 0.09420983432319698,
"learning_rate": 1.7620737207520498e-06,
"loss": 0.7302,
"step": 110
},
{
"epoch": 0.5309417040358745,
"grad_norm": 0.09391867567605319,
"learning_rate": 1.756926108319194e-06,
"loss": 0.7222,
"step": 111
},
{
"epoch": 0.5357249626307923,
"grad_norm": 0.09479652603956866,
"learning_rate": 1.751731096802113e-06,
"loss": 0.7361,
"step": 112
},
{
"epoch": 0.54050822122571,
"grad_norm": 0.09440230389077435,
"learning_rate": 1.7464890115149759e-06,
"loss": 0.7183,
"step": 113
},
{
"epoch": 0.5452914798206278,
"grad_norm": 0.09514244364363002,
"learning_rate": 1.7412001807197361e-06,
"loss": 0.7342,
"step": 114
},
{
"epoch": 0.5500747384155455,
"grad_norm": 0.10939831006494534,
"learning_rate": 1.735864935605572e-06,
"loss": 0.7251,
"step": 115
},
{
"epoch": 0.5548579970104633,
"grad_norm": 0.10066676165355973,
"learning_rate": 1.7304836102681493e-06,
"loss": 0.7081,
"step": 116
},
{
"epoch": 0.5596412556053811,
"grad_norm": 0.10100361164339053,
"learning_rate": 1.7250565416887015e-06,
"loss": 0.742,
"step": 117
},
{
"epoch": 0.5644245142002989,
"grad_norm": 0.09740229601345607,
"learning_rate": 1.719584069712925e-06,
"loss": 0.7314,
"step": 118
},
{
"epoch": 0.5692077727952167,
"grad_norm": 0.1012821496567702,
"learning_rate": 1.7140665370296992e-06,
"loss": 0.7167,
"step": 119
},
{
"epoch": 0.5739910313901345,
"grad_norm": 0.09994075838359362,
"learning_rate": 1.708504289149628e-06,
"loss": 0.7421,
"step": 120
},
{
"epoch": 0.5787742899850523,
"grad_norm": 0.09513046173828367,
"learning_rate": 1.702897674383402e-06,
"loss": 0.7067,
"step": 121
},
{
"epoch": 0.5835575485799701,
"grad_norm": 0.10488877885042427,
"learning_rate": 1.697247043819988e-06,
"loss": 0.7283,
"step": 122
},
{
"epoch": 0.5883408071748879,
"grad_norm": 0.10017563354892535,
"learning_rate": 1.6915527513046443e-06,
"loss": 0.7289,
"step": 123
},
{
"epoch": 0.5931240657698057,
"grad_norm": 0.09910676006320021,
"learning_rate": 1.6858151534167616e-06,
"loss": 0.7258,
"step": 124
},
{
"epoch": 0.5979073243647235,
"grad_norm": 0.10226756484228856,
"learning_rate": 1.6800346094475346e-06,
"loss": 0.7294,
"step": 125
},
{
"epoch": 0.6026905829596413,
"grad_norm": 0.0941277312513867,
"learning_rate": 1.6742114813774618e-06,
"loss": 0.7059,
"step": 126
},
{
"epoch": 0.6074738415545591,
"grad_norm": 0.10468386708851042,
"learning_rate": 1.6683461338536798e-06,
"loss": 0.76,
"step": 127
},
{
"epoch": 0.6122571001494769,
"grad_norm": 0.09546912003315239,
"learning_rate": 1.6624389341671278e-06,
"loss": 0.7199,
"step": 128
},
{
"epoch": 0.6170403587443947,
"grad_norm": 0.09278710008849092,
"learning_rate": 1.656490252229548e-06,
"loss": 0.71,
"step": 129
},
{
"epoch": 0.6218236173393124,
"grad_norm": 0.09629578223078193,
"learning_rate": 1.6505004605503223e-06,
"loss": 0.7297,
"step": 130
},
{
"epoch": 0.6266068759342301,
"grad_norm": 0.10564515959559177,
"learning_rate": 1.6444699342131428e-06,
"loss": 0.7323,
"step": 131
},
{
"epoch": 0.6313901345291479,
"grad_norm": 0.11359024419098725,
"learning_rate": 1.638399050852528e-06,
"loss": 0.7091,
"step": 132
},
{
"epoch": 0.6361733931240657,
"grad_norm": 0.11261022540293862,
"learning_rate": 1.632288190630172e-06,
"loss": 0.7092,
"step": 133
},
{
"epoch": 0.6409566517189835,
"grad_norm": 0.11356374624941931,
"learning_rate": 1.6261377362111396e-06,
"loss": 0.7226,
"step": 134
},
{
"epoch": 0.6457399103139013,
"grad_norm": 0.09628738165774237,
"learning_rate": 1.6199480727399032e-06,
"loss": 0.7313,
"step": 135
},
{
"epoch": 0.6505231689088191,
"grad_norm": 0.09955265729242128,
"learning_rate": 1.6137195878162267e-06,
"loss": 0.7264,
"step": 136
},
{
"epoch": 0.6553064275037369,
"grad_norm": 0.10088157860044299,
"learning_rate": 1.607452671470891e-06,
"loss": 0.72,
"step": 137
},
{
"epoch": 0.6600896860986547,
"grad_norm": 0.09316854100471951,
"learning_rate": 1.601147716141272e-06,
"loss": 0.7043,
"step": 138
},
{
"epoch": 0.6648729446935725,
"grad_norm": 0.09866104920600266,
"learning_rate": 1.5948051166467657e-06,
"loss": 0.7314,
"step": 139
},
{
"epoch": 0.6696562032884903,
"grad_norm": 0.09908667617176863,
"learning_rate": 1.5884252701640634e-06,
"loss": 0.7223,
"step": 140
},
{
"epoch": 0.6744394618834081,
"grad_norm": 0.10108043693556777,
"learning_rate": 1.5820085762022823e-06,
"loss": 0.7145,
"step": 141
},
{
"epoch": 0.6792227204783259,
"grad_norm": 0.09483321797525981,
"learning_rate": 1.5755554365779455e-06,
"loss": 0.712,
"step": 142
},
{
"epoch": 0.6840059790732437,
"grad_norm": 0.09772063438530315,
"learning_rate": 1.5690662553898222e-06,
"loss": 0.7262,
"step": 143
},
{
"epoch": 0.6887892376681615,
"grad_norm": 0.09547210509162248,
"learning_rate": 1.5625414389936218e-06,
"loss": 0.6881,
"step": 144
},
{
"epoch": 0.6935724962630793,
"grad_norm": 0.10198333563773951,
"learning_rate": 1.555981395976548e-06,
"loss": 0.7023,
"step": 145
},
{
"epoch": 0.6983557548579971,
"grad_norm": 0.0960216671080163,
"learning_rate": 1.5493865371317123e-06,
"loss": 0.7041,
"step": 146
},
{
"epoch": 0.7031390134529149,
"grad_norm": 0.10811878950887173,
"learning_rate": 1.542757275432411e-06,
"loss": 0.7121,
"step": 147
},
{
"epoch": 0.7079222720478325,
"grad_norm": 0.09745342759060693,
"learning_rate": 1.5360940260062635e-06,
"loss": 0.7,
"step": 148
},
{
"epoch": 0.7127055306427503,
"grad_norm": 0.10002068890855158,
"learning_rate": 1.5293972061092185e-06,
"loss": 0.7174,
"step": 149
},
{
"epoch": 0.7174887892376681,
"grad_norm": 0.094440761646848,
"learning_rate": 1.522667235099422e-06,
"loss": 0.6842,
"step": 150
},
{
"epoch": 0.7222720478325859,
"grad_norm": 0.09714805521617614,
"learning_rate": 1.515904534410961e-06,
"loss": 0.6917,
"step": 151
},
{
"epoch": 0.7270553064275037,
"grad_norm": 0.09206634939711936,
"learning_rate": 1.5091095275274699e-06,
"loss": 0.6807,
"step": 152
},
{
"epoch": 0.7318385650224215,
"grad_norm": 0.09811924963451824,
"learning_rate": 1.5022826399556133e-06,
"loss": 0.6938,
"step": 153
},
{
"epoch": 0.7366218236173393,
"grad_norm": 0.09469018906462104,
"learning_rate": 1.4954242991984396e-06,
"loss": 0.7262,
"step": 154
},
{
"epoch": 0.7414050822122571,
"grad_norm": 0.09900495842570976,
"learning_rate": 1.4885349347286115e-06,
"loss": 0.6928,
"step": 155
},
{
"epoch": 0.7461883408071749,
"grad_norm": 0.09813499443182924,
"learning_rate": 1.4816149779615126e-06,
"loss": 0.7041,
"step": 156
},
{
"epoch": 0.7509715994020927,
"grad_norm": 0.09285509032551069,
"learning_rate": 1.474664862228229e-06,
"loss": 0.7157,
"step": 157
},
{
"epoch": 0.7557548579970105,
"grad_norm": 0.09930227957877516,
"learning_rate": 1.467685022748419e-06,
"loss": 0.7077,
"step": 158
},
{
"epoch": 0.7605381165919283,
"grad_norm": 0.09336816965151891,
"learning_rate": 1.4606758966030534e-06,
"loss": 0.6905,
"step": 159
},
{
"epoch": 0.7653213751868461,
"grad_norm": 0.09584860785157516,
"learning_rate": 1.4536379227070509e-06,
"loss": 0.704,
"step": 160
},
{
"epoch": 0.7701046337817639,
"grad_norm": 0.09906164552724124,
"learning_rate": 1.4465715417817888e-06,
"loss": 0.7014,
"step": 161
},
{
"epoch": 0.7748878923766817,
"grad_norm": 0.09920929186360831,
"learning_rate": 1.4394771963275076e-06,
"loss": 0.6711,
"step": 162
},
{
"epoch": 0.7796711509715994,
"grad_norm": 0.09312914704123235,
"learning_rate": 1.4323553305955997e-06,
"loss": 0.704,
"step": 163
},
{
"epoch": 0.7844544095665172,
"grad_norm": 0.09380001375870357,
"learning_rate": 1.4252063905607909e-06,
"loss": 0.6769,
"step": 164
},
{
"epoch": 0.7892376681614349,
"grad_norm": 0.09383108087011895,
"learning_rate": 1.4180308238932135e-06,
"loss": 0.6903,
"step": 165
},
{
"epoch": 0.7940209267563527,
"grad_norm": 0.09761627284743495,
"learning_rate": 1.410829079930372e-06,
"loss": 0.7126,
"step": 166
},
{
"epoch": 0.7988041853512705,
"grad_norm": 0.09591926993818495,
"learning_rate": 1.4036016096490064e-06,
"loss": 0.6936,
"step": 167
},
{
"epoch": 0.8035874439461883,
"grad_norm": 0.09463907898930997,
"learning_rate": 1.3963488656368517e-06,
"loss": 0.6918,
"step": 168
},
{
"epoch": 0.8083707025411061,
"grad_norm": 0.10314575539858357,
"learning_rate": 1.389071302064295e-06,
"loss": 0.6837,
"step": 169
},
{
"epoch": 0.8131539611360239,
"grad_norm": 0.0964154089668258,
"learning_rate": 1.381769374655938e-06,
"loss": 0.7087,
"step": 170
},
{
"epoch": 0.8179372197309417,
"grad_norm": 0.10458955759891816,
"learning_rate": 1.374443540662057e-06,
"loss": 0.7132,
"step": 171
},
{
"epoch": 0.8227204783258595,
"grad_norm": 0.11118113052583456,
"learning_rate": 1.3670942588299705e-06,
"loss": 0.689,
"step": 172
},
{
"epoch": 0.8275037369207773,
"grad_norm": 0.09430050647819165,
"learning_rate": 1.3597219893753117e-06,
"loss": 0.6669,
"step": 173
},
{
"epoch": 0.8322869955156951,
"grad_norm": 0.10018122520539552,
"learning_rate": 1.352327193953211e-06,
"loss": 0.675,
"step": 174
},
{
"epoch": 0.8370702541106129,
"grad_norm": 0.1036112926787395,
"learning_rate": 1.3449103356293852e-06,
"loss": 0.7151,
"step": 175
},
{
"epoch": 0.8418535127055307,
"grad_norm": 0.09652117392718416,
"learning_rate": 1.337471878851141e-06,
"loss": 0.6819,
"step": 176
},
{
"epoch": 0.8466367713004485,
"grad_norm": 0.11467070226240633,
"learning_rate": 1.3300122894182909e-06,
"loss": 0.7063,
"step": 177
},
{
"epoch": 0.8514200298953662,
"grad_norm": 0.0974406950357686,
"learning_rate": 1.3225320344539842e-06,
"loss": 0.7154,
"step": 178
},
{
"epoch": 0.856203288490284,
"grad_norm": 0.10056923973958724,
"learning_rate": 1.315031582375457e-06,
"loss": 0.7119,
"step": 179
},
{
"epoch": 0.8609865470852018,
"grad_norm": 0.10289512917324216,
"learning_rate": 1.3075114028646974e-06,
"loss": 0.6872,
"step": 180
},
{
"epoch": 0.8657698056801196,
"grad_norm": 0.10284996024746469,
"learning_rate": 1.299971966839036e-06,
"loss": 0.6995,
"step": 181
},
{
"epoch": 0.8705530642750373,
"grad_norm": 0.09442402879665361,
"learning_rate": 1.292413746421655e-06,
"loss": 0.6788,
"step": 182
},
{
"epoch": 0.8753363228699551,
"grad_norm": 0.09221585066528634,
"learning_rate": 1.2848372149120246e-06,
"loss": 0.6625,
"step": 183
},
{
"epoch": 0.8801195814648729,
"grad_norm": 0.09614590670948946,
"learning_rate": 1.2772428467562651e-06,
"loss": 0.6993,
"step": 184
},
{
"epoch": 0.8849028400597907,
"grad_norm": 0.09884964743533457,
"learning_rate": 1.2696311175174357e-06,
"loss": 0.6826,
"step": 185
},
{
"epoch": 0.8896860986547085,
"grad_norm": 0.10049262287084837,
"learning_rate": 1.2620025038457554e-06,
"loss": 0.6875,
"step": 186
},
{
"epoch": 0.8944693572496263,
"grad_norm": 0.0951319815934962,
"learning_rate": 1.254357483448755e-06,
"loss": 0.6763,
"step": 187
},
{
"epoch": 0.8992526158445441,
"grad_norm": 0.0935897850203258,
"learning_rate": 1.2466965350613615e-06,
"loss": 0.7191,
"step": 188
},
{
"epoch": 0.9040358744394619,
"grad_norm": 0.10488228598924217,
"learning_rate": 1.2390201384159219e-06,
"loss": 0.7031,
"step": 189
},
{
"epoch": 0.9088191330343797,
"grad_norm": 0.09803611282531831,
"learning_rate": 1.231328774212159e-06,
"loss": 0.6596,
"step": 190
},
{
"epoch": 0.9136023916292975,
"grad_norm": 0.10982924572402691,
"learning_rate": 1.223622924087073e-06,
"loss": 0.685,
"step": 191
},
{
"epoch": 0.9183856502242153,
"grad_norm": 0.0990057467989385,
"learning_rate": 1.215903070584779e-06,
"loss": 0.6905,
"step": 192
},
{
"epoch": 0.923168908819133,
"grad_norm": 0.09806799076875558,
"learning_rate": 1.2081696971262903e-06,
"loss": 0.6888,
"step": 193
},
{
"epoch": 0.9279521674140508,
"grad_norm": 0.09725950749183558,
"learning_rate": 1.2004232879792464e-06,
"loss": 0.6897,
"step": 194
},
{
"epoch": 0.9327354260089686,
"grad_norm": 0.09998658118754998,
"learning_rate": 1.1926643282275882e-06,
"loss": 0.6808,
"step": 195
},
{
"epoch": 0.9375186846038864,
"grad_norm": 0.09991311679692257,
"learning_rate": 1.1848933037411825e-06,
"loss": 0.6721,
"step": 196
},
{
"epoch": 0.9423019431988042,
"grad_norm": 0.09570773453199784,
"learning_rate": 1.1771107011453933e-06,
"loss": 0.6943,
"step": 197
},
{
"epoch": 0.947085201793722,
"grad_norm": 0.09891331359398514,
"learning_rate": 1.1693170077906143e-06,
"loss": 0.6989,
"step": 198
},
{
"epoch": 0.9518684603886398,
"grad_norm": 0.09162536810525922,
"learning_rate": 1.1615127117217463e-06,
"loss": 0.6705,
"step": 199
},
{
"epoch": 0.9566517189835575,
"grad_norm": 0.08903988395053124,
"learning_rate": 1.1536983016476373e-06,
"loss": 0.679,
"step": 200
},
{
"epoch": 0.9614349775784753,
"grad_norm": 0.09042806424104788,
"learning_rate": 1.1458742669104803e-06,
"loss": 0.6652,
"step": 201
},
{
"epoch": 0.9662182361733931,
"grad_norm": 0.10347050843667145,
"learning_rate": 1.1380410974551682e-06,
"loss": 0.6891,
"step": 202
},
{
"epoch": 0.9710014947683109,
"grad_norm": 0.0937785288147842,
"learning_rate": 1.130199283798615e-06,
"loss": 0.662,
"step": 203
},
{
"epoch": 0.9757847533632287,
"grad_norm": 0.10125646071292,
"learning_rate": 1.1223493169990391e-06,
"loss": 0.6857,
"step": 204
},
{
"epoch": 0.9805680119581465,
"grad_norm": 0.09552098120941739,
"learning_rate": 1.1144916886252124e-06,
"loss": 0.6693,
"step": 205
},
{
"epoch": 0.9853512705530643,
"grad_norm": 0.0939464203547695,
"learning_rate": 1.1066268907256782e-06,
"loss": 0.689,
"step": 206
},
{
"epoch": 0.9901345291479821,
"grad_norm": 0.1083244661837491,
"learning_rate": 1.098755415797939e-06,
"loss": 0.6795,
"step": 207
},
{
"epoch": 0.9949177877428999,
"grad_norm": 0.09671011359258122,
"learning_rate": 1.0908777567576168e-06,
"loss": 0.697,
"step": 208
},
{
"epoch": 0.9997010463378176,
"grad_norm": 0.09491067631505212,
"learning_rate": 1.0829944069075847e-06,
"loss": 0.6913,
"step": 209
},
{
"epoch": 1.0,
"grad_norm": 0.09491067631505212,
"learning_rate": 1.0751058599070781e-06,
"loss": 0.0398,
"step": 210
},
{
"epoch": 1.0047832585949177,
"grad_norm": 0.09568291564665689,
"learning_rate": 1.0672126097407795e-06,
"loss": 0.6558,
"step": 211
},
{
"epoch": 1.0095665171898356,
"grad_norm": 0.0890899262566247,
"learning_rate": 1.0593151506878865e-06,
"loss": 0.6742,
"step": 212
},
{
"epoch": 1.0143497757847533,
"grad_norm": 0.08951496407842846,
"learning_rate": 1.0514139772911597e-06,
"loss": 0.6589,
"step": 213
},
{
"epoch": 1.0191330343796712,
"grad_norm": 0.09303979677050327,
"learning_rate": 1.043509584325953e-06,
"loss": 0.6526,
"step": 214
},
{
"epoch": 1.0239162929745889,
"grad_norm": 0.10551892280989528,
"learning_rate": 1.0356024667692314e-06,
"loss": 0.6849,
"step": 215
},
{
"epoch": 1.0286995515695068,
"grad_norm": 0.10560698057117009,
"learning_rate": 1.0276931197685753e-06,
"loss": 0.6947,
"step": 216
},
{
"epoch": 1.0334828101644244,
"grad_norm": 0.09055248425609617,
"learning_rate": 1.0197820386111737e-06,
"loss": 0.6692,
"step": 217
},
{
"epoch": 1.0382660687593424,
"grad_norm": 0.08952534903326591,
"learning_rate": 1.0118697186928105e-06,
"loss": 0.6481,
"step": 218
},
{
"epoch": 1.04304932735426,
"grad_norm": 0.0949207133753394,
"learning_rate": 1.0039566554868392e-06,
"loss": 0.6561,
"step": 219
},
{
"epoch": 1.047832585949178,
"grad_norm": 0.09247582314260705,
"learning_rate": 9.960433445131607e-07,
"loss": 0.6727,
"step": 220
},
{
"epoch": 1.0526158445440956,
"grad_norm": 0.0922431854223743,
"learning_rate": 9.881302813071896e-07,
"loss": 0.6786,
"step": 221
},
{
"epoch": 1.0573991031390135,
"grad_norm": 0.09921340856730206,
"learning_rate": 9.802179613888262e-07,
"loss": 0.6492,
"step": 222
},
{
"epoch": 1.0621823617339312,
"grad_norm": 0.09405904196612806,
"learning_rate": 9.723068802314246e-07,
"loss": 0.6435,
"step": 223
},
{
"epoch": 1.0669656203288491,
"grad_norm": 0.10252064804861775,
"learning_rate": 9.643975332307687e-07,
"loss": 0.6693,
"step": 224
},
{
"epoch": 1.0717488789237668,
"grad_norm": 0.09137882604103069,
"learning_rate": 9.564904156740471e-07,
"loss": 0.6554,
"step": 225
},
{
"epoch": 1.0765321375186847,
"grad_norm": 0.09506143141231545,
"learning_rate": 9.485860227088405e-07,
"loss": 0.6524,
"step": 226
},
{
"epoch": 1.0813153961136024,
"grad_norm": 0.09471266291722098,
"learning_rate": 9.406848493121134e-07,
"loss": 0.6598,
"step": 227
},
{
"epoch": 1.08609865470852,
"grad_norm": 0.09374158444399681,
"learning_rate": 9.327873902592205e-07,
"loss": 0.6546,
"step": 228
},
{
"epoch": 1.090881913303438,
"grad_norm": 0.0988485463507574,
"learning_rate": 9.248941400929222e-07,
"loss": 0.6659,
"step": 229
},
{
"epoch": 1.0956651718983557,
"grad_norm": 0.09989186431558944,
"learning_rate": 9.17005593092415e-07,
"loss": 0.6789,
"step": 230
},
{
"epoch": 1.1004484304932736,
"grad_norm": 0.09577210416129449,
"learning_rate": 9.09122243242383e-07,
"loss": 0.6395,
"step": 231
},
{
"epoch": 1.1052316890881912,
"grad_norm": 0.09417460653116495,
"learning_rate": 9.01244584202061e-07,
"loss": 0.6351,
"step": 232
},
{
"epoch": 1.1100149476831092,
"grad_norm": 0.1060296134876217,
"learning_rate": 8.933731092743219e-07,
"loss": 0.6843,
"step": 233
},
{
"epoch": 1.1147982062780268,
"grad_norm": 0.1015156854708665,
"learning_rate": 8.855083113747875e-07,
"loss": 0.6533,
"step": 234
},
{
"epoch": 1.1195814648729447,
"grad_norm": 0.09252864648733664,
"learning_rate": 8.776506830009607e-07,
"loss": 0.6529,
"step": 235
},
{
"epoch": 1.1243647234678624,
"grad_norm": 0.09810040579156247,
"learning_rate": 8.698007162013849e-07,
"loss": 0.6622,
"step": 236
},
{
"epoch": 1.1291479820627803,
"grad_norm": 0.10333456832019272,
"learning_rate": 8.619589025448318e-07,
"loss": 0.6698,
"step": 237
},
{
"epoch": 1.133931240657698,
"grad_norm": 0.09369526359642345,
"learning_rate": 8.541257330895197e-07,
"loss": 0.6397,
"step": 238
},
{
"epoch": 1.138714499252616,
"grad_norm": 0.0934070849673633,
"learning_rate": 8.463016983523627e-07,
"loss": 0.6724,
"step": 239
},
{
"epoch": 1.1434977578475336,
"grad_norm": 0.0968568071003159,
"learning_rate": 8.384872882782541e-07,
"loss": 0.6651,
"step": 240
},
{
"epoch": 1.1482810164424515,
"grad_norm": 0.09218848184783551,
"learning_rate": 8.306829922093857e-07,
"loss": 0.6482,
"step": 241
},
{
"epoch": 1.1530642750373692,
"grad_norm": 0.09367162146496326,
"learning_rate": 8.228892988546067e-07,
"loss": 0.6532,
"step": 242
},
{
"epoch": 1.157847533632287,
"grad_norm": 0.09179870741014423,
"learning_rate": 8.15106696258818e-07,
"loss": 0.6458,
"step": 243
},
{
"epoch": 1.1626307922272048,
"grad_norm": 0.10425982157218257,
"learning_rate": 8.073356717724115e-07,
"loss": 0.6476,
"step": 244
},
{
"epoch": 1.1674140508221225,
"grad_norm": 0.10785978296392415,
"learning_rate": 7.995767120207536e-07,
"loss": 0.6542,
"step": 245
},
{
"epoch": 1.1721973094170404,
"grad_norm": 0.09053925155843066,
"learning_rate": 7.918303028737096e-07,
"loss": 0.6444,
"step": 246
},
{
"epoch": 1.176980568011958,
"grad_norm": 0.11054671698924359,
"learning_rate": 7.840969294152211e-07,
"loss": 0.6546,
"step": 247
},
{
"epoch": 1.181763826606876,
"grad_norm": 0.09190168624229306,
"learning_rate": 7.763770759129269e-07,
"loss": 0.6483,
"step": 248
},
{
"epoch": 1.1865470852017936,
"grad_norm": 0.10112895278117082,
"learning_rate": 7.68671225787841e-07,
"loss": 0.6607,
"step": 249
},
{
"epoch": 1.1913303437967115,
"grad_norm": 0.09521368142452571,
"learning_rate": 7.609798615840785e-07,
"loss": 0.6632,
"step": 250
},
{
"epoch": 1.1961136023916292,
"grad_norm": 0.09631678500828386,
"learning_rate": 7.533034649386384e-07,
"loss": 0.6271,
"step": 251
},
{
"epoch": 1.2008968609865471,
"grad_norm": 0.09402110237205977,
"learning_rate": 7.456425165512452e-07,
"loss": 0.649,
"step": 252
},
{
"epoch": 1.2056801195814648,
"grad_norm": 0.10452266128761932,
"learning_rate": 7.379974961542447e-07,
"loss": 0.6744,
"step": 253
},
{
"epoch": 1.2104633781763827,
"grad_norm": 0.09522707743392524,
"learning_rate": 7.303688824825646e-07,
"loss": 0.6608,
"step": 254
},
{
"epoch": 1.2152466367713004,
"grad_norm": 0.09573208889216732,
"learning_rate": 7.227571532437349e-07,
"loss": 0.652,
"step": 255
},
{
"epoch": 1.2200298953662183,
"grad_norm": 0.08917908293059873,
"learning_rate": 7.151627850879755e-07,
"loss": 0.6543,
"step": 256
},
{
"epoch": 1.224813153961136,
"grad_norm": 0.09616438435062312,
"learning_rate": 7.075862535783453e-07,
"loss": 0.6337,
"step": 257
},
{
"epoch": 1.229596412556054,
"grad_norm": 0.09640367364080155,
"learning_rate": 7.00028033160964e-07,
"loss": 0.6839,
"step": 258
},
{
"epoch": 1.2343796711509716,
"grad_norm": 0.09586353497663917,
"learning_rate": 6.924885971353026e-07,
"loss": 0.6669,
"step": 259
},
{
"epoch": 1.2391629297458895,
"grad_norm": 0.09267059238961081,
"learning_rate": 6.849684176245431e-07,
"loss": 0.6314,
"step": 260
},
{
"epoch": 1.2439461883408072,
"grad_norm": 0.09031407329588002,
"learning_rate": 6.774679655460158e-07,
"loss": 0.6449,
"step": 261
},
{
"epoch": 1.2487294469357249,
"grad_norm": 0.09470627715876291,
"learning_rate": 6.699877105817092e-07,
"loss": 0.6502,
"step": 262
},
{
"epoch": 1.2535127055306428,
"grad_norm": 0.10074811226580811,
"learning_rate": 6.625281211488591e-07,
"loss": 0.6686,
"step": 263
},
{
"epoch": 1.2582959641255607,
"grad_norm": 0.10063396201285223,
"learning_rate": 6.55089664370615e-07,
"loss": 0.6695,
"step": 264
},
{
"epoch": 1.2630792227204783,
"grad_norm": 0.0918463846096307,
"learning_rate": 6.476728060467888e-07,
"loss": 0.6451,
"step": 265
},
{
"epoch": 1.267862481315396,
"grad_norm": 0.09328601851356563,
"learning_rate": 6.402780106246884e-07,
"loss": 0.6532,
"step": 266
},
{
"epoch": 1.272645739910314,
"grad_norm": 0.09424847785405825,
"learning_rate": 6.329057411700298e-07,
"loss": 0.6673,
"step": 267
},
{
"epoch": 1.2774289985052316,
"grad_norm": 0.10008134051501576,
"learning_rate": 6.255564593379429e-07,
"loss": 0.6672,
"step": 268
},
{
"epoch": 1.2822122571001495,
"grad_norm": 0.09294984655524738,
"learning_rate": 6.182306253440619e-07,
"loss": 0.6395,
"step": 269
},
{
"epoch": 1.2869955156950672,
"grad_norm": 0.10285895388747343,
"learning_rate": 6.109286979357051e-07,
"loss": 0.6637,
"step": 270
},
{
"epoch": 1.291778774289985,
"grad_norm": 0.11139784795321246,
"learning_rate": 6.036511343631488e-07,
"loss": 0.6455,
"step": 271
},
{
"epoch": 1.2965620328849028,
"grad_norm": 0.09212296328590026,
"learning_rate": 5.963983903509935e-07,
"loss": 0.6638,
"step": 272
},
{
"epoch": 1.3013452914798207,
"grad_norm": 0.0949968377343012,
"learning_rate": 5.89170920069628e-07,
"loss": 0.6548,
"step": 273
},
{
"epoch": 1.3061285500747384,
"grad_norm": 0.09690303299554558,
"learning_rate": 5.819691761067865e-07,
"loss": 0.6388,
"step": 274
},
{
"epoch": 1.310911808669656,
"grad_norm": 0.09255296263795812,
"learning_rate": 5.747936094392089e-07,
"loss": 0.6435,
"step": 275
},
{
"epoch": 1.315695067264574,
"grad_norm": 0.09503263182638313,
"learning_rate": 5.676446694044002e-07,
"loss": 0.638,
"step": 276
},
{
"epoch": 1.3204783258594919,
"grad_norm": 0.09478054996201758,
"learning_rate": 5.605228036724927e-07,
"loss": 0.6502,
"step": 277
},
{
"epoch": 1.3252615844544096,
"grad_norm": 0.0933411883471192,
"learning_rate": 5.534284582182114e-07,
"loss": 0.6511,
"step": 278
},
{
"epoch": 1.3300448430493272,
"grad_norm": 0.09944351370813859,
"learning_rate": 5.463620772929494e-07,
"loss": 0.6325,
"step": 279
},
{
"epoch": 1.3348281016442451,
"grad_norm": 0.10023032726854744,
"learning_rate": 5.393241033969466e-07,
"loss": 0.6418,
"step": 280
},
{
"epoch": 1.339611360239163,
"grad_norm": 0.09729398494948012,
"learning_rate": 5.323149772515812e-07,
"loss": 0.6372,
"step": 281
},
{
"epoch": 1.3443946188340807,
"grad_norm": 0.09323209082587747,
"learning_rate": 5.253351377717706e-07,
"loss": 0.6504,
"step": 282
},
{
"epoch": 1.3491778774289984,
"grad_norm": 0.08940562070783202,
"learning_rate": 5.183850220384873e-07,
"loss": 0.6461,
"step": 283
},
{
"epoch": 1.3539611360239163,
"grad_norm": 0.09092518318025446,
"learning_rate": 5.114650652713884e-07,
"loss": 0.6542,
"step": 284
},
{
"epoch": 1.358744394618834,
"grad_norm": 0.0957083892879257,
"learning_rate": 5.045757008015606e-07,
"loss": 0.6627,
"step": 285
},
{
"epoch": 1.363527653213752,
"grad_norm": 0.09918131125769998,
"learning_rate": 4.977173600443868e-07,
"loss": 0.6447,
"step": 286
},
{
"epoch": 1.3683109118086696,
"grad_norm": 0.09079455495976413,
"learning_rate": 4.908904724725299e-07,
"loss": 0.651,
"step": 287
},
{
"epoch": 1.3730941704035875,
"grad_norm": 0.09533039778556848,
"learning_rate": 4.840954655890391e-07,
"loss": 0.6518,
"step": 288
},
{
"epoch": 1.3778774289985052,
"grad_norm": 0.09328409620590697,
"learning_rate": 4.773327649005777e-07,
"loss": 0.6712,
"step": 289
},
{
"epoch": 1.382660687593423,
"grad_norm": 0.10546886430926707,
"learning_rate": 4.7060279389078184e-07,
"loss": 0.6594,
"step": 290
},
{
"epoch": 1.3874439461883408,
"grad_norm": 0.09513157037379577,
"learning_rate": 4.6390597399373644e-07,
"loss": 0.6311,
"step": 291
},
{
"epoch": 1.3922272047832587,
"grad_norm": 0.0910714399276055,
"learning_rate": 4.5724272456758907e-07,
"loss": 0.6524,
"step": 292
},
{
"epoch": 1.3970104633781764,
"grad_norm": 0.08960044994197404,
"learning_rate": 4.506134628682877e-07,
"loss": 0.6515,
"step": 293
},
{
"epoch": 1.4017937219730943,
"grad_norm": 0.0939439987196228,
"learning_rate": 4.440186040234524e-07,
"loss": 0.6487,
"step": 294
},
{
"epoch": 1.406576980568012,
"grad_norm": 0.10645194425387064,
"learning_rate": 4.3745856100637834e-07,
"loss": 0.629,
"step": 295
},
{
"epoch": 1.4113602391629296,
"grad_norm": 0.1047763121754449,
"learning_rate": 4.3093374461017785e-07,
"loss": 0.6466,
"step": 296
},
{
"epoch": 1.4161434977578475,
"grad_norm": 0.09982639743024341,
"learning_rate": 4.244445634220545e-07,
"loss": 0.6504,
"step": 297
},
{
"epoch": 1.4209267563527654,
"grad_norm": 0.094704337085837,
"learning_rate": 4.1799142379771766e-07,
"loss": 0.6675,
"step": 298
},
{
"epoch": 1.4257100149476831,
"grad_norm": 0.09542340607816273,
"learning_rate": 4.115747298359363e-07,
"loss": 0.6379,
"step": 299
},
{
"epoch": 1.4304932735426008,
"grad_norm": 0.09975848410849608,
"learning_rate": 4.0519488335323415e-07,
"loss": 0.6684,
"step": 300
},
{
"epoch": 1.4352765321375187,
"grad_norm": 0.09564133208363568,
"learning_rate": 3.9885228385872806e-07,
"loss": 0.6345,
"step": 301
},
{
"epoch": 1.4400597907324364,
"grad_norm": 0.0955432935737647,
"learning_rate": 3.925473285291091e-07,
"loss": 0.6419,
"step": 302
},
{
"epoch": 1.4448430493273543,
"grad_norm": 0.0971708074341661,
"learning_rate": 3.862804121837733e-07,
"loss": 0.6568,
"step": 303
},
{
"epoch": 1.449626307922272,
"grad_norm": 0.09654206097129785,
"learning_rate": 3.8005192726009663e-07,
"loss": 0.6526,
"step": 304
},
{
"epoch": 1.45440956651719,
"grad_norm": 0.1047844291301578,
"learning_rate": 3.738622637888608e-07,
"loss": 0.6554,
"step": 305
},
{
"epoch": 1.4591928251121076,
"grad_norm": 0.10495835343403974,
"learning_rate": 3.677118093698278e-07,
"loss": 0.639,
"step": 306
},
{
"epoch": 1.4639760837070255,
"grad_norm": 0.09312185978330073,
"learning_rate": 3.61600949147472e-07,
"loss": 0.6534,
"step": 307
},
{
"epoch": 1.4687593423019432,
"grad_norm": 0.0914400067851364,
"learning_rate": 3.5553006578685706e-07,
"loss": 0.6364,
"step": 308
},
{
"epoch": 1.473542600896861,
"grad_norm": 0.10168751711517944,
"learning_rate": 3.494995394496778e-07,
"loss": 0.6438,
"step": 309
},
{
"epoch": 1.4783258594917787,
"grad_norm": 0.08777082505313431,
"learning_rate": 3.435097477704517e-07,
"loss": 0.6159,
"step": 310
},
{
"epoch": 1.4831091180866967,
"grad_norm": 0.0992483436164171,
"learning_rate": 3.3756106583287205e-07,
"loss": 0.6692,
"step": 311
},
{
"epoch": 1.4878923766816143,
"grad_norm": 0.09763140125702534,
"learning_rate": 3.316538661463204e-07,
"loss": 0.6704,
"step": 312
},
{
"epoch": 1.492675635276532,
"grad_norm": 0.103958466638517,
"learning_rate": 3.2578851862253796e-07,
"loss": 0.6582,
"step": 313
},
{
"epoch": 1.49745889387145,
"grad_norm": 0.09058417960194183,
"learning_rate": 3.199653905524654e-07,
"loss": 0.6353,
"step": 314
},
{
"epoch": 1.5022421524663678,
"grad_norm": 0.10131403619552605,
"learning_rate": 3.1418484658323806e-07,
"loss": 0.6566,
"step": 315
},
{
"epoch": 1.5070254110612855,
"grad_norm": 0.09681513597634411,
"learning_rate": 3.0844724869535577e-07,
"loss": 0.6437,
"step": 316
},
{
"epoch": 1.5118086696562032,
"grad_norm": 0.10073309195120103,
"learning_rate": 3.027529561800117e-07,
"loss": 0.6541,
"step": 317
},
{
"epoch": 1.516591928251121,
"grad_norm": 0.09187767379862512,
"learning_rate": 2.971023256165983e-07,
"loss": 0.6429,
"step": 318
},
{
"epoch": 1.521375186846039,
"grad_norm": 0.09322468814151724,
"learning_rate": 2.9149571085037215e-07,
"loss": 0.6536,
"step": 319
},
{
"epoch": 1.5261584454409567,
"grad_norm": 0.09535864278016615,
"learning_rate": 2.8593346297030073e-07,
"loss": 0.6448,
"step": 320
},
{
"epoch": 1.5309417040358744,
"grad_norm": 0.09853757658051235,
"learning_rate": 2.804159302870751e-07,
"loss": 0.6361,
"step": 321
},
{
"epoch": 1.5357249626307923,
"grad_norm": 0.08652865663588583,
"learning_rate": 2.7494345831129837e-07,
"loss": 0.6275,
"step": 322
},
{
"epoch": 1.54050822122571,
"grad_norm": 0.09209381258321075,
"learning_rate": 2.6951638973185073e-07,
"loss": 0.6528,
"step": 323
},
{
"epoch": 1.5452914798206279,
"grad_norm": 0.09568385273192681,
"learning_rate": 2.64135064394428e-07,
"loss": 0.6632,
"step": 324
},
{
"epoch": 1.5500747384155455,
"grad_norm": 0.0947277435093391,
"learning_rate": 2.587998192802638e-07,
"loss": 0.6306,
"step": 325
},
{
"epoch": 1.5548579970104632,
"grad_norm": 0.0985703474276344,
"learning_rate": 2.5351098848502386e-07,
"loss": 0.6511,
"step": 326
},
{
"epoch": 1.5596412556053811,
"grad_norm": 0.09427610648180619,
"learning_rate": 2.482689031978872e-07,
"loss": 0.6533,
"step": 327
},
{
"epoch": 1.564424514200299,
"grad_norm": 0.09520925811802433,
"learning_rate": 2.4307389168080606e-07,
"loss": 0.6603,
"step": 328
},
{
"epoch": 1.5692077727952167,
"grad_norm": 0.0907369263004915,
"learning_rate": 2.3792627924795038e-07,
"loss": 0.6818,
"step": 329
},
{
"epoch": 1.5739910313901344,
"grad_norm": 0.09440279581013306,
"learning_rate": 2.3282638824533529e-07,
"loss": 0.6531,
"step": 330
},
{
"epoch": 1.5787742899850523,
"grad_norm": 0.09614745051429147,
"learning_rate": 2.277745380306383e-07,
"loss": 0.6795,
"step": 331
},
{
"epoch": 1.5835575485799702,
"grad_norm": 0.09778941686336041,
"learning_rate": 2.227710449531971e-07,
"loss": 0.6778,
"step": 332
},
{
"epoch": 1.588340807174888,
"grad_norm": 0.09575250682717351,
"learning_rate": 2.178162223342035e-07,
"loss": 0.6404,
"step": 333
},
{
"epoch": 1.5931240657698056,
"grad_norm": 0.09627217057571222,
"learning_rate": 2.1291038044707965e-07,
"loss": 0.6528,
"step": 334
},
{
"epoch": 1.5979073243647235,
"grad_norm": 0.09572743591446818,
"learning_rate": 2.0805382649805225e-07,
"loss": 0.6461,
"step": 335
},
{
"epoch": 1.6026905829596414,
"grad_norm": 0.09528928099830879,
"learning_rate": 2.032468646069112e-07,
"loss": 0.6425,
"step": 336
},
{
"epoch": 1.607473841554559,
"grad_norm": 0.09652866769512121,
"learning_rate": 1.9848979578796865e-07,
"loss": 0.6548,
"step": 337
},
{
"epoch": 1.6122571001494768,
"grad_norm": 0.0954083836089715,
"learning_rate": 1.937829179312076e-07,
"loss": 0.6633,
"step": 338
},
{
"epoch": 1.6170403587443947,
"grad_norm": 0.09389212828330971,
"learning_rate": 1.8912652578362853e-07,
"loss": 0.653,
"step": 339
},
{
"epoch": 1.6218236173393124,
"grad_norm": 0.09323975661872334,
"learning_rate": 1.8452091093079215e-07,
"loss": 0.6405,
"step": 340
},
{
"epoch": 1.6266068759342303,
"grad_norm": 0.1030124431981675,
"learning_rate": 1.7996636177855928e-07,
"loss": 0.6776,
"step": 341
},
{
"epoch": 1.631390134529148,
"grad_norm": 0.09627742650338285,
"learning_rate": 1.75463163535033e-07,
"loss": 0.6579,
"step": 342
},
{
"epoch": 1.6361733931240656,
"grad_norm": 0.09724021609427144,
"learning_rate": 1.7101159819269583e-07,
"loss": 0.6432,
"step": 343
},
{
"epoch": 1.6409566517189835,
"grad_norm": 0.09615121849981347,
"learning_rate": 1.6661194451075345e-07,
"loss": 0.6628,
"step": 344
},
{
"epoch": 1.6457399103139014,
"grad_norm": 0.11302849698050037,
"learning_rate": 1.6226447799767772e-07,
"loss": 0.6306,
"step": 345
},
{
"epoch": 1.6505231689088191,
"grad_norm": 0.10400127614773519,
"learning_rate": 1.5796947089395475e-07,
"loss": 0.6462,
"step": 346
},
{
"epoch": 1.6553064275037368,
"grad_norm": 0.08798479350296001,
"learning_rate": 1.5372719215503582e-07,
"loss": 0.6309,
"step": 347
},
{
"epoch": 1.6600896860986547,
"grad_norm": 0.09514870211869147,
"learning_rate": 1.4953790743449702e-07,
"loss": 0.6631,
"step": 348
},
{
"epoch": 1.6648729446935726,
"grad_norm": 0.09749807157916107,
"learning_rate": 1.4540187906740241e-07,
"loss": 0.6285,
"step": 349
},
{
"epoch": 1.6696562032884903,
"grad_norm": 0.0901583318721974,
"learning_rate": 1.4131936605387762e-07,
"loss": 0.6731,
"step": 350
},
{
"epoch": 1.674439461883408,
"grad_norm": 0.09526536450165937,
"learning_rate": 1.3729062404289017e-07,
"loss": 0.6729,
"step": 351
},
{
"epoch": 1.6792227204783259,
"grad_norm": 0.09836491336123554,
"learning_rate": 1.3331590531624115e-07,
"loss": 0.6515,
"step": 352
},
{
"epoch": 1.6840059790732438,
"grad_norm": 0.10075181987095727,
"learning_rate": 1.2939545877276726e-07,
"loss": 0.6452,
"step": 353
},
{
"epoch": 1.6887892376681615,
"grad_norm": 0.09365016014154177,
"learning_rate": 1.25529529912754e-07,
"loss": 0.6477,
"step": 354
},
{
"epoch": 1.6935724962630792,
"grad_norm": 0.09704957910910289,
"learning_rate": 1.2171836082256316e-07,
"loss": 0.6678,
"step": 355
},
{
"epoch": 1.698355754857997,
"grad_norm": 0.0902657671425916,
"learning_rate": 1.1796219015947285e-07,
"loss": 0.6515,
"step": 356
},
{
"epoch": 1.703139013452915,
"grad_norm": 0.09237650202510098,
"learning_rate": 1.1426125313673285e-07,
"loss": 0.6645,
"step": 357
},
{
"epoch": 1.7079222720478326,
"grad_norm": 0.09196231975892524,
"learning_rate": 1.1061578150883444e-07,
"loss": 0.6092,
"step": 358
},
{
"epoch": 1.7127055306427503,
"grad_norm": 0.10378820492061246,
"learning_rate": 1.070260035570002e-07,
"loss": 0.6539,
"step": 359
},
{
"epoch": 1.717488789237668,
"grad_norm": 0.09091589756400278,
"learning_rate": 1.0349214407488571e-07,
"loss": 0.6454,
"step": 360
},
{
"epoch": 1.722272047832586,
"grad_norm": 0.09881444337923977,
"learning_rate": 1.000144243545058e-07,
"loss": 0.6486,
"step": 361
},
{
"epoch": 1.7270553064275038,
"grad_norm": 0.09311309771551186,
"learning_rate": 9.659306217237517e-08,
"loss": 0.6402,
"step": 362
},
{
"epoch": 1.7318385650224215,
"grad_norm": 0.09631340848121332,
"learning_rate": 9.322827177587212e-08,
"loss": 0.6469,
"step": 363
},
{
"epoch": 1.7366218236173392,
"grad_norm": 0.08882699558772723,
"learning_rate": 8.992026386982221e-08,
"loss": 0.6535,
"step": 364
},
{
"epoch": 1.741405082212257,
"grad_norm": 0.09280206311141305,
"learning_rate": 8.66692456033029e-08,
"loss": 0.648,
"step": 365
},
{
"epoch": 1.746188340807175,
"grad_norm": 0.0909402496845187,
"learning_rate": 8.347542055667311e-08,
"loss": 0.6529,
"step": 366
},
{
"epoch": 1.7509715994020927,
"grad_norm": 0.09512784479004122,
"learning_rate": 8.033898872882394e-08,
"loss": 0.6383,
"step": 367
},
{
"epoch": 1.7557548579970104,
"grad_norm": 0.09252600518424785,
"learning_rate": 7.726014652465507e-08,
"loss": 0.6202,
"step": 368
},
{
"epoch": 1.7605381165919283,
"grad_norm": 0.09450252582803388,
"learning_rate": 7.423908674277579e-08,
"loss": 0.6494,
"step": 369
},
{
"epoch": 1.7653213751868462,
"grad_norm": 0.09089301547199258,
"learning_rate": 7.127599856343192e-08,
"loss": 0.6583,
"step": 370
},
{
"epoch": 1.7701046337817639,
"grad_norm": 0.0917284963739844,
"learning_rate": 6.837106753665823e-08,
"loss": 0.666,
"step": 371
},
{
"epoch": 1.7748878923766815,
"grad_norm": 0.09493041895710681,
"learning_rate": 6.552447557066109e-08,
"loss": 0.6464,
"step": 372
},
{
"epoch": 1.7796711509715994,
"grad_norm": 0.08941486424509316,
"learning_rate": 6.273640092042575e-08,
"loss": 0.6367,
"step": 373
},
{
"epoch": 1.7844544095665174,
"grad_norm": 0.08812104207206783,
"learning_rate": 6.000701817655474e-08,
"loss": 0.6259,
"step": 374
},
{
"epoch": 1.789237668161435,
"grad_norm": 0.09772722276760373,
"learning_rate": 5.733649825433384e-08,
"loss": 0.6316,
"step": 375
},
{
"epoch": 1.7940209267563527,
"grad_norm": 0.09550366242600927,
"learning_rate": 5.47250083830314e-08,
"loss": 0.6764,
"step": 376
},
{
"epoch": 1.7988041853512704,
"grad_norm": 0.09529244067030168,
"learning_rate": 5.217271209542384e-08,
"loss": 0.6581,
"step": 377
},
{
"epoch": 1.8035874439461883,
"grad_norm": 0.09484969927499808,
"learning_rate": 4.967976921755679e-08,
"loss": 0.6238,
"step": 378
},
{
"epoch": 1.8083707025411062,
"grad_norm": 0.0922584352432481,
"learning_rate": 4.724633585873627e-08,
"loss": 0.6417,
"step": 379
},
{
"epoch": 1.813153961136024,
"grad_norm": 0.09178466251978876,
"learning_rate": 4.487256440175291e-08,
"loss": 0.6563,
"step": 380
},
{
"epoch": 1.8179372197309416,
"grad_norm": 0.0945223759439494,
"learning_rate": 4.255860349334006e-08,
"loss": 0.6479,
"step": 381
},
{
"epoch": 1.8227204783258595,
"grad_norm": 0.08929357609354767,
"learning_rate": 4.030459803486464e-08,
"loss": 0.6378,
"step": 382
},
{
"epoch": 1.8275037369207774,
"grad_norm": 0.08950252320624025,
"learning_rate": 3.811068917325444e-08,
"loss": 0.6128,
"step": 383
},
{
"epoch": 1.832286995515695,
"grad_norm": 0.09959763380863362,
"learning_rate": 3.5977014292158495e-08,
"loss": 0.6493,
"step": 384
},
{
"epoch": 1.8370702541106128,
"grad_norm": 0.09877239003895597,
"learning_rate": 3.3903707003344774e-08,
"loss": 0.6453,
"step": 385
},
{
"epoch": 1.8418535127055307,
"grad_norm": 0.09253710326481404,
"learning_rate": 3.189089713833226e-08,
"loss": 0.6564,
"step": 386
},
{
"epoch": 1.8466367713004486,
"grad_norm": 0.09295026609135121,
"learning_rate": 2.9938710740262884e-08,
"loss": 0.6286,
"step": 387
},
{
"epoch": 1.8514200298953662,
"grad_norm": 0.0931563883337063,
"learning_rate": 2.8047270056005934e-08,
"loss": 0.6431,
"step": 388
},
{
"epoch": 1.856203288490284,
"grad_norm": 0.10071203031568553,
"learning_rate": 2.6216693528505195e-08,
"loss": 0.6419,
"step": 389
},
{
"epoch": 1.8609865470852018,
"grad_norm": 0.0926672982724561,
"learning_rate": 2.4447095789360884e-08,
"loss": 0.6426,
"step": 390
},
{
"epoch": 1.8657698056801197,
"grad_norm": 0.10839157436286975,
"learning_rate": 2.2738587651651487e-08,
"loss": 0.6418,
"step": 391
},
{
"epoch": 1.8705530642750374,
"grad_norm": 0.09452841812388145,
"learning_rate": 2.109127610299466e-08,
"loss": 0.6534,
"step": 392
},
{
"epoch": 1.875336322869955,
"grad_norm": 0.09059164967961951,
"learning_rate": 1.950526429884769e-08,
"loss": 0.6385,
"step": 393
},
{
"epoch": 1.8801195814648728,
"grad_norm": 0.09541292286319235,
"learning_rate": 1.7980651556048e-08,
"loss": 0.6533,
"step": 394
},
{
"epoch": 1.8849028400597907,
"grad_norm": 0.09352871341544354,
"learning_rate": 1.6517533346593226e-08,
"loss": 0.6533,
"step": 395
},
{
"epoch": 1.8896860986547086,
"grad_norm": 0.09830540898676399,
"learning_rate": 1.5116001291663462e-08,
"loss": 0.686,
"step": 396
},
{
"epoch": 1.8944693572496263,
"grad_norm": 0.09186784336874675,
"learning_rate": 1.3776143155883491e-08,
"loss": 0.6265,
"step": 397
},
{
"epoch": 1.899252615844544,
"grad_norm": 0.0903805903035563,
"learning_rate": 1.2498042841827317e-08,
"loss": 0.6444,
"step": 398
},
{
"epoch": 1.9040358744394619,
"grad_norm": 0.09251729842752435,
"learning_rate": 1.128178038476324e-08,
"loss": 0.643,
"step": 399
},
{
"epoch": 1.9088191330343798,
"grad_norm": 0.08909847951509034,
"learning_rate": 1.0127431947643316e-08,
"loss": 0.643,
"step": 400
},
{
"epoch": 1.9136023916292975,
"grad_norm": 0.09779029431433935,
"learning_rate": 9.035069816332619e-09,
"loss": 0.6312,
"step": 401
},
{
"epoch": 1.9183856502242151,
"grad_norm": 0.09602092233428558,
"learning_rate": 8.004762395083963e-09,
"loss": 0.629,
"step": 402
},
{
"epoch": 1.923168908819133,
"grad_norm": 0.09003448698278545,
"learning_rate": 7.036574202253343e-09,
"loss": 0.6706,
"step": 403
},
{
"epoch": 1.927952167414051,
"grad_norm": 0.09531787472090986,
"learning_rate": 6.130565866260484e-09,
"loss": 0.65,
"step": 404
},
{
"epoch": 1.9327354260089686,
"grad_norm": 0.09179251340184746,
"learning_rate": 5.286794121791782e-09,
"loss": 0.6574,
"step": 405
},
{
"epoch": 1.9375186846038863,
"grad_norm": 0.09493544791044316,
"learning_rate": 4.5053118062478025e-09,
"loss": 0.6322,
"step": 406
},
{
"epoch": 1.9423019431988042,
"grad_norm": 0.09306468796228341,
"learning_rate": 3.786167856434375e-09,
"loss": 0.6634,
"step": 407
},
{
"epoch": 1.9470852017937221,
"grad_norm": 0.09006826318963117,
"learning_rate": 3.1294073054987102e-09,
"loss": 0.6418,
"step": 408
},
{
"epoch": 1.9518684603886398,
"grad_norm": 0.09638156976673805,
"learning_rate": 2.5350712801084363e-09,
"loss": 0.631,
"step": 409
},
{
"epoch": 1.9566517189835575,
"grad_norm": 0.09585138354438733,
"learning_rate": 2.003196997877099e-09,
"loss": 0.6405,
"step": 410
},
{
"epoch": 1.9614349775784752,
"grad_norm": 0.0982765637161277,
"learning_rate": 1.5338177650332517e-09,
"loss": 0.631,
"step": 411
},
{
"epoch": 1.966218236173393,
"grad_norm": 0.0924075594922873,
"learning_rate": 1.1269629743346777e-09,
"loss": 0.6433,
"step": 412
},
{
"epoch": 1.971001494768311,
"grad_norm": 0.09407079001673903,
"learning_rate": 7.826581032279734e-10,
"loss": 0.6422,
"step": 413
},
{
"epoch": 1.9757847533632287,
"grad_norm": 0.09103323653600585,
"learning_rate": 5.00924712252937e-10,
"loss": 0.6645,
"step": 414
},
{
"epoch": 1.9805680119581464,
"grad_norm": 0.09999729799669839,
"learning_rate": 2.8178044369286945e-10,
"loss": 0.6495,
"step": 415
},
{
"epoch": 1.9853512705530643,
"grad_norm": 0.0958229669734574,
"learning_rate": 1.2523902046934763e-10,
"loss": 0.6238,
"step": 416
},
{
"epoch": 1.9901345291479822,
"grad_norm": 0.08983387781419207,
"learning_rate": 3.131024528302273e-11,
"loss": 0.6478,
"step": 417
},
{
"epoch": 1.9949177877428999,
"grad_norm": 0.09621386225221452,
"learning_rate": 0.0,
"loss": 0.6557,
"step": 418
},
{
"epoch": 1.9949177877428999,
"step": 418,
"total_flos": 862605439369216.0,
"train_loss": 0.715426175948678,
"train_runtime": 10328.0995,
"train_samples_per_second": 5.181,
"train_steps_per_second": 0.04
}
],
"logging_steps": 1,
"max_steps": 418,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 862605439369216.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}