rooty2020's picture
Upload models
ed1f23c verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9999100800287744,
"eval_steps": 500,
"global_step": 2780,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0003596798849024368,
"grad_norm": 24.657894057885567,
"learning_rate": 2.3809523809523807e-08,
"loss": 1.9628,
"step": 1
},
{
"epoch": 0.0007193597698048736,
"grad_norm": 30.667354093274785,
"learning_rate": 4.7619047619047613e-08,
"loss": 2.072,
"step": 2
},
{
"epoch": 0.0010790396547073105,
"grad_norm": 24.314787012232703,
"learning_rate": 7.142857142857142e-08,
"loss": 1.8467,
"step": 3
},
{
"epoch": 0.0014387195396097473,
"grad_norm": 24.64290123054978,
"learning_rate": 9.523809523809523e-08,
"loss": 1.6867,
"step": 4
},
{
"epoch": 0.001798399424512184,
"grad_norm": 35.88136055532647,
"learning_rate": 1.1904761904761903e-07,
"loss": 1.806,
"step": 5
},
{
"epoch": 0.002158079309414621,
"grad_norm": 27.677265995459226,
"learning_rate": 1.4285714285714285e-07,
"loss": 1.8982,
"step": 6
},
{
"epoch": 0.0025177591943170577,
"grad_norm": 23.480436398186388,
"learning_rate": 1.6666666666666665e-07,
"loss": 1.9013,
"step": 7
},
{
"epoch": 0.0028774390792194945,
"grad_norm": 24.984895340397184,
"learning_rate": 1.9047619047619045e-07,
"loss": 1.841,
"step": 8
},
{
"epoch": 0.0032371189641219314,
"grad_norm": 27.260529495430646,
"learning_rate": 2.1428571428571426e-07,
"loss": 1.6571,
"step": 9
},
{
"epoch": 0.003596798849024368,
"grad_norm": 27.54796671224951,
"learning_rate": 2.3809523809523806e-07,
"loss": 2.0824,
"step": 10
},
{
"epoch": 0.003956478733926805,
"grad_norm": 33.69190420427175,
"learning_rate": 2.619047619047619e-07,
"loss": 1.8739,
"step": 11
},
{
"epoch": 0.004316158618829242,
"grad_norm": 30.568165478816997,
"learning_rate": 2.857142857142857e-07,
"loss": 1.7997,
"step": 12
},
{
"epoch": 0.004675838503731679,
"grad_norm": 27.70479526484769,
"learning_rate": 3.095238095238095e-07,
"loss": 1.9171,
"step": 13
},
{
"epoch": 0.0050355183886341155,
"grad_norm": 24.018535717962276,
"learning_rate": 3.333333333333333e-07,
"loss": 1.8336,
"step": 14
},
{
"epoch": 0.005395198273536552,
"grad_norm": 26.97998601552695,
"learning_rate": 3.5714285714285716e-07,
"loss": 1.7935,
"step": 15
},
{
"epoch": 0.005754878158438989,
"grad_norm": 24.331417458830614,
"learning_rate": 3.809523809523809e-07,
"loss": 1.9425,
"step": 16
},
{
"epoch": 0.006114558043341426,
"grad_norm": 29.93694414395371,
"learning_rate": 4.0476190476190476e-07,
"loss": 2.0054,
"step": 17
},
{
"epoch": 0.006474237928243863,
"grad_norm": 24.869232469336673,
"learning_rate": 4.285714285714285e-07,
"loss": 1.8427,
"step": 18
},
{
"epoch": 0.0068339178131462995,
"grad_norm": 33.75206502968894,
"learning_rate": 4.5238095238095237e-07,
"loss": 1.8214,
"step": 19
},
{
"epoch": 0.007193597698048736,
"grad_norm": 55.90355991587864,
"learning_rate": 4.761904761904761e-07,
"loss": 1.917,
"step": 20
},
{
"epoch": 0.007553277582951173,
"grad_norm": 41.74883319338933,
"learning_rate": 5e-07,
"loss": 1.6926,
"step": 21
},
{
"epoch": 0.00791295746785361,
"grad_norm": 25.198019181519257,
"learning_rate": 5.238095238095238e-07,
"loss": 1.8096,
"step": 22
},
{
"epoch": 0.008272637352756048,
"grad_norm": 30.935660001988825,
"learning_rate": 5.476190476190477e-07,
"loss": 1.8552,
"step": 23
},
{
"epoch": 0.008632317237658484,
"grad_norm": 27.76638468602567,
"learning_rate": 5.714285714285714e-07,
"loss": 1.8051,
"step": 24
},
{
"epoch": 0.008991997122560921,
"grad_norm": 31.429674327113258,
"learning_rate": 5.952380952380952e-07,
"loss": 1.8426,
"step": 25
},
{
"epoch": 0.009351677007463357,
"grad_norm": 25.38911642996415,
"learning_rate": 6.19047619047619e-07,
"loss": 1.8152,
"step": 26
},
{
"epoch": 0.009711356892365795,
"grad_norm": 26.199219756218067,
"learning_rate": 6.428571428571429e-07,
"loss": 1.915,
"step": 27
},
{
"epoch": 0.010071036777268231,
"grad_norm": 49.64436899684689,
"learning_rate": 6.666666666666666e-07,
"loss": 1.61,
"step": 28
},
{
"epoch": 0.010430716662170669,
"grad_norm": 25.55310912363886,
"learning_rate": 6.904761904761904e-07,
"loss": 1.9478,
"step": 29
},
{
"epoch": 0.010790396547073105,
"grad_norm": 29.322538544916103,
"learning_rate": 7.142857142857143e-07,
"loss": 1.9344,
"step": 30
},
{
"epoch": 0.011150076431975542,
"grad_norm": 40.34553988667848,
"learning_rate": 7.380952380952381e-07,
"loss": 1.7039,
"step": 31
},
{
"epoch": 0.011509756316877978,
"grad_norm": 31.75709179665269,
"learning_rate": 7.619047619047618e-07,
"loss": 1.8525,
"step": 32
},
{
"epoch": 0.011869436201780416,
"grad_norm": 28.65644980651086,
"learning_rate": 7.857142857142856e-07,
"loss": 1.6667,
"step": 33
},
{
"epoch": 0.012229116086682852,
"grad_norm": 25.909345062352944,
"learning_rate": 8.095238095238095e-07,
"loss": 1.74,
"step": 34
},
{
"epoch": 0.01258879597158529,
"grad_norm": 26.409036934947846,
"learning_rate": 8.333333333333333e-07,
"loss": 1.9297,
"step": 35
},
{
"epoch": 0.012948475856487725,
"grad_norm": 31.591347593753483,
"learning_rate": 8.57142857142857e-07,
"loss": 1.9569,
"step": 36
},
{
"epoch": 0.013308155741390163,
"grad_norm": 27.929599025076882,
"learning_rate": 8.809523809523809e-07,
"loss": 1.7864,
"step": 37
},
{
"epoch": 0.013667835626292599,
"grad_norm": 25.184971911331967,
"learning_rate": 9.047619047619047e-07,
"loss": 1.7775,
"step": 38
},
{
"epoch": 0.014027515511195037,
"grad_norm": 37.127863696545276,
"learning_rate": 9.285714285714285e-07,
"loss": 1.6813,
"step": 39
},
{
"epoch": 0.014387195396097473,
"grad_norm": 35.24940880196834,
"learning_rate": 9.523809523809522e-07,
"loss": 1.6952,
"step": 40
},
{
"epoch": 0.01474687528099991,
"grad_norm": 37.49249332791816,
"learning_rate": 9.761904761904762e-07,
"loss": 1.7087,
"step": 41
},
{
"epoch": 0.015106555165902346,
"grad_norm": 26.412721137721803,
"learning_rate": 1e-06,
"loss": 1.6767,
"step": 42
},
{
"epoch": 0.015466235050804784,
"grad_norm": 25.768557436561615,
"learning_rate": 1.0238095238095238e-06,
"loss": 2.1104,
"step": 43
},
{
"epoch": 0.01582591493570722,
"grad_norm": 23.10779963872319,
"learning_rate": 1.0476190476190476e-06,
"loss": 1.7659,
"step": 44
},
{
"epoch": 0.016185594820609658,
"grad_norm": 26.556637934978006,
"learning_rate": 1.0714285714285714e-06,
"loss": 1.7681,
"step": 45
},
{
"epoch": 0.016545274705512095,
"grad_norm": 28.212255142491433,
"learning_rate": 1.0952380952380954e-06,
"loss": 1.6662,
"step": 46
},
{
"epoch": 0.01690495459041453,
"grad_norm": 25.607079101859597,
"learning_rate": 1.119047619047619e-06,
"loss": 1.8303,
"step": 47
},
{
"epoch": 0.017264634475316967,
"grad_norm": 21.710874177286694,
"learning_rate": 1.1428571428571428e-06,
"loss": 1.6683,
"step": 48
},
{
"epoch": 0.017624314360219405,
"grad_norm": 39.50586583131758,
"learning_rate": 1.1666666666666668e-06,
"loss": 1.8498,
"step": 49
},
{
"epoch": 0.017983994245121843,
"grad_norm": 24.230732356415412,
"learning_rate": 1.1904761904761904e-06,
"loss": 1.6856,
"step": 50
},
{
"epoch": 0.018343674130024277,
"grad_norm": 25.21450860021096,
"learning_rate": 1.2142857142857142e-06,
"loss": 1.7257,
"step": 51
},
{
"epoch": 0.018703354014926715,
"grad_norm": 21.76255172359649,
"learning_rate": 1.238095238095238e-06,
"loss": 1.7963,
"step": 52
},
{
"epoch": 0.019063033899829152,
"grad_norm": 24.898114689531777,
"learning_rate": 1.2619047619047618e-06,
"loss": 1.7276,
"step": 53
},
{
"epoch": 0.01942271378473159,
"grad_norm": 23.3232999678589,
"learning_rate": 1.2857142857142858e-06,
"loss": 1.7695,
"step": 54
},
{
"epoch": 0.019782393669634024,
"grad_norm": 20.509465613870823,
"learning_rate": 1.3095238095238094e-06,
"loss": 1.6424,
"step": 55
},
{
"epoch": 0.020142073554536462,
"grad_norm": 20.398195215022653,
"learning_rate": 1.3333333333333332e-06,
"loss": 1.717,
"step": 56
},
{
"epoch": 0.0205017534394389,
"grad_norm": 31.327297532044103,
"learning_rate": 1.3571428571428572e-06,
"loss": 1.7493,
"step": 57
},
{
"epoch": 0.020861433324341337,
"grad_norm": 24.707276082229445,
"learning_rate": 1.3809523809523808e-06,
"loss": 1.5969,
"step": 58
},
{
"epoch": 0.02122111320924377,
"grad_norm": 28.030731682265625,
"learning_rate": 1.4047619047619046e-06,
"loss": 1.5655,
"step": 59
},
{
"epoch": 0.02158079309414621,
"grad_norm": 20.344002649738734,
"learning_rate": 1.4285714285714286e-06,
"loss": 1.6057,
"step": 60
},
{
"epoch": 0.021940472979048647,
"grad_norm": 22.72060239339675,
"learning_rate": 1.4523809523809522e-06,
"loss": 1.5706,
"step": 61
},
{
"epoch": 0.022300152863951084,
"grad_norm": 17.85007758440868,
"learning_rate": 1.4761904761904762e-06,
"loss": 1.4835,
"step": 62
},
{
"epoch": 0.02265983274885352,
"grad_norm": 20.159874324160384,
"learning_rate": 1.5e-06,
"loss": 1.5554,
"step": 63
},
{
"epoch": 0.023019512633755956,
"grad_norm": 19.439259199449676,
"learning_rate": 1.5238095238095236e-06,
"loss": 1.6593,
"step": 64
},
{
"epoch": 0.023379192518658394,
"grad_norm": 34.04377995684072,
"learning_rate": 1.5476190476190476e-06,
"loss": 1.7608,
"step": 65
},
{
"epoch": 0.02373887240356083,
"grad_norm": 77.75542556402162,
"learning_rate": 1.5714285714285712e-06,
"loss": 1.4184,
"step": 66
},
{
"epoch": 0.02409855228846327,
"grad_norm": 17.931373921754478,
"learning_rate": 1.5952380952380953e-06,
"loss": 1.4383,
"step": 67
},
{
"epoch": 0.024458232173365704,
"grad_norm": 47.325411387811286,
"learning_rate": 1.619047619047619e-06,
"loss": 1.607,
"step": 68
},
{
"epoch": 0.02481791205826814,
"grad_norm": 19.14892383129688,
"learning_rate": 1.6428571428571426e-06,
"loss": 1.672,
"step": 69
},
{
"epoch": 0.02517759194317058,
"grad_norm": 20.795136990227526,
"learning_rate": 1.6666666666666667e-06,
"loss": 1.5908,
"step": 70
},
{
"epoch": 0.025537271828073017,
"grad_norm": 245.7142651154435,
"learning_rate": 1.6904761904761905e-06,
"loss": 1.5133,
"step": 71
},
{
"epoch": 0.02589695171297545,
"grad_norm": 18.120188391248455,
"learning_rate": 1.714285714285714e-06,
"loss": 1.4675,
"step": 72
},
{
"epoch": 0.02625663159787789,
"grad_norm": 37.54469099426714,
"learning_rate": 1.738095238095238e-06,
"loss": 1.476,
"step": 73
},
{
"epoch": 0.026616311482780326,
"grad_norm": 20.079174089707283,
"learning_rate": 1.7619047619047619e-06,
"loss": 1.6322,
"step": 74
},
{
"epoch": 0.026975991367682764,
"grad_norm": 26.452336796901825,
"learning_rate": 1.7857142857142857e-06,
"loss": 1.494,
"step": 75
},
{
"epoch": 0.027335671252585198,
"grad_norm": 15.633645614281596,
"learning_rate": 1.8095238095238095e-06,
"loss": 1.4588,
"step": 76
},
{
"epoch": 0.027695351137487636,
"grad_norm": 16.04992319096084,
"learning_rate": 1.833333333333333e-06,
"loss": 1.3675,
"step": 77
},
{
"epoch": 0.028055031022390074,
"grad_norm": 14.054679148021458,
"learning_rate": 1.857142857142857e-06,
"loss": 1.4436,
"step": 78
},
{
"epoch": 0.02841471090729251,
"grad_norm": 34.68375069175576,
"learning_rate": 1.8809523809523809e-06,
"loss": 1.4779,
"step": 79
},
{
"epoch": 0.028774390792194945,
"grad_norm": 17.221014096550647,
"learning_rate": 1.9047619047619045e-06,
"loss": 1.4736,
"step": 80
},
{
"epoch": 0.029134070677097383,
"grad_norm": 25.417838482669037,
"learning_rate": 1.9285714285714285e-06,
"loss": 1.4455,
"step": 81
},
{
"epoch": 0.02949375056199982,
"grad_norm": 22.554088669119682,
"learning_rate": 1.9523809523809523e-06,
"loss": 1.3496,
"step": 82
},
{
"epoch": 0.02985343044690226,
"grad_norm": 13.2745362959651,
"learning_rate": 1.976190476190476e-06,
"loss": 1.4045,
"step": 83
},
{
"epoch": 0.030213110331804693,
"grad_norm": 18.116068926216787,
"learning_rate": 2e-06,
"loss": 1.4533,
"step": 84
},
{
"epoch": 0.03057279021670713,
"grad_norm": 11.651678918563489,
"learning_rate": 1.9999993210623e-06,
"loss": 1.3389,
"step": 85
},
{
"epoch": 0.030932470101609568,
"grad_norm": 18.39855056566554,
"learning_rate": 1.9999972842501214e-06,
"loss": 1.486,
"step": 86
},
{
"epoch": 0.031292149986512,
"grad_norm": 43.28759772676706,
"learning_rate": 1.9999938895662306e-06,
"loss": 1.4033,
"step": 87
},
{
"epoch": 0.03165182987141444,
"grad_norm": 20.562126556479306,
"learning_rate": 1.999989137015237e-06,
"loss": 1.3676,
"step": 88
},
{
"epoch": 0.03201150975631688,
"grad_norm": 17.960377843247592,
"learning_rate": 1.999983026603594e-06,
"loss": 1.4498,
"step": 89
},
{
"epoch": 0.032371189641219315,
"grad_norm": 62.91438352499693,
"learning_rate": 1.9999755583395984e-06,
"loss": 1.5197,
"step": 90
},
{
"epoch": 0.03273086952612175,
"grad_norm": 15.735368933888223,
"learning_rate": 1.9999667322333914e-06,
"loss": 1.3134,
"step": 91
},
{
"epoch": 0.03309054941102419,
"grad_norm": 25.610197392574353,
"learning_rate": 1.9999565482969577e-06,
"loss": 1.3616,
"step": 92
},
{
"epoch": 0.03345022929592663,
"grad_norm": 16.073082498652944,
"learning_rate": 1.999945006544126e-06,
"loss": 1.3045,
"step": 93
},
{
"epoch": 0.03380990918082906,
"grad_norm": 41.00521294340134,
"learning_rate": 1.9999321069905685e-06,
"loss": 1.4665,
"step": 94
},
{
"epoch": 0.0341695890657315,
"grad_norm": 22.559526486442397,
"learning_rate": 1.999917849653801e-06,
"loss": 1.4456,
"step": 95
},
{
"epoch": 0.034529268950633935,
"grad_norm": 19.801315354513516,
"learning_rate": 1.999902234553183e-06,
"loss": 1.354,
"step": 96
},
{
"epoch": 0.03488894883553637,
"grad_norm": 14.632262747530882,
"learning_rate": 1.9998852617099185e-06,
"loss": 1.2639,
"step": 97
},
{
"epoch": 0.03524862872043881,
"grad_norm": 15.246448969933628,
"learning_rate": 1.9998669311470544e-06,
"loss": 1.2934,
"step": 98
},
{
"epoch": 0.03560830860534125,
"grad_norm": 21.648660776413315,
"learning_rate": 1.9998472428894807e-06,
"loss": 1.2827,
"step": 99
},
{
"epoch": 0.035967988490243685,
"grad_norm": 44.669300603767674,
"learning_rate": 1.9998261969639324e-06,
"loss": 1.3849,
"step": 100
},
{
"epoch": 0.03632766837514612,
"grad_norm": 13.223212712604015,
"learning_rate": 1.9998037933989864e-06,
"loss": 1.3349,
"step": 101
},
{
"epoch": 0.036687348260048554,
"grad_norm": 26.408214197921858,
"learning_rate": 1.9997800322250646e-06,
"loss": 1.3088,
"step": 102
},
{
"epoch": 0.03704702814495099,
"grad_norm": 39.847526841472714,
"learning_rate": 1.9997549134744313e-06,
"loss": 1.2439,
"step": 103
},
{
"epoch": 0.03740670802985343,
"grad_norm": 19.032937778796384,
"learning_rate": 1.9997284371811954e-06,
"loss": 1.3567,
"step": 104
},
{
"epoch": 0.03776638791475587,
"grad_norm": 16.573596403359943,
"learning_rate": 1.9997006033813074e-06,
"loss": 1.3914,
"step": 105
},
{
"epoch": 0.038126067799658304,
"grad_norm": 15.605607152976063,
"learning_rate": 1.9996714121125624e-06,
"loss": 1.3379,
"step": 106
},
{
"epoch": 0.03848574768456074,
"grad_norm": 25.202668710987126,
"learning_rate": 1.9996408634145993e-06,
"loss": 1.2859,
"step": 107
},
{
"epoch": 0.03884542756946318,
"grad_norm": 17.576552133251525,
"learning_rate": 1.9996089573288983e-06,
"loss": 1.3148,
"step": 108
},
{
"epoch": 0.03920510745436562,
"grad_norm": 64.99202894639889,
"learning_rate": 1.9995756938987842e-06,
"loss": 1.2813,
"step": 109
},
{
"epoch": 0.03956478733926805,
"grad_norm": 15.179557087516176,
"learning_rate": 1.9995410731694255e-06,
"loss": 1.3602,
"step": 110
},
{
"epoch": 0.039924467224170486,
"grad_norm": 42.7405319283968,
"learning_rate": 1.9995050951878317e-06,
"loss": 1.2673,
"step": 111
},
{
"epoch": 0.040284147109072924,
"grad_norm": 12.367805261335855,
"learning_rate": 1.9994677600028566e-06,
"loss": 1.2554,
"step": 112
},
{
"epoch": 0.04064382699397536,
"grad_norm": 91.0547066038717,
"learning_rate": 1.9994290676651974e-06,
"loss": 1.3692,
"step": 113
},
{
"epoch": 0.0410035068788778,
"grad_norm": 16.55349276096106,
"learning_rate": 1.9993890182273932e-06,
"loss": 1.2412,
"step": 114
},
{
"epoch": 0.04136318676378024,
"grad_norm": 17.297600766623404,
"learning_rate": 1.9993476117438255e-06,
"loss": 1.2596,
"step": 115
},
{
"epoch": 0.041722866648682674,
"grad_norm": 16.00366028375049,
"learning_rate": 1.9993048482707196e-06,
"loss": 1.2698,
"step": 116
},
{
"epoch": 0.04208254653358511,
"grad_norm": 15.196236021120892,
"learning_rate": 1.9992607278661437e-06,
"loss": 1.1785,
"step": 117
},
{
"epoch": 0.04244222641848754,
"grad_norm": 22.149541341272936,
"learning_rate": 1.999215250590006e-06,
"loss": 1.3311,
"step": 118
},
{
"epoch": 0.04280190630338998,
"grad_norm": 21.977962462219512,
"learning_rate": 1.9991684165040613e-06,
"loss": 1.2641,
"step": 119
},
{
"epoch": 0.04316158618829242,
"grad_norm": 14.111609395101187,
"learning_rate": 1.999120225671903e-06,
"loss": 1.1405,
"step": 120
},
{
"epoch": 0.043521266073194856,
"grad_norm": 20.62292555056753,
"learning_rate": 1.999070678158968e-06,
"loss": 1.3071,
"step": 121
},
{
"epoch": 0.043880945958097294,
"grad_norm": 23.38451805462871,
"learning_rate": 1.9990197740325364e-06,
"loss": 1.108,
"step": 122
},
{
"epoch": 0.04424062584299973,
"grad_norm": 13.558092001639237,
"learning_rate": 1.9989675133617293e-06,
"loss": 1.2362,
"step": 123
},
{
"epoch": 0.04460030572790217,
"grad_norm": 14.905071611078164,
"learning_rate": 1.9989138962175105e-06,
"loss": 1.1732,
"step": 124
},
{
"epoch": 0.04495998561280461,
"grad_norm": 39.48608516481057,
"learning_rate": 1.9988589226726847e-06,
"loss": 1.2309,
"step": 125
},
{
"epoch": 0.04531966549770704,
"grad_norm": 12.779123007395382,
"learning_rate": 1.9988025928019e-06,
"loss": 1.1954,
"step": 126
},
{
"epoch": 0.045679345382609475,
"grad_norm": 17.35216140700479,
"learning_rate": 1.9987449066816448e-06,
"loss": 1.1587,
"step": 127
},
{
"epoch": 0.04603902526751191,
"grad_norm": 18.05552910414134,
"learning_rate": 1.99868586439025e-06,
"loss": 1.1195,
"step": 128
},
{
"epoch": 0.04639870515241435,
"grad_norm": 25.563244537484564,
"learning_rate": 1.9986254660078872e-06,
"loss": 1.0623,
"step": 129
},
{
"epoch": 0.04675838503731679,
"grad_norm": 11.114112784403885,
"learning_rate": 1.9985637116165705e-06,
"loss": 1.1891,
"step": 130
},
{
"epoch": 0.047118064922219226,
"grad_norm": 11.694158316033255,
"learning_rate": 1.998500601300154e-06,
"loss": 1.2493,
"step": 131
},
{
"epoch": 0.04747774480712166,
"grad_norm": 18.678734986125306,
"learning_rate": 1.998436135144334e-06,
"loss": 1.1552,
"step": 132
},
{
"epoch": 0.0478374246920241,
"grad_norm": 11.975265356576603,
"learning_rate": 1.998370313236648e-06,
"loss": 1.1419,
"step": 133
},
{
"epoch": 0.04819710457692654,
"grad_norm": 11.566190390478454,
"learning_rate": 1.998303135666473e-06,
"loss": 1.1454,
"step": 134
},
{
"epoch": 0.04855678446182897,
"grad_norm": 9.162200598633703,
"learning_rate": 1.9982346025250284e-06,
"loss": 1.1242,
"step": 135
},
{
"epoch": 0.04891646434673141,
"grad_norm": 11.9348502171034,
"learning_rate": 1.9981647139053736e-06,
"loss": 1.12,
"step": 136
},
{
"epoch": 0.049276144231633845,
"grad_norm": 22.085669626967896,
"learning_rate": 1.998093469902408e-06,
"loss": 1.0606,
"step": 137
},
{
"epoch": 0.04963582411653628,
"grad_norm": 16.735251001069905,
"learning_rate": 1.998020870612873e-06,
"loss": 1.0911,
"step": 138
},
{
"epoch": 0.04999550400143872,
"grad_norm": 9.426173644069113,
"learning_rate": 1.997946916135349e-06,
"loss": 1.112,
"step": 139
},
{
"epoch": 0.05035518388634116,
"grad_norm": 20.191952895136268,
"learning_rate": 1.9978716065702566e-06,
"loss": 1.1656,
"step": 140
},
{
"epoch": 0.050714863771243596,
"grad_norm": 13.677037264490226,
"learning_rate": 1.9977949420198572e-06,
"loss": 1.0637,
"step": 141
},
{
"epoch": 0.05107454365614603,
"grad_norm": 12.033881085585996,
"learning_rate": 1.997716922588252e-06,
"loss": 1.1782,
"step": 142
},
{
"epoch": 0.051434223541048464,
"grad_norm": 14.640616921424659,
"learning_rate": 1.9976375483813812e-06,
"loss": 1.0661,
"step": 143
},
{
"epoch": 0.0517939034259509,
"grad_norm": 17.648574437085983,
"learning_rate": 1.9975568195070253e-06,
"loss": 1.101,
"step": 144
},
{
"epoch": 0.05215358331085334,
"grad_norm": 12.44101732999933,
"learning_rate": 1.997474736074804e-06,
"loss": 1.0811,
"step": 145
},
{
"epoch": 0.05251326319575578,
"grad_norm": 17.633040231320773,
"learning_rate": 1.997391298196176e-06,
"loss": 1.024,
"step": 146
},
{
"epoch": 0.052872943080658215,
"grad_norm": 10.134440849280935,
"learning_rate": 1.99730650598444e-06,
"loss": 1.0928,
"step": 147
},
{
"epoch": 0.05323262296556065,
"grad_norm": 77.64793512021097,
"learning_rate": 1.9972203595547333e-06,
"loss": 1.0911,
"step": 148
},
{
"epoch": 0.05359230285046309,
"grad_norm": 19.966471706357808,
"learning_rate": 1.997132859024032e-06,
"loss": 1.0408,
"step": 149
},
{
"epoch": 0.05395198273536553,
"grad_norm": 11.447878411704954,
"learning_rate": 1.99704400451115e-06,
"loss": 1.0656,
"step": 150
},
{
"epoch": 0.05431166262026796,
"grad_norm": 41.450180842662306,
"learning_rate": 1.9969537961367422e-06,
"loss": 1.1232,
"step": 151
},
{
"epoch": 0.054671342505170396,
"grad_norm": 26.578399242870077,
"learning_rate": 1.9968622340232992e-06,
"loss": 1.0539,
"step": 152
},
{
"epoch": 0.055031022390072834,
"grad_norm": 9.67652417545913,
"learning_rate": 1.9967693182951516e-06,
"loss": 1.0482,
"step": 153
},
{
"epoch": 0.05539070227497527,
"grad_norm": 10.301154397842641,
"learning_rate": 1.996675049078467e-06,
"loss": 1.0324,
"step": 154
},
{
"epoch": 0.05575038215987771,
"grad_norm": 10.804979711551514,
"learning_rate": 1.9965794265012514e-06,
"loss": 1.0874,
"step": 155
},
{
"epoch": 0.05611006204478015,
"grad_norm": 29.866331694883616,
"learning_rate": 1.9964824506933476e-06,
"loss": 0.9488,
"step": 156
},
{
"epoch": 0.056469741929682585,
"grad_norm": 16.536655370052248,
"learning_rate": 1.9963841217864383e-06,
"loss": 1.1381,
"step": 157
},
{
"epoch": 0.05682942181458502,
"grad_norm": 8.454711341009409,
"learning_rate": 1.9962844399140403e-06,
"loss": 1.0745,
"step": 158
},
{
"epoch": 0.05718910169948745,
"grad_norm": 10.592894543320343,
"learning_rate": 1.99618340521151e-06,
"loss": 1.1185,
"step": 159
},
{
"epoch": 0.05754878158438989,
"grad_norm": 15.744873745417562,
"learning_rate": 1.99608101781604e-06,
"loss": 1.0278,
"step": 160
},
{
"epoch": 0.05790846146929233,
"grad_norm": 35.28796719476405,
"learning_rate": 1.995977277866659e-06,
"loss": 1.0456,
"step": 161
},
{
"epoch": 0.058268141354194766,
"grad_norm": 8.937312674896397,
"learning_rate": 1.9958721855042337e-06,
"loss": 1.0146,
"step": 162
},
{
"epoch": 0.058627821239097204,
"grad_norm": 15.188344915353202,
"learning_rate": 1.9957657408714654e-06,
"loss": 1.0028,
"step": 163
},
{
"epoch": 0.05898750112399964,
"grad_norm": 21.617655880303055,
"learning_rate": 1.995657944112894e-06,
"loss": 1.0596,
"step": 164
},
{
"epoch": 0.05934718100890208,
"grad_norm": 24.033180560114804,
"learning_rate": 1.995548795374893e-06,
"loss": 0.9765,
"step": 165
},
{
"epoch": 0.05970686089380452,
"grad_norm": 18.980234992969166,
"learning_rate": 1.9954382948056734e-06,
"loss": 1.0193,
"step": 166
},
{
"epoch": 0.06006654077870695,
"grad_norm": 10.685604514771951,
"learning_rate": 1.9953264425552803e-06,
"loss": 1.0075,
"step": 167
},
{
"epoch": 0.060426220663609385,
"grad_norm": 20.39061216772055,
"learning_rate": 1.9952132387755962e-06,
"loss": 1.1359,
"step": 168
},
{
"epoch": 0.06078590054851182,
"grad_norm": 94.23235824636771,
"learning_rate": 1.995098683620337e-06,
"loss": 0.9968,
"step": 169
},
{
"epoch": 0.06114558043341426,
"grad_norm": 20.38255400065285,
"learning_rate": 1.994982777245055e-06,
"loss": 1.0343,
"step": 170
},
{
"epoch": 0.0615052603183167,
"grad_norm": 29.676079933187594,
"learning_rate": 1.994865519807136e-06,
"loss": 1.0832,
"step": 171
},
{
"epoch": 0.061864940203219136,
"grad_norm": 12.901534441904019,
"learning_rate": 1.9947469114658014e-06,
"loss": 1.0846,
"step": 172
},
{
"epoch": 0.062224620088121574,
"grad_norm": 10.486038081142027,
"learning_rate": 1.9946269523821066e-06,
"loss": 1.0267,
"step": 173
},
{
"epoch": 0.062584299973024,
"grad_norm": 22.73799500944774,
"learning_rate": 1.9945056427189404e-06,
"loss": 0.9535,
"step": 174
},
{
"epoch": 0.06294397985792645,
"grad_norm": 16.85081562222787,
"learning_rate": 1.994382982641027e-06,
"loss": 0.9932,
"step": 175
},
{
"epoch": 0.06330365974282888,
"grad_norm": 16.251880100755784,
"learning_rate": 1.994258972314923e-06,
"loss": 1.0392,
"step": 176
},
{
"epoch": 0.06366333962773132,
"grad_norm": 32.41304280526982,
"learning_rate": 1.994133611909019e-06,
"loss": 1.0575,
"step": 177
},
{
"epoch": 0.06402301951263376,
"grad_norm": 14.947138189495567,
"learning_rate": 1.994006901593539e-06,
"loss": 0.9463,
"step": 178
},
{
"epoch": 0.06438269939753619,
"grad_norm": 10.534537683564043,
"learning_rate": 1.99387884154054e-06,
"loss": 1.0129,
"step": 179
},
{
"epoch": 0.06474237928243863,
"grad_norm": 18.770367176923315,
"learning_rate": 1.993749431923911e-06,
"loss": 1.0142,
"step": 180
},
{
"epoch": 0.06510205916734106,
"grad_norm": 11.869627533386478,
"learning_rate": 1.993618672919375e-06,
"loss": 1.0921,
"step": 181
},
{
"epoch": 0.0654617390522435,
"grad_norm": 12.213531444916233,
"learning_rate": 1.993486564704486e-06,
"loss": 0.9263,
"step": 182
},
{
"epoch": 0.06582141893714594,
"grad_norm": 8.304800269697004,
"learning_rate": 1.9933531074586295e-06,
"loss": 1.0208,
"step": 183
},
{
"epoch": 0.06618109882204838,
"grad_norm": 11.210120271679068,
"learning_rate": 1.9932183013630252e-06,
"loss": 0.949,
"step": 184
},
{
"epoch": 0.06654077870695081,
"grad_norm": 10.674834342779953,
"learning_rate": 1.993082146600723e-06,
"loss": 0.9653,
"step": 185
},
{
"epoch": 0.06690045859185326,
"grad_norm": 9.157668727064559,
"learning_rate": 1.9929446433566033e-06,
"loss": 0.9695,
"step": 186
},
{
"epoch": 0.06726013847675569,
"grad_norm": 35.81790736149004,
"learning_rate": 1.9928057918173785e-06,
"loss": 1.0061,
"step": 187
},
{
"epoch": 0.06761981836165812,
"grad_norm": 14.232279118378148,
"learning_rate": 1.992665592171592e-06,
"loss": 1.0155,
"step": 188
},
{
"epoch": 0.06797949824656056,
"grad_norm": 15.793796762381849,
"learning_rate": 1.9925240446096176e-06,
"loss": 1.0623,
"step": 189
},
{
"epoch": 0.068339178131463,
"grad_norm": 39.43576277183396,
"learning_rate": 1.992381149323659e-06,
"loss": 1.0001,
"step": 190
},
{
"epoch": 0.06869885801636544,
"grad_norm": 31.001449639725706,
"learning_rate": 1.9922369065077497e-06,
"loss": 1.0187,
"step": 191
},
{
"epoch": 0.06905853790126787,
"grad_norm": 8.228870313870372,
"learning_rate": 1.992091316357754e-06,
"loss": 0.9295,
"step": 192
},
{
"epoch": 0.06941821778617031,
"grad_norm": 29.05814131108659,
"learning_rate": 1.9919443790713656e-06,
"loss": 0.9984,
"step": 193
},
{
"epoch": 0.06977789767107274,
"grad_norm": 110.22787600422478,
"learning_rate": 1.991796094848106e-06,
"loss": 0.9936,
"step": 194
},
{
"epoch": 0.07013757755597518,
"grad_norm": 12.618535381583916,
"learning_rate": 1.9916464638893276e-06,
"loss": 0.9948,
"step": 195
},
{
"epoch": 0.07049725744087762,
"grad_norm": 16.072952603434494,
"learning_rate": 1.9914954863982104e-06,
"loss": 0.9892,
"step": 196
},
{
"epoch": 0.07085693732578005,
"grad_norm": 96.02536139809511,
"learning_rate": 1.991343162579763e-06,
"loss": 0.9598,
"step": 197
},
{
"epoch": 0.0712166172106825,
"grad_norm": 38.460090688210364,
"learning_rate": 1.9911894926408216e-06,
"loss": 1.0457,
"step": 198
},
{
"epoch": 0.07157629709558493,
"grad_norm": 8.085643587932019,
"learning_rate": 1.9910344767900516e-06,
"loss": 0.9666,
"step": 199
},
{
"epoch": 0.07193597698048737,
"grad_norm": 9.409402571435368,
"learning_rate": 1.990878115237945e-06,
"loss": 1.0249,
"step": 200
},
{
"epoch": 0.0722956568653898,
"grad_norm": 7.734707494269557,
"learning_rate": 1.9907204081968206e-06,
"loss": 0.9392,
"step": 201
},
{
"epoch": 0.07265533675029225,
"grad_norm": 26.66088795484755,
"learning_rate": 1.990561355880826e-06,
"loss": 1.0388,
"step": 202
},
{
"epoch": 0.07301501663519468,
"grad_norm": 10.493587163710068,
"learning_rate": 1.990400958505934e-06,
"loss": 1.0221,
"step": 203
},
{
"epoch": 0.07337469652009711,
"grad_norm": 12.851538063052784,
"learning_rate": 1.9902392162899436e-06,
"loss": 1.0027,
"step": 204
},
{
"epoch": 0.07373437640499955,
"grad_norm": 16.34581335012308,
"learning_rate": 1.9900761294524815e-06,
"loss": 0.8526,
"step": 205
},
{
"epoch": 0.07409405628990198,
"grad_norm": 15.465585604721932,
"learning_rate": 1.989911698214999e-06,
"loss": 1.0492,
"step": 206
},
{
"epoch": 0.07445373617480443,
"grad_norm": 13.85049814171573,
"learning_rate": 1.9897459228007732e-06,
"loss": 0.9283,
"step": 207
},
{
"epoch": 0.07481341605970686,
"grad_norm": 12.445607790320484,
"learning_rate": 1.989578803434907e-06,
"loss": 0.9975,
"step": 208
},
{
"epoch": 0.0751730959446093,
"grad_norm": 8.566268373195546,
"learning_rate": 1.9894103403443263e-06,
"loss": 0.9703,
"step": 209
},
{
"epoch": 0.07553277582951173,
"grad_norm": 10.895634757757644,
"learning_rate": 1.9892405337577844e-06,
"loss": 1.0748,
"step": 210
},
{
"epoch": 0.07589245571441418,
"grad_norm": 10.356787874855785,
"learning_rate": 1.989069383905856e-06,
"loss": 0.9088,
"step": 211
},
{
"epoch": 0.07625213559931661,
"grad_norm": 11.44872572247474,
"learning_rate": 1.9888968910209433e-06,
"loss": 0.9444,
"step": 212
},
{
"epoch": 0.07661181548421904,
"grad_norm": 29.747404916680768,
"learning_rate": 1.988723055337268e-06,
"loss": 0.9275,
"step": 213
},
{
"epoch": 0.07697149536912148,
"grad_norm": 8.787407613024634,
"learning_rate": 1.988547877090879e-06,
"loss": 0.9263,
"step": 214
},
{
"epoch": 0.07733117525402392,
"grad_norm": 13.958900773060638,
"learning_rate": 1.988371356519646e-06,
"loss": 0.9726,
"step": 215
},
{
"epoch": 0.07769085513892636,
"grad_norm": 24.499239616110906,
"learning_rate": 1.988193493863261e-06,
"loss": 0.9279,
"step": 216
},
{
"epoch": 0.07805053502382879,
"grad_norm": 15.629383308803032,
"learning_rate": 1.988014289363241e-06,
"loss": 0.99,
"step": 217
},
{
"epoch": 0.07841021490873124,
"grad_norm": 14.741040148864387,
"learning_rate": 1.987833743262922e-06,
"loss": 0.9291,
"step": 218
},
{
"epoch": 0.07876989479363367,
"grad_norm": 10.400173804859792,
"learning_rate": 1.9876518558074634e-06,
"loss": 0.9717,
"step": 219
},
{
"epoch": 0.0791295746785361,
"grad_norm": 219.94552736960952,
"learning_rate": 1.9874686272438462e-06,
"loss": 0.9617,
"step": 220
},
{
"epoch": 0.07948925456343854,
"grad_norm": 16.654361262872754,
"learning_rate": 1.987284057820872e-06,
"loss": 0.954,
"step": 221
},
{
"epoch": 0.07984893444834097,
"grad_norm": 19.913358977779705,
"learning_rate": 1.9870981477891625e-06,
"loss": 0.9919,
"step": 222
},
{
"epoch": 0.08020861433324342,
"grad_norm": 26.51927334904652,
"learning_rate": 1.9869108974011603e-06,
"loss": 0.9587,
"step": 223
},
{
"epoch": 0.08056829421814585,
"grad_norm": 7.743524165285064,
"learning_rate": 1.9867223069111286e-06,
"loss": 0.9628,
"step": 224
},
{
"epoch": 0.08092797410304829,
"grad_norm": 44.7631620880274,
"learning_rate": 1.98653237657515e-06,
"loss": 0.9877,
"step": 225
},
{
"epoch": 0.08128765398795072,
"grad_norm": 15.42485811260832,
"learning_rate": 1.9863411066511254e-06,
"loss": 1.0684,
"step": 226
},
{
"epoch": 0.08164733387285317,
"grad_norm": 8.709227213717968,
"learning_rate": 1.986148497398776e-06,
"loss": 0.9141,
"step": 227
},
{
"epoch": 0.0820070137577556,
"grad_norm": 32.36945680273078,
"learning_rate": 1.985954549079641e-06,
"loss": 0.9186,
"step": 228
},
{
"epoch": 0.08236669364265803,
"grad_norm": 11.980831305408493,
"learning_rate": 1.9857592619570782e-06,
"loss": 0.9199,
"step": 229
},
{
"epoch": 0.08272637352756047,
"grad_norm": 11.952525858137294,
"learning_rate": 1.9855626362962634e-06,
"loss": 0.9792,
"step": 230
},
{
"epoch": 0.0830860534124629,
"grad_norm": 11.27300140826299,
"learning_rate": 1.9853646723641893e-06,
"loss": 0.9684,
"step": 231
},
{
"epoch": 0.08344573329736535,
"grad_norm": 13.15367279550054,
"learning_rate": 1.9851653704296664e-06,
"loss": 0.9036,
"step": 232
},
{
"epoch": 0.08380541318226778,
"grad_norm": 11.20570851960056,
"learning_rate": 1.9849647307633218e-06,
"loss": 0.8904,
"step": 233
},
{
"epoch": 0.08416509306717022,
"grad_norm": 36.5714456594361,
"learning_rate": 1.9847627536375997e-06,
"loss": 0.9049,
"step": 234
},
{
"epoch": 0.08452477295207265,
"grad_norm": 14.09924306368202,
"learning_rate": 1.9845594393267594e-06,
"loss": 0.9414,
"step": 235
},
{
"epoch": 0.08488445283697509,
"grad_norm": 9.18220985691734,
"learning_rate": 1.984354788106876e-06,
"loss": 0.9278,
"step": 236
},
{
"epoch": 0.08524413272187753,
"grad_norm": 10.030956245273718,
"learning_rate": 1.9841488002558414e-06,
"loss": 0.9914,
"step": 237
},
{
"epoch": 0.08560381260677996,
"grad_norm": 25.38489005292741,
"learning_rate": 1.9839414760533604e-06,
"loss": 0.9513,
"step": 238
},
{
"epoch": 0.0859634924916824,
"grad_norm": 28.306112709700646,
"learning_rate": 1.9837328157809545e-06,
"loss": 0.9452,
"step": 239
},
{
"epoch": 0.08632317237658484,
"grad_norm": 8.256455535562468,
"learning_rate": 1.983522819721957e-06,
"loss": 0.8817,
"step": 240
},
{
"epoch": 0.08668285226148728,
"grad_norm": 11.283564132645212,
"learning_rate": 1.9833114881615176e-06,
"loss": 0.8688,
"step": 241
},
{
"epoch": 0.08704253214638971,
"grad_norm": 8.739851010743765,
"learning_rate": 1.9830988213865977e-06,
"loss": 0.901,
"step": 242
},
{
"epoch": 0.08740221203129216,
"grad_norm": 13.429560807663824,
"learning_rate": 1.9828848196859724e-06,
"loss": 0.9433,
"step": 243
},
{
"epoch": 0.08776189191619459,
"grad_norm": 9.762304293644194,
"learning_rate": 1.9826694833502295e-06,
"loss": 0.8557,
"step": 244
},
{
"epoch": 0.08812157180109702,
"grad_norm": 18.78325006841531,
"learning_rate": 1.9824528126717683e-06,
"loss": 0.8549,
"step": 245
},
{
"epoch": 0.08848125168599946,
"grad_norm": 9.560722810791209,
"learning_rate": 1.9822348079448013e-06,
"loss": 0.9296,
"step": 246
},
{
"epoch": 0.0888409315709019,
"grad_norm": 27.147006922557473,
"learning_rate": 1.982015469465351e-06,
"loss": 0.8618,
"step": 247
},
{
"epoch": 0.08920061145580434,
"grad_norm": 20.124472837337276,
"learning_rate": 1.9817947975312526e-06,
"loss": 0.911,
"step": 248
},
{
"epoch": 0.08956029134070677,
"grad_norm": 12.301748162630073,
"learning_rate": 1.9815727924421506e-06,
"loss": 0.8915,
"step": 249
},
{
"epoch": 0.08991997122560921,
"grad_norm": 9.574588238888932,
"learning_rate": 1.9813494544995e-06,
"loss": 0.8568,
"step": 250
},
{
"epoch": 0.09027965111051164,
"grad_norm": 8.558822505649715,
"learning_rate": 1.9811247840065667e-06,
"loss": 0.9092,
"step": 251
},
{
"epoch": 0.09063933099541407,
"grad_norm": 10.687295740498575,
"learning_rate": 1.9808987812684244e-06,
"loss": 0.8546,
"step": 252
},
{
"epoch": 0.09099901088031652,
"grad_norm": 8.278440678155265,
"learning_rate": 1.980671446591957e-06,
"loss": 0.9665,
"step": 253
},
{
"epoch": 0.09135869076521895,
"grad_norm": 10.179982442382315,
"learning_rate": 1.9804427802858566e-06,
"loss": 1.0027,
"step": 254
},
{
"epoch": 0.0917183706501214,
"grad_norm": 33.91581096469286,
"learning_rate": 1.980212782660624e-06,
"loss": 0.9011,
"step": 255
},
{
"epoch": 0.09207805053502383,
"grad_norm": 11.022829190878873,
"learning_rate": 1.9799814540285666e-06,
"loss": 0.9284,
"step": 256
},
{
"epoch": 0.09243773041992627,
"grad_norm": 11.446092031229947,
"learning_rate": 1.9797487947038e-06,
"loss": 0.8747,
"step": 257
},
{
"epoch": 0.0927974103048287,
"grad_norm": 12.379375654292929,
"learning_rate": 1.9795148050022473e-06,
"loss": 0.9398,
"step": 258
},
{
"epoch": 0.09315709018973115,
"grad_norm": 10.665168488416498,
"learning_rate": 1.9792794852416368e-06,
"loss": 0.898,
"step": 259
},
{
"epoch": 0.09351677007463358,
"grad_norm": 16.559548126823888,
"learning_rate": 1.979042835741503e-06,
"loss": 0.923,
"step": 260
},
{
"epoch": 0.093876449959536,
"grad_norm": 14.215417562017038,
"learning_rate": 1.978804856823187e-06,
"loss": 0.9251,
"step": 261
},
{
"epoch": 0.09423612984443845,
"grad_norm": 9.201940638447635,
"learning_rate": 1.9785655488098346e-06,
"loss": 0.8465,
"step": 262
},
{
"epoch": 0.09459580972934088,
"grad_norm": 11.452188467726437,
"learning_rate": 1.978324912026396e-06,
"loss": 0.8753,
"step": 263
},
{
"epoch": 0.09495548961424333,
"grad_norm": 17.15325293077976,
"learning_rate": 1.9780829467996257e-06,
"loss": 0.9477,
"step": 264
},
{
"epoch": 0.09531516949914576,
"grad_norm": 10.004227224146033,
"learning_rate": 1.977839653458083e-06,
"loss": 0.9036,
"step": 265
},
{
"epoch": 0.0956748493840482,
"grad_norm": 8.616156980769292,
"learning_rate": 1.9775950323321298e-06,
"loss": 0.939,
"step": 266
},
{
"epoch": 0.09603452926895063,
"grad_norm": 14.128821933261587,
"learning_rate": 1.9773490837539304e-06,
"loss": 0.9686,
"step": 267
},
{
"epoch": 0.09639420915385308,
"grad_norm": 14.668127731373431,
"learning_rate": 1.9771018080574533e-06,
"loss": 0.8869,
"step": 268
},
{
"epoch": 0.09675388903875551,
"grad_norm": 12.018016633334687,
"learning_rate": 1.9768532055784677e-06,
"loss": 0.9346,
"step": 269
},
{
"epoch": 0.09711356892365794,
"grad_norm": 16.455704186742906,
"learning_rate": 1.976603276654544e-06,
"loss": 0.88,
"step": 270
},
{
"epoch": 0.09747324880856038,
"grad_norm": 34.040812158589354,
"learning_rate": 1.976352021625056e-06,
"loss": 0.8906,
"step": 271
},
{
"epoch": 0.09783292869346281,
"grad_norm": 9.26169790660666,
"learning_rate": 1.9760994408311753e-06,
"loss": 0.9632,
"step": 272
},
{
"epoch": 0.09819260857836526,
"grad_norm": 10.463662543924356,
"learning_rate": 1.9758455346158764e-06,
"loss": 0.9285,
"step": 273
},
{
"epoch": 0.09855228846326769,
"grad_norm": 11.246341713615287,
"learning_rate": 1.9755903033239315e-06,
"loss": 0.9197,
"step": 274
},
{
"epoch": 0.09891196834817013,
"grad_norm": 9.598455481645157,
"learning_rate": 1.975333747301913e-06,
"loss": 0.8562,
"step": 275
},
{
"epoch": 0.09927164823307257,
"grad_norm": 12.452620100983056,
"learning_rate": 1.9750758668981923e-06,
"loss": 0.9208,
"step": 276
},
{
"epoch": 0.099631328117975,
"grad_norm": 16.907941826083178,
"learning_rate": 1.974816662462939e-06,
"loss": 0.8863,
"step": 277
},
{
"epoch": 0.09999100800287744,
"grad_norm": 16.54484269720697,
"learning_rate": 1.9745561343481196e-06,
"loss": 0.9162,
"step": 278
},
{
"epoch": 0.10035068788777987,
"grad_norm": 16.024180075889007,
"learning_rate": 1.974294282907499e-06,
"loss": 0.9985,
"step": 279
},
{
"epoch": 0.10071036777268232,
"grad_norm": 14.096905052340583,
"learning_rate": 1.97403110849664e-06,
"loss": 0.9025,
"step": 280
},
{
"epoch": 0.10107004765758475,
"grad_norm": 25.494600045804013,
"learning_rate": 1.973766611472899e-06,
"loss": 0.892,
"step": 281
},
{
"epoch": 0.10142972754248719,
"grad_norm": 11.168220744518289,
"learning_rate": 1.9735007921954314e-06,
"loss": 0.8846,
"step": 282
},
{
"epoch": 0.10178940742738962,
"grad_norm": 12.99935475702517,
"learning_rate": 1.9732336510251863e-06,
"loss": 0.8938,
"step": 283
},
{
"epoch": 0.10214908731229207,
"grad_norm": 13.287549329776299,
"learning_rate": 1.9729651883249074e-06,
"loss": 0.8901,
"step": 284
},
{
"epoch": 0.1025087671971945,
"grad_norm": 9.310937580536969,
"learning_rate": 1.972695404459134e-06,
"loss": 0.9032,
"step": 285
},
{
"epoch": 0.10286844708209693,
"grad_norm": 22.642549782369525,
"learning_rate": 1.9724242997941994e-06,
"loss": 0.8748,
"step": 286
},
{
"epoch": 0.10322812696699937,
"grad_norm": 11.391713797213066,
"learning_rate": 1.9721518746982296e-06,
"loss": 0.9434,
"step": 287
},
{
"epoch": 0.1035878068519018,
"grad_norm": 10.277355772563425,
"learning_rate": 1.971878129541144e-06,
"loss": 0.8842,
"step": 288
},
{
"epoch": 0.10394748673680425,
"grad_norm": 51.70100815959815,
"learning_rate": 1.971603064694654e-06,
"loss": 0.9127,
"step": 289
},
{
"epoch": 0.10430716662170668,
"grad_norm": 13.914980770347539,
"learning_rate": 1.971326680532264e-06,
"loss": 0.9081,
"step": 290
},
{
"epoch": 0.10466684650660912,
"grad_norm": 10.948719016310948,
"learning_rate": 1.971048977429269e-06,
"loss": 0.8867,
"step": 291
},
{
"epoch": 0.10502652639151155,
"grad_norm": 11.543650466595189,
"learning_rate": 1.970769955762755e-06,
"loss": 0.9236,
"step": 292
},
{
"epoch": 0.10538620627641399,
"grad_norm": 17.480050188284167,
"learning_rate": 1.9704896159115995e-06,
"loss": 0.8941,
"step": 293
},
{
"epoch": 0.10574588616131643,
"grad_norm": 11.361668925439416,
"learning_rate": 1.970207958256468e-06,
"loss": 0.8741,
"step": 294
},
{
"epoch": 0.10610556604621886,
"grad_norm": 11.004344996715577,
"learning_rate": 1.969924983179817e-06,
"loss": 0.8695,
"step": 295
},
{
"epoch": 0.1064652459311213,
"grad_norm": 12.097699216737382,
"learning_rate": 1.9696406910658916e-06,
"loss": 0.861,
"step": 296
},
{
"epoch": 0.10682492581602374,
"grad_norm": 8.257198644737876,
"learning_rate": 1.9693550823007247e-06,
"loss": 0.8375,
"step": 297
},
{
"epoch": 0.10718460570092618,
"grad_norm": 22.0001141358469,
"learning_rate": 1.9690681572721376e-06,
"loss": 0.8442,
"step": 298
},
{
"epoch": 0.10754428558582861,
"grad_norm": 14.233797906171292,
"learning_rate": 1.9687799163697384e-06,
"loss": 0.9239,
"step": 299
},
{
"epoch": 0.10790396547073106,
"grad_norm": 11.770942295021396,
"learning_rate": 1.968490359984923e-06,
"loss": 0.9098,
"step": 300
},
{
"epoch": 0.10826364535563349,
"grad_norm": 23.60257924918355,
"learning_rate": 1.9681994885108726e-06,
"loss": 0.8693,
"step": 301
},
{
"epoch": 0.10862332524053592,
"grad_norm": 14.960614091519401,
"learning_rate": 1.967907302342554e-06,
"loss": 0.8315,
"step": 302
},
{
"epoch": 0.10898300512543836,
"grad_norm": 20.45544688064615,
"learning_rate": 1.96761380187672e-06,
"loss": 0.9464,
"step": 303
},
{
"epoch": 0.10934268501034079,
"grad_norm": 14.957920336704825,
"learning_rate": 1.967318987511908e-06,
"loss": 0.9121,
"step": 304
},
{
"epoch": 0.10970236489524324,
"grad_norm": 26.138065669695667,
"learning_rate": 1.967022859648438e-06,
"loss": 0.8295,
"step": 305
},
{
"epoch": 0.11006204478014567,
"grad_norm": 18.768323476716652,
"learning_rate": 1.966725418688416e-06,
"loss": 0.8819,
"step": 306
},
{
"epoch": 0.11042172466504811,
"grad_norm": 17.74264546609931,
"learning_rate": 1.9664266650357295e-06,
"loss": 0.9849,
"step": 307
},
{
"epoch": 0.11078140454995054,
"grad_norm": 14.258252350797768,
"learning_rate": 1.966126599096048e-06,
"loss": 0.9227,
"step": 308
},
{
"epoch": 0.11114108443485297,
"grad_norm": 10.994564258836762,
"learning_rate": 1.965825221276825e-06,
"loss": 0.9719,
"step": 309
},
{
"epoch": 0.11150076431975542,
"grad_norm": 15.122948478081264,
"learning_rate": 1.9655225319872925e-06,
"loss": 0.9255,
"step": 310
},
{
"epoch": 0.11186044420465785,
"grad_norm": 12.314006203316827,
"learning_rate": 1.9652185316384657e-06,
"loss": 0.9275,
"step": 311
},
{
"epoch": 0.1122201240895603,
"grad_norm": 23.25875296453776,
"learning_rate": 1.964913220643139e-06,
"loss": 0.9677,
"step": 312
},
{
"epoch": 0.11257980397446272,
"grad_norm": 17.358740759679804,
"learning_rate": 1.964606599415887e-06,
"loss": 0.9023,
"step": 313
},
{
"epoch": 0.11293948385936517,
"grad_norm": 12.346597085386588,
"learning_rate": 1.9642986683730623e-06,
"loss": 0.906,
"step": 314
},
{
"epoch": 0.1132991637442676,
"grad_norm": 16.91901558140758,
"learning_rate": 1.963989427932798e-06,
"loss": 0.9201,
"step": 315
},
{
"epoch": 0.11365884362917004,
"grad_norm": 9.741199157688925,
"learning_rate": 1.9636788785150034e-06,
"loss": 0.9499,
"step": 316
},
{
"epoch": 0.11401852351407248,
"grad_norm": 13.262595663362411,
"learning_rate": 1.963367020541366e-06,
"loss": 0.914,
"step": 317
},
{
"epoch": 0.1143782033989749,
"grad_norm": 19.22847042204688,
"learning_rate": 1.9630538544353504e-06,
"loss": 0.9313,
"step": 318
},
{
"epoch": 0.11473788328387735,
"grad_norm": 13.02293930823946,
"learning_rate": 1.9627393806221965e-06,
"loss": 0.8973,
"step": 319
},
{
"epoch": 0.11509756316877978,
"grad_norm": 76.118414409978,
"learning_rate": 1.962423599528921e-06,
"loss": 0.8645,
"step": 320
},
{
"epoch": 0.11545724305368223,
"grad_norm": 19.283564453806168,
"learning_rate": 1.9621065115843153e-06,
"loss": 0.9104,
"step": 321
},
{
"epoch": 0.11581692293858466,
"grad_norm": 21.313272273661486,
"learning_rate": 1.961788117218945e-06,
"loss": 0.8843,
"step": 322
},
{
"epoch": 0.1161766028234871,
"grad_norm": 51.153040936522814,
"learning_rate": 1.96146841686515e-06,
"loss": 0.9051,
"step": 323
},
{
"epoch": 0.11653628270838953,
"grad_norm": 11.133909813691858,
"learning_rate": 1.9611474109570443e-06,
"loss": 0.862,
"step": 324
},
{
"epoch": 0.11689596259329196,
"grad_norm": 10.558210832457515,
"learning_rate": 1.9608250999305127e-06,
"loss": 0.8813,
"step": 325
},
{
"epoch": 0.11725564247819441,
"grad_norm": 16.281419689047077,
"learning_rate": 1.9605014842232148e-06,
"loss": 0.8851,
"step": 326
},
{
"epoch": 0.11761532236309684,
"grad_norm": 16.565785391837323,
"learning_rate": 1.9601765642745793e-06,
"loss": 1.0049,
"step": 327
},
{
"epoch": 0.11797500224799928,
"grad_norm": 14.5572818578826,
"learning_rate": 1.9598503405258075e-06,
"loss": 0.9034,
"step": 328
},
{
"epoch": 0.11833468213290171,
"grad_norm": 11.868932975556522,
"learning_rate": 1.9595228134198705e-06,
"loss": 0.8458,
"step": 329
},
{
"epoch": 0.11869436201780416,
"grad_norm": 9.98695028024214,
"learning_rate": 1.9591939834015094e-06,
"loss": 0.8226,
"step": 330
},
{
"epoch": 0.11905404190270659,
"grad_norm": 9.223681177074399,
"learning_rate": 1.958863850917234e-06,
"loss": 0.8638,
"step": 331
},
{
"epoch": 0.11941372178760903,
"grad_norm": 12.89149079121029,
"learning_rate": 1.9585324164153234e-06,
"loss": 0.8801,
"step": 332
},
{
"epoch": 0.11977340167251146,
"grad_norm": 18.140293724032134,
"learning_rate": 1.9581996803458243e-06,
"loss": 0.9153,
"step": 333
},
{
"epoch": 0.1201330815574139,
"grad_norm": 33.52740780248001,
"learning_rate": 1.957865643160551e-06,
"loss": 0.8737,
"step": 334
},
{
"epoch": 0.12049276144231634,
"grad_norm": 8.280847090521378,
"learning_rate": 1.9575303053130846e-06,
"loss": 0.8294,
"step": 335
},
{
"epoch": 0.12085244132721877,
"grad_norm": 7.678778659702913,
"learning_rate": 1.9571936672587716e-06,
"loss": 0.8831,
"step": 336
},
{
"epoch": 0.12121212121212122,
"grad_norm": 15.784348499821462,
"learning_rate": 1.9568557294547242e-06,
"loss": 0.9263,
"step": 337
},
{
"epoch": 0.12157180109702365,
"grad_norm": 12.10096678184232,
"learning_rate": 1.9565164923598204e-06,
"loss": 0.8828,
"step": 338
},
{
"epoch": 0.12193148098192609,
"grad_norm": 8.816663905269253,
"learning_rate": 1.956175956434702e-06,
"loss": 0.8907,
"step": 339
},
{
"epoch": 0.12229116086682852,
"grad_norm": 29.90225834951316,
"learning_rate": 1.955834122141774e-06,
"loss": 0.7933,
"step": 340
},
{
"epoch": 0.12265084075173097,
"grad_norm": 10.292172711015832,
"learning_rate": 1.9554909899452053e-06,
"loss": 0.8857,
"step": 341
},
{
"epoch": 0.1230105206366334,
"grad_norm": 39.15541881489192,
"learning_rate": 1.955146560310926e-06,
"loss": 0.8972,
"step": 342
},
{
"epoch": 0.12337020052153583,
"grad_norm": 11.893045761955474,
"learning_rate": 1.954800833706629e-06,
"loss": 0.8419,
"step": 343
},
{
"epoch": 0.12372988040643827,
"grad_norm": 68.55531647862284,
"learning_rate": 1.954453810601768e-06,
"loss": 0.8472,
"step": 344
},
{
"epoch": 0.1240895602913407,
"grad_norm": 10.504320449547713,
"learning_rate": 1.954105491467557e-06,
"loss": 0.9026,
"step": 345
},
{
"epoch": 0.12444924017624315,
"grad_norm": 12.598885612377023,
"learning_rate": 1.9537558767769696e-06,
"loss": 0.8975,
"step": 346
},
{
"epoch": 0.12480892006114558,
"grad_norm": 23.969375774147824,
"learning_rate": 1.95340496700474e-06,
"loss": 0.916,
"step": 347
},
{
"epoch": 0.125168599946048,
"grad_norm": 13.999278996159775,
"learning_rate": 1.953052762627359e-06,
"loss": 0.7556,
"step": 348
},
{
"epoch": 0.12552827983095047,
"grad_norm": 16.442131135298013,
"learning_rate": 1.9526992641230767e-06,
"loss": 0.8536,
"step": 349
},
{
"epoch": 0.1258879597158529,
"grad_norm": 13.146056020396799,
"learning_rate": 1.9523444719719e-06,
"loss": 0.9139,
"step": 350
},
{
"epoch": 0.12624763960075533,
"grad_norm": 12.700247793000134,
"learning_rate": 1.951988386655592e-06,
"loss": 0.8299,
"step": 351
},
{
"epoch": 0.12660731948565776,
"grad_norm": 11.963896379243318,
"learning_rate": 1.951631008657673e-06,
"loss": 0.8569,
"step": 352
},
{
"epoch": 0.1269669993705602,
"grad_norm": 52.23592527717699,
"learning_rate": 1.9512723384634173e-06,
"loss": 0.8283,
"step": 353
},
{
"epoch": 0.12732667925546265,
"grad_norm": 10.03356695138902,
"learning_rate": 1.9509123765598543e-06,
"loss": 0.8658,
"step": 354
},
{
"epoch": 0.12768635914036508,
"grad_norm": 9.293741966429987,
"learning_rate": 1.9505511234357674e-06,
"loss": 0.7894,
"step": 355
},
{
"epoch": 0.1280460390252675,
"grad_norm": 10.747400133317525,
"learning_rate": 1.9501885795816933e-06,
"loss": 0.7884,
"step": 356
},
{
"epoch": 0.12840571891016994,
"grad_norm": 83.51327413134986,
"learning_rate": 1.949824745489922e-06,
"loss": 0.8946,
"step": 357
},
{
"epoch": 0.12876539879507237,
"grad_norm": 30.717710225026778,
"learning_rate": 1.949459621654494e-06,
"loss": 0.8959,
"step": 358
},
{
"epoch": 0.12912507867997483,
"grad_norm": 30.82688771552335,
"learning_rate": 1.949093208571202e-06,
"loss": 0.8639,
"step": 359
},
{
"epoch": 0.12948475856487726,
"grad_norm": 13.061305788972607,
"learning_rate": 1.9487255067375904e-06,
"loss": 0.9012,
"step": 360
},
{
"epoch": 0.1298444384497797,
"grad_norm": 13.398942621099714,
"learning_rate": 1.9483565166529515e-06,
"loss": 0.9357,
"step": 361
},
{
"epoch": 0.13020411833468212,
"grad_norm": 12.669122185267923,
"learning_rate": 1.947986238818328e-06,
"loss": 0.8448,
"step": 362
},
{
"epoch": 0.13056379821958458,
"grad_norm": 13.424355636023794,
"learning_rate": 1.947614673736511e-06,
"loss": 0.8521,
"step": 363
},
{
"epoch": 0.130923478104487,
"grad_norm": 9.175568758023797,
"learning_rate": 1.94724182191204e-06,
"loss": 0.8636,
"step": 364
},
{
"epoch": 0.13128315798938944,
"grad_norm": 12.328782554527889,
"learning_rate": 1.946867683851201e-06,
"loss": 0.9347,
"step": 365
},
{
"epoch": 0.13164283787429187,
"grad_norm": 12.013763613914739,
"learning_rate": 1.9464922600620267e-06,
"loss": 0.8363,
"step": 366
},
{
"epoch": 0.1320025177591943,
"grad_norm": 9.269438078892499,
"learning_rate": 1.946115551054296e-06,
"loss": 0.817,
"step": 367
},
{
"epoch": 0.13236219764409676,
"grad_norm": 10.963559708168836,
"learning_rate": 1.945737557339533e-06,
"loss": 0.8938,
"step": 368
},
{
"epoch": 0.1327218775289992,
"grad_norm": 15.66670813226693,
"learning_rate": 1.9453582794310064e-06,
"loss": 0.9166,
"step": 369
},
{
"epoch": 0.13308155741390162,
"grad_norm": 12.100424658153491,
"learning_rate": 1.9449777178437274e-06,
"loss": 0.7974,
"step": 370
},
{
"epoch": 0.13344123729880406,
"grad_norm": 15.678206138304043,
"learning_rate": 1.9445958730944515e-06,
"loss": 0.8629,
"step": 371
},
{
"epoch": 0.1338009171837065,
"grad_norm": 11.550421434718123,
"learning_rate": 1.9442127457016765e-06,
"loss": 0.8583,
"step": 372
},
{
"epoch": 0.13416059706860894,
"grad_norm": 11.909500141730392,
"learning_rate": 1.943828336185642e-06,
"loss": 0.914,
"step": 373
},
{
"epoch": 0.13452027695351138,
"grad_norm": 9.940660597221434,
"learning_rate": 1.9434426450683275e-06,
"loss": 0.7436,
"step": 374
},
{
"epoch": 0.1348799568384138,
"grad_norm": 10.34248930812127,
"learning_rate": 1.943055672873454e-06,
"loss": 0.8108,
"step": 375
},
{
"epoch": 0.13523963672331624,
"grad_norm": 12.457340019224596,
"learning_rate": 1.942667420126481e-06,
"loss": 0.8272,
"step": 376
},
{
"epoch": 0.1355993166082187,
"grad_norm": 9.465393908237939,
"learning_rate": 1.942277887354608e-06,
"loss": 0.8666,
"step": 377
},
{
"epoch": 0.13595899649312113,
"grad_norm": 14.936676198112885,
"learning_rate": 1.941887075086772e-06,
"loss": 0.7892,
"step": 378
},
{
"epoch": 0.13631867637802356,
"grad_norm": 16.657805002799737,
"learning_rate": 1.9414949838536467e-06,
"loss": 0.8826,
"step": 379
},
{
"epoch": 0.136678356262926,
"grad_norm": 14.465245609908502,
"learning_rate": 1.9411016141876437e-06,
"loss": 0.8851,
"step": 380
},
{
"epoch": 0.13703803614782845,
"grad_norm": 12.225630986738413,
"learning_rate": 1.9407069666229093e-06,
"loss": 0.8197,
"step": 381
},
{
"epoch": 0.13739771603273088,
"grad_norm": 9.515207261688014,
"learning_rate": 1.9403110416953264e-06,
"loss": 0.8497,
"step": 382
},
{
"epoch": 0.1377573959176333,
"grad_norm": 15.813970332166535,
"learning_rate": 1.939913839942512e-06,
"loss": 0.8754,
"step": 383
},
{
"epoch": 0.13811707580253574,
"grad_norm": 12.87993595588049,
"learning_rate": 1.9395153619038154e-06,
"loss": 0.9,
"step": 384
},
{
"epoch": 0.13847675568743817,
"grad_norm": 21.12374700170286,
"learning_rate": 1.939115608120321e-06,
"loss": 0.8271,
"step": 385
},
{
"epoch": 0.13883643557234063,
"grad_norm": 8.357132133993698,
"learning_rate": 1.938714579134845e-06,
"loss": 0.8227,
"step": 386
},
{
"epoch": 0.13919611545724306,
"grad_norm": 9.359497020025131,
"learning_rate": 1.9383122754919338e-06,
"loss": 0.8339,
"step": 387
},
{
"epoch": 0.1395557953421455,
"grad_norm": 270.40586750608423,
"learning_rate": 1.9379086977378663e-06,
"loss": 0.9283,
"step": 388
},
{
"epoch": 0.13991547522704792,
"grad_norm": 9.080060254672393,
"learning_rate": 1.9375038464206504e-06,
"loss": 0.8596,
"step": 389
},
{
"epoch": 0.14027515511195035,
"grad_norm": 9.878471431037067,
"learning_rate": 1.9370977220900236e-06,
"loss": 0.9245,
"step": 390
},
{
"epoch": 0.1406348349968528,
"grad_norm": 10.555714443464495,
"learning_rate": 1.936690325297453e-06,
"loss": 0.9154,
"step": 391
},
{
"epoch": 0.14099451488175524,
"grad_norm": 10.700724258109885,
"learning_rate": 1.936281656596132e-06,
"loss": 0.9078,
"step": 392
},
{
"epoch": 0.14135419476665767,
"grad_norm": 8.07202896315807,
"learning_rate": 1.9358717165409816e-06,
"loss": 0.7941,
"step": 393
},
{
"epoch": 0.1417138746515601,
"grad_norm": 13.946094269658285,
"learning_rate": 1.93546050568865e-06,
"loss": 0.8197,
"step": 394
},
{
"epoch": 0.14207355453646256,
"grad_norm": 9.373248085121805,
"learning_rate": 1.93504802459751e-06,
"loss": 0.8974,
"step": 395
},
{
"epoch": 0.142433234421365,
"grad_norm": 20.94564386485015,
"learning_rate": 1.934634273827659e-06,
"loss": 0.8379,
"step": 396
},
{
"epoch": 0.14279291430626742,
"grad_norm": 10.643078401471191,
"learning_rate": 1.93421925394092e-06,
"loss": 0.8434,
"step": 397
},
{
"epoch": 0.14315259419116985,
"grad_norm": 12.292583457238141,
"learning_rate": 1.9338029655008375e-06,
"loss": 0.8723,
"step": 398
},
{
"epoch": 0.14351227407607228,
"grad_norm": 11.303797133940236,
"learning_rate": 1.9333854090726796e-06,
"loss": 0.8724,
"step": 399
},
{
"epoch": 0.14387195396097474,
"grad_norm": 7.762057454413017,
"learning_rate": 1.9329665852234356e-06,
"loss": 0.86,
"step": 400
},
{
"epoch": 0.14423163384587717,
"grad_norm": 14.321946837228882,
"learning_rate": 1.9325464945218168e-06,
"loss": 0.9621,
"step": 401
},
{
"epoch": 0.1445913137307796,
"grad_norm": 6.825717053445401,
"learning_rate": 1.9321251375382536e-06,
"loss": 0.8112,
"step": 402
},
{
"epoch": 0.14495099361568203,
"grad_norm": 14.49694123220213,
"learning_rate": 1.931702514844896e-06,
"loss": 0.856,
"step": 403
},
{
"epoch": 0.1453106735005845,
"grad_norm": 11.663421014632888,
"learning_rate": 1.9312786270156135e-06,
"loss": 0.8233,
"step": 404
},
{
"epoch": 0.14567035338548692,
"grad_norm": 14.164098459094538,
"learning_rate": 1.9308534746259925e-06,
"loss": 0.8564,
"step": 405
},
{
"epoch": 0.14603003327038935,
"grad_norm": 12.359332513895684,
"learning_rate": 1.9304270582533377e-06,
"loss": 0.865,
"step": 406
},
{
"epoch": 0.14638971315529178,
"grad_norm": 12.139694859275288,
"learning_rate": 1.929999378476668e-06,
"loss": 0.8756,
"step": 407
},
{
"epoch": 0.14674939304019421,
"grad_norm": 21.360173742690147,
"learning_rate": 1.9295704358767207e-06,
"loss": 0.8693,
"step": 408
},
{
"epoch": 0.14710907292509667,
"grad_norm": 11.100332470691239,
"learning_rate": 1.9291402310359458e-06,
"loss": 0.9017,
"step": 409
},
{
"epoch": 0.1474687528099991,
"grad_norm": 43.95482902174156,
"learning_rate": 1.9287087645385083e-06,
"loss": 0.856,
"step": 410
},
{
"epoch": 0.14782843269490153,
"grad_norm": 9.177097100103019,
"learning_rate": 1.928276036970285e-06,
"loss": 0.876,
"step": 411
},
{
"epoch": 0.14818811257980397,
"grad_norm": 10.285115271330506,
"learning_rate": 1.927842048918867e-06,
"loss": 0.8525,
"step": 412
},
{
"epoch": 0.14854779246470642,
"grad_norm": 8.03920252705612,
"learning_rate": 1.9274068009735547e-06,
"loss": 0.8298,
"step": 413
},
{
"epoch": 0.14890747234960885,
"grad_norm": 13.19357438089089,
"learning_rate": 1.926970293725362e-06,
"loss": 0.8565,
"step": 414
},
{
"epoch": 0.14926715223451129,
"grad_norm": 11.014939550913528,
"learning_rate": 1.926532527767011e-06,
"loss": 0.9041,
"step": 415
},
{
"epoch": 0.14962683211941372,
"grad_norm": 34.139306188602696,
"learning_rate": 1.926093503692933e-06,
"loss": 0.8974,
"step": 416
},
{
"epoch": 0.14998651200431615,
"grad_norm": 126.99384682987264,
"learning_rate": 1.925653222099268e-06,
"loss": 0.8406,
"step": 417
},
{
"epoch": 0.1503461918892186,
"grad_norm": 7.92044174289043,
"learning_rate": 1.9252116835838636e-06,
"loss": 0.8821,
"step": 418
},
{
"epoch": 0.15070587177412104,
"grad_norm": 7.348465342243873,
"learning_rate": 1.9247688887462746e-06,
"loss": 0.8577,
"step": 419
},
{
"epoch": 0.15106555165902347,
"grad_norm": 7.970909157239155,
"learning_rate": 1.9243248381877606e-06,
"loss": 0.937,
"step": 420
},
{
"epoch": 0.1514252315439259,
"grad_norm": 27.181556708867408,
"learning_rate": 1.9238795325112867e-06,
"loss": 0.904,
"step": 421
},
{
"epoch": 0.15178491142882836,
"grad_norm": 38.73261243697153,
"learning_rate": 1.9234329723215234e-06,
"loss": 0.8058,
"step": 422
},
{
"epoch": 0.1521445913137308,
"grad_norm": 8.612041746525732,
"learning_rate": 1.922985158224843e-06,
"loss": 0.8325,
"step": 423
},
{
"epoch": 0.15250427119863322,
"grad_norm": 9.35579094951297,
"learning_rate": 1.9225360908293216e-06,
"loss": 0.9131,
"step": 424
},
{
"epoch": 0.15286395108353565,
"grad_norm": 10.069976190703061,
"learning_rate": 1.922085770744737e-06,
"loss": 0.8255,
"step": 425
},
{
"epoch": 0.15322363096843808,
"grad_norm": 8.493391879280153,
"learning_rate": 1.921634198582567e-06,
"loss": 0.7657,
"step": 426
},
{
"epoch": 0.15358331085334054,
"grad_norm": 6.5652776306338305,
"learning_rate": 1.9211813749559914e-06,
"loss": 0.8071,
"step": 427
},
{
"epoch": 0.15394299073824297,
"grad_norm": 9.892167099058437,
"learning_rate": 1.9207273004798874e-06,
"loss": 0.8277,
"step": 428
},
{
"epoch": 0.1543026706231454,
"grad_norm": 12.10168551634377,
"learning_rate": 1.9202719757708315e-06,
"loss": 0.8913,
"step": 429
},
{
"epoch": 0.15466235050804783,
"grad_norm": 8.400143155457233,
"learning_rate": 1.919815401447099e-06,
"loss": 0.8193,
"step": 430
},
{
"epoch": 0.15502203039295026,
"grad_norm": 8.67439915761609,
"learning_rate": 1.91935757812866e-06,
"loss": 0.9119,
"step": 431
},
{
"epoch": 0.15538171027785272,
"grad_norm": 6.6432568009165305,
"learning_rate": 1.9188985064371815e-06,
"loss": 0.8462,
"step": 432
},
{
"epoch": 0.15574139016275515,
"grad_norm": 10.370296729266624,
"learning_rate": 1.9184381869960256e-06,
"loss": 0.8642,
"step": 433
},
{
"epoch": 0.15610107004765758,
"grad_norm": 7.986242744346441,
"learning_rate": 1.9179766204302495e-06,
"loss": 0.7959,
"step": 434
},
{
"epoch": 0.15646074993256,
"grad_norm": 17.1456213560963,
"learning_rate": 1.9175138073666027e-06,
"loss": 0.8015,
"step": 435
},
{
"epoch": 0.15682042981746247,
"grad_norm": 8.916530755054769,
"learning_rate": 1.9170497484335276e-06,
"loss": 0.7821,
"step": 436
},
{
"epoch": 0.1571801097023649,
"grad_norm": 35.16726375571258,
"learning_rate": 1.9165844442611584e-06,
"loss": 0.8719,
"step": 437
},
{
"epoch": 0.15753978958726733,
"grad_norm": 9.395656545549057,
"learning_rate": 1.91611789548132e-06,
"loss": 0.7904,
"step": 438
},
{
"epoch": 0.15789946947216976,
"grad_norm": 13.635627198065489,
"learning_rate": 1.9156501027275274e-06,
"loss": 0.8651,
"step": 439
},
{
"epoch": 0.1582591493570722,
"grad_norm": 9.22448593403895,
"learning_rate": 1.915181066634986e-06,
"loss": 0.8507,
"step": 440
},
{
"epoch": 0.15861882924197465,
"grad_norm": 14.688094562563721,
"learning_rate": 1.914710787840587e-06,
"loss": 0.7956,
"step": 441
},
{
"epoch": 0.15897850912687708,
"grad_norm": 8.911901702895749,
"learning_rate": 1.914239266982911e-06,
"loss": 0.8144,
"step": 442
},
{
"epoch": 0.1593381890117795,
"grad_norm": 12.767614911255095,
"learning_rate": 1.913766504702225e-06,
"loss": 0.8392,
"step": 443
},
{
"epoch": 0.15969786889668194,
"grad_norm": 11.049958977930125,
"learning_rate": 1.9132925016404803e-06,
"loss": 0.9043,
"step": 444
},
{
"epoch": 0.1600575487815844,
"grad_norm": 24.35116451385261,
"learning_rate": 1.9128172584413146e-06,
"loss": 0.8345,
"step": 445
},
{
"epoch": 0.16041722866648683,
"grad_norm": 9.82011280699428,
"learning_rate": 1.912340775750049e-06,
"loss": 0.7961,
"step": 446
},
{
"epoch": 0.16077690855138926,
"grad_norm": 7.902611715439257,
"learning_rate": 1.9118630542136872e-06,
"loss": 0.911,
"step": 447
},
{
"epoch": 0.1611365884362917,
"grad_norm": 70.31395398173764,
"learning_rate": 1.911384094480916e-06,
"loss": 0.8239,
"step": 448
},
{
"epoch": 0.16149626832119413,
"grad_norm": 33.338731991358,
"learning_rate": 1.9109038972021027e-06,
"loss": 0.8022,
"step": 449
},
{
"epoch": 0.16185594820609658,
"grad_norm": 11.33718969042203,
"learning_rate": 1.9104224630292957e-06,
"loss": 0.8216,
"step": 450
},
{
"epoch": 0.16221562809099901,
"grad_norm": 8.452034969158248,
"learning_rate": 1.9099397926162225e-06,
"loss": 0.8445,
"step": 451
},
{
"epoch": 0.16257530797590145,
"grad_norm": 11.4488154951291,
"learning_rate": 1.909455886618289e-06,
"loss": 0.8916,
"step": 452
},
{
"epoch": 0.16293498786080388,
"grad_norm": 12.31967554618124,
"learning_rate": 1.90897074569258e-06,
"loss": 0.8674,
"step": 453
},
{
"epoch": 0.16329466774570633,
"grad_norm": 10.697499242046135,
"learning_rate": 1.9084843704978553e-06,
"loss": 0.902,
"step": 454
},
{
"epoch": 0.16365434763060877,
"grad_norm": 10.085248528236896,
"learning_rate": 1.9079967616945532e-06,
"loss": 0.8762,
"step": 455
},
{
"epoch": 0.1640140275155112,
"grad_norm": 20.071003715679723,
"learning_rate": 1.9075079199447843e-06,
"loss": 0.8851,
"step": 456
},
{
"epoch": 0.16437370740041363,
"grad_norm": 7.493447739075657,
"learning_rate": 1.9070178459123362e-06,
"loss": 0.8059,
"step": 457
},
{
"epoch": 0.16473338728531606,
"grad_norm": 16.7299671266415,
"learning_rate": 1.9065265402626673e-06,
"loss": 0.8399,
"step": 458
},
{
"epoch": 0.16509306717021852,
"grad_norm": 11.39504360829656,
"learning_rate": 1.9060340036629098e-06,
"loss": 0.9617,
"step": 459
},
{
"epoch": 0.16545274705512095,
"grad_norm": 20.206501898154706,
"learning_rate": 1.9055402367818672e-06,
"loss": 0.8597,
"step": 460
},
{
"epoch": 0.16581242694002338,
"grad_norm": 13.324287414295295,
"learning_rate": 1.9050452402900132e-06,
"loss": 0.9062,
"step": 461
},
{
"epoch": 0.1661721068249258,
"grad_norm": 12.33113766605342,
"learning_rate": 1.9045490148594916e-06,
"loss": 0.8202,
"step": 462
},
{
"epoch": 0.16653178670982827,
"grad_norm": 16.117376983943213,
"learning_rate": 1.9040515611641142e-06,
"loss": 0.863,
"step": 463
},
{
"epoch": 0.1668914665947307,
"grad_norm": 14.213075056424216,
"learning_rate": 1.9035528798793616e-06,
"loss": 0.7904,
"step": 464
},
{
"epoch": 0.16725114647963313,
"grad_norm": 12.164792118405405,
"learning_rate": 1.9030529716823803e-06,
"loss": 0.8742,
"step": 465
},
{
"epoch": 0.16761082636453556,
"grad_norm": 11.496257274302343,
"learning_rate": 1.9025518372519844e-06,
"loss": 0.7988,
"step": 466
},
{
"epoch": 0.167970506249438,
"grad_norm": 10.35968011430579,
"learning_rate": 1.902049477268651e-06,
"loss": 0.9183,
"step": 467
},
{
"epoch": 0.16833018613434045,
"grad_norm": 8.60003816280571,
"learning_rate": 1.9015458924145226e-06,
"loss": 0.8665,
"step": 468
},
{
"epoch": 0.16868986601924288,
"grad_norm": 10.042650282563095,
"learning_rate": 1.901041083373405e-06,
"loss": 0.871,
"step": 469
},
{
"epoch": 0.1690495459041453,
"grad_norm": 8.07460363018041,
"learning_rate": 1.9005350508307658e-06,
"loss": 0.7647,
"step": 470
},
{
"epoch": 0.16940922578904774,
"grad_norm": 14.764742702869542,
"learning_rate": 1.900027795473734e-06,
"loss": 0.8248,
"step": 471
},
{
"epoch": 0.16976890567395017,
"grad_norm": 12.707930104442196,
"learning_rate": 1.8995193179910996e-06,
"loss": 0.8234,
"step": 472
},
{
"epoch": 0.17012858555885263,
"grad_norm": 9.221906057488562,
"learning_rate": 1.8990096190733111e-06,
"loss": 0.901,
"step": 473
},
{
"epoch": 0.17048826544375506,
"grad_norm": 8.934916040042465,
"learning_rate": 1.8984986994124764e-06,
"loss": 0.8368,
"step": 474
},
{
"epoch": 0.1708479453286575,
"grad_norm": 10.708952670356158,
"learning_rate": 1.8979865597023607e-06,
"loss": 0.8035,
"step": 475
},
{
"epoch": 0.17120762521355992,
"grad_norm": 9.897343600602069,
"learning_rate": 1.897473200638386e-06,
"loss": 0.8099,
"step": 476
},
{
"epoch": 0.17156730509846238,
"grad_norm": 9.0242865083605,
"learning_rate": 1.89695862291763e-06,
"loss": 0.8575,
"step": 477
},
{
"epoch": 0.1719269849833648,
"grad_norm": 11.294148942631974,
"learning_rate": 1.896442827238825e-06,
"loss": 0.8016,
"step": 478
},
{
"epoch": 0.17228666486826724,
"grad_norm": 8.181165543542155,
"learning_rate": 1.895925814302357e-06,
"loss": 0.8631,
"step": 479
},
{
"epoch": 0.17264634475316967,
"grad_norm": 10.916696791012598,
"learning_rate": 1.8954075848102654e-06,
"loss": 0.7915,
"step": 480
},
{
"epoch": 0.1730060246380721,
"grad_norm": 9.32733756404691,
"learning_rate": 1.8948881394662416e-06,
"loss": 0.7793,
"step": 481
},
{
"epoch": 0.17336570452297456,
"grad_norm": 12.052464990784205,
"learning_rate": 1.8943674789756274e-06,
"loss": 0.8623,
"step": 482
},
{
"epoch": 0.173725384407877,
"grad_norm": 19.7058645389506,
"learning_rate": 1.8938456040454148e-06,
"loss": 0.836,
"step": 483
},
{
"epoch": 0.17408506429277942,
"grad_norm": 26.01931607553377,
"learning_rate": 1.8933225153842444e-06,
"loss": 0.8655,
"step": 484
},
{
"epoch": 0.17444474417768185,
"grad_norm": 18.735199040250077,
"learning_rate": 1.8927982137024066e-06,
"loss": 0.839,
"step": 485
},
{
"epoch": 0.1748044240625843,
"grad_norm": 15.61104075774782,
"learning_rate": 1.8922726997118368e-06,
"loss": 0.9053,
"step": 486
},
{
"epoch": 0.17516410394748674,
"grad_norm": 13.842802256021539,
"learning_rate": 1.891745974126118e-06,
"loss": 0.8572,
"step": 487
},
{
"epoch": 0.17552378383238917,
"grad_norm": 18.7217105013184,
"learning_rate": 1.8912180376604776e-06,
"loss": 0.879,
"step": 488
},
{
"epoch": 0.1758834637172916,
"grad_norm": 13.134751258066478,
"learning_rate": 1.890688891031788e-06,
"loss": 0.7968,
"step": 489
},
{
"epoch": 0.17624314360219404,
"grad_norm": 13.500016989697537,
"learning_rate": 1.890158534958564e-06,
"loss": 0.7834,
"step": 490
},
{
"epoch": 0.1766028234870965,
"grad_norm": 27.62671510273035,
"learning_rate": 1.8896269701609632e-06,
"loss": 0.8369,
"step": 491
},
{
"epoch": 0.17696250337199892,
"grad_norm": 22.6387971806034,
"learning_rate": 1.8890941973607842e-06,
"loss": 0.7804,
"step": 492
},
{
"epoch": 0.17732218325690136,
"grad_norm": 80.5144766426964,
"learning_rate": 1.8885602172814663e-06,
"loss": 0.9074,
"step": 493
},
{
"epoch": 0.1776818631418038,
"grad_norm": 8.581003452540251,
"learning_rate": 1.888025030648088e-06,
"loss": 0.8666,
"step": 494
},
{
"epoch": 0.17804154302670624,
"grad_norm": 12.721986027130344,
"learning_rate": 1.8874886381873654e-06,
"loss": 0.8943,
"step": 495
},
{
"epoch": 0.17840122291160868,
"grad_norm": 10.857560555875054,
"learning_rate": 1.8869510406276535e-06,
"loss": 0.8547,
"step": 496
},
{
"epoch": 0.1787609027965111,
"grad_norm": 9.487050734369223,
"learning_rate": 1.8864122386989423e-06,
"loss": 0.8365,
"step": 497
},
{
"epoch": 0.17912058268141354,
"grad_norm": 10.327204356972482,
"learning_rate": 1.8858722331328577e-06,
"loss": 0.8412,
"step": 498
},
{
"epoch": 0.17948026256631597,
"grad_norm": 9.95664866490052,
"learning_rate": 1.8853310246626604e-06,
"loss": 0.8252,
"step": 499
},
{
"epoch": 0.17983994245121843,
"grad_norm": 12.777054919130249,
"learning_rate": 1.8847886140232436e-06,
"loss": 0.8191,
"step": 500
},
{
"epoch": 0.18019962233612086,
"grad_norm": 11.663994161516209,
"learning_rate": 1.8842450019511333e-06,
"loss": 0.8332,
"step": 501
},
{
"epoch": 0.1805593022210233,
"grad_norm": 13.433640005929696,
"learning_rate": 1.8837001891844872e-06,
"loss": 0.8071,
"step": 502
},
{
"epoch": 0.18091898210592572,
"grad_norm": 10.554856785551745,
"learning_rate": 1.8831541764630935e-06,
"loss": 0.8987,
"step": 503
},
{
"epoch": 0.18127866199082815,
"grad_norm": 9.383086892636745,
"learning_rate": 1.8826069645283686e-06,
"loss": 0.8411,
"step": 504
},
{
"epoch": 0.1816383418757306,
"grad_norm": 10.170715585820707,
"learning_rate": 1.8820585541233589e-06,
"loss": 0.8284,
"step": 505
},
{
"epoch": 0.18199802176063304,
"grad_norm": 15.744352966889148,
"learning_rate": 1.8815089459927369e-06,
"loss": 0.8144,
"step": 506
},
{
"epoch": 0.18235770164553547,
"grad_norm": 9.770670238104348,
"learning_rate": 1.8809581408828024e-06,
"loss": 0.8206,
"step": 507
},
{
"epoch": 0.1827173815304379,
"grad_norm": 11.333576038323153,
"learning_rate": 1.8804061395414793e-06,
"loss": 0.8452,
"step": 508
},
{
"epoch": 0.18307706141534036,
"grad_norm": 12.453020924416958,
"learning_rate": 1.8798529427183176e-06,
"loss": 0.8419,
"step": 509
},
{
"epoch": 0.1834367413002428,
"grad_norm": 10.426367414034607,
"learning_rate": 1.8792985511644894e-06,
"loss": 0.8769,
"step": 510
},
{
"epoch": 0.18379642118514522,
"grad_norm": 16.90275020796146,
"learning_rate": 1.878742965632789e-06,
"loss": 0.9086,
"step": 511
},
{
"epoch": 0.18415610107004765,
"grad_norm": 8.978038437485434,
"learning_rate": 1.8781861868776326e-06,
"loss": 0.8797,
"step": 512
},
{
"epoch": 0.18451578095495008,
"grad_norm": 126.89374658111018,
"learning_rate": 1.877628215655056e-06,
"loss": 0.8737,
"step": 513
},
{
"epoch": 0.18487546083985254,
"grad_norm": 6.941430734744093,
"learning_rate": 1.8770690527227154e-06,
"loss": 0.8342,
"step": 514
},
{
"epoch": 0.18523514072475497,
"grad_norm": 10.359513217199083,
"learning_rate": 1.8765086988398835e-06,
"loss": 0.8977,
"step": 515
},
{
"epoch": 0.1855948206096574,
"grad_norm": 25.151243920382832,
"learning_rate": 1.8759471547674517e-06,
"loss": 0.8332,
"step": 516
},
{
"epoch": 0.18595450049455983,
"grad_norm": 18.948640562206315,
"learning_rate": 1.8753844212679267e-06,
"loss": 0.867,
"step": 517
},
{
"epoch": 0.1863141803794623,
"grad_norm": 9.948782803153867,
"learning_rate": 1.8748204991054302e-06,
"loss": 0.8165,
"step": 518
},
{
"epoch": 0.18667386026436472,
"grad_norm": 13.71406044526494,
"learning_rate": 1.8742553890456985e-06,
"loss": 0.864,
"step": 519
},
{
"epoch": 0.18703354014926715,
"grad_norm": 66.03135486053179,
"learning_rate": 1.8736890918560806e-06,
"loss": 0.8248,
"step": 520
},
{
"epoch": 0.18739322003416958,
"grad_norm": 7.404838929316901,
"learning_rate": 1.8731216083055372e-06,
"loss": 0.7933,
"step": 521
},
{
"epoch": 0.187752899919072,
"grad_norm": 8.790376905298293,
"learning_rate": 1.8725529391646411e-06,
"loss": 0.8204,
"step": 522
},
{
"epoch": 0.18811257980397447,
"grad_norm": 11.767370844635321,
"learning_rate": 1.8719830852055734e-06,
"loss": 0.7924,
"step": 523
},
{
"epoch": 0.1884722596888769,
"grad_norm": 16.86102096800946,
"learning_rate": 1.8714120472021249e-06,
"loss": 0.8037,
"step": 524
},
{
"epoch": 0.18883193957377933,
"grad_norm": 11.200751122260533,
"learning_rate": 1.870839825929694e-06,
"loss": 0.867,
"step": 525
},
{
"epoch": 0.18919161945868176,
"grad_norm": 12.847431421238989,
"learning_rate": 1.8702664221652863e-06,
"loss": 0.7669,
"step": 526
},
{
"epoch": 0.18955129934358422,
"grad_norm": 8.507177391831126,
"learning_rate": 1.869691836687512e-06,
"loss": 0.827,
"step": 527
},
{
"epoch": 0.18991097922848665,
"grad_norm": 13.681998332161132,
"learning_rate": 1.8691160702765875e-06,
"loss": 0.8366,
"step": 528
},
{
"epoch": 0.19027065911338908,
"grad_norm": 8.016047400192488,
"learning_rate": 1.868539123714331e-06,
"loss": 0.8178,
"step": 529
},
{
"epoch": 0.19063033899829152,
"grad_norm": 11.598905703734358,
"learning_rate": 1.8679609977841643e-06,
"loss": 0.8801,
"step": 530
},
{
"epoch": 0.19099001888319395,
"grad_norm": 8.344476621453797,
"learning_rate": 1.8673816932711106e-06,
"loss": 0.8611,
"step": 531
},
{
"epoch": 0.1913496987680964,
"grad_norm": 220.24124707635522,
"learning_rate": 1.866801210961793e-06,
"loss": 0.7756,
"step": 532
},
{
"epoch": 0.19170937865299884,
"grad_norm": 7.7309606873755525,
"learning_rate": 1.8662195516444344e-06,
"loss": 0.8871,
"step": 533
},
{
"epoch": 0.19206905853790127,
"grad_norm": 12.87691886716015,
"learning_rate": 1.8656367161088556e-06,
"loss": 0.7906,
"step": 534
},
{
"epoch": 0.1924287384228037,
"grad_norm": 15.505546334154891,
"learning_rate": 1.8650527051464742e-06,
"loss": 0.8578,
"step": 535
},
{
"epoch": 0.19278841830770616,
"grad_norm": 13.605855564448348,
"learning_rate": 1.8644675195503047e-06,
"loss": 0.8955,
"step": 536
},
{
"epoch": 0.19314809819260859,
"grad_norm": 9.750630774470594,
"learning_rate": 1.8638811601149565e-06,
"loss": 0.9195,
"step": 537
},
{
"epoch": 0.19350777807751102,
"grad_norm": 8.441215495649715,
"learning_rate": 1.863293627636632e-06,
"loss": 0.8546,
"step": 538
},
{
"epoch": 0.19386745796241345,
"grad_norm": 19.253028018882947,
"learning_rate": 1.8627049229131276e-06,
"loss": 0.8292,
"step": 539
},
{
"epoch": 0.19422713784731588,
"grad_norm": 23.313553166247857,
"learning_rate": 1.8621150467438307e-06,
"loss": 0.803,
"step": 540
},
{
"epoch": 0.19458681773221834,
"grad_norm": 24.909947074852294,
"learning_rate": 1.8615239999297199e-06,
"loss": 0.8334,
"step": 541
},
{
"epoch": 0.19494649761712077,
"grad_norm": 14.208748631565902,
"learning_rate": 1.8609317832733628e-06,
"loss": 0.849,
"step": 542
},
{
"epoch": 0.1953061775020232,
"grad_norm": 72.2808028401263,
"learning_rate": 1.8603383975789164e-06,
"loss": 0.9316,
"step": 543
},
{
"epoch": 0.19566585738692563,
"grad_norm": 8.863515350100679,
"learning_rate": 1.8597438436521238e-06,
"loss": 0.8319,
"step": 544
},
{
"epoch": 0.19602553727182806,
"grad_norm": 11.343673945919084,
"learning_rate": 1.8591481223003155e-06,
"loss": 0.8225,
"step": 545
},
{
"epoch": 0.19638521715673052,
"grad_norm": 12.638901386572595,
"learning_rate": 1.858551234332407e-06,
"loss": 0.9001,
"step": 546
},
{
"epoch": 0.19674489704163295,
"grad_norm": 8.83864644270351,
"learning_rate": 1.8579531805588978e-06,
"loss": 0.8782,
"step": 547
},
{
"epoch": 0.19710457692653538,
"grad_norm": 17.835291838539035,
"learning_rate": 1.8573539617918699e-06,
"loss": 0.7971,
"step": 548
},
{
"epoch": 0.1974642568114378,
"grad_norm": 13.19953651388432,
"learning_rate": 1.8567535788449881e-06,
"loss": 0.829,
"step": 549
},
{
"epoch": 0.19782393669634027,
"grad_norm": 8.124384602473048,
"learning_rate": 1.8561520325334977e-06,
"loss": 0.8405,
"step": 550
},
{
"epoch": 0.1981836165812427,
"grad_norm": 8.187675496849428,
"learning_rate": 1.8555493236742238e-06,
"loss": 0.8524,
"step": 551
},
{
"epoch": 0.19854329646614513,
"grad_norm": 56.26064098328229,
"learning_rate": 1.8549454530855694e-06,
"loss": 0.8115,
"step": 552
},
{
"epoch": 0.19890297635104756,
"grad_norm": 9.973278810508788,
"learning_rate": 1.854340421587516e-06,
"loss": 0.8622,
"step": 553
},
{
"epoch": 0.19926265623595,
"grad_norm": 15.73714198878386,
"learning_rate": 1.8537342300016206e-06,
"loss": 0.8425,
"step": 554
},
{
"epoch": 0.19962233612085245,
"grad_norm": 20.407221284007207,
"learning_rate": 1.8531268791510163e-06,
"loss": 0.9294,
"step": 555
},
{
"epoch": 0.19998201600575488,
"grad_norm": 15.744069716766877,
"learning_rate": 1.8525183698604094e-06,
"loss": 0.8993,
"step": 556
},
{
"epoch": 0.2003416958906573,
"grad_norm": 42.07300620484659,
"learning_rate": 1.8519087029560798e-06,
"loss": 0.8133,
"step": 557
},
{
"epoch": 0.20070137577555974,
"grad_norm": 13.508080414174918,
"learning_rate": 1.8512978792658798e-06,
"loss": 0.7981,
"step": 558
},
{
"epoch": 0.2010610556604622,
"grad_norm": 15.37269219328954,
"learning_rate": 1.850685899619231e-06,
"loss": 0.8778,
"step": 559
},
{
"epoch": 0.20142073554536463,
"grad_norm": 17.113977534513946,
"learning_rate": 1.8500727648471257e-06,
"loss": 0.8015,
"step": 560
},
{
"epoch": 0.20178041543026706,
"grad_norm": 17.954420979530887,
"learning_rate": 1.849458475782125e-06,
"loss": 0.8618,
"step": 561
},
{
"epoch": 0.2021400953151695,
"grad_norm": 12.661761182791771,
"learning_rate": 1.8488430332583564e-06,
"loss": 0.8386,
"step": 562
},
{
"epoch": 0.20249977520007192,
"grad_norm": 16.593297444328744,
"learning_rate": 1.8482264381115146e-06,
"loss": 0.8486,
"step": 563
},
{
"epoch": 0.20285945508497438,
"grad_norm": 166.1578676776432,
"learning_rate": 1.8476086911788584e-06,
"loss": 0.8574,
"step": 564
},
{
"epoch": 0.2032191349698768,
"grad_norm": 9.232219788176112,
"learning_rate": 1.8469897932992118e-06,
"loss": 0.8386,
"step": 565
},
{
"epoch": 0.20357881485477924,
"grad_norm": 803.6760447649217,
"learning_rate": 1.8463697453129607e-06,
"loss": 0.8087,
"step": 566
},
{
"epoch": 0.20393849473968167,
"grad_norm": 8.897839753882822,
"learning_rate": 1.8457485480620529e-06,
"loss": 0.8682,
"step": 567
},
{
"epoch": 0.20429817462458413,
"grad_norm": 9.607245818294034,
"learning_rate": 1.8451262023899971e-06,
"loss": 0.7801,
"step": 568
},
{
"epoch": 0.20465785450948656,
"grad_norm": 16.666904794818137,
"learning_rate": 1.844502709141861e-06,
"loss": 0.8333,
"step": 569
},
{
"epoch": 0.205017534394389,
"grad_norm": 49.6053151118419,
"learning_rate": 1.843878069164271e-06,
"loss": 0.827,
"step": 570
},
{
"epoch": 0.20537721427929143,
"grad_norm": 21.17528972453623,
"learning_rate": 1.84325228330541e-06,
"loss": 0.8328,
"step": 571
},
{
"epoch": 0.20573689416419386,
"grad_norm": 113.77732313185646,
"learning_rate": 1.8426253524150176e-06,
"loss": 0.8754,
"step": 572
},
{
"epoch": 0.20609657404909631,
"grad_norm": 14.576975005767219,
"learning_rate": 1.8419972773443876e-06,
"loss": 0.8254,
"step": 573
},
{
"epoch": 0.20645625393399875,
"grad_norm": 11.251350372431052,
"learning_rate": 1.8413680589463673e-06,
"loss": 0.8429,
"step": 574
},
{
"epoch": 0.20681593381890118,
"grad_norm": 10.082724721956959,
"learning_rate": 1.8407376980753577e-06,
"loss": 0.8393,
"step": 575
},
{
"epoch": 0.2071756137038036,
"grad_norm": 26.295582863141938,
"learning_rate": 1.8401061955873099e-06,
"loss": 0.8415,
"step": 576
},
{
"epoch": 0.20753529358870604,
"grad_norm": 13.31851233479887,
"learning_rate": 1.8394735523397256e-06,
"loss": 0.8582,
"step": 577
},
{
"epoch": 0.2078949734736085,
"grad_norm": 19.282797406828767,
"learning_rate": 1.8388397691916552e-06,
"loss": 0.8357,
"step": 578
},
{
"epoch": 0.20825465335851093,
"grad_norm": 11.977505691018543,
"learning_rate": 1.8382048470036979e-06,
"loss": 0.8085,
"step": 579
},
{
"epoch": 0.20861433324341336,
"grad_norm": 13.059344673081393,
"learning_rate": 1.8375687866379988e-06,
"loss": 0.8757,
"step": 580
},
{
"epoch": 0.2089740131283158,
"grad_norm": 18.053152775353162,
"learning_rate": 1.8369315889582481e-06,
"loss": 0.772,
"step": 581
},
{
"epoch": 0.20933369301321825,
"grad_norm": 9.52888367295666,
"learning_rate": 1.8362932548296813e-06,
"loss": 0.8515,
"step": 582
},
{
"epoch": 0.20969337289812068,
"grad_norm": 23.13394593879355,
"learning_rate": 1.8356537851190761e-06,
"loss": 0.8452,
"step": 583
},
{
"epoch": 0.2100530527830231,
"grad_norm": 10.933333046608094,
"learning_rate": 1.8350131806947533e-06,
"loss": 0.8864,
"step": 584
},
{
"epoch": 0.21041273266792554,
"grad_norm": 7.363294631767518,
"learning_rate": 1.834371442426574e-06,
"loss": 0.7625,
"step": 585
},
{
"epoch": 0.21077241255282797,
"grad_norm": 25.090864142853984,
"learning_rate": 1.833728571185938e-06,
"loss": 0.8253,
"step": 586
},
{
"epoch": 0.21113209243773043,
"grad_norm": 12.565351258808075,
"learning_rate": 1.833084567845785e-06,
"loss": 0.7504,
"step": 587
},
{
"epoch": 0.21149177232263286,
"grad_norm": 16.15110531469703,
"learning_rate": 1.8324394332805911e-06,
"loss": 0.8094,
"step": 588
},
{
"epoch": 0.2118514522075353,
"grad_norm": 12.752498938310502,
"learning_rate": 1.8317931683663686e-06,
"loss": 0.7901,
"step": 589
},
{
"epoch": 0.21221113209243772,
"grad_norm": 8.229226597208003,
"learning_rate": 1.8311457739806645e-06,
"loss": 0.7494,
"step": 590
},
{
"epoch": 0.21257081197734018,
"grad_norm": 9.919964958921899,
"learning_rate": 1.8304972510025605e-06,
"loss": 0.8116,
"step": 591
},
{
"epoch": 0.2129304918622426,
"grad_norm": 9.539232699936269,
"learning_rate": 1.8298476003126692e-06,
"loss": 0.835,
"step": 592
},
{
"epoch": 0.21329017174714504,
"grad_norm": 10.808501663044773,
"learning_rate": 1.8291968227931357e-06,
"loss": 0.8389,
"step": 593
},
{
"epoch": 0.21364985163204747,
"grad_norm": 12.414953510669138,
"learning_rate": 1.8285449193276348e-06,
"loss": 0.8508,
"step": 594
},
{
"epoch": 0.2140095315169499,
"grad_norm": 25.29400974823337,
"learning_rate": 1.8278918908013695e-06,
"loss": 0.8451,
"step": 595
},
{
"epoch": 0.21436921140185236,
"grad_norm": 22.802866918366973,
"learning_rate": 1.8272377381010724e-06,
"loss": 0.8105,
"step": 596
},
{
"epoch": 0.2147288912867548,
"grad_norm": 13.833252675358032,
"learning_rate": 1.8265824621150003e-06,
"loss": 0.8345,
"step": 597
},
{
"epoch": 0.21508857117165722,
"grad_norm": 11.973137632215282,
"learning_rate": 1.8259260637329367e-06,
"loss": 0.8041,
"step": 598
},
{
"epoch": 0.21544825105655965,
"grad_norm": 9.071656710108867,
"learning_rate": 1.825268543846189e-06,
"loss": 0.8871,
"step": 599
},
{
"epoch": 0.2158079309414621,
"grad_norm": 38.103980714930515,
"learning_rate": 1.824609903347587e-06,
"loss": 0.8215,
"step": 600
},
{
"epoch": 0.21616761082636454,
"grad_norm": 24.149403872754025,
"learning_rate": 1.8239501431314825e-06,
"loss": 0.8871,
"step": 601
},
{
"epoch": 0.21652729071126697,
"grad_norm": 34.62534097027228,
"learning_rate": 1.8232892640937481e-06,
"loss": 0.8904,
"step": 602
},
{
"epoch": 0.2168869705961694,
"grad_norm": 8.736596796911465,
"learning_rate": 1.8226272671317744e-06,
"loss": 0.788,
"step": 603
},
{
"epoch": 0.21724665048107183,
"grad_norm": 15.23005069606993,
"learning_rate": 1.8219641531444712e-06,
"loss": 0.7981,
"step": 604
},
{
"epoch": 0.2176063303659743,
"grad_norm": 10.319790765410714,
"learning_rate": 1.8212999230322648e-06,
"loss": 0.8544,
"step": 605
},
{
"epoch": 0.21796601025087672,
"grad_norm": 15.292884480203284,
"learning_rate": 1.8206345776970968e-06,
"loss": 0.8734,
"step": 606
},
{
"epoch": 0.21832569013577915,
"grad_norm": 41.4760877649855,
"learning_rate": 1.8199681180424231e-06,
"loss": 0.8189,
"step": 607
},
{
"epoch": 0.21868537002068159,
"grad_norm": 57.998102387960245,
"learning_rate": 1.8193005449732133e-06,
"loss": 0.7815,
"step": 608
},
{
"epoch": 0.21904504990558404,
"grad_norm": 14.687290384292297,
"learning_rate": 1.818631859395948e-06,
"loss": 0.8302,
"step": 609
},
{
"epoch": 0.21940472979048647,
"grad_norm": 11.204183156820001,
"learning_rate": 1.817962062218619e-06,
"loss": 0.8424,
"step": 610
},
{
"epoch": 0.2197644096753889,
"grad_norm": 19.133163995291312,
"learning_rate": 1.8172911543507274e-06,
"loss": 0.7586,
"step": 611
},
{
"epoch": 0.22012408956029134,
"grad_norm": 10.37788675490883,
"learning_rate": 1.8166191367032826e-06,
"loss": 0.8972,
"step": 612
},
{
"epoch": 0.22048376944519377,
"grad_norm": 16.856991399854,
"learning_rate": 1.8159460101888012e-06,
"loss": 0.9049,
"step": 613
},
{
"epoch": 0.22084344933009623,
"grad_norm": 9.580492105108986,
"learning_rate": 1.815271775721304e-06,
"loss": 0.8372,
"step": 614
},
{
"epoch": 0.22120312921499866,
"grad_norm": 10.686207643013471,
"learning_rate": 1.8145964342163186e-06,
"loss": 0.8238,
"step": 615
},
{
"epoch": 0.2215628090999011,
"grad_norm": 77.3835210722144,
"learning_rate": 1.813919986590874e-06,
"loss": 0.7959,
"step": 616
},
{
"epoch": 0.22192248898480352,
"grad_norm": 15.988387697616838,
"learning_rate": 1.813242433763502e-06,
"loss": 0.8358,
"step": 617
},
{
"epoch": 0.22228216886970595,
"grad_norm": 10.826804108361445,
"learning_rate": 1.812563776654235e-06,
"loss": 0.8695,
"step": 618
},
{
"epoch": 0.2226418487546084,
"grad_norm": 27.447521435342345,
"learning_rate": 1.8118840161846047e-06,
"loss": 0.8576,
"step": 619
},
{
"epoch": 0.22300152863951084,
"grad_norm": 11.387854391226709,
"learning_rate": 1.811203153277641e-06,
"loss": 0.8884,
"step": 620
},
{
"epoch": 0.22336120852441327,
"grad_norm": 22.688520547371507,
"learning_rate": 1.8105211888578706e-06,
"loss": 0.8684,
"step": 621
},
{
"epoch": 0.2237208884093157,
"grad_norm": 19.7224020082186,
"learning_rate": 1.809838123851317e-06,
"loss": 0.8326,
"step": 622
},
{
"epoch": 0.22408056829421816,
"grad_norm": 10.103024522841077,
"learning_rate": 1.8091539591854968e-06,
"loss": 0.814,
"step": 623
},
{
"epoch": 0.2244402481791206,
"grad_norm": 12.91559714015716,
"learning_rate": 1.8084686957894205e-06,
"loss": 0.853,
"step": 624
},
{
"epoch": 0.22479992806402302,
"grad_norm": 8.383315946503124,
"learning_rate": 1.8077823345935903e-06,
"loss": 0.8612,
"step": 625
},
{
"epoch": 0.22515960794892545,
"grad_norm": 15.542093256359411,
"learning_rate": 1.8070948765299994e-06,
"loss": 0.8629,
"step": 626
},
{
"epoch": 0.22551928783382788,
"grad_norm": 19.90685107530346,
"learning_rate": 1.8064063225321303e-06,
"loss": 0.766,
"step": 627
},
{
"epoch": 0.22587896771873034,
"grad_norm": 9.54510925955737,
"learning_rate": 1.805716673534953e-06,
"loss": 0.8849,
"step": 628
},
{
"epoch": 0.22623864760363277,
"grad_norm": 14.31665729605147,
"learning_rate": 1.8050259304749251e-06,
"loss": 0.8654,
"step": 629
},
{
"epoch": 0.2265983274885352,
"grad_norm": 16.528827262123798,
"learning_rate": 1.8043340942899902e-06,
"loss": 0.7675,
"step": 630
},
{
"epoch": 0.22695800737343763,
"grad_norm": 10.048294889909558,
"learning_rate": 1.8036411659195749e-06,
"loss": 0.8595,
"step": 631
},
{
"epoch": 0.2273176872583401,
"grad_norm": 28.43706659648747,
"learning_rate": 1.80294714630459e-06,
"loss": 0.8223,
"step": 632
},
{
"epoch": 0.22767736714324252,
"grad_norm": 71.59101307653549,
"learning_rate": 1.8022520363874273e-06,
"loss": 0.8443,
"step": 633
},
{
"epoch": 0.22803704702814495,
"grad_norm": 10.0449247754985,
"learning_rate": 1.8015558371119602e-06,
"loss": 0.7915,
"step": 634
},
{
"epoch": 0.22839672691304738,
"grad_norm": 13.741192708054545,
"learning_rate": 1.8008585494235396e-06,
"loss": 0.834,
"step": 635
},
{
"epoch": 0.2287564067979498,
"grad_norm": 9.262015952475151,
"learning_rate": 1.8001601742689957e-06,
"loss": 0.8289,
"step": 636
},
{
"epoch": 0.22911608668285227,
"grad_norm": 10.601838370846096,
"learning_rate": 1.7994607125966353e-06,
"loss": 0.7977,
"step": 637
},
{
"epoch": 0.2294757665677547,
"grad_norm": 15.376559229012283,
"learning_rate": 1.7987601653562399e-06,
"loss": 0.797,
"step": 638
},
{
"epoch": 0.22983544645265713,
"grad_norm": 30.38370495537623,
"learning_rate": 1.798058533499065e-06,
"loss": 0.9089,
"step": 639
},
{
"epoch": 0.23019512633755956,
"grad_norm": 9.21023757987484,
"learning_rate": 1.79735581797784e-06,
"loss": 0.8337,
"step": 640
},
{
"epoch": 0.23055480622246202,
"grad_norm": 13.636729599543848,
"learning_rate": 1.7966520197467646e-06,
"loss": 0.8558,
"step": 641
},
{
"epoch": 0.23091448610736445,
"grad_norm": 9.349057503980244,
"learning_rate": 1.795947139761509e-06,
"loss": 0.7809,
"step": 642
},
{
"epoch": 0.23127416599226688,
"grad_norm": 11.595908969999318,
"learning_rate": 1.7952411789792123e-06,
"loss": 0.8439,
"step": 643
},
{
"epoch": 0.23163384587716931,
"grad_norm": 10.612729019075717,
"learning_rate": 1.7945341383584816e-06,
"loss": 0.8131,
"step": 644
},
{
"epoch": 0.23199352576207175,
"grad_norm": 24.365221574146226,
"learning_rate": 1.7938260188593901e-06,
"loss": 0.8217,
"step": 645
},
{
"epoch": 0.2323532056469742,
"grad_norm": 9.952610882034666,
"learning_rate": 1.7931168214434753e-06,
"loss": 0.9276,
"step": 646
},
{
"epoch": 0.23271288553187663,
"grad_norm": 11.51841503044773,
"learning_rate": 1.7924065470737396e-06,
"loss": 0.8442,
"step": 647
},
{
"epoch": 0.23307256541677907,
"grad_norm": 18.199580289712692,
"learning_rate": 1.7916951967146464e-06,
"loss": 0.817,
"step": 648
},
{
"epoch": 0.2334322453016815,
"grad_norm": 21.40225469340493,
"learning_rate": 1.7909827713321214e-06,
"loss": 0.8596,
"step": 649
},
{
"epoch": 0.23379192518658393,
"grad_norm": 9.322355475761332,
"learning_rate": 1.7902692718935493e-06,
"loss": 0.7432,
"step": 650
},
{
"epoch": 0.23415160507148639,
"grad_norm": 42.78840005902247,
"learning_rate": 1.7895546993677733e-06,
"loss": 0.8617,
"step": 651
},
{
"epoch": 0.23451128495638882,
"grad_norm": 12.912124674274,
"learning_rate": 1.788839054725094e-06,
"loss": 0.8521,
"step": 652
},
{
"epoch": 0.23487096484129125,
"grad_norm": 26.057742118486228,
"learning_rate": 1.7881223389372676e-06,
"loss": 0.7674,
"step": 653
},
{
"epoch": 0.23523064472619368,
"grad_norm": 9.457050216997986,
"learning_rate": 1.787404552977505e-06,
"loss": 0.868,
"step": 654
},
{
"epoch": 0.23559032461109614,
"grad_norm": 48.60675393536446,
"learning_rate": 1.7866856978204697e-06,
"loss": 0.8065,
"step": 655
},
{
"epoch": 0.23595000449599857,
"grad_norm": 8.940770729593302,
"learning_rate": 1.785965774442278e-06,
"loss": 0.8632,
"step": 656
},
{
"epoch": 0.236309684380901,
"grad_norm": 30.24633174828573,
"learning_rate": 1.7852447838204956e-06,
"loss": 0.7572,
"step": 657
},
{
"epoch": 0.23666936426580343,
"grad_norm": 28.318681389494447,
"learning_rate": 1.7845227269341383e-06,
"loss": 0.8503,
"step": 658
},
{
"epoch": 0.23702904415070586,
"grad_norm": 27.320400067616706,
"learning_rate": 1.7837996047636695e-06,
"loss": 0.8268,
"step": 659
},
{
"epoch": 0.23738872403560832,
"grad_norm": 24.962246438076164,
"learning_rate": 1.7830754182909985e-06,
"loss": 0.9086,
"step": 660
},
{
"epoch": 0.23774840392051075,
"grad_norm": 9.45182507885013,
"learning_rate": 1.7823501684994804e-06,
"loss": 0.8342,
"step": 661
},
{
"epoch": 0.23810808380541318,
"grad_norm": 9.158151559844153,
"learning_rate": 1.7816238563739144e-06,
"loss": 0.8722,
"step": 662
},
{
"epoch": 0.2384677636903156,
"grad_norm": 8.873008087691057,
"learning_rate": 1.7808964829005414e-06,
"loss": 0.897,
"step": 663
},
{
"epoch": 0.23882744357521807,
"grad_norm": 16.74472931542851,
"learning_rate": 1.7801680490670447e-06,
"loss": 0.905,
"step": 664
},
{
"epoch": 0.2391871234601205,
"grad_norm": 15.274376064023013,
"learning_rate": 1.779438555862546e-06,
"loss": 0.9388,
"step": 665
},
{
"epoch": 0.23954680334502293,
"grad_norm": 8.126377106569372,
"learning_rate": 1.7787080042776062e-06,
"loss": 0.803,
"step": 666
},
{
"epoch": 0.23990648322992536,
"grad_norm": 19.720922442725232,
"learning_rate": 1.7779763953042237e-06,
"loss": 0.8114,
"step": 667
},
{
"epoch": 0.2402661631148278,
"grad_norm": 6.70288102967279,
"learning_rate": 1.777243729935832e-06,
"loss": 0.8596,
"step": 668
},
{
"epoch": 0.24062584299973025,
"grad_norm": 12.445261188563348,
"learning_rate": 1.7765100091672999e-06,
"loss": 0.8305,
"step": 669
},
{
"epoch": 0.24098552288463268,
"grad_norm": 14.470509216945313,
"learning_rate": 1.7757752339949281e-06,
"loss": 0.8205,
"step": 670
},
{
"epoch": 0.2413452027695351,
"grad_norm": 75.73337553871086,
"learning_rate": 1.77503940541645e-06,
"loss": 0.8631,
"step": 671
},
{
"epoch": 0.24170488265443754,
"grad_norm": 9.689768060150353,
"learning_rate": 1.7743025244310292e-06,
"loss": 0.7986,
"step": 672
},
{
"epoch": 0.24206456253934,
"grad_norm": 31.777854668635033,
"learning_rate": 1.7735645920392584e-06,
"loss": 0.8363,
"step": 673
},
{
"epoch": 0.24242424242424243,
"grad_norm": 17.7545677230147,
"learning_rate": 1.7728256092431574e-06,
"loss": 0.8747,
"step": 674
},
{
"epoch": 0.24278392230914486,
"grad_norm": 13.33007316226056,
"learning_rate": 1.772085577046173e-06,
"loss": 0.9042,
"step": 675
},
{
"epoch": 0.2431436021940473,
"grad_norm": 10.22093374125928,
"learning_rate": 1.771344496453177e-06,
"loss": 0.8308,
"step": 676
},
{
"epoch": 0.24350328207894972,
"grad_norm": 41.301615171234594,
"learning_rate": 1.770602368470464e-06,
"loss": 0.8183,
"step": 677
},
{
"epoch": 0.24386296196385218,
"grad_norm": 10.371878412263237,
"learning_rate": 1.7698591941057516e-06,
"loss": 0.8899,
"step": 678
},
{
"epoch": 0.2442226418487546,
"grad_norm": 39.69708829055009,
"learning_rate": 1.7691149743681782e-06,
"loss": 0.8367,
"step": 679
},
{
"epoch": 0.24458232173365704,
"grad_norm": 25.888723986232677,
"learning_rate": 1.768369710268301e-06,
"loss": 0.8365,
"step": 680
},
{
"epoch": 0.24494200161855947,
"grad_norm": 18.206774975675085,
"learning_rate": 1.767623402818096e-06,
"loss": 0.8885,
"step": 681
},
{
"epoch": 0.24530168150346193,
"grad_norm": 8.935330841122331,
"learning_rate": 1.766876053030956e-06,
"loss": 0.903,
"step": 682
},
{
"epoch": 0.24566136138836436,
"grad_norm": 33.95519093624401,
"learning_rate": 1.7661276619216885e-06,
"loss": 0.7565,
"step": 683
},
{
"epoch": 0.2460210412732668,
"grad_norm": 11.177340777055564,
"learning_rate": 1.7653782305065156e-06,
"loss": 0.85,
"step": 684
},
{
"epoch": 0.24638072115816922,
"grad_norm": 12.859137623323068,
"learning_rate": 1.7646277598030715e-06,
"loss": 0.7715,
"step": 685
},
{
"epoch": 0.24674040104307166,
"grad_norm": 7.650967682616375,
"learning_rate": 1.7638762508304023e-06,
"loss": 0.7208,
"step": 686
},
{
"epoch": 0.24710008092797411,
"grad_norm": 10.767439185965463,
"learning_rate": 1.7631237046089632e-06,
"loss": 0.8317,
"step": 687
},
{
"epoch": 0.24745976081287654,
"grad_norm": 11.672712327778948,
"learning_rate": 1.7623701221606187e-06,
"loss": 0.7783,
"step": 688
},
{
"epoch": 0.24781944069777898,
"grad_norm": 16.700574370186114,
"learning_rate": 1.7616155045086392e-06,
"loss": 0.8823,
"step": 689
},
{
"epoch": 0.2481791205826814,
"grad_norm": 10.467456780761605,
"learning_rate": 1.7608598526777017e-06,
"loss": 0.8705,
"step": 690
},
{
"epoch": 0.24853880046758384,
"grad_norm": 39.549181218877564,
"learning_rate": 1.7601031676938875e-06,
"loss": 0.8143,
"step": 691
},
{
"epoch": 0.2488984803524863,
"grad_norm": 12.8898510426591,
"learning_rate": 1.7593454505846803e-06,
"loss": 0.7451,
"step": 692
},
{
"epoch": 0.24925816023738873,
"grad_norm": 21.893261282880133,
"learning_rate": 1.7585867023789655e-06,
"loss": 0.8776,
"step": 693
},
{
"epoch": 0.24961784012229116,
"grad_norm": 16.328323744615236,
"learning_rate": 1.7578269241070287e-06,
"loss": 0.8591,
"step": 694
},
{
"epoch": 0.2499775200071936,
"grad_norm": 21.12995839406831,
"learning_rate": 1.7570661168005541e-06,
"loss": 0.8083,
"step": 695
},
{
"epoch": 0.250337199892096,
"grad_norm": 15.584874342335457,
"learning_rate": 1.7563042814926233e-06,
"loss": 0.8413,
"step": 696
},
{
"epoch": 0.25069687977699845,
"grad_norm": 298.07947482680925,
"learning_rate": 1.7555414192177137e-06,
"loss": 0.8321,
"step": 697
},
{
"epoch": 0.25105655966190094,
"grad_norm": 16.30630259438712,
"learning_rate": 1.754777531011697e-06,
"loss": 0.8303,
"step": 698
},
{
"epoch": 0.25141623954680337,
"grad_norm": 15.99104046759504,
"learning_rate": 1.7540126179118384e-06,
"loss": 0.8423,
"step": 699
},
{
"epoch": 0.2517759194317058,
"grad_norm": 7.132549295527994,
"learning_rate": 1.7532466809567948e-06,
"loss": 0.8063,
"step": 700
},
{
"epoch": 0.2521355993166082,
"grad_norm": 12.305338193314348,
"learning_rate": 1.7524797211866126e-06,
"loss": 0.8488,
"step": 701
},
{
"epoch": 0.25249527920151066,
"grad_norm": 8.474168280891881,
"learning_rate": 1.751711739642728e-06,
"loss": 0.7771,
"step": 702
},
{
"epoch": 0.2528549590864131,
"grad_norm": 8.74400685739328,
"learning_rate": 1.7509427373679642e-06,
"loss": 0.8312,
"step": 703
},
{
"epoch": 0.2532146389713155,
"grad_norm": 18.048435642397564,
"learning_rate": 1.7501727154065303e-06,
"loss": 0.799,
"step": 704
},
{
"epoch": 0.25357431885621795,
"grad_norm": 7.860827866052275,
"learning_rate": 1.7494016748040203e-06,
"loss": 0.8036,
"step": 705
},
{
"epoch": 0.2539339987411204,
"grad_norm": 9.074872447593906,
"learning_rate": 1.7486296166074115e-06,
"loss": 0.8129,
"step": 706
},
{
"epoch": 0.2542936786260228,
"grad_norm": 8.108719393583089,
"learning_rate": 1.747856541865062e-06,
"loss": 0.9203,
"step": 707
},
{
"epoch": 0.2546533585109253,
"grad_norm": 25.11094384341223,
"learning_rate": 1.7470824516267122e-06,
"loss": 0.8529,
"step": 708
},
{
"epoch": 0.25501303839582773,
"grad_norm": 15.553324645409731,
"learning_rate": 1.746307346943479e-06,
"loss": 0.8235,
"step": 709
},
{
"epoch": 0.25537271828073016,
"grad_norm": 16.569294273110824,
"learning_rate": 1.7455312288678586e-06,
"loss": 0.8668,
"step": 710
},
{
"epoch": 0.2557323981656326,
"grad_norm": 21.109256363359002,
"learning_rate": 1.7447540984537222e-06,
"loss": 0.8766,
"step": 711
},
{
"epoch": 0.256092078050535,
"grad_norm": 10.89201806284621,
"learning_rate": 1.7439759567563167e-06,
"loss": 0.8322,
"step": 712
},
{
"epoch": 0.25645175793543745,
"grad_norm": 10.637309913168227,
"learning_rate": 1.7431968048322615e-06,
"loss": 0.8087,
"step": 713
},
{
"epoch": 0.2568114378203399,
"grad_norm": 51.93925558999249,
"learning_rate": 1.742416643739547e-06,
"loss": 0.8422,
"step": 714
},
{
"epoch": 0.2571711177052423,
"grad_norm": 15.458855078161623,
"learning_rate": 1.7416354745375355e-06,
"loss": 0.8765,
"step": 715
},
{
"epoch": 0.25753079759014474,
"grad_norm": 9.67024721977002,
"learning_rate": 1.7408532982869573e-06,
"loss": 0.7901,
"step": 716
},
{
"epoch": 0.25789047747504723,
"grad_norm": 19.090486922876156,
"learning_rate": 1.7400701160499102e-06,
"loss": 0.6879,
"step": 717
},
{
"epoch": 0.25825015735994966,
"grad_norm": 18.300311821518722,
"learning_rate": 1.7392859288898585e-06,
"loss": 0.8627,
"step": 718
},
{
"epoch": 0.2586098372448521,
"grad_norm": 36.93267430999449,
"learning_rate": 1.73850073787163e-06,
"loss": 0.8248,
"step": 719
},
{
"epoch": 0.2589695171297545,
"grad_norm": 15.184990783948566,
"learning_rate": 1.7377145440614162e-06,
"loss": 0.9503,
"step": 720
},
{
"epoch": 0.25932919701465695,
"grad_norm": 9.000596816529308,
"learning_rate": 1.7369273485267712e-06,
"loss": 0.7975,
"step": 721
},
{
"epoch": 0.2596888768995594,
"grad_norm": 9.340713980030525,
"learning_rate": 1.7361391523366079e-06,
"loss": 0.8181,
"step": 722
},
{
"epoch": 0.2600485567844618,
"grad_norm": 10.548205661898189,
"learning_rate": 1.7353499565611984e-06,
"loss": 0.818,
"step": 723
},
{
"epoch": 0.26040823666936425,
"grad_norm": 13.929969767317461,
"learning_rate": 1.7345597622721727e-06,
"loss": 0.8653,
"step": 724
},
{
"epoch": 0.2607679165542667,
"grad_norm": 9.336168977450368,
"learning_rate": 1.7337685705425156e-06,
"loss": 0.7825,
"step": 725
},
{
"epoch": 0.26112759643916916,
"grad_norm": 10.31428096377927,
"learning_rate": 1.7329763824465673e-06,
"loss": 0.8273,
"step": 726
},
{
"epoch": 0.2614872763240716,
"grad_norm": 34.68010377643249,
"learning_rate": 1.7321831990600204e-06,
"loss": 0.8133,
"step": 727
},
{
"epoch": 0.261846956208974,
"grad_norm": 18.793582469750675,
"learning_rate": 1.7313890214599191e-06,
"loss": 0.8366,
"step": 728
},
{
"epoch": 0.26220663609387646,
"grad_norm": 13.557344626433261,
"learning_rate": 1.7305938507246576e-06,
"loss": 0.8497,
"step": 729
},
{
"epoch": 0.2625663159787789,
"grad_norm": 12.896197123309461,
"learning_rate": 1.7297976879339787e-06,
"loss": 0.8175,
"step": 730
},
{
"epoch": 0.2629259958636813,
"grad_norm": 8.93948575659342,
"learning_rate": 1.7290005341689722e-06,
"loss": 0.725,
"step": 731
},
{
"epoch": 0.26328567574858375,
"grad_norm": 10.865900403804043,
"learning_rate": 1.728202390512074e-06,
"loss": 0.8266,
"step": 732
},
{
"epoch": 0.2636453556334862,
"grad_norm": 12.367235565753614,
"learning_rate": 1.727403258047063e-06,
"loss": 0.7802,
"step": 733
},
{
"epoch": 0.2640050355183886,
"grad_norm": 13.355752507775879,
"learning_rate": 1.7266031378590623e-06,
"loss": 0.8084,
"step": 734
},
{
"epoch": 0.2643647154032911,
"grad_norm": 15.835476175252543,
"learning_rate": 1.7258020310345347e-06,
"loss": 0.8823,
"step": 735
},
{
"epoch": 0.2647243952881935,
"grad_norm": 15.452252506761473,
"learning_rate": 1.7249999386612841e-06,
"loss": 0.8159,
"step": 736
},
{
"epoch": 0.26508407517309596,
"grad_norm": 16.121510751539834,
"learning_rate": 1.7241968618284517e-06,
"loss": 0.9353,
"step": 737
},
{
"epoch": 0.2654437550579984,
"grad_norm": 9.91924725908932,
"learning_rate": 1.7233928016265157e-06,
"loss": 0.8034,
"step": 738
},
{
"epoch": 0.2658034349429008,
"grad_norm": 30.814627689517852,
"learning_rate": 1.7225877591472897e-06,
"loss": 0.899,
"step": 739
},
{
"epoch": 0.26616311482780325,
"grad_norm": 11.583573012041843,
"learning_rate": 1.721781735483921e-06,
"loss": 0.8618,
"step": 740
},
{
"epoch": 0.2665227947127057,
"grad_norm": 22.63758120139141,
"learning_rate": 1.7209747317308895e-06,
"loss": 0.87,
"step": 741
},
{
"epoch": 0.2668824745976081,
"grad_norm": 10.887909712467465,
"learning_rate": 1.7201667489840057e-06,
"loss": 0.794,
"step": 742
},
{
"epoch": 0.26724215448251054,
"grad_norm": 13.833672774372257,
"learning_rate": 1.7193577883404096e-06,
"loss": 0.8163,
"step": 743
},
{
"epoch": 0.267601834367413,
"grad_norm": 10.67679191166491,
"learning_rate": 1.7185478508985686e-06,
"loss": 0.8903,
"step": 744
},
{
"epoch": 0.26796151425231546,
"grad_norm": 19.764225276756495,
"learning_rate": 1.7177369377582774e-06,
"loss": 0.8141,
"step": 745
},
{
"epoch": 0.2683211941372179,
"grad_norm": 7.453247870573237,
"learning_rate": 1.7169250500206543e-06,
"loss": 0.7496,
"step": 746
},
{
"epoch": 0.2686808740221203,
"grad_norm": 11.858782522727958,
"learning_rate": 1.7161121887881423e-06,
"loss": 0.8056,
"step": 747
},
{
"epoch": 0.26904055390702275,
"grad_norm": 562.7981120857124,
"learning_rate": 1.715298355164505e-06,
"loss": 0.7878,
"step": 748
},
{
"epoch": 0.2694002337919252,
"grad_norm": 29.610471689590998,
"learning_rate": 1.7144835502548278e-06,
"loss": 0.817,
"step": 749
},
{
"epoch": 0.2697599136768276,
"grad_norm": 9.888278314422525,
"learning_rate": 1.713667775165514e-06,
"loss": 0.8582,
"step": 750
},
{
"epoch": 0.27011959356173004,
"grad_norm": 18.158214855130225,
"learning_rate": 1.7128510310042842e-06,
"loss": 0.7803,
"step": 751
},
{
"epoch": 0.2704792734466325,
"grad_norm": 13.538079639632755,
"learning_rate": 1.7120333188801755e-06,
"loss": 0.8486,
"step": 752
},
{
"epoch": 0.27083895333153496,
"grad_norm": 11.011178771660498,
"learning_rate": 1.711214639903539e-06,
"loss": 0.8576,
"step": 753
},
{
"epoch": 0.2711986332164374,
"grad_norm": 19.191293483671636,
"learning_rate": 1.7103949951860388e-06,
"loss": 0.8468,
"step": 754
},
{
"epoch": 0.2715583131013398,
"grad_norm": 31.547073310234587,
"learning_rate": 1.7095743858406504e-06,
"loss": 0.8026,
"step": 755
},
{
"epoch": 0.27191799298624225,
"grad_norm": 12.222187320095932,
"learning_rate": 1.7087528129816589e-06,
"loss": 0.7992,
"step": 756
},
{
"epoch": 0.2722776728711447,
"grad_norm": 10.245665096008041,
"learning_rate": 1.7079302777246577e-06,
"loss": 0.7533,
"step": 757
},
{
"epoch": 0.2726373527560471,
"grad_norm": 12.281298617797551,
"learning_rate": 1.7071067811865474e-06,
"loss": 0.8466,
"step": 758
},
{
"epoch": 0.27299703264094954,
"grad_norm": 31.92308596034205,
"learning_rate": 1.7062823244855338e-06,
"loss": 0.8583,
"step": 759
},
{
"epoch": 0.273356712525852,
"grad_norm": 19.03299009694274,
"learning_rate": 1.705456908741126e-06,
"loss": 0.8752,
"step": 760
},
{
"epoch": 0.2737163924107544,
"grad_norm": 11.873500571277848,
"learning_rate": 1.7046305350741364e-06,
"loss": 0.7947,
"step": 761
},
{
"epoch": 0.2740760722956569,
"grad_norm": 8.234454258923366,
"learning_rate": 1.7038032046066766e-06,
"loss": 0.8159,
"step": 762
},
{
"epoch": 0.2744357521805593,
"grad_norm": 14.261554962936753,
"learning_rate": 1.7029749184621589e-06,
"loss": 0.8358,
"step": 763
},
{
"epoch": 0.27479543206546175,
"grad_norm": 8.924596703742441,
"learning_rate": 1.7021456777652925e-06,
"loss": 0.8722,
"step": 764
},
{
"epoch": 0.2751551119503642,
"grad_norm": 16.004147890619333,
"learning_rate": 1.7013154836420828e-06,
"loss": 0.8113,
"step": 765
},
{
"epoch": 0.2755147918352666,
"grad_norm": 25.725102108427837,
"learning_rate": 1.7004843372198306e-06,
"loss": 0.8038,
"step": 766
},
{
"epoch": 0.27587447172016905,
"grad_norm": 20.592545691085085,
"learning_rate": 1.6996522396271282e-06,
"loss": 0.8962,
"step": 767
},
{
"epoch": 0.2762341516050715,
"grad_norm": 14.580869058386387,
"learning_rate": 1.6988191919938614e-06,
"loss": 0.8223,
"step": 768
},
{
"epoch": 0.2765938314899739,
"grad_norm": 13.042675904203861,
"learning_rate": 1.6979851954512046e-06,
"loss": 0.7806,
"step": 769
},
{
"epoch": 0.27695351137487634,
"grad_norm": 20.874681915407717,
"learning_rate": 1.697150251131621e-06,
"loss": 0.8159,
"step": 770
},
{
"epoch": 0.2773131912597788,
"grad_norm": 18.01054025980178,
"learning_rate": 1.6963143601688613e-06,
"loss": 0.7994,
"step": 771
},
{
"epoch": 0.27767287114468125,
"grad_norm": 9.86441283566478,
"learning_rate": 1.6954775236979613e-06,
"loss": 0.7713,
"step": 772
},
{
"epoch": 0.2780325510295837,
"grad_norm": 9.603505030837669,
"learning_rate": 1.6946397428552403e-06,
"loss": 0.8715,
"step": 773
},
{
"epoch": 0.2783922309144861,
"grad_norm": 20.907997863761388,
"learning_rate": 1.6938010187783008e-06,
"loss": 0.8625,
"step": 774
},
{
"epoch": 0.27875191079938855,
"grad_norm": 19.27890273228633,
"learning_rate": 1.692961352606025e-06,
"loss": 0.8799,
"step": 775
},
{
"epoch": 0.279111590684291,
"grad_norm": 10.928377651485217,
"learning_rate": 1.6921207454785754e-06,
"loss": 0.7825,
"step": 776
},
{
"epoch": 0.2794712705691934,
"grad_norm": 10.287586539812986,
"learning_rate": 1.6912791985373915e-06,
"loss": 0.7821,
"step": 777
},
{
"epoch": 0.27983095045409584,
"grad_norm": 26.2017756984771,
"learning_rate": 1.6904367129251894e-06,
"loss": 0.8283,
"step": 778
},
{
"epoch": 0.28019063033899827,
"grad_norm": 29.916481658741684,
"learning_rate": 1.6895932897859595e-06,
"loss": 0.8102,
"step": 779
},
{
"epoch": 0.2805503102239007,
"grad_norm": 29.738609268126158,
"learning_rate": 1.6887489302649653e-06,
"loss": 0.8027,
"step": 780
},
{
"epoch": 0.2809099901088032,
"grad_norm": 14.32653397494833,
"learning_rate": 1.6879036355087419e-06,
"loss": 0.8453,
"step": 781
},
{
"epoch": 0.2812696699937056,
"grad_norm": 13.982071385865439,
"learning_rate": 1.6870574066650943e-06,
"loss": 0.8141,
"step": 782
},
{
"epoch": 0.28162934987860805,
"grad_norm": 13.895810588892795,
"learning_rate": 1.6862102448830953e-06,
"loss": 0.8058,
"step": 783
},
{
"epoch": 0.2819890297635105,
"grad_norm": 23.242023805795334,
"learning_rate": 1.6853621513130856e-06,
"loss": 0.8829,
"step": 784
},
{
"epoch": 0.2823487096484129,
"grad_norm": 22.034185469781356,
"learning_rate": 1.6845131271066705e-06,
"loss": 0.8221,
"step": 785
},
{
"epoch": 0.28270838953331534,
"grad_norm": 13.888880004280962,
"learning_rate": 1.683663173416719e-06,
"loss": 0.8347,
"step": 786
},
{
"epoch": 0.28306806941821777,
"grad_norm": 15.136351064460394,
"learning_rate": 1.6828122913973624e-06,
"loss": 0.7982,
"step": 787
},
{
"epoch": 0.2834277493031202,
"grad_norm": 17.739420272026308,
"learning_rate": 1.6819604822039924e-06,
"loss": 0.7921,
"step": 788
},
{
"epoch": 0.28378742918802263,
"grad_norm": 12.20138141629904,
"learning_rate": 1.6811077469932599e-06,
"loss": 0.8411,
"step": 789
},
{
"epoch": 0.2841471090729251,
"grad_norm": 67.77069162527476,
"learning_rate": 1.6802540869230727e-06,
"loss": 0.9363,
"step": 790
},
{
"epoch": 0.28450678895782755,
"grad_norm": 11.058784409530341,
"learning_rate": 1.679399503152595e-06,
"loss": 0.7848,
"step": 791
},
{
"epoch": 0.28486646884273,
"grad_norm": 37.726997131440754,
"learning_rate": 1.6785439968422456e-06,
"loss": 0.8963,
"step": 792
},
{
"epoch": 0.2852261487276324,
"grad_norm": 13.837868128904297,
"learning_rate": 1.6776875691536945e-06,
"loss": 0.8425,
"step": 793
},
{
"epoch": 0.28558582861253484,
"grad_norm": 10.65287112444893,
"learning_rate": 1.6768302212498644e-06,
"loss": 0.846,
"step": 794
},
{
"epoch": 0.2859455084974373,
"grad_norm": 9.740983854721788,
"learning_rate": 1.6759719542949267e-06,
"loss": 0.8351,
"step": 795
},
{
"epoch": 0.2863051883823397,
"grad_norm": 15.975826234129812,
"learning_rate": 1.675112769454301e-06,
"loss": 0.8553,
"step": 796
},
{
"epoch": 0.28666486826724213,
"grad_norm": 24.87491128009603,
"learning_rate": 1.6742526678946537e-06,
"loss": 0.8334,
"step": 797
},
{
"epoch": 0.28702454815214457,
"grad_norm": 11.849439163597227,
"learning_rate": 1.673391650783895e-06,
"loss": 0.8172,
"step": 798
},
{
"epoch": 0.28738422803704705,
"grad_norm": 34.28785479628006,
"learning_rate": 1.6725297192911792e-06,
"loss": 0.8297,
"step": 799
},
{
"epoch": 0.2877439079219495,
"grad_norm": 8.532384451679459,
"learning_rate": 1.6716668745869016e-06,
"loss": 0.7997,
"step": 800
},
{
"epoch": 0.2881035878068519,
"grad_norm": 9.696577956984706,
"learning_rate": 1.670803117842698e-06,
"loss": 0.7694,
"step": 801
},
{
"epoch": 0.28846326769175434,
"grad_norm": 10.684746551489631,
"learning_rate": 1.669938450231442e-06,
"loss": 0.8267,
"step": 802
},
{
"epoch": 0.2888229475766568,
"grad_norm": 36.494588193321704,
"learning_rate": 1.6690728729272454e-06,
"loss": 0.8525,
"step": 803
},
{
"epoch": 0.2891826274615592,
"grad_norm": 58.7376340621734,
"learning_rate": 1.6682063871054532e-06,
"loss": 0.8488,
"step": 804
},
{
"epoch": 0.28954230734646164,
"grad_norm": 12.134256747560729,
"learning_rate": 1.667338993942646e-06,
"loss": 0.7648,
"step": 805
},
{
"epoch": 0.28990198723136407,
"grad_norm": 8.948659088851144,
"learning_rate": 1.6664706946166356e-06,
"loss": 0.7751,
"step": 806
},
{
"epoch": 0.2902616671162665,
"grad_norm": 7.572355457329241,
"learning_rate": 1.6656014903064638e-06,
"loss": 0.7932,
"step": 807
},
{
"epoch": 0.290621347001169,
"grad_norm": 16.210024820799884,
"learning_rate": 1.664731382192402e-06,
"loss": 0.7886,
"step": 808
},
{
"epoch": 0.2909810268860714,
"grad_norm": 40.53526437751432,
"learning_rate": 1.6638603714559487e-06,
"loss": 0.8116,
"step": 809
},
{
"epoch": 0.29134070677097385,
"grad_norm": 28.42826510371944,
"learning_rate": 1.662988459279828e-06,
"loss": 0.8047,
"step": 810
},
{
"epoch": 0.2917003866558763,
"grad_norm": 33.46427640754087,
"learning_rate": 1.6621156468479875e-06,
"loss": 0.8323,
"step": 811
},
{
"epoch": 0.2920600665407787,
"grad_norm": 33.90446508938218,
"learning_rate": 1.6612419353455986e-06,
"loss": 0.7955,
"step": 812
},
{
"epoch": 0.29241974642568114,
"grad_norm": 11.872234229630894,
"learning_rate": 1.660367325959052e-06,
"loss": 0.834,
"step": 813
},
{
"epoch": 0.29277942631058357,
"grad_norm": 9.539864324874296,
"learning_rate": 1.6594918198759585e-06,
"loss": 0.7772,
"step": 814
},
{
"epoch": 0.293139106195486,
"grad_norm": 26.351591824713594,
"learning_rate": 1.658615418285146e-06,
"loss": 0.8662,
"step": 815
},
{
"epoch": 0.29349878608038843,
"grad_norm": 15.695857483765826,
"learning_rate": 1.6577381223766589e-06,
"loss": 0.8019,
"step": 816
},
{
"epoch": 0.2938584659652909,
"grad_norm": 12.45686309056222,
"learning_rate": 1.6568599333417558e-06,
"loss": 0.8718,
"step": 817
},
{
"epoch": 0.29421814585019335,
"grad_norm": 16.1517356511705,
"learning_rate": 1.6559808523729078e-06,
"loss": 0.8688,
"step": 818
},
{
"epoch": 0.2945778257350958,
"grad_norm": 13.117121759539312,
"learning_rate": 1.6551008806637973e-06,
"loss": 0.8312,
"step": 819
},
{
"epoch": 0.2949375056199982,
"grad_norm": 15.3307311798555,
"learning_rate": 1.6542200194093167e-06,
"loss": 0.8336,
"step": 820
},
{
"epoch": 0.29529718550490064,
"grad_norm": 20.590254012592517,
"learning_rate": 1.653338269805565e-06,
"loss": 0.8925,
"step": 821
},
{
"epoch": 0.29565686538980307,
"grad_norm": 10.511054798493147,
"learning_rate": 1.6524556330498491e-06,
"loss": 0.7651,
"step": 822
},
{
"epoch": 0.2960165452747055,
"grad_norm": 9.442663727447568,
"learning_rate": 1.6515721103406794e-06,
"loss": 0.7767,
"step": 823
},
{
"epoch": 0.29637622515960793,
"grad_norm": 13.254710699947385,
"learning_rate": 1.6506877028777697e-06,
"loss": 0.8256,
"step": 824
},
{
"epoch": 0.29673590504451036,
"grad_norm": 14.322695950891546,
"learning_rate": 1.6498024118620348e-06,
"loss": 0.8412,
"step": 825
},
{
"epoch": 0.29709558492941285,
"grad_norm": 12.753944063299416,
"learning_rate": 1.6489162384955903e-06,
"loss": 0.8549,
"step": 826
},
{
"epoch": 0.2974552648143153,
"grad_norm": 15.385293947218466,
"learning_rate": 1.6480291839817487e-06,
"loss": 0.8415,
"step": 827
},
{
"epoch": 0.2978149446992177,
"grad_norm": 15.553950849317074,
"learning_rate": 1.6471412495250195e-06,
"loss": 0.7959,
"step": 828
},
{
"epoch": 0.29817462458412014,
"grad_norm": 59.15493918077238,
"learning_rate": 1.646252436331107e-06,
"loss": 0.8605,
"step": 829
},
{
"epoch": 0.29853430446902257,
"grad_norm": 15.926913399975538,
"learning_rate": 1.6453627456069093e-06,
"loss": 0.8099,
"step": 830
},
{
"epoch": 0.298893984353925,
"grad_norm": 10.391601367933777,
"learning_rate": 1.6444721785605147e-06,
"loss": 0.7969,
"step": 831
},
{
"epoch": 0.29925366423882743,
"grad_norm": 19.686532965429244,
"learning_rate": 1.6435807364012033e-06,
"loss": 0.8759,
"step": 832
},
{
"epoch": 0.29961334412372986,
"grad_norm": 59.95400710719536,
"learning_rate": 1.6426884203394416e-06,
"loss": 0.818,
"step": 833
},
{
"epoch": 0.2999730240086323,
"grad_norm": 8.213078614768975,
"learning_rate": 1.6417952315868842e-06,
"loss": 0.8344,
"step": 834
},
{
"epoch": 0.3003327038935348,
"grad_norm": 9.450209629692555,
"learning_rate": 1.6409011713563696e-06,
"loss": 0.7793,
"step": 835
},
{
"epoch": 0.3006923837784372,
"grad_norm": 25.223960723123003,
"learning_rate": 1.6400062408619206e-06,
"loss": 0.805,
"step": 836
},
{
"epoch": 0.30105206366333964,
"grad_norm": 11.274061295553878,
"learning_rate": 1.6391104413187414e-06,
"loss": 0.8034,
"step": 837
},
{
"epoch": 0.3014117435482421,
"grad_norm": 20.489780181155755,
"learning_rate": 1.638213773943216e-06,
"loss": 0.8294,
"step": 838
},
{
"epoch": 0.3017714234331445,
"grad_norm": 13.11232749838915,
"learning_rate": 1.6373162399529065e-06,
"loss": 0.8592,
"step": 839
},
{
"epoch": 0.30213110331804693,
"grad_norm": 34.94053578829066,
"learning_rate": 1.6364178405665533e-06,
"loss": 0.8019,
"step": 840
},
{
"epoch": 0.30249078320294936,
"grad_norm": 8.027001898160389,
"learning_rate": 1.6355185770040696e-06,
"loss": 0.7753,
"step": 841
},
{
"epoch": 0.3028504630878518,
"grad_norm": 11.5424881957026,
"learning_rate": 1.6346184504865442e-06,
"loss": 0.8159,
"step": 842
},
{
"epoch": 0.3032101429727542,
"grad_norm": 38.00803930831098,
"learning_rate": 1.6337174622362364e-06,
"loss": 0.7729,
"step": 843
},
{
"epoch": 0.3035698228576567,
"grad_norm": 11.388205546620558,
"learning_rate": 1.632815613476576e-06,
"loss": 0.842,
"step": 844
},
{
"epoch": 0.30392950274255914,
"grad_norm": 13.345383026363749,
"learning_rate": 1.6319129054321614e-06,
"loss": 0.82,
"step": 845
},
{
"epoch": 0.3042891826274616,
"grad_norm": 15.481172112555491,
"learning_rate": 1.6310093393287572e-06,
"loss": 0.7908,
"step": 846
},
{
"epoch": 0.304648862512364,
"grad_norm": 17.793364757904424,
"learning_rate": 1.6301049163932938e-06,
"loss": 0.7903,
"step": 847
},
{
"epoch": 0.30500854239726644,
"grad_norm": 9.398055804932289,
"learning_rate": 1.629199637853865e-06,
"loss": 0.8063,
"step": 848
},
{
"epoch": 0.30536822228216887,
"grad_norm": 14.577557639396854,
"learning_rate": 1.6282935049397266e-06,
"loss": 0.8735,
"step": 849
},
{
"epoch": 0.3057279021670713,
"grad_norm": 16.84760719078329,
"learning_rate": 1.6273865188812934e-06,
"loss": 0.8729,
"step": 850
},
{
"epoch": 0.30608758205197373,
"grad_norm": 87.16748970514807,
"learning_rate": 1.6264786809101398e-06,
"loss": 0.8031,
"step": 851
},
{
"epoch": 0.30644726193687616,
"grad_norm": 7.324167866399497,
"learning_rate": 1.6255699922589968e-06,
"loss": 0.8374,
"step": 852
},
{
"epoch": 0.3068069418217786,
"grad_norm": 17.704387559171618,
"learning_rate": 1.6246604541617503e-06,
"loss": 0.8714,
"step": 853
},
{
"epoch": 0.3071666217066811,
"grad_norm": 10.960718810059277,
"learning_rate": 1.6237500678534395e-06,
"loss": 0.877,
"step": 854
},
{
"epoch": 0.3075263015915835,
"grad_norm": 13.267520621820575,
"learning_rate": 1.622838834570256e-06,
"loss": 0.7507,
"step": 855
},
{
"epoch": 0.30788598147648594,
"grad_norm": 10.101131660097286,
"learning_rate": 1.6219267555495404e-06,
"loss": 0.8485,
"step": 856
},
{
"epoch": 0.30824566136138837,
"grad_norm": 28.91401873395612,
"learning_rate": 1.6210138320297832e-06,
"loss": 0.8341,
"step": 857
},
{
"epoch": 0.3086053412462908,
"grad_norm": 9.10947963373033,
"learning_rate": 1.62010006525062e-06,
"loss": 0.7946,
"step": 858
},
{
"epoch": 0.30896502113119323,
"grad_norm": 29.423941116569043,
"learning_rate": 1.619185456452833e-06,
"loss": 0.8638,
"step": 859
},
{
"epoch": 0.30932470101609566,
"grad_norm": 70.7487803817577,
"learning_rate": 1.6182700068783461e-06,
"loss": 0.78,
"step": 860
},
{
"epoch": 0.3096843809009981,
"grad_norm": 11.862012856538149,
"learning_rate": 1.6173537177702264e-06,
"loss": 0.8702,
"step": 861
},
{
"epoch": 0.3100440607859005,
"grad_norm": 42.081334101558895,
"learning_rate": 1.6164365903726802e-06,
"loss": 0.8349,
"step": 862
},
{
"epoch": 0.310403740670803,
"grad_norm": 20.841721919283344,
"learning_rate": 1.615518625931052e-06,
"loss": 0.8459,
"step": 863
},
{
"epoch": 0.31076342055570544,
"grad_norm": 18.631068109566076,
"learning_rate": 1.6145998256918235e-06,
"loss": 0.8353,
"step": 864
},
{
"epoch": 0.31112310044060787,
"grad_norm": 13.858882449564776,
"learning_rate": 1.613680190902611e-06,
"loss": 0.7288,
"step": 865
},
{
"epoch": 0.3114827803255103,
"grad_norm": 10.066126035348045,
"learning_rate": 1.6127597228121634e-06,
"loss": 0.8649,
"step": 866
},
{
"epoch": 0.31184246021041273,
"grad_norm": 28.67784112319774,
"learning_rate": 1.611838422670362e-06,
"loss": 0.8838,
"step": 867
},
{
"epoch": 0.31220214009531516,
"grad_norm": 8.108908627388885,
"learning_rate": 1.610916291728218e-06,
"loss": 0.8829,
"step": 868
},
{
"epoch": 0.3125618199802176,
"grad_norm": 87.9045905781619,
"learning_rate": 1.6099933312378692e-06,
"loss": 0.7908,
"step": 869
},
{
"epoch": 0.31292149986512,
"grad_norm": 8.440832703255046,
"learning_rate": 1.6090695424525824e-06,
"loss": 0.8192,
"step": 870
},
{
"epoch": 0.31328117975002245,
"grad_norm": 14.108324533497566,
"learning_rate": 1.6081449266267466e-06,
"loss": 0.8439,
"step": 871
},
{
"epoch": 0.31364085963492494,
"grad_norm": 21.058597406933576,
"learning_rate": 1.6072194850158754e-06,
"loss": 0.7903,
"step": 872
},
{
"epoch": 0.31400053951982737,
"grad_norm": 17.1153669670534,
"learning_rate": 1.606293218876603e-06,
"loss": 0.8127,
"step": 873
},
{
"epoch": 0.3143602194047298,
"grad_norm": 20.063520749174305,
"learning_rate": 1.6053661294666831e-06,
"loss": 0.7895,
"step": 874
},
{
"epoch": 0.31471989928963223,
"grad_norm": 12.09554048451555,
"learning_rate": 1.6044382180449882e-06,
"loss": 0.8388,
"step": 875
},
{
"epoch": 0.31507957917453466,
"grad_norm": 9.740248851394714,
"learning_rate": 1.6035094858715062e-06,
"loss": 0.8546,
"step": 876
},
{
"epoch": 0.3154392590594371,
"grad_norm": 11.899318966703719,
"learning_rate": 1.6025799342073394e-06,
"loss": 0.8245,
"step": 877
},
{
"epoch": 0.3157989389443395,
"grad_norm": 31.6291426460199,
"learning_rate": 1.6016495643147035e-06,
"loss": 0.856,
"step": 878
},
{
"epoch": 0.31615861882924196,
"grad_norm": 8.958829848892421,
"learning_rate": 1.6007183774569243e-06,
"loss": 0.8267,
"step": 879
},
{
"epoch": 0.3165182987141444,
"grad_norm": 8.648010929262487,
"learning_rate": 1.5997863748984384e-06,
"loss": 0.8109,
"step": 880
},
{
"epoch": 0.3168779785990469,
"grad_norm": 11.004894249365195,
"learning_rate": 1.5988535579047886e-06,
"loss": 0.8167,
"step": 881
},
{
"epoch": 0.3172376584839493,
"grad_norm": 50.09024201272788,
"learning_rate": 1.597919927742624e-06,
"loss": 0.8158,
"step": 882
},
{
"epoch": 0.31759733836885173,
"grad_norm": 8.893349558410735,
"learning_rate": 1.5969854856796987e-06,
"loss": 0.8407,
"step": 883
},
{
"epoch": 0.31795701825375416,
"grad_norm": 10.592646511680684,
"learning_rate": 1.596050232984868e-06,
"loss": 0.8009,
"step": 884
},
{
"epoch": 0.3183166981386566,
"grad_norm": 10.323815809361715,
"learning_rate": 1.5951141709280884e-06,
"loss": 0.8136,
"step": 885
},
{
"epoch": 0.318676378023559,
"grad_norm": 12.20503725223687,
"learning_rate": 1.5941773007804163e-06,
"loss": 0.7926,
"step": 886
},
{
"epoch": 0.31903605790846146,
"grad_norm": 18.903991133341332,
"learning_rate": 1.5932396238140039e-06,
"loss": 0.804,
"step": 887
},
{
"epoch": 0.3193957377933639,
"grad_norm": 16.745142488988655,
"learning_rate": 1.5923011413020996e-06,
"loss": 0.8534,
"step": 888
},
{
"epoch": 0.3197554176782663,
"grad_norm": 11.82810409668907,
"learning_rate": 1.5913618545190466e-06,
"loss": 0.8231,
"step": 889
},
{
"epoch": 0.3201150975631688,
"grad_norm": 12.8017171989301,
"learning_rate": 1.5904217647402784e-06,
"loss": 0.8213,
"step": 890
},
{
"epoch": 0.32047477744807124,
"grad_norm": 45.437697875363554,
"learning_rate": 1.5894808732423206e-06,
"loss": 0.9166,
"step": 891
},
{
"epoch": 0.32083445733297367,
"grad_norm": 18.312331492175126,
"learning_rate": 1.5885391813027857e-06,
"loss": 0.8344,
"step": 892
},
{
"epoch": 0.3211941372178761,
"grad_norm": 14.817855719365022,
"learning_rate": 1.587596690200375e-06,
"loss": 0.8171,
"step": 893
},
{
"epoch": 0.3215538171027785,
"grad_norm": 16.579974562069108,
"learning_rate": 1.5866534012148728e-06,
"loss": 0.8675,
"step": 894
},
{
"epoch": 0.32191349698768096,
"grad_norm": 19.738290812053823,
"learning_rate": 1.5857093156271493e-06,
"loss": 0.8253,
"step": 895
},
{
"epoch": 0.3222731768725834,
"grad_norm": 45.64460615929968,
"learning_rate": 1.5847644347191543e-06,
"loss": 0.7757,
"step": 896
},
{
"epoch": 0.3226328567574858,
"grad_norm": 24.035059526207903,
"learning_rate": 1.5838187597739185e-06,
"loss": 0.7758,
"step": 897
},
{
"epoch": 0.32299253664238825,
"grad_norm": 8.548187185744933,
"learning_rate": 1.5828722920755509e-06,
"loss": 0.752,
"step": 898
},
{
"epoch": 0.32335221652729074,
"grad_norm": 10.573658213625091,
"learning_rate": 1.581925032909236e-06,
"loss": 0.8239,
"step": 899
},
{
"epoch": 0.32371189641219317,
"grad_norm": 12.419125746778684,
"learning_rate": 1.5809769835612345e-06,
"loss": 0.821,
"step": 900
},
{
"epoch": 0.3240715762970956,
"grad_norm": 18.363616736374816,
"learning_rate": 1.5800281453188791e-06,
"loss": 0.8306,
"step": 901
},
{
"epoch": 0.32443125618199803,
"grad_norm": 8.317619792668513,
"learning_rate": 1.5790785194705736e-06,
"loss": 0.7871,
"step": 902
},
{
"epoch": 0.32479093606690046,
"grad_norm": 9.104270713842354,
"learning_rate": 1.5781281073057918e-06,
"loss": 0.7563,
"step": 903
},
{
"epoch": 0.3251506159518029,
"grad_norm": 8.197051622130894,
"learning_rate": 1.577176910115075e-06,
"loss": 0.8388,
"step": 904
},
{
"epoch": 0.3255102958367053,
"grad_norm": 17.408789825661064,
"learning_rate": 1.5762249291900303e-06,
"loss": 0.856,
"step": 905
},
{
"epoch": 0.32586997572160775,
"grad_norm": 8.64218462106854,
"learning_rate": 1.5752721658233293e-06,
"loss": 0.7881,
"step": 906
},
{
"epoch": 0.3262296556065102,
"grad_norm": 21.889925421685135,
"learning_rate": 1.574318621308706e-06,
"loss": 0.8433,
"step": 907
},
{
"epoch": 0.32658933549141267,
"grad_norm": 9.032394276112345,
"learning_rate": 1.573364296940955e-06,
"loss": 0.8011,
"step": 908
},
{
"epoch": 0.3269490153763151,
"grad_norm": 7.425594569946747,
"learning_rate": 1.5724091940159302e-06,
"loss": 0.8167,
"step": 909
},
{
"epoch": 0.32730869526121753,
"grad_norm": 9.657849211152683,
"learning_rate": 1.5714533138305417e-06,
"loss": 0.7789,
"step": 910
},
{
"epoch": 0.32766837514611996,
"grad_norm": 12.962398629844566,
"learning_rate": 1.570496657682756e-06,
"loss": 0.7405,
"step": 911
},
{
"epoch": 0.3280280550310224,
"grad_norm": 9.728680873151585,
"learning_rate": 1.5695392268715933e-06,
"loss": 0.8395,
"step": 912
},
{
"epoch": 0.3283877349159248,
"grad_norm": 17.36816506706582,
"learning_rate": 1.5685810226971245e-06,
"loss": 0.8606,
"step": 913
},
{
"epoch": 0.32874741480082725,
"grad_norm": 15.394141419267186,
"learning_rate": 1.5676220464604723e-06,
"loss": 0.8734,
"step": 914
},
{
"epoch": 0.3291070946857297,
"grad_norm": 20.301988586288964,
"learning_rate": 1.5666622994638068e-06,
"loss": 0.7856,
"step": 915
},
{
"epoch": 0.3294667745706321,
"grad_norm": 7.365140544006204,
"learning_rate": 1.5657017830103445e-06,
"loss": 0.7697,
"step": 916
},
{
"epoch": 0.3298264544555346,
"grad_norm": 14.840746023750308,
"learning_rate": 1.564740498404347e-06,
"loss": 0.852,
"step": 917
},
{
"epoch": 0.33018613434043703,
"grad_norm": 11.209068774761208,
"learning_rate": 1.5637784469511197e-06,
"loss": 0.8597,
"step": 918
},
{
"epoch": 0.33054581422533946,
"grad_norm": 12.600439575847025,
"learning_rate": 1.5628156299570078e-06,
"loss": 0.884,
"step": 919
},
{
"epoch": 0.3309054941102419,
"grad_norm": 11.004504418307047,
"learning_rate": 1.5618520487293978e-06,
"loss": 0.767,
"step": 920
},
{
"epoch": 0.3312651739951443,
"grad_norm": 16.47348784457448,
"learning_rate": 1.5608877045767117e-06,
"loss": 0.7997,
"step": 921
},
{
"epoch": 0.33162485388004675,
"grad_norm": 14.172981813767054,
"learning_rate": 1.5599225988084096e-06,
"loss": 0.8803,
"step": 922
},
{
"epoch": 0.3319845337649492,
"grad_norm": 9.312027744107203,
"learning_rate": 1.5589567327349844e-06,
"loss": 0.8002,
"step": 923
},
{
"epoch": 0.3323442136498516,
"grad_norm": 9.462458498320945,
"learning_rate": 1.5579901076679623e-06,
"loss": 0.8819,
"step": 924
},
{
"epoch": 0.33270389353475405,
"grad_norm": 30.483989133537733,
"learning_rate": 1.5570227249198993e-06,
"loss": 0.8576,
"step": 925
},
{
"epoch": 0.33306357341965653,
"grad_norm": 10.674129254191316,
"learning_rate": 1.556054585804381e-06,
"loss": 0.8554,
"step": 926
},
{
"epoch": 0.33342325330455896,
"grad_norm": 18.07952400429078,
"learning_rate": 1.5550856916360193e-06,
"loss": 0.7764,
"step": 927
},
{
"epoch": 0.3337829331894614,
"grad_norm": 35.58945059855189,
"learning_rate": 1.5541160437304521e-06,
"loss": 0.7932,
"step": 928
},
{
"epoch": 0.3341426130743638,
"grad_norm": 11.530320098353496,
"learning_rate": 1.5531456434043402e-06,
"loss": 0.8376,
"step": 929
},
{
"epoch": 0.33450229295926626,
"grad_norm": 14.940415551040594,
"learning_rate": 1.5521744919753665e-06,
"loss": 0.8139,
"step": 930
},
{
"epoch": 0.3348619728441687,
"grad_norm": 19.133265887417053,
"learning_rate": 1.5512025907622337e-06,
"loss": 0.7948,
"step": 931
},
{
"epoch": 0.3352216527290711,
"grad_norm": 20.304909409452367,
"learning_rate": 1.5502299410846625e-06,
"loss": 0.7893,
"step": 932
},
{
"epoch": 0.33558133261397355,
"grad_norm": 18.20029987081357,
"learning_rate": 1.5492565442633894e-06,
"loss": 0.7907,
"step": 933
},
{
"epoch": 0.335941012498876,
"grad_norm": 11.477770769822252,
"learning_rate": 1.5482824016201667e-06,
"loss": 0.7741,
"step": 934
},
{
"epoch": 0.3363006923837784,
"grad_norm": 19.009389322033904,
"learning_rate": 1.5473075144777585e-06,
"loss": 0.8435,
"step": 935
},
{
"epoch": 0.3366603722686809,
"grad_norm": 9.206957963342228,
"learning_rate": 1.5463318841599405e-06,
"loss": 0.7693,
"step": 936
},
{
"epoch": 0.3370200521535833,
"grad_norm": 24.27902007180823,
"learning_rate": 1.5453555119914963e-06,
"loss": 0.8786,
"step": 937
},
{
"epoch": 0.33737973203848576,
"grad_norm": 23.1158786632023,
"learning_rate": 1.544378399298218e-06,
"loss": 0.8064,
"step": 938
},
{
"epoch": 0.3377394119233882,
"grad_norm": 9.442153134819922,
"learning_rate": 1.5434005474069029e-06,
"loss": 0.8602,
"step": 939
},
{
"epoch": 0.3380990918082906,
"grad_norm": 24.385261977107035,
"learning_rate": 1.5424219576453523e-06,
"loss": 0.7806,
"step": 940
},
{
"epoch": 0.33845877169319305,
"grad_norm": 9.629777488734236,
"learning_rate": 1.541442631342369e-06,
"loss": 0.8477,
"step": 941
},
{
"epoch": 0.3388184515780955,
"grad_norm": 13.53376392358462,
"learning_rate": 1.5404625698277557e-06,
"loss": 0.8692,
"step": 942
},
{
"epoch": 0.3391781314629979,
"grad_norm": 42.45227574325178,
"learning_rate": 1.5394817744323146e-06,
"loss": 0.7987,
"step": 943
},
{
"epoch": 0.33953781134790034,
"grad_norm": 9.390799236590395,
"learning_rate": 1.5385002464878427e-06,
"loss": 0.7894,
"step": 944
},
{
"epoch": 0.33989749123280283,
"grad_norm": 11.917413039094523,
"learning_rate": 1.5375179873271333e-06,
"loss": 0.7952,
"step": 945
},
{
"epoch": 0.34025717111770526,
"grad_norm": 15.306425899284218,
"learning_rate": 1.536534998283972e-06,
"loss": 0.8162,
"step": 946
},
{
"epoch": 0.3406168510026077,
"grad_norm": 12.319391145056539,
"learning_rate": 1.5355512806931347e-06,
"loss": 0.7321,
"step": 947
},
{
"epoch": 0.3409765308875101,
"grad_norm": 15.21947795438788,
"learning_rate": 1.5345668358903883e-06,
"loss": 0.8332,
"step": 948
},
{
"epoch": 0.34133621077241255,
"grad_norm": 12.79023939223576,
"learning_rate": 1.5335816652124857e-06,
"loss": 0.7526,
"step": 949
},
{
"epoch": 0.341695890657315,
"grad_norm": 12.241239776171069,
"learning_rate": 1.5325957699971657e-06,
"loss": 0.8083,
"step": 950
},
{
"epoch": 0.3420555705422174,
"grad_norm": 15.981128158826372,
"learning_rate": 1.5316091515831518e-06,
"loss": 0.8536,
"step": 951
},
{
"epoch": 0.34241525042711984,
"grad_norm": 48.50595550604798,
"learning_rate": 1.530621811310148e-06,
"loss": 0.8867,
"step": 952
},
{
"epoch": 0.3427749303120223,
"grad_norm": 11.161134334105054,
"learning_rate": 1.52963375051884e-06,
"loss": 0.7749,
"step": 953
},
{
"epoch": 0.34313461019692476,
"grad_norm": 9.40407424062396,
"learning_rate": 1.5286449705508913e-06,
"loss": 0.8432,
"step": 954
},
{
"epoch": 0.3434942900818272,
"grad_norm": 11.85964243313317,
"learning_rate": 1.5276554727489415e-06,
"loss": 0.8039,
"step": 955
},
{
"epoch": 0.3438539699667296,
"grad_norm": 14.442397679824648,
"learning_rate": 1.5266652584566055e-06,
"loss": 0.7995,
"step": 956
},
{
"epoch": 0.34421364985163205,
"grad_norm": 7.721625200366785,
"learning_rate": 1.525674329018471e-06,
"loss": 0.8102,
"step": 957
},
{
"epoch": 0.3445733297365345,
"grad_norm": 11.322671671562892,
"learning_rate": 1.5246826857800968e-06,
"loss": 0.7819,
"step": 958
},
{
"epoch": 0.3449330096214369,
"grad_norm": 12.528623186014242,
"learning_rate": 1.5236903300880105e-06,
"loss": 0.8461,
"step": 959
},
{
"epoch": 0.34529268950633935,
"grad_norm": 23.142820551204743,
"learning_rate": 1.5226972632897077e-06,
"loss": 0.774,
"step": 960
},
{
"epoch": 0.3456523693912418,
"grad_norm": 11.677380786620736,
"learning_rate": 1.5217034867336497e-06,
"loss": 0.7439,
"step": 961
},
{
"epoch": 0.3460120492761442,
"grad_norm": 13.080329634154356,
"learning_rate": 1.5207090017692603e-06,
"loss": 0.9005,
"step": 962
},
{
"epoch": 0.3463717291610467,
"grad_norm": 8.76279501287922,
"learning_rate": 1.5197138097469273e-06,
"loss": 0.7786,
"step": 963
},
{
"epoch": 0.3467314090459491,
"grad_norm": 9.145020451349145,
"learning_rate": 1.5187179120179966e-06,
"loss": 0.826,
"step": 964
},
{
"epoch": 0.34709108893085155,
"grad_norm": 11.96564424412383,
"learning_rate": 1.517721309934774e-06,
"loss": 0.8053,
"step": 965
},
{
"epoch": 0.347450768815754,
"grad_norm": 22.996590327643915,
"learning_rate": 1.5167240048505198e-06,
"loss": 0.7496,
"step": 966
},
{
"epoch": 0.3478104487006564,
"grad_norm": 46.47732574352865,
"learning_rate": 1.5157259981194511e-06,
"loss": 0.8158,
"step": 967
},
{
"epoch": 0.34817012858555885,
"grad_norm": 9.077840131285068,
"learning_rate": 1.5147272910967365e-06,
"loss": 0.7397,
"step": 968
},
{
"epoch": 0.3485298084704613,
"grad_norm": 38.49321976187874,
"learning_rate": 1.5137278851384957e-06,
"loss": 0.8061,
"step": 969
},
{
"epoch": 0.3488894883553637,
"grad_norm": 14.699600184538111,
"learning_rate": 1.512727781601797e-06,
"loss": 0.8551,
"step": 970
},
{
"epoch": 0.34924916824026614,
"grad_norm": 50.54241823893945,
"learning_rate": 1.5117269818446568e-06,
"loss": 0.785,
"step": 971
},
{
"epoch": 0.3496088481251686,
"grad_norm": 12.219731366639781,
"learning_rate": 1.5107254872260365e-06,
"loss": 0.7976,
"step": 972
},
{
"epoch": 0.34996852801007106,
"grad_norm": 66.37116117852479,
"learning_rate": 1.5097232991058406e-06,
"loss": 0.8306,
"step": 973
},
{
"epoch": 0.3503282078949735,
"grad_norm": 14.744398293903853,
"learning_rate": 1.5087204188449162e-06,
"loss": 0.8264,
"step": 974
},
{
"epoch": 0.3506878877798759,
"grad_norm": 11.939324628211166,
"learning_rate": 1.5077168478050493e-06,
"loss": 0.861,
"step": 975
},
{
"epoch": 0.35104756766477835,
"grad_norm": 15.47147625235276,
"learning_rate": 1.5067125873489648e-06,
"loss": 0.8891,
"step": 976
},
{
"epoch": 0.3514072475496808,
"grad_norm": 21.26032650919122,
"learning_rate": 1.5057076388403228e-06,
"loss": 0.8627,
"step": 977
},
{
"epoch": 0.3517669274345832,
"grad_norm": 10.057782853454887,
"learning_rate": 1.5047020036437185e-06,
"loss": 0.8373,
"step": 978
},
{
"epoch": 0.35212660731948564,
"grad_norm": 49.39308264228791,
"learning_rate": 1.503695683124679e-06,
"loss": 0.8177,
"step": 979
},
{
"epoch": 0.35248628720438807,
"grad_norm": 506.43358996335076,
"learning_rate": 1.5026886786496622e-06,
"loss": 0.8482,
"step": 980
},
{
"epoch": 0.35284596708929056,
"grad_norm": 9.421474554469173,
"learning_rate": 1.5016809915860546e-06,
"loss": 0.8716,
"step": 981
},
{
"epoch": 0.353205646974193,
"grad_norm": 10.756179124311437,
"learning_rate": 1.50067262330217e-06,
"loss": 0.8771,
"step": 982
},
{
"epoch": 0.3535653268590954,
"grad_norm": 11.635019042438932,
"learning_rate": 1.4996635751672466e-06,
"loss": 0.8755,
"step": 983
},
{
"epoch": 0.35392500674399785,
"grad_norm": 12.993547435586098,
"learning_rate": 1.4986538485514464e-06,
"loss": 0.815,
"step": 984
},
{
"epoch": 0.3542846866289003,
"grad_norm": 12.48020482228494,
"learning_rate": 1.4976434448258517e-06,
"loss": 0.7832,
"step": 985
},
{
"epoch": 0.3546443665138027,
"grad_norm": 22.268540662140516,
"learning_rate": 1.4966323653624655e-06,
"loss": 0.8134,
"step": 986
},
{
"epoch": 0.35500404639870514,
"grad_norm": 16.195090971616366,
"learning_rate": 1.4956206115342074e-06,
"loss": 0.8019,
"step": 987
},
{
"epoch": 0.3553637262836076,
"grad_norm": 14.341331507258404,
"learning_rate": 1.4946081847149133e-06,
"loss": 0.7698,
"step": 988
},
{
"epoch": 0.35572340616851,
"grad_norm": 25.95684895405825,
"learning_rate": 1.4935950862793321e-06,
"loss": 0.8292,
"step": 989
},
{
"epoch": 0.3560830860534125,
"grad_norm": 11.501720854734899,
"learning_rate": 1.4925813176031258e-06,
"loss": 0.8614,
"step": 990
},
{
"epoch": 0.3564427659383149,
"grad_norm": 23.09362849823031,
"learning_rate": 1.4915668800628657e-06,
"loss": 0.8302,
"step": 991
},
{
"epoch": 0.35680244582321735,
"grad_norm": 16.445160172029425,
"learning_rate": 1.490551775036032e-06,
"loss": 0.8054,
"step": 992
},
{
"epoch": 0.3571621257081198,
"grad_norm": 71.19646553017826,
"learning_rate": 1.4895360039010098e-06,
"loss": 0.8277,
"step": 993
},
{
"epoch": 0.3575218055930222,
"grad_norm": 12.55336952743194,
"learning_rate": 1.4885195680370912e-06,
"loss": 0.839,
"step": 994
},
{
"epoch": 0.35788148547792464,
"grad_norm": 8.47142278878508,
"learning_rate": 1.4875024688244682e-06,
"loss": 0.7229,
"step": 995
},
{
"epoch": 0.3582411653628271,
"grad_norm": 11.800045379862597,
"learning_rate": 1.4864847076442355e-06,
"loss": 0.7626,
"step": 996
},
{
"epoch": 0.3586008452477295,
"grad_norm": 12.20058748151486,
"learning_rate": 1.4854662858783854e-06,
"loss": 0.8258,
"step": 997
},
{
"epoch": 0.35896052513263194,
"grad_norm": 8.480065883757563,
"learning_rate": 1.4844472049098085e-06,
"loss": 0.8154,
"step": 998
},
{
"epoch": 0.3593202050175344,
"grad_norm": 10.14693035519556,
"learning_rate": 1.4834274661222895e-06,
"loss": 0.7837,
"step": 999
},
{
"epoch": 0.35967988490243685,
"grad_norm": 17.540092433906466,
"learning_rate": 1.4824070709005061e-06,
"loss": 0.757,
"step": 1000
},
{
"epoch": 0.3600395647873393,
"grad_norm": 12.006178562565093,
"learning_rate": 1.4813860206300284e-06,
"loss": 0.8361,
"step": 1001
},
{
"epoch": 0.3603992446722417,
"grad_norm": 11.474505736413265,
"learning_rate": 1.4803643166973152e-06,
"loss": 0.751,
"step": 1002
},
{
"epoch": 0.36075892455714414,
"grad_norm": 12.295639473927698,
"learning_rate": 1.4793419604897137e-06,
"loss": 0.8236,
"step": 1003
},
{
"epoch": 0.3611186044420466,
"grad_norm": 10.585220227913453,
"learning_rate": 1.4783189533954553e-06,
"loss": 0.8428,
"step": 1004
},
{
"epoch": 0.361478284326949,
"grad_norm": 30.23962883155143,
"learning_rate": 1.477295296803657e-06,
"loss": 0.849,
"step": 1005
},
{
"epoch": 0.36183796421185144,
"grad_norm": 10.507032050043298,
"learning_rate": 1.4762709921043163e-06,
"loss": 0.7515,
"step": 1006
},
{
"epoch": 0.36219764409675387,
"grad_norm": 13.380543084981593,
"learning_rate": 1.4752460406883121e-06,
"loss": 0.8287,
"step": 1007
},
{
"epoch": 0.3625573239816563,
"grad_norm": 20.284545047328862,
"learning_rate": 1.4742204439473997e-06,
"loss": 0.872,
"step": 1008
},
{
"epoch": 0.3629170038665588,
"grad_norm": 11.297613284740551,
"learning_rate": 1.4731942032742125e-06,
"loss": 0.8191,
"step": 1009
},
{
"epoch": 0.3632766837514612,
"grad_norm": 14.76800834057125,
"learning_rate": 1.472167320062257e-06,
"loss": 0.8411,
"step": 1010
},
{
"epoch": 0.36363636363636365,
"grad_norm": 9.204006423305321,
"learning_rate": 1.471139795705913e-06,
"loss": 0.8407,
"step": 1011
},
{
"epoch": 0.3639960435212661,
"grad_norm": 8.349398346809124,
"learning_rate": 1.4701116316004306e-06,
"loss": 0.8025,
"step": 1012
},
{
"epoch": 0.3643557234061685,
"grad_norm": 43.767938880585604,
"learning_rate": 1.4690828291419281e-06,
"loss": 0.8662,
"step": 1013
},
{
"epoch": 0.36471540329107094,
"grad_norm": 19.22175395386423,
"learning_rate": 1.4680533897273912e-06,
"loss": 0.7194,
"step": 1014
},
{
"epoch": 0.36507508317597337,
"grad_norm": 19.349534981345588,
"learning_rate": 1.4670233147546707e-06,
"loss": 0.7998,
"step": 1015
},
{
"epoch": 0.3654347630608758,
"grad_norm": 10.127794747695477,
"learning_rate": 1.4659926056224796e-06,
"loss": 0.7606,
"step": 1016
},
{
"epoch": 0.36579444294577823,
"grad_norm": 8.774573308044332,
"learning_rate": 1.4649612637303928e-06,
"loss": 0.823,
"step": 1017
},
{
"epoch": 0.3661541228306807,
"grad_norm": 28.81593303028586,
"learning_rate": 1.4639292904788438e-06,
"loss": 0.7986,
"step": 1018
},
{
"epoch": 0.36651380271558315,
"grad_norm": 8.516115832154505,
"learning_rate": 1.462896687269124e-06,
"loss": 0.7682,
"step": 1019
},
{
"epoch": 0.3668734826004856,
"grad_norm": 11.481660856244018,
"learning_rate": 1.4618634555033799e-06,
"loss": 0.8325,
"step": 1020
},
{
"epoch": 0.367233162485388,
"grad_norm": 13.55050121060625,
"learning_rate": 1.460829596584611e-06,
"loss": 0.8463,
"step": 1021
},
{
"epoch": 0.36759284237029044,
"grad_norm": 31.753290279863542,
"learning_rate": 1.4597951119166694e-06,
"loss": 0.7871,
"step": 1022
},
{
"epoch": 0.36795252225519287,
"grad_norm": 8.851091556426617,
"learning_rate": 1.4587600029042562e-06,
"loss": 0.9002,
"step": 1023
},
{
"epoch": 0.3683122021400953,
"grad_norm": 17.178291585278597,
"learning_rate": 1.4577242709529207e-06,
"loss": 0.8519,
"step": 1024
},
{
"epoch": 0.36867188202499773,
"grad_norm": 8.280756385424112,
"learning_rate": 1.4566879174690575e-06,
"loss": 0.7981,
"step": 1025
},
{
"epoch": 0.36903156190990016,
"grad_norm": 9.944689050913146,
"learning_rate": 1.4556509438599056e-06,
"loss": 0.8026,
"step": 1026
},
{
"epoch": 0.36939124179480265,
"grad_norm": 8.561981316354894,
"learning_rate": 1.454613351533546e-06,
"loss": 0.8253,
"step": 1027
},
{
"epoch": 0.3697509216797051,
"grad_norm": 238.36052734951087,
"learning_rate": 1.4535751418988998e-06,
"loss": 0.8453,
"step": 1028
},
{
"epoch": 0.3701106015646075,
"grad_norm": 10.415708338046755,
"learning_rate": 1.4525363163657263e-06,
"loss": 0.8229,
"step": 1029
},
{
"epoch": 0.37047028144950994,
"grad_norm": 21.256646098290446,
"learning_rate": 1.4514968763446212e-06,
"loss": 0.8246,
"step": 1030
},
{
"epoch": 0.3708299613344124,
"grad_norm": 13.820460952610205,
"learning_rate": 1.4504568232470142e-06,
"loss": 0.8992,
"step": 1031
},
{
"epoch": 0.3711896412193148,
"grad_norm": 7.847945102008554,
"learning_rate": 1.4494161584851686e-06,
"loss": 0.7884,
"step": 1032
},
{
"epoch": 0.37154932110421723,
"grad_norm": 36.901480910997556,
"learning_rate": 1.4483748834721764e-06,
"loss": 0.8843,
"step": 1033
},
{
"epoch": 0.37190900098911966,
"grad_norm": 31.933756144343516,
"learning_rate": 1.4473329996219603e-06,
"loss": 0.8774,
"step": 1034
},
{
"epoch": 0.3722686808740221,
"grad_norm": 12.570281355916684,
"learning_rate": 1.4462905083492682e-06,
"loss": 0.7677,
"step": 1035
},
{
"epoch": 0.3726283607589246,
"grad_norm": 24.448011209673478,
"learning_rate": 1.4452474110696738e-06,
"loss": 0.9001,
"step": 1036
},
{
"epoch": 0.372988040643827,
"grad_norm": 16.64188594311135,
"learning_rate": 1.4442037091995725e-06,
"loss": 0.7804,
"step": 1037
},
{
"epoch": 0.37334772052872944,
"grad_norm": 10.787966777653638,
"learning_rate": 1.443159404156182e-06,
"loss": 0.7997,
"step": 1038
},
{
"epoch": 0.3737074004136319,
"grad_norm": 24.73374128276489,
"learning_rate": 1.4421144973575382e-06,
"loss": 0.8383,
"step": 1039
},
{
"epoch": 0.3740670802985343,
"grad_norm": 12.779768819403833,
"learning_rate": 1.4410689902224946e-06,
"loss": 0.8078,
"step": 1040
},
{
"epoch": 0.37442676018343674,
"grad_norm": 12.334032840610424,
"learning_rate": 1.4400228841707193e-06,
"loss": 0.8493,
"step": 1041
},
{
"epoch": 0.37478644006833917,
"grad_norm": 10.383999430125927,
"learning_rate": 1.438976180622694e-06,
"loss": 0.7998,
"step": 1042
},
{
"epoch": 0.3751461199532416,
"grad_norm": 8.54968246530765,
"learning_rate": 1.4379288809997119e-06,
"loss": 0.7873,
"step": 1043
},
{
"epoch": 0.375505799838144,
"grad_norm": 9.278192896177686,
"learning_rate": 1.4368809867238752e-06,
"loss": 0.7953,
"step": 1044
},
{
"epoch": 0.3758654797230465,
"grad_norm": 12.610003559394263,
"learning_rate": 1.435832499218094e-06,
"loss": 0.8028,
"step": 1045
},
{
"epoch": 0.37622515960794894,
"grad_norm": 10.644761875930058,
"learning_rate": 1.4347834199060833e-06,
"loss": 0.7547,
"step": 1046
},
{
"epoch": 0.3765848394928514,
"grad_norm": 9.931803934694264,
"learning_rate": 1.4337337502123626e-06,
"loss": 0.7901,
"step": 1047
},
{
"epoch": 0.3769445193777538,
"grad_norm": 12.302593898497262,
"learning_rate": 1.432683491562252e-06,
"loss": 0.8149,
"step": 1048
},
{
"epoch": 0.37730419926265624,
"grad_norm": 22.05992852868052,
"learning_rate": 1.4316326453818727e-06,
"loss": 0.8674,
"step": 1049
},
{
"epoch": 0.37766387914755867,
"grad_norm": 17.45343281461148,
"learning_rate": 1.4305812130981415e-06,
"loss": 0.7951,
"step": 1050
},
{
"epoch": 0.3780235590324611,
"grad_norm": 11.71295204378767,
"learning_rate": 1.4295291961387741e-06,
"loss": 0.7485,
"step": 1051
},
{
"epoch": 0.37838323891736353,
"grad_norm": 13.500946291081991,
"learning_rate": 1.4284765959322772e-06,
"loss": 0.8303,
"step": 1052
},
{
"epoch": 0.37874291880226596,
"grad_norm": 10.445069567715905,
"learning_rate": 1.4274234139079511e-06,
"loss": 0.7582,
"step": 1053
},
{
"epoch": 0.37910259868716845,
"grad_norm": 34.764581398060585,
"learning_rate": 1.4263696514958858e-06,
"loss": 0.7737,
"step": 1054
},
{
"epoch": 0.3794622785720709,
"grad_norm": 18.256324339254622,
"learning_rate": 1.4253153101269596e-06,
"loss": 0.7754,
"step": 1055
},
{
"epoch": 0.3798219584569733,
"grad_norm": 9.211567863325408,
"learning_rate": 1.4242603912328365e-06,
"loss": 0.8182,
"step": 1056
},
{
"epoch": 0.38018163834187574,
"grad_norm": 7.852936897662152,
"learning_rate": 1.4232048962459648e-06,
"loss": 0.8347,
"step": 1057
},
{
"epoch": 0.38054131822677817,
"grad_norm": 16.74312021145382,
"learning_rate": 1.4221488265995754e-06,
"loss": 0.8697,
"step": 1058
},
{
"epoch": 0.3809009981116806,
"grad_norm": 11.73914518751653,
"learning_rate": 1.421092183727679e-06,
"loss": 0.8545,
"step": 1059
},
{
"epoch": 0.38126067799658303,
"grad_norm": 8.879225199770659,
"learning_rate": 1.4200349690650653e-06,
"loss": 0.8468,
"step": 1060
},
{
"epoch": 0.38162035788148546,
"grad_norm": 96.90806110127664,
"learning_rate": 1.4189771840472995e-06,
"loss": 0.785,
"step": 1061
},
{
"epoch": 0.3819800377663879,
"grad_norm": 8.306537706936869,
"learning_rate": 1.4179188301107228e-06,
"loss": 0.8579,
"step": 1062
},
{
"epoch": 0.3823397176512904,
"grad_norm": 10.093154336668416,
"learning_rate": 1.416859908692447e-06,
"loss": 0.857,
"step": 1063
},
{
"epoch": 0.3826993975361928,
"grad_norm": 19.53334063430709,
"learning_rate": 1.4158004212303563e-06,
"loss": 0.8353,
"step": 1064
},
{
"epoch": 0.38305907742109524,
"grad_norm": 30.012211191584232,
"learning_rate": 1.414740369163102e-06,
"loss": 0.7892,
"step": 1065
},
{
"epoch": 0.38341875730599767,
"grad_norm": 7.6470646148037815,
"learning_rate": 1.413679753930103e-06,
"loss": 0.8003,
"step": 1066
},
{
"epoch": 0.3837784371909001,
"grad_norm": 15.82080272840552,
"learning_rate": 1.4126185769715426e-06,
"loss": 0.82,
"step": 1067
},
{
"epoch": 0.38413811707580253,
"grad_norm": 7.730309662956251,
"learning_rate": 1.4115568397283668e-06,
"loss": 0.8057,
"step": 1068
},
{
"epoch": 0.38449779696070496,
"grad_norm": 10.327301299623088,
"learning_rate": 1.410494543642283e-06,
"loss": 0.8456,
"step": 1069
},
{
"epoch": 0.3848574768456074,
"grad_norm": 8.781388123477912,
"learning_rate": 1.4094316901557562e-06,
"loss": 0.809,
"step": 1070
},
{
"epoch": 0.3852171567305098,
"grad_norm": 98.0099367935755,
"learning_rate": 1.408368280712009e-06,
"loss": 0.774,
"step": 1071
},
{
"epoch": 0.3855768366154123,
"grad_norm": 21.51930297396018,
"learning_rate": 1.4073043167550196e-06,
"loss": 0.7839,
"step": 1072
},
{
"epoch": 0.38593651650031474,
"grad_norm": 10.168407590746957,
"learning_rate": 1.406239799729518e-06,
"loss": 0.8474,
"step": 1073
},
{
"epoch": 0.38629619638521717,
"grad_norm": 11.143546461481547,
"learning_rate": 1.4051747310809861e-06,
"loss": 0.8234,
"step": 1074
},
{
"epoch": 0.3866558762701196,
"grad_norm": 10.014573885018102,
"learning_rate": 1.4041091122556537e-06,
"loss": 0.8624,
"step": 1075
},
{
"epoch": 0.38701555615502203,
"grad_norm": 9.203697477888994,
"learning_rate": 1.403042944700499e-06,
"loss": 0.7798,
"step": 1076
},
{
"epoch": 0.38737523603992446,
"grad_norm": 17.70155693236071,
"learning_rate": 1.4019762298632442e-06,
"loss": 0.8215,
"step": 1077
},
{
"epoch": 0.3877349159248269,
"grad_norm": 21.144004989509693,
"learning_rate": 1.400908969192356e-06,
"loss": 0.8341,
"step": 1078
},
{
"epoch": 0.3880945958097293,
"grad_norm": 42.44024629613774,
"learning_rate": 1.3998411641370401e-06,
"loss": 0.8266,
"step": 1079
},
{
"epoch": 0.38845427569463176,
"grad_norm": 30.42272061255153,
"learning_rate": 1.398772816147244e-06,
"loss": 0.8196,
"step": 1080
},
{
"epoch": 0.3888139555795342,
"grad_norm": 10.105715515251974,
"learning_rate": 1.3977039266736506e-06,
"loss": 0.7754,
"step": 1081
},
{
"epoch": 0.3891736354644367,
"grad_norm": 11.746605806057,
"learning_rate": 1.3966344971676786e-06,
"loss": 0.9183,
"step": 1082
},
{
"epoch": 0.3895333153493391,
"grad_norm": 26.766407550188198,
"learning_rate": 1.39556452908148e-06,
"loss": 0.8562,
"step": 1083
},
{
"epoch": 0.38989299523424154,
"grad_norm": 18.759620530870333,
"learning_rate": 1.3944940238679381e-06,
"loss": 0.7709,
"step": 1084
},
{
"epoch": 0.39025267511914397,
"grad_norm": 9.30940685054903,
"learning_rate": 1.3934229829806657e-06,
"loss": 0.8328,
"step": 1085
},
{
"epoch": 0.3906123550040464,
"grad_norm": 14.569851121933846,
"learning_rate": 1.3923514078740031e-06,
"loss": 0.8178,
"step": 1086
},
{
"epoch": 0.3909720348889488,
"grad_norm": 9.050830910649225,
"learning_rate": 1.3912793000030152e-06,
"loss": 0.7815,
"step": 1087
},
{
"epoch": 0.39133171477385126,
"grad_norm": 20.182619801572873,
"learning_rate": 1.3902066608234916e-06,
"loss": 0.8243,
"step": 1088
},
{
"epoch": 0.3916913946587537,
"grad_norm": 17.198218422594294,
"learning_rate": 1.389133491791942e-06,
"loss": 0.859,
"step": 1089
},
{
"epoch": 0.3920510745436561,
"grad_norm": 11.92497597568398,
"learning_rate": 1.388059794365597e-06,
"loss": 0.8262,
"step": 1090
},
{
"epoch": 0.3924107544285586,
"grad_norm": 14.691891278167247,
"learning_rate": 1.3869855700024028e-06,
"loss": 0.872,
"step": 1091
},
{
"epoch": 0.39277043431346104,
"grad_norm": 58.65616037540032,
"learning_rate": 1.3859108201610235e-06,
"loss": 0.8695,
"step": 1092
},
{
"epoch": 0.39313011419836347,
"grad_norm": 17.730256377111736,
"learning_rate": 1.3848355463008344e-06,
"loss": 0.7734,
"step": 1093
},
{
"epoch": 0.3934897940832659,
"grad_norm": 8.898215862601006,
"learning_rate": 1.383759749881924e-06,
"loss": 0.8358,
"step": 1094
},
{
"epoch": 0.39384947396816833,
"grad_norm": 9.273330467024028,
"learning_rate": 1.3826834323650898e-06,
"loss": 0.8524,
"step": 1095
},
{
"epoch": 0.39420915385307076,
"grad_norm": 8.076849244230383,
"learning_rate": 1.3816065952118365e-06,
"loss": 0.7429,
"step": 1096
},
{
"epoch": 0.3945688337379732,
"grad_norm": 9.3288544839028,
"learning_rate": 1.3805292398843753e-06,
"loss": 0.8314,
"step": 1097
},
{
"epoch": 0.3949285136228756,
"grad_norm": 110.01055869569176,
"learning_rate": 1.37945136784562e-06,
"loss": 0.7984,
"step": 1098
},
{
"epoch": 0.39528819350777805,
"grad_norm": 12.333689127918134,
"learning_rate": 1.3783729805591873e-06,
"loss": 0.8953,
"step": 1099
},
{
"epoch": 0.39564787339268054,
"grad_norm": 19.067730795891276,
"learning_rate": 1.3772940794893914e-06,
"loss": 0.785,
"step": 1100
},
{
"epoch": 0.39600755327758297,
"grad_norm": 24.615349848830494,
"learning_rate": 1.376214666101247e-06,
"loss": 0.7983,
"step": 1101
},
{
"epoch": 0.3963672331624854,
"grad_norm": 13.380900687883715,
"learning_rate": 1.3751347418604621e-06,
"loss": 0.884,
"step": 1102
},
{
"epoch": 0.39672691304738783,
"grad_norm": 8.981897254003437,
"learning_rate": 1.3740543082334397e-06,
"loss": 0.7893,
"step": 1103
},
{
"epoch": 0.39708659293229026,
"grad_norm": 15.905987338375768,
"learning_rate": 1.3729733666872734e-06,
"loss": 0.8062,
"step": 1104
},
{
"epoch": 0.3974462728171927,
"grad_norm": 12.35174296620484,
"learning_rate": 1.3718919186897479e-06,
"loss": 0.8566,
"step": 1105
},
{
"epoch": 0.3978059527020951,
"grad_norm": 13.415687900596817,
"learning_rate": 1.3708099657093345e-06,
"loss": 0.8429,
"step": 1106
},
{
"epoch": 0.39816563258699755,
"grad_norm": 7.651709359237927,
"learning_rate": 1.3697275092151906e-06,
"loss": 0.8384,
"step": 1107
},
{
"epoch": 0.3985253124719,
"grad_norm": 11.375541052279686,
"learning_rate": 1.3686445506771568e-06,
"loss": 0.8101,
"step": 1108
},
{
"epoch": 0.39888499235680247,
"grad_norm": 29.087280176558043,
"learning_rate": 1.3675610915657566e-06,
"loss": 0.7717,
"step": 1109
},
{
"epoch": 0.3992446722417049,
"grad_norm": 8.957288712416425,
"learning_rate": 1.366477133352192e-06,
"loss": 0.8606,
"step": 1110
},
{
"epoch": 0.39960435212660733,
"grad_norm": 26.20743774800868,
"learning_rate": 1.3653926775083435e-06,
"loss": 0.8882,
"step": 1111
},
{
"epoch": 0.39996403201150976,
"grad_norm": 18.21422500140279,
"learning_rate": 1.3643077255067664e-06,
"loss": 0.8443,
"step": 1112
},
{
"epoch": 0.4003237118964122,
"grad_norm": 13.865360018920079,
"learning_rate": 1.3632222788206913e-06,
"loss": 0.7962,
"step": 1113
},
{
"epoch": 0.4006833917813146,
"grad_norm": 20.948730649362815,
"learning_rate": 1.3621363389240187e-06,
"loss": 0.9649,
"step": 1114
},
{
"epoch": 0.40104307166621705,
"grad_norm": 8.86259566115294,
"learning_rate": 1.36104990729132e-06,
"loss": 0.84,
"step": 1115
},
{
"epoch": 0.4014027515511195,
"grad_norm": 16.948794281131754,
"learning_rate": 1.359962985397834e-06,
"loss": 0.7539,
"step": 1116
},
{
"epoch": 0.4017624314360219,
"grad_norm": 12.958441105299196,
"learning_rate": 1.3588755747194653e-06,
"loss": 0.7699,
"step": 1117
},
{
"epoch": 0.4021221113209244,
"grad_norm": 8.103833208902573,
"learning_rate": 1.3577876767327819e-06,
"loss": 0.8126,
"step": 1118
},
{
"epoch": 0.40248179120582683,
"grad_norm": 10.616654719018031,
"learning_rate": 1.3566992929150135e-06,
"loss": 0.8136,
"step": 1119
},
{
"epoch": 0.40284147109072926,
"grad_norm": 11.982269199251334,
"learning_rate": 1.3556104247440504e-06,
"loss": 0.7557,
"step": 1120
},
{
"epoch": 0.4032011509756317,
"grad_norm": 12.61067259437186,
"learning_rate": 1.3545210736984392e-06,
"loss": 0.816,
"step": 1121
},
{
"epoch": 0.4035608308605341,
"grad_norm": 30.616245149026604,
"learning_rate": 1.3534312412573834e-06,
"loss": 0.8658,
"step": 1122
},
{
"epoch": 0.40392051074543656,
"grad_norm": 33.15211026998674,
"learning_rate": 1.3523409289007397e-06,
"loss": 0.8177,
"step": 1123
},
{
"epoch": 0.404280190630339,
"grad_norm": 18.671486206489256,
"learning_rate": 1.3512501381090155e-06,
"loss": 0.773,
"step": 1124
},
{
"epoch": 0.4046398705152414,
"grad_norm": 15.838979634447169,
"learning_rate": 1.3501588703633702e-06,
"loss": 0.7824,
"step": 1125
},
{
"epoch": 0.40499955040014385,
"grad_norm": 15.728426784086825,
"learning_rate": 1.3490671271456081e-06,
"loss": 0.8208,
"step": 1126
},
{
"epoch": 0.40535923028504633,
"grad_norm": 13.296285622727124,
"learning_rate": 1.3479749099381817e-06,
"loss": 0.874,
"step": 1127
},
{
"epoch": 0.40571891016994877,
"grad_norm": 16.681213486243337,
"learning_rate": 1.3468822202241847e-06,
"loss": 0.862,
"step": 1128
},
{
"epoch": 0.4060785900548512,
"grad_norm": 16.518023213124945,
"learning_rate": 1.3457890594873545e-06,
"loss": 0.7991,
"step": 1129
},
{
"epoch": 0.4064382699397536,
"grad_norm": 7.661635968834577,
"learning_rate": 1.3446954292120664e-06,
"loss": 0.8425,
"step": 1130
},
{
"epoch": 0.40679794982465606,
"grad_norm": 24.024008499482644,
"learning_rate": 1.3436013308833348e-06,
"loss": 0.8415,
"step": 1131
},
{
"epoch": 0.4071576297095585,
"grad_norm": 20.253189055266766,
"learning_rate": 1.3425067659868084e-06,
"loss": 0.847,
"step": 1132
},
{
"epoch": 0.4075173095944609,
"grad_norm": 30.131942993580697,
"learning_rate": 1.3414117360087697e-06,
"loss": 0.877,
"step": 1133
},
{
"epoch": 0.40787698947936335,
"grad_norm": 7.759127811054072,
"learning_rate": 1.340316242436134e-06,
"loss": 0.7446,
"step": 1134
},
{
"epoch": 0.4082366693642658,
"grad_norm": 10.095478497154414,
"learning_rate": 1.339220286756444e-06,
"loss": 0.8167,
"step": 1135
},
{
"epoch": 0.40859634924916827,
"grad_norm": 7.703546068052134,
"learning_rate": 1.3381238704578716e-06,
"loss": 0.7909,
"step": 1136
},
{
"epoch": 0.4089560291340707,
"grad_norm": 9.605978827864863,
"learning_rate": 1.3370269950292132e-06,
"loss": 0.8524,
"step": 1137
},
{
"epoch": 0.40931570901897313,
"grad_norm": 14.056068815384645,
"learning_rate": 1.3359296619598892e-06,
"loss": 0.8288,
"step": 1138
},
{
"epoch": 0.40967538890387556,
"grad_norm": 10.631086756096403,
"learning_rate": 1.334831872739941e-06,
"loss": 0.7538,
"step": 1139
},
{
"epoch": 0.410035068788778,
"grad_norm": 8.684603876787211,
"learning_rate": 1.3337336288600297e-06,
"loss": 0.8113,
"step": 1140
},
{
"epoch": 0.4103947486736804,
"grad_norm": 37.72680423919033,
"learning_rate": 1.3326349318114334e-06,
"loss": 0.829,
"step": 1141
},
{
"epoch": 0.41075442855858285,
"grad_norm": 15.359788775736618,
"learning_rate": 1.3315357830860458e-06,
"loss": 0.7683,
"step": 1142
},
{
"epoch": 0.4111141084434853,
"grad_norm": 9.887705149920857,
"learning_rate": 1.3304361841763745e-06,
"loss": 0.7601,
"step": 1143
},
{
"epoch": 0.4114737883283877,
"grad_norm": 11.806306747173277,
"learning_rate": 1.3293361365755372e-06,
"loss": 0.8496,
"step": 1144
},
{
"epoch": 0.4118334682132902,
"grad_norm": 8.680706338600322,
"learning_rate": 1.3282356417772616e-06,
"loss": 0.7706,
"step": 1145
},
{
"epoch": 0.41219314809819263,
"grad_norm": 16.094061475463267,
"learning_rate": 1.3271347012758828e-06,
"loss": 0.7889,
"step": 1146
},
{
"epoch": 0.41255282798309506,
"grad_norm": 8.12872960363513,
"learning_rate": 1.3260333165663405e-06,
"loss": 0.8137,
"step": 1147
},
{
"epoch": 0.4129125078679975,
"grad_norm": 22.447001029360134,
"learning_rate": 1.324931489144178e-06,
"loss": 0.8666,
"step": 1148
},
{
"epoch": 0.4132721877528999,
"grad_norm": 10.386632819287223,
"learning_rate": 1.3238292205055394e-06,
"loss": 0.7832,
"step": 1149
},
{
"epoch": 0.41363186763780235,
"grad_norm": 12.107129360878472,
"learning_rate": 1.3227265121471689e-06,
"loss": 0.8319,
"step": 1150
},
{
"epoch": 0.4139915475227048,
"grad_norm": 11.661602549688038,
"learning_rate": 1.3216233655664064e-06,
"loss": 0.8509,
"step": 1151
},
{
"epoch": 0.4143512274076072,
"grad_norm": 7.598324574193326,
"learning_rate": 1.3205197822611876e-06,
"loss": 0.7538,
"step": 1152
},
{
"epoch": 0.41471090729250965,
"grad_norm": 10.398107676973735,
"learning_rate": 1.3194157637300413e-06,
"loss": 0.7999,
"step": 1153
},
{
"epoch": 0.4150705871774121,
"grad_norm": 14.88430692067026,
"learning_rate": 1.318311311472087e-06,
"loss": 0.7972,
"step": 1154
},
{
"epoch": 0.41543026706231456,
"grad_norm": 9.035578815704389,
"learning_rate": 1.3172064269870334e-06,
"loss": 0.7613,
"step": 1155
},
{
"epoch": 0.415789946947217,
"grad_norm": 9.698803746884536,
"learning_rate": 1.3161011117751754e-06,
"loss": 0.9181,
"step": 1156
},
{
"epoch": 0.4161496268321194,
"grad_norm": 8.563695782621274,
"learning_rate": 1.3149953673373943e-06,
"loss": 0.8396,
"step": 1157
},
{
"epoch": 0.41650930671702185,
"grad_norm": 12.592925677092781,
"learning_rate": 1.3138891951751526e-06,
"loss": 0.7965,
"step": 1158
},
{
"epoch": 0.4168689866019243,
"grad_norm": 29.84248534355901,
"learning_rate": 1.3127825967904943e-06,
"loss": 0.7692,
"step": 1159
},
{
"epoch": 0.4172286664868267,
"grad_norm": 16.973817623923992,
"learning_rate": 1.3116755736860421e-06,
"loss": 0.8561,
"step": 1160
},
{
"epoch": 0.41758834637172915,
"grad_norm": 30.96181954207072,
"learning_rate": 1.3105681273649957e-06,
"loss": 0.8723,
"step": 1161
},
{
"epoch": 0.4179480262566316,
"grad_norm": 8.705331458524364,
"learning_rate": 1.3094602593311292e-06,
"loss": 0.8276,
"step": 1162
},
{
"epoch": 0.418307706141534,
"grad_norm": 11.948946543596383,
"learning_rate": 1.3083519710887894e-06,
"loss": 0.7731,
"step": 1163
},
{
"epoch": 0.4186673860264365,
"grad_norm": 14.658515304037246,
"learning_rate": 1.307243264142893e-06,
"loss": 0.7962,
"step": 1164
},
{
"epoch": 0.4190270659113389,
"grad_norm": 9.97210830939083,
"learning_rate": 1.3061341399989266e-06,
"loss": 0.8541,
"step": 1165
},
{
"epoch": 0.41938674579624136,
"grad_norm": 11.08794321794277,
"learning_rate": 1.3050246001629423e-06,
"loss": 0.8469,
"step": 1166
},
{
"epoch": 0.4197464256811438,
"grad_norm": 31.824399831222916,
"learning_rate": 1.3039146461415573e-06,
"loss": 0.8517,
"step": 1167
},
{
"epoch": 0.4201061055660462,
"grad_norm": 15.681118811218097,
"learning_rate": 1.30280427944195e-06,
"loss": 0.8312,
"step": 1168
},
{
"epoch": 0.42046578545094865,
"grad_norm": 10.889552931272489,
"learning_rate": 1.3016935015718612e-06,
"loss": 0.8185,
"step": 1169
},
{
"epoch": 0.4208254653358511,
"grad_norm": 23.96497199753361,
"learning_rate": 1.3005823140395877e-06,
"loss": 0.8774,
"step": 1170
},
{
"epoch": 0.4211851452207535,
"grad_norm": 8.076426939251284,
"learning_rate": 1.2994707183539847e-06,
"loss": 0.8003,
"step": 1171
},
{
"epoch": 0.42154482510565594,
"grad_norm": 13.089536429771677,
"learning_rate": 1.29835871602446e-06,
"loss": 0.794,
"step": 1172
},
{
"epoch": 0.4219045049905584,
"grad_norm": 11.486572894743636,
"learning_rate": 1.2972463085609741e-06,
"loss": 0.7904,
"step": 1173
},
{
"epoch": 0.42226418487546086,
"grad_norm": 27.854878543369725,
"learning_rate": 1.2961334974740386e-06,
"loss": 0.875,
"step": 1174
},
{
"epoch": 0.4226238647603633,
"grad_norm": 14.369805685256482,
"learning_rate": 1.2950202842747114e-06,
"loss": 0.8853,
"step": 1175
},
{
"epoch": 0.4229835446452657,
"grad_norm": 82.9946121363335,
"learning_rate": 1.2939066704745977e-06,
"loss": 0.7587,
"step": 1176
},
{
"epoch": 0.42334322453016815,
"grad_norm": 25.80508621463069,
"learning_rate": 1.2927926575858462e-06,
"loss": 0.7773,
"step": 1177
},
{
"epoch": 0.4237029044150706,
"grad_norm": 15.569812419461757,
"learning_rate": 1.2916782471211476e-06,
"loss": 0.7987,
"step": 1178
},
{
"epoch": 0.424062584299973,
"grad_norm": 10.82278896220999,
"learning_rate": 1.2905634405937325e-06,
"loss": 0.8372,
"step": 1179
},
{
"epoch": 0.42442226418487544,
"grad_norm": 9.49168857907145,
"learning_rate": 1.2894482395173693e-06,
"loss": 0.7634,
"step": 1180
},
{
"epoch": 0.4247819440697779,
"grad_norm": 13.358456853547283,
"learning_rate": 1.2883326454063621e-06,
"loss": 0.8061,
"step": 1181
},
{
"epoch": 0.42514162395468036,
"grad_norm": 10.116587204702688,
"learning_rate": 1.2872166597755488e-06,
"loss": 0.7762,
"step": 1182
},
{
"epoch": 0.4255013038395828,
"grad_norm": 15.816590836728992,
"learning_rate": 1.2861002841402981e-06,
"loss": 0.8105,
"step": 1183
},
{
"epoch": 0.4258609837244852,
"grad_norm": 15.057231555218781,
"learning_rate": 1.2849835200165103e-06,
"loss": 0.8121,
"step": 1184
},
{
"epoch": 0.42622066360938765,
"grad_norm": 10.617264268821197,
"learning_rate": 1.2838663689206105e-06,
"loss": 0.7983,
"step": 1185
},
{
"epoch": 0.4265803434942901,
"grad_norm": 25.285208379786997,
"learning_rate": 1.2827488323695521e-06,
"loss": 0.8107,
"step": 1186
},
{
"epoch": 0.4269400233791925,
"grad_norm": 11.791095382152786,
"learning_rate": 1.2816309118808094e-06,
"loss": 0.7905,
"step": 1187
},
{
"epoch": 0.42729970326409494,
"grad_norm": 20.31416399255396,
"learning_rate": 1.2805126089723797e-06,
"loss": 0.8098,
"step": 1188
},
{
"epoch": 0.4276593831489974,
"grad_norm": 13.500661045780163,
"learning_rate": 1.2793939251627786e-06,
"loss": 0.8793,
"step": 1189
},
{
"epoch": 0.4280190630338998,
"grad_norm": 12.351809166340395,
"learning_rate": 1.27827486197104e-06,
"loss": 0.8212,
"step": 1190
},
{
"epoch": 0.4283787429188023,
"grad_norm": 9.00943104462971,
"learning_rate": 1.2771554209167115e-06,
"loss": 0.7854,
"step": 1191
},
{
"epoch": 0.4287384228037047,
"grad_norm": 12.66360659572935,
"learning_rate": 1.2760356035198553e-06,
"loss": 0.8775,
"step": 1192
},
{
"epoch": 0.42909810268860715,
"grad_norm": 7.212969656334713,
"learning_rate": 1.2749154113010429e-06,
"loss": 0.8348,
"step": 1193
},
{
"epoch": 0.4294577825735096,
"grad_norm": 17.116970732423972,
"learning_rate": 1.273794845781357e-06,
"loss": 0.8068,
"step": 1194
},
{
"epoch": 0.429817462458412,
"grad_norm": 19.640202487139703,
"learning_rate": 1.272673908482385e-06,
"loss": 0.8116,
"step": 1195
},
{
"epoch": 0.43017714234331444,
"grad_norm": 17.183855064124938,
"learning_rate": 1.2715526009262208e-06,
"loss": 0.8086,
"step": 1196
},
{
"epoch": 0.4305368222282169,
"grad_norm": 15.824711313247183,
"learning_rate": 1.2704309246354597e-06,
"loss": 0.7469,
"step": 1197
},
{
"epoch": 0.4308965021131193,
"grad_norm": 101.31063472576798,
"learning_rate": 1.2693088811331985e-06,
"loss": 0.7757,
"step": 1198
},
{
"epoch": 0.43125618199802174,
"grad_norm": 13.437342130247243,
"learning_rate": 1.2681864719430326e-06,
"loss": 0.8051,
"step": 1199
},
{
"epoch": 0.4316158618829242,
"grad_norm": 11.560463329221609,
"learning_rate": 1.267063698589054e-06,
"loss": 0.8188,
"step": 1200
},
{
"epoch": 0.43197554176782665,
"grad_norm": 15.9775143215431,
"learning_rate": 1.2659405625958485e-06,
"loss": 0.7999,
"step": 1201
},
{
"epoch": 0.4323352216527291,
"grad_norm": 11.061264786476832,
"learning_rate": 1.2648170654884952e-06,
"loss": 0.7652,
"step": 1202
},
{
"epoch": 0.4326949015376315,
"grad_norm": 9.301545465701375,
"learning_rate": 1.2636932087925636e-06,
"loss": 0.8292,
"step": 1203
},
{
"epoch": 0.43305458142253395,
"grad_norm": 29.793784920402466,
"learning_rate": 1.26256899403411e-06,
"loss": 0.8507,
"step": 1204
},
{
"epoch": 0.4334142613074364,
"grad_norm": 14.99437408721803,
"learning_rate": 1.261444422739679e-06,
"loss": 0.8628,
"step": 1205
},
{
"epoch": 0.4337739411923388,
"grad_norm": 8.171720154254778,
"learning_rate": 1.2603194964362978e-06,
"loss": 0.8719,
"step": 1206
},
{
"epoch": 0.43413362107724124,
"grad_norm": 15.461314713557933,
"learning_rate": 1.2591942166514763e-06,
"loss": 0.7838,
"step": 1207
},
{
"epoch": 0.43449330096214367,
"grad_norm": 10.039494905553514,
"learning_rate": 1.2580685849132038e-06,
"loss": 0.8216,
"step": 1208
},
{
"epoch": 0.43485298084704616,
"grad_norm": 27.279568189454807,
"learning_rate": 1.2569426027499483e-06,
"loss": 0.8054,
"step": 1209
},
{
"epoch": 0.4352126607319486,
"grad_norm": 17.918777685269298,
"learning_rate": 1.2558162716906535e-06,
"loss": 0.7491,
"step": 1210
},
{
"epoch": 0.435572340616851,
"grad_norm": 15.32934599202498,
"learning_rate": 1.2546895932647364e-06,
"loss": 0.8193,
"step": 1211
},
{
"epoch": 0.43593202050175345,
"grad_norm": 9.127247215540397,
"learning_rate": 1.2535625690020858e-06,
"loss": 0.8449,
"step": 1212
},
{
"epoch": 0.4362917003866559,
"grad_norm": 14.029464558108746,
"learning_rate": 1.2524352004330605e-06,
"loss": 0.8291,
"step": 1213
},
{
"epoch": 0.4366513802715583,
"grad_norm": 10.089731825442499,
"learning_rate": 1.2513074890884863e-06,
"loss": 0.8157,
"step": 1214
},
{
"epoch": 0.43701106015646074,
"grad_norm": 17.512841039384675,
"learning_rate": 1.2501794364996553e-06,
"loss": 0.7185,
"step": 1215
},
{
"epoch": 0.43737074004136317,
"grad_norm": 10.710023680718834,
"learning_rate": 1.249051044198321e-06,
"loss": 0.8398,
"step": 1216
},
{
"epoch": 0.4377304199262656,
"grad_norm": 15.138685298177357,
"learning_rate": 1.247922313716701e-06,
"loss": 0.7848,
"step": 1217
},
{
"epoch": 0.4380900998111681,
"grad_norm": 11.154967860168135,
"learning_rate": 1.2467932465874698e-06,
"loss": 0.7925,
"step": 1218
},
{
"epoch": 0.4384497796960705,
"grad_norm": 28.175644182641264,
"learning_rate": 1.2456638443437604e-06,
"loss": 0.8244,
"step": 1219
},
{
"epoch": 0.43880945958097295,
"grad_norm": 13.515139584672358,
"learning_rate": 1.2445341085191598e-06,
"loss": 0.8696,
"step": 1220
},
{
"epoch": 0.4391691394658754,
"grad_norm": 7.60111989861678,
"learning_rate": 1.243404040647709e-06,
"loss": 0.8147,
"step": 1221
},
{
"epoch": 0.4395288193507778,
"grad_norm": 11.874802985111746,
"learning_rate": 1.2422736422638989e-06,
"loss": 0.821,
"step": 1222
},
{
"epoch": 0.43988849923568024,
"grad_norm": 10.265771757817534,
"learning_rate": 1.24114291490267e-06,
"loss": 0.7894,
"step": 1223
},
{
"epoch": 0.44024817912058267,
"grad_norm": 9.305337328289518,
"learning_rate": 1.2400118600994089e-06,
"loss": 0.8681,
"step": 1224
},
{
"epoch": 0.4406078590054851,
"grad_norm": 15.078958442910196,
"learning_rate": 1.2388804793899472e-06,
"loss": 0.8619,
"step": 1225
},
{
"epoch": 0.44096753889038753,
"grad_norm": 10.629870712533126,
"learning_rate": 1.2377487743105593e-06,
"loss": 0.8269,
"step": 1226
},
{
"epoch": 0.44132721877528996,
"grad_norm": 13.607965360091349,
"learning_rate": 1.2366167463979589e-06,
"loss": 0.8167,
"step": 1227
},
{
"epoch": 0.44168689866019245,
"grad_norm": 62.836568572867826,
"learning_rate": 1.2354843971892997e-06,
"loss": 0.8038,
"step": 1228
},
{
"epoch": 0.4420465785450949,
"grad_norm": 24.402575205654706,
"learning_rate": 1.2343517282221702e-06,
"loss": 0.7943,
"step": 1229
},
{
"epoch": 0.4424062584299973,
"grad_norm": 39.445338745066515,
"learning_rate": 1.233218741034594e-06,
"loss": 0.7956,
"step": 1230
},
{
"epoch": 0.44276593831489974,
"grad_norm": 12.306449046969655,
"learning_rate": 1.2320854371650266e-06,
"loss": 0.7592,
"step": 1231
},
{
"epoch": 0.4431256181998022,
"grad_norm": 9.349786884584788,
"learning_rate": 1.2309518181523536e-06,
"loss": 0.836,
"step": 1232
},
{
"epoch": 0.4434852980847046,
"grad_norm": 14.046011861194604,
"learning_rate": 1.2298178855358873e-06,
"loss": 0.8548,
"step": 1233
},
{
"epoch": 0.44384497796960704,
"grad_norm": 11.757374318577009,
"learning_rate": 1.2286836408553685e-06,
"loss": 0.8351,
"step": 1234
},
{
"epoch": 0.44420465785450947,
"grad_norm": 8.04805363719931,
"learning_rate": 1.227549085650959e-06,
"loss": 0.8048,
"step": 1235
},
{
"epoch": 0.4445643377394119,
"grad_norm": 12.275919702845814,
"learning_rate": 1.226414221463244e-06,
"loss": 0.7916,
"step": 1236
},
{
"epoch": 0.4449240176243144,
"grad_norm": 12.570661828913073,
"learning_rate": 1.2252790498332272e-06,
"loss": 0.849,
"step": 1237
},
{
"epoch": 0.4452836975092168,
"grad_norm": 10.488860725402276,
"learning_rate": 1.2241435723023308e-06,
"loss": 0.7694,
"step": 1238
},
{
"epoch": 0.44564337739411924,
"grad_norm": 7.622957083352856,
"learning_rate": 1.2230077904123912e-06,
"loss": 0.8339,
"step": 1239
},
{
"epoch": 0.4460030572790217,
"grad_norm": 39.197658683374236,
"learning_rate": 1.2218717057056592e-06,
"loss": 0.8131,
"step": 1240
},
{
"epoch": 0.4463627371639241,
"grad_norm": 10.548348927119244,
"learning_rate": 1.2207353197247956e-06,
"loss": 0.7641,
"step": 1241
},
{
"epoch": 0.44672241704882654,
"grad_norm": 7.259534641391028,
"learning_rate": 1.2195986340128718e-06,
"loss": 0.786,
"step": 1242
},
{
"epoch": 0.44708209693372897,
"grad_norm": 24.7303333263242,
"learning_rate": 1.2184616501133646e-06,
"loss": 0.8738,
"step": 1243
},
{
"epoch": 0.4474417768186314,
"grad_norm": 13.900637558705428,
"learning_rate": 1.2173243695701573e-06,
"loss": 0.8664,
"step": 1244
},
{
"epoch": 0.44780145670353383,
"grad_norm": 11.298140909025424,
"learning_rate": 1.2161867939275343e-06,
"loss": 0.8792,
"step": 1245
},
{
"epoch": 0.4481611365884363,
"grad_norm": 14.50133406883414,
"learning_rate": 1.2150489247301825e-06,
"loss": 0.8256,
"step": 1246
},
{
"epoch": 0.44852081647333875,
"grad_norm": 31.132010892010488,
"learning_rate": 1.2139107635231855e-06,
"loss": 0.8271,
"step": 1247
},
{
"epoch": 0.4488804963582412,
"grad_norm": 12.569487198202452,
"learning_rate": 1.2127723118520252e-06,
"loss": 0.8615,
"step": 1248
},
{
"epoch": 0.4492401762431436,
"grad_norm": 10.949709264797026,
"learning_rate": 1.2116335712625765e-06,
"loss": 0.8454,
"step": 1249
},
{
"epoch": 0.44959985612804604,
"grad_norm": 10.332432325251993,
"learning_rate": 1.2104945433011078e-06,
"loss": 0.797,
"step": 1250
},
{
"epoch": 0.44995953601294847,
"grad_norm": 10.14572559356846,
"learning_rate": 1.2093552295142768e-06,
"loss": 0.907,
"step": 1251
},
{
"epoch": 0.4503192158978509,
"grad_norm": 18.970962703868434,
"learning_rate": 1.2082156314491297e-06,
"loss": 0.7562,
"step": 1252
},
{
"epoch": 0.45067889578275333,
"grad_norm": 11.616910049291329,
"learning_rate": 1.2070757506530988e-06,
"loss": 0.8113,
"step": 1253
},
{
"epoch": 0.45103857566765576,
"grad_norm": 10.081351483948243,
"learning_rate": 1.205935588674e-06,
"loss": 0.7622,
"step": 1254
},
{
"epoch": 0.45139825555255825,
"grad_norm": 20.255342800605845,
"learning_rate": 1.2047951470600317e-06,
"loss": 0.8975,
"step": 1255
},
{
"epoch": 0.4517579354374607,
"grad_norm": 13.878665803083992,
"learning_rate": 1.2036544273597707e-06,
"loss": 0.8028,
"step": 1256
},
{
"epoch": 0.4521176153223631,
"grad_norm": 16.323478583232987,
"learning_rate": 1.202513431122173e-06,
"loss": 0.9193,
"step": 1257
},
{
"epoch": 0.45247729520726554,
"grad_norm": 12.595777252246911,
"learning_rate": 1.2013721598965687e-06,
"loss": 0.9226,
"step": 1258
},
{
"epoch": 0.45283697509216797,
"grad_norm": 29.133036085665644,
"learning_rate": 1.2002306152326625e-06,
"loss": 0.7382,
"step": 1259
},
{
"epoch": 0.4531966549770704,
"grad_norm": 13.19818067106045,
"learning_rate": 1.1990887986805295e-06,
"loss": 0.7563,
"step": 1260
},
{
"epoch": 0.45355633486197283,
"grad_norm": 59.41462666030352,
"learning_rate": 1.1979467117906141e-06,
"loss": 0.8029,
"step": 1261
},
{
"epoch": 0.45391601474687526,
"grad_norm": 9.281685333565427,
"learning_rate": 1.1968043561137284e-06,
"loss": 0.8307,
"step": 1262
},
{
"epoch": 0.4542756946317777,
"grad_norm": 10.829792460540387,
"learning_rate": 1.1956617332010486e-06,
"loss": 0.7897,
"step": 1263
},
{
"epoch": 0.4546353745166802,
"grad_norm": 9.87199135934417,
"learning_rate": 1.194518844604115e-06,
"loss": 0.8053,
"step": 1264
},
{
"epoch": 0.4549950544015826,
"grad_norm": 18.600256673133423,
"learning_rate": 1.193375691874827e-06,
"loss": 0.8669,
"step": 1265
},
{
"epoch": 0.45535473428648504,
"grad_norm": 15.82984920467322,
"learning_rate": 1.1922322765654444e-06,
"loss": 0.7869,
"step": 1266
},
{
"epoch": 0.45571441417138747,
"grad_norm": 29.131611437423444,
"learning_rate": 1.191088600228582e-06,
"loss": 0.7925,
"step": 1267
},
{
"epoch": 0.4560740940562899,
"grad_norm": 42.60802444905089,
"learning_rate": 1.1899446644172104e-06,
"loss": 0.7573,
"step": 1268
},
{
"epoch": 0.45643377394119233,
"grad_norm": 10.628041432812774,
"learning_rate": 1.1888004706846518e-06,
"loss": 0.799,
"step": 1269
},
{
"epoch": 0.45679345382609476,
"grad_norm": 18.94420908670456,
"learning_rate": 1.187656020584578e-06,
"loss": 0.7464,
"step": 1270
},
{
"epoch": 0.4571531337109972,
"grad_norm": 9.262181828568094,
"learning_rate": 1.1865113156710105e-06,
"loss": 0.7261,
"step": 1271
},
{
"epoch": 0.4575128135958996,
"grad_norm": 10.497722925087944,
"learning_rate": 1.1853663574983154e-06,
"loss": 0.8084,
"step": 1272
},
{
"epoch": 0.4578724934808021,
"grad_norm": 24.43866104784505,
"learning_rate": 1.1842211476212036e-06,
"loss": 0.8101,
"step": 1273
},
{
"epoch": 0.45823217336570454,
"grad_norm": 7.289370864106929,
"learning_rate": 1.183075687594727e-06,
"loss": 0.7586,
"step": 1274
},
{
"epoch": 0.458591853250607,
"grad_norm": 11.556599457183667,
"learning_rate": 1.181929978974278e-06,
"loss": 0.7747,
"step": 1275
},
{
"epoch": 0.4589515331355094,
"grad_norm": 12.053272756336359,
"learning_rate": 1.1807840233155862e-06,
"loss": 0.8078,
"step": 1276
},
{
"epoch": 0.45931121302041183,
"grad_norm": 10.666529440001169,
"learning_rate": 1.179637822174716e-06,
"loss": 0.901,
"step": 1277
},
{
"epoch": 0.45967089290531427,
"grad_norm": 13.548560706866235,
"learning_rate": 1.1784913771080663e-06,
"loss": 0.8418,
"step": 1278
},
{
"epoch": 0.4600305727902167,
"grad_norm": 48.48568465151353,
"learning_rate": 1.1773446896723666e-06,
"loss": 0.8083,
"step": 1279
},
{
"epoch": 0.4603902526751191,
"grad_norm": 11.366387829012211,
"learning_rate": 1.1761977614246757e-06,
"loss": 0.855,
"step": 1280
},
{
"epoch": 0.46074993256002156,
"grad_norm": 12.01127680637438,
"learning_rate": 1.1750505939223784e-06,
"loss": 0.8486,
"step": 1281
},
{
"epoch": 0.46110961244492404,
"grad_norm": 7.978281297381684,
"learning_rate": 1.1739031887231864e-06,
"loss": 0.8512,
"step": 1282
},
{
"epoch": 0.4614692923298265,
"grad_norm": 11.795596679560251,
"learning_rate": 1.1727555473851321e-06,
"loss": 0.8486,
"step": 1283
},
{
"epoch": 0.4618289722147289,
"grad_norm": 15.49662766160169,
"learning_rate": 1.17160767146657e-06,
"loss": 0.841,
"step": 1284
},
{
"epoch": 0.46218865209963134,
"grad_norm": 16.986608292313473,
"learning_rate": 1.170459562526172e-06,
"loss": 0.8212,
"step": 1285
},
{
"epoch": 0.46254833198453377,
"grad_norm": 9.93564733691553,
"learning_rate": 1.1693112221229276e-06,
"loss": 0.7663,
"step": 1286
},
{
"epoch": 0.4629080118694362,
"grad_norm": 54.42918082000089,
"learning_rate": 1.1681626518161396e-06,
"loss": 0.8096,
"step": 1287
},
{
"epoch": 0.46326769175433863,
"grad_norm": 7.922207343459904,
"learning_rate": 1.1670138531654236e-06,
"loss": 0.8284,
"step": 1288
},
{
"epoch": 0.46362737163924106,
"grad_norm": 13.072860874905023,
"learning_rate": 1.1658648277307047e-06,
"loss": 0.7808,
"step": 1289
},
{
"epoch": 0.4639870515241435,
"grad_norm": 16.501809646437952,
"learning_rate": 1.1647155770722169e-06,
"loss": 0.8181,
"step": 1290
},
{
"epoch": 0.464346731409046,
"grad_norm": 14.341375858719926,
"learning_rate": 1.1635661027504983e-06,
"loss": 0.8495,
"step": 1291
},
{
"epoch": 0.4647064112939484,
"grad_norm": 9.923435204329014,
"learning_rate": 1.162416406326393e-06,
"loss": 0.7881,
"step": 1292
},
{
"epoch": 0.46506609117885084,
"grad_norm": 11.687141456686968,
"learning_rate": 1.161266489361045e-06,
"loss": 0.8102,
"step": 1293
},
{
"epoch": 0.46542577106375327,
"grad_norm": 7.318134783993273,
"learning_rate": 1.1601163534158979e-06,
"loss": 0.7568,
"step": 1294
},
{
"epoch": 0.4657854509486557,
"grad_norm": 7.717278544222602,
"learning_rate": 1.1589660000526934e-06,
"loss": 0.8592,
"step": 1295
},
{
"epoch": 0.46614513083355813,
"grad_norm": 9.639364847193715,
"learning_rate": 1.1578154308334682e-06,
"loss": 0.8454,
"step": 1296
},
{
"epoch": 0.46650481071846056,
"grad_norm": 7.546318024475833,
"learning_rate": 1.1566646473205516e-06,
"loss": 0.8949,
"step": 1297
},
{
"epoch": 0.466864490603363,
"grad_norm": 20.974931918438077,
"learning_rate": 1.1555136510765644e-06,
"loss": 0.8186,
"step": 1298
},
{
"epoch": 0.4672241704882654,
"grad_norm": 16.749368758432176,
"learning_rate": 1.154362443664416e-06,
"loss": 0.8559,
"step": 1299
},
{
"epoch": 0.46758385037316785,
"grad_norm": 14.427679951488976,
"learning_rate": 1.1532110266473026e-06,
"loss": 0.7989,
"step": 1300
},
{
"epoch": 0.46794353025807034,
"grad_norm": 8.46357969121419,
"learning_rate": 1.1520594015887048e-06,
"loss": 0.7918,
"step": 1301
},
{
"epoch": 0.46830321014297277,
"grad_norm": 21.35180893075654,
"learning_rate": 1.1509075700523867e-06,
"loss": 0.837,
"step": 1302
},
{
"epoch": 0.4686628900278752,
"grad_norm": 13.21873278578323,
"learning_rate": 1.1497555336023913e-06,
"loss": 0.838,
"step": 1303
},
{
"epoch": 0.46902256991277763,
"grad_norm": 8.170795276298515,
"learning_rate": 1.1486032938030407e-06,
"loss": 0.8402,
"step": 1304
},
{
"epoch": 0.46938224979768006,
"grad_norm": 32.83502426851321,
"learning_rate": 1.1474508522189333e-06,
"loss": 0.761,
"step": 1305
},
{
"epoch": 0.4697419296825825,
"grad_norm": 15.1106562782342,
"learning_rate": 1.1462982104149407e-06,
"loss": 0.9193,
"step": 1306
},
{
"epoch": 0.4701016095674849,
"grad_norm": 38.89986928464333,
"learning_rate": 1.1451453699562074e-06,
"loss": 0.7628,
"step": 1307
},
{
"epoch": 0.47046128945238735,
"grad_norm": 11.047045316832111,
"learning_rate": 1.1439923324081463e-06,
"loss": 0.846,
"step": 1308
},
{
"epoch": 0.4708209693372898,
"grad_norm": 17.16310657966234,
"learning_rate": 1.14283909933644e-06,
"loss": 0.8376,
"step": 1309
},
{
"epoch": 0.47118064922219227,
"grad_norm": 11.43598914484691,
"learning_rate": 1.141685672307034e-06,
"loss": 0.814,
"step": 1310
},
{
"epoch": 0.4715403291070947,
"grad_norm": 28.209360740494237,
"learning_rate": 1.1405320528861391e-06,
"loss": 0.7522,
"step": 1311
},
{
"epoch": 0.47190000899199713,
"grad_norm": 8.049434855095333,
"learning_rate": 1.1393782426402266e-06,
"loss": 0.7578,
"step": 1312
},
{
"epoch": 0.47225968887689956,
"grad_norm": 9.649229554253916,
"learning_rate": 1.1382242431360273e-06,
"loss": 0.8434,
"step": 1313
},
{
"epoch": 0.472619368761802,
"grad_norm": 13.371114231283347,
"learning_rate": 1.1370700559405282e-06,
"loss": 0.8427,
"step": 1314
},
{
"epoch": 0.4729790486467044,
"grad_norm": 17.018058939594745,
"learning_rate": 1.1359156826209723e-06,
"loss": 0.8202,
"step": 1315
},
{
"epoch": 0.47333872853160686,
"grad_norm": 16.720685610019604,
"learning_rate": 1.1347611247448542e-06,
"loss": 0.8163,
"step": 1316
},
{
"epoch": 0.4736984084165093,
"grad_norm": 11.349509462719638,
"learning_rate": 1.1336063838799203e-06,
"loss": 0.8061,
"step": 1317
},
{
"epoch": 0.4740580883014117,
"grad_norm": 52.62375682555366,
"learning_rate": 1.1324514615941643e-06,
"loss": 0.8094,
"step": 1318
},
{
"epoch": 0.4744177681863142,
"grad_norm": 8.373983084050154,
"learning_rate": 1.1312963594558269e-06,
"loss": 0.8,
"step": 1319
},
{
"epoch": 0.47477744807121663,
"grad_norm": 21.3569404939675,
"learning_rate": 1.1301410790333928e-06,
"loss": 0.8031,
"step": 1320
},
{
"epoch": 0.47513712795611907,
"grad_norm": 7.938213921263037,
"learning_rate": 1.128985621895589e-06,
"loss": 0.8187,
"step": 1321
},
{
"epoch": 0.4754968078410215,
"grad_norm": 10.108542668174916,
"learning_rate": 1.127829989611382e-06,
"loss": 0.7671,
"step": 1322
},
{
"epoch": 0.4758564877259239,
"grad_norm": 26.649203021223208,
"learning_rate": 1.1266741837499772e-06,
"loss": 0.877,
"step": 1323
},
{
"epoch": 0.47621616761082636,
"grad_norm": 10.835403456104503,
"learning_rate": 1.1255182058808142e-06,
"loss": 0.8013,
"step": 1324
},
{
"epoch": 0.4765758474957288,
"grad_norm": 16.69247054672645,
"learning_rate": 1.124362057573567e-06,
"loss": 0.7688,
"step": 1325
},
{
"epoch": 0.4769355273806312,
"grad_norm": 14.321681582885438,
"learning_rate": 1.1232057403981414e-06,
"loss": 0.7841,
"step": 1326
},
{
"epoch": 0.47729520726553365,
"grad_norm": 9.321991160951313,
"learning_rate": 1.1220492559246716e-06,
"loss": 0.8265,
"step": 1327
},
{
"epoch": 0.47765488715043614,
"grad_norm": 21.250382371137924,
"learning_rate": 1.1208926057235196e-06,
"loss": 0.8528,
"step": 1328
},
{
"epoch": 0.47801456703533857,
"grad_norm": 10.41683671396487,
"learning_rate": 1.1197357913652723e-06,
"loss": 0.8448,
"step": 1329
},
{
"epoch": 0.478374246920241,
"grad_norm": 9.696248340882471,
"learning_rate": 1.1185788144207393e-06,
"loss": 0.8706,
"step": 1330
},
{
"epoch": 0.47873392680514343,
"grad_norm": 8.773555354216427,
"learning_rate": 1.1174216764609513e-06,
"loss": 0.8033,
"step": 1331
},
{
"epoch": 0.47909360669004586,
"grad_norm": 15.713430787734534,
"learning_rate": 1.1162643790571572e-06,
"loss": 0.8287,
"step": 1332
},
{
"epoch": 0.4794532865749483,
"grad_norm": 21.999837055541647,
"learning_rate": 1.115106923780823e-06,
"loss": 0.7078,
"step": 1333
},
{
"epoch": 0.4798129664598507,
"grad_norm": 29.56044026061468,
"learning_rate": 1.1139493122036288e-06,
"loss": 0.8464,
"step": 1334
},
{
"epoch": 0.48017264634475315,
"grad_norm": 15.644608725288286,
"learning_rate": 1.1127915458974664e-06,
"loss": 0.8285,
"step": 1335
},
{
"epoch": 0.4805323262296556,
"grad_norm": 17.631169257133948,
"learning_rate": 1.1116336264344388e-06,
"loss": 0.8656,
"step": 1336
},
{
"epoch": 0.48089200611455807,
"grad_norm": 16.153840889427563,
"learning_rate": 1.1104755553868556e-06,
"loss": 0.7716,
"step": 1337
},
{
"epoch": 0.4812516859994605,
"grad_norm": 12.574642214183019,
"learning_rate": 1.109317334327234e-06,
"loss": 0.7618,
"step": 1338
},
{
"epoch": 0.48161136588436293,
"grad_norm": 9.725647080860101,
"learning_rate": 1.1081589648282928e-06,
"loss": 0.8191,
"step": 1339
},
{
"epoch": 0.48197104576926536,
"grad_norm": 16.049163537310257,
"learning_rate": 1.1070004484629542e-06,
"loss": 0.7816,
"step": 1340
},
{
"epoch": 0.4823307256541678,
"grad_norm": 12.70889920815928,
"learning_rate": 1.1058417868043385e-06,
"loss": 0.7665,
"step": 1341
},
{
"epoch": 0.4826904055390702,
"grad_norm": 12.747243830870412,
"learning_rate": 1.1046829814257647e-06,
"loss": 0.7383,
"step": 1342
},
{
"epoch": 0.48305008542397265,
"grad_norm": 18.477751772361778,
"learning_rate": 1.1035240339007451e-06,
"loss": 0.8569,
"step": 1343
},
{
"epoch": 0.4834097653088751,
"grad_norm": 10.187595645045208,
"learning_rate": 1.102364945802987e-06,
"loss": 0.8328,
"step": 1344
},
{
"epoch": 0.4837694451937775,
"grad_norm": 17.366969367388446,
"learning_rate": 1.101205718706387e-06,
"loss": 0.7632,
"step": 1345
},
{
"epoch": 0.48412912507868,
"grad_norm": 20.525432589105105,
"learning_rate": 1.1000463541850312e-06,
"loss": 0.8232,
"step": 1346
},
{
"epoch": 0.48448880496358243,
"grad_norm": 9.98374707321502,
"learning_rate": 1.0988868538131921e-06,
"loss": 0.8608,
"step": 1347
},
{
"epoch": 0.48484848484848486,
"grad_norm": 16.896881644874085,
"learning_rate": 1.0977272191653271e-06,
"loss": 0.8108,
"step": 1348
},
{
"epoch": 0.4852081647333873,
"grad_norm": 23.16193027472863,
"learning_rate": 1.0965674518160747e-06,
"loss": 0.7395,
"step": 1349
},
{
"epoch": 0.4855678446182897,
"grad_norm": 13.68948597753657,
"learning_rate": 1.0954075533402556e-06,
"loss": 0.7869,
"step": 1350
},
{
"epoch": 0.48592752450319215,
"grad_norm": 26.143039351841896,
"learning_rate": 1.0942475253128664e-06,
"loss": 0.7691,
"step": 1351
},
{
"epoch": 0.4862872043880946,
"grad_norm": 19.19169239426908,
"learning_rate": 1.0930873693090815e-06,
"loss": 0.7822,
"step": 1352
},
{
"epoch": 0.486646884272997,
"grad_norm": 40.92276343320756,
"learning_rate": 1.0919270869042474e-06,
"loss": 0.7735,
"step": 1353
},
{
"epoch": 0.48700656415789945,
"grad_norm": 11.678223770091648,
"learning_rate": 1.0907666796738837e-06,
"loss": 0.789,
"step": 1354
},
{
"epoch": 0.48736624404280193,
"grad_norm": 10.509971370373918,
"learning_rate": 1.0896061491936782e-06,
"loss": 0.8999,
"step": 1355
},
{
"epoch": 0.48772592392770436,
"grad_norm": 10.172828871993636,
"learning_rate": 1.088445497039487e-06,
"loss": 0.7966,
"step": 1356
},
{
"epoch": 0.4880856038126068,
"grad_norm": 11.52124986705594,
"learning_rate": 1.0872847247873313e-06,
"loss": 0.7998,
"step": 1357
},
{
"epoch": 0.4884452836975092,
"grad_norm": 32.04553875479848,
"learning_rate": 1.086123834013395e-06,
"loss": 0.8151,
"step": 1358
},
{
"epoch": 0.48880496358241166,
"grad_norm": 10.22603073573459,
"learning_rate": 1.084962826294023e-06,
"loss": 0.72,
"step": 1359
},
{
"epoch": 0.4891646434673141,
"grad_norm": 9.240702419200277,
"learning_rate": 1.0838017032057191e-06,
"loss": 0.8467,
"step": 1360
},
{
"epoch": 0.4895243233522165,
"grad_norm": 8.945883592984766,
"learning_rate": 1.0826404663251445e-06,
"loss": 0.7825,
"step": 1361
},
{
"epoch": 0.48988400323711895,
"grad_norm": 10.060625848398129,
"learning_rate": 1.0814791172291132e-06,
"loss": 0.8255,
"step": 1362
},
{
"epoch": 0.4902436831220214,
"grad_norm": 19.05127417333616,
"learning_rate": 1.0803176574945932e-06,
"loss": 0.7784,
"step": 1363
},
{
"epoch": 0.49060336300692386,
"grad_norm": 11.93794686525435,
"learning_rate": 1.0791560886987015e-06,
"loss": 0.8153,
"step": 1364
},
{
"epoch": 0.4909630428918263,
"grad_norm": 13.854128210349783,
"learning_rate": 1.0779944124187046e-06,
"loss": 0.8223,
"step": 1365
},
{
"epoch": 0.4913227227767287,
"grad_norm": 20.4817390782388,
"learning_rate": 1.0768326302320133e-06,
"loss": 0.7273,
"step": 1366
},
{
"epoch": 0.49168240266163116,
"grad_norm": 13.400956287747531,
"learning_rate": 1.0756707437161841e-06,
"loss": 0.773,
"step": 1367
},
{
"epoch": 0.4920420825465336,
"grad_norm": 10.538271198806664,
"learning_rate": 1.074508754448913e-06,
"loss": 0.835,
"step": 1368
},
{
"epoch": 0.492401762431436,
"grad_norm": 12.436747695465828,
"learning_rate": 1.0733466640080373e-06,
"loss": 0.8027,
"step": 1369
},
{
"epoch": 0.49276144231633845,
"grad_norm": 11.051007256517842,
"learning_rate": 1.0721844739715309e-06,
"loss": 0.7913,
"step": 1370
},
{
"epoch": 0.4931211222012409,
"grad_norm": 8.332551161729398,
"learning_rate": 1.071022185917503e-06,
"loss": 0.7725,
"step": 1371
},
{
"epoch": 0.4934808020861433,
"grad_norm": 9.823582866338528,
"learning_rate": 1.0698598014241959e-06,
"loss": 0.7831,
"step": 1372
},
{
"epoch": 0.49384048197104574,
"grad_norm": 18.00426132442602,
"learning_rate": 1.0686973220699834e-06,
"loss": 0.8159,
"step": 1373
},
{
"epoch": 0.49420016185594823,
"grad_norm": 7.140595216851977,
"learning_rate": 1.0675347494333667e-06,
"loss": 0.7904,
"step": 1374
},
{
"epoch": 0.49455984174085066,
"grad_norm": 21.6065240185961,
"learning_rate": 1.0663720850929751e-06,
"loss": 0.8269,
"step": 1375
},
{
"epoch": 0.4949195216257531,
"grad_norm": 44.46686647900651,
"learning_rate": 1.065209330627562e-06,
"loss": 0.8579,
"step": 1376
},
{
"epoch": 0.4952792015106555,
"grad_norm": 8.205664225193278,
"learning_rate": 1.064046487616003e-06,
"loss": 0.7465,
"step": 1377
},
{
"epoch": 0.49563888139555795,
"grad_norm": 7.058285244781892,
"learning_rate": 1.062883557637294e-06,
"loss": 0.8295,
"step": 1378
},
{
"epoch": 0.4959985612804604,
"grad_norm": 11.023738749472114,
"learning_rate": 1.0617205422705492e-06,
"loss": 0.8333,
"step": 1379
},
{
"epoch": 0.4963582411653628,
"grad_norm": 19.382091374578597,
"learning_rate": 1.060557443094998e-06,
"loss": 0.7928,
"step": 1380
},
{
"epoch": 0.49671792105026524,
"grad_norm": 21.399481545241475,
"learning_rate": 1.059394261689985e-06,
"loss": 0.7789,
"step": 1381
},
{
"epoch": 0.4970776009351677,
"grad_norm": 34.560384015302354,
"learning_rate": 1.0582309996349647e-06,
"loss": 0.8029,
"step": 1382
},
{
"epoch": 0.49743728082007016,
"grad_norm": 13.318672546302027,
"learning_rate": 1.0570676585095026e-06,
"loss": 0.8695,
"step": 1383
},
{
"epoch": 0.4977969607049726,
"grad_norm": 10.822353934734869,
"learning_rate": 1.0559042398932711e-06,
"loss": 0.719,
"step": 1384
},
{
"epoch": 0.498156640589875,
"grad_norm": 9.328541550684049,
"learning_rate": 1.0547407453660471e-06,
"loss": 0.8812,
"step": 1385
},
{
"epoch": 0.49851632047477745,
"grad_norm": 10.228891897402235,
"learning_rate": 1.053577176507712e-06,
"loss": 0.774,
"step": 1386
},
{
"epoch": 0.4988760003596799,
"grad_norm": 16.094006547659376,
"learning_rate": 1.0524135348982465e-06,
"loss": 0.7738,
"step": 1387
},
{
"epoch": 0.4992356802445823,
"grad_norm": 11.13873927702271,
"learning_rate": 1.0512498221177317e-06,
"loss": 0.7719,
"step": 1388
},
{
"epoch": 0.49959536012948474,
"grad_norm": 13.20489245236757,
"learning_rate": 1.0500860397463438e-06,
"loss": 0.7941,
"step": 1389
},
{
"epoch": 0.4999550400143872,
"grad_norm": 46.87310262944863,
"learning_rate": 1.0489221893643552e-06,
"loss": 0.8437,
"step": 1390
},
{
"epoch": 0.5003147198992897,
"grad_norm": 12.47578093303396,
"learning_rate": 1.0477582725521285e-06,
"loss": 0.8346,
"step": 1391
},
{
"epoch": 0.500674399784192,
"grad_norm": 11.642973996885482,
"learning_rate": 1.0465942908901189e-06,
"loss": 0.8496,
"step": 1392
},
{
"epoch": 0.5010340796690945,
"grad_norm": 14.487060735985708,
"learning_rate": 1.0454302459588674e-06,
"loss": 0.7931,
"step": 1393
},
{
"epoch": 0.5013937595539969,
"grad_norm": 21.194495689068134,
"learning_rate": 1.044266139339003e-06,
"loss": 0.7935,
"step": 1394
},
{
"epoch": 0.5017534394388994,
"grad_norm": 19.22494025894822,
"learning_rate": 1.0431019726112365e-06,
"loss": 0.795,
"step": 1395
},
{
"epoch": 0.5021131193238019,
"grad_norm": 12.664414830532246,
"learning_rate": 1.041937747356362e-06,
"loss": 0.8353,
"step": 1396
},
{
"epoch": 0.5024727992087042,
"grad_norm": 78.46427391405251,
"learning_rate": 1.040773465155252e-06,
"loss": 0.7851,
"step": 1397
},
{
"epoch": 0.5028324790936067,
"grad_norm": 8.50956585993304,
"learning_rate": 1.0396091275888566e-06,
"loss": 0.8766,
"step": 1398
},
{
"epoch": 0.5031921589785091,
"grad_norm": 22.599333988844542,
"learning_rate": 1.038444736238201e-06,
"loss": 0.8871,
"step": 1399
},
{
"epoch": 0.5035518388634116,
"grad_norm": 20.845736915187427,
"learning_rate": 1.0372802926843843e-06,
"loss": 0.8241,
"step": 1400
},
{
"epoch": 0.503911518748314,
"grad_norm": 9.166515327483395,
"learning_rate": 1.036115798508575e-06,
"loss": 0.813,
"step": 1401
},
{
"epoch": 0.5042711986332165,
"grad_norm": 11.81132719359345,
"learning_rate": 1.0349512552920112e-06,
"loss": 0.8076,
"step": 1402
},
{
"epoch": 0.5046308785181188,
"grad_norm": 8.350875162883664,
"learning_rate": 1.0337866646159978e-06,
"loss": 0.8202,
"step": 1403
},
{
"epoch": 0.5049905584030213,
"grad_norm": 17.291666992097348,
"learning_rate": 1.0326220280619036e-06,
"loss": 0.8115,
"step": 1404
},
{
"epoch": 0.5053502382879238,
"grad_norm": 12.20286423286577,
"learning_rate": 1.03145734721116e-06,
"loss": 0.8031,
"step": 1405
},
{
"epoch": 0.5057099181728262,
"grad_norm": 10.0654211351684,
"learning_rate": 1.0302926236452586e-06,
"loss": 0.7799,
"step": 1406
},
{
"epoch": 0.5060695980577287,
"grad_norm": 11.282504147389032,
"learning_rate": 1.0291278589457486e-06,
"loss": 0.731,
"step": 1407
},
{
"epoch": 0.506429277942631,
"grad_norm": 12.519439675020324,
"learning_rate": 1.0279630546942354e-06,
"loss": 0.8127,
"step": 1408
},
{
"epoch": 0.5067889578275335,
"grad_norm": 10.836142356182036,
"learning_rate": 1.0267982124723781e-06,
"loss": 0.8929,
"step": 1409
},
{
"epoch": 0.5071486377124359,
"grad_norm": 13.070906754805897,
"learning_rate": 1.0256333338618874e-06,
"loss": 0.8247,
"step": 1410
},
{
"epoch": 0.5075083175973384,
"grad_norm": 19.769708659412878,
"learning_rate": 1.0244684204445236e-06,
"loss": 0.8108,
"step": 1411
},
{
"epoch": 0.5078679974822408,
"grad_norm": 9.992181293854209,
"learning_rate": 1.0233034738020932e-06,
"loss": 0.8022,
"step": 1412
},
{
"epoch": 0.5082276773671432,
"grad_norm": 19.53932795176293,
"learning_rate": 1.022138495516449e-06,
"loss": 0.8203,
"step": 1413
},
{
"epoch": 0.5085873572520456,
"grad_norm": 20.04206472752638,
"learning_rate": 1.0209734871694863e-06,
"loss": 0.8613,
"step": 1414
},
{
"epoch": 0.5089470371369481,
"grad_norm": 12.528200450149379,
"learning_rate": 1.0198084503431414e-06,
"loss": 0.8216,
"step": 1415
},
{
"epoch": 0.5093067170218506,
"grad_norm": 8.533127280227811,
"learning_rate": 1.0186433866193892e-06,
"loss": 0.9119,
"step": 1416
},
{
"epoch": 0.509666396906753,
"grad_norm": 11.118502235354354,
"learning_rate": 1.0174782975802408e-06,
"loss": 0.8355,
"step": 1417
},
{
"epoch": 0.5100260767916555,
"grad_norm": 59.59495590275687,
"learning_rate": 1.016313184807742e-06,
"loss": 0.8441,
"step": 1418
},
{
"epoch": 0.5103857566765578,
"grad_norm": 10.357110554360238,
"learning_rate": 1.0151480498839712e-06,
"loss": 0.8593,
"step": 1419
},
{
"epoch": 0.5107454365614603,
"grad_norm": 18.11079726858921,
"learning_rate": 1.0139828943910357e-06,
"loss": 0.8574,
"step": 1420
},
{
"epoch": 0.5111051164463627,
"grad_norm": 8.670492965431258,
"learning_rate": 1.012817719911072e-06,
"loss": 0.7924,
"step": 1421
},
{
"epoch": 0.5114647963312652,
"grad_norm": 15.467203175658279,
"learning_rate": 1.0116525280262419e-06,
"loss": 0.7738,
"step": 1422
},
{
"epoch": 0.5118244762161676,
"grad_norm": 7.324319828884616,
"learning_rate": 1.0104873203187305e-06,
"loss": 0.8766,
"step": 1423
},
{
"epoch": 0.51218415610107,
"grad_norm": 9.15166688944549,
"learning_rate": 1.0093220983707448e-06,
"loss": 0.7628,
"step": 1424
},
{
"epoch": 0.5125438359859725,
"grad_norm": 10.055659156945493,
"learning_rate": 1.008156863764511e-06,
"loss": 0.819,
"step": 1425
},
{
"epoch": 0.5129035158708749,
"grad_norm": 9.109683293235477,
"learning_rate": 1.0069916180822727e-06,
"loss": 0.7693,
"step": 1426
},
{
"epoch": 0.5132631957557774,
"grad_norm": 433.16603237625316,
"learning_rate": 1.0058263629062883e-06,
"loss": 0.8065,
"step": 1427
},
{
"epoch": 0.5136228756406798,
"grad_norm": 79.67214769742216,
"learning_rate": 1.0046610998188288e-06,
"loss": 0.7972,
"step": 1428
},
{
"epoch": 0.5139825555255823,
"grad_norm": 11.928414979810299,
"learning_rate": 1.0034958304021767e-06,
"loss": 0.8611,
"step": 1429
},
{
"epoch": 0.5143422354104846,
"grad_norm": 13.517459801013043,
"learning_rate": 1.0023305562386221e-06,
"loss": 0.78,
"step": 1430
},
{
"epoch": 0.5147019152953871,
"grad_norm": 11.23971683250357,
"learning_rate": 1.0011652789104629e-06,
"loss": 0.7937,
"step": 1431
},
{
"epoch": 0.5150615951802895,
"grad_norm": 106.71880860032363,
"learning_rate": 1e-06,
"loss": 0.8431,
"step": 1432
},
{
"epoch": 0.515421275065192,
"grad_norm": 9.547930827763363,
"learning_rate": 9.98834721089537e-07,
"loss": 0.8941,
"step": 1433
},
{
"epoch": 0.5157809549500945,
"grad_norm": 7.9713663876106935,
"learning_rate": 9.976694437613776e-07,
"loss": 0.7785,
"step": 1434
},
{
"epoch": 0.5161406348349968,
"grad_norm": 17.969196382796778,
"learning_rate": 9.965041695978237e-07,
"loss": 0.7539,
"step": 1435
},
{
"epoch": 0.5165003147198993,
"grad_norm": 14.849388441928985,
"learning_rate": 9.953389001811713e-07,
"loss": 0.822,
"step": 1436
},
{
"epoch": 0.5168599946048017,
"grad_norm": 11.618250156707834,
"learning_rate": 9.941736370937118e-07,
"loss": 0.8114,
"step": 1437
},
{
"epoch": 0.5172196744897042,
"grad_norm": 30.362467421486922,
"learning_rate": 9.930083819177272e-07,
"loss": 0.8269,
"step": 1438
},
{
"epoch": 0.5175793543746066,
"grad_norm": 10.711150851738822,
"learning_rate": 9.91843136235489e-07,
"loss": 0.8406,
"step": 1439
},
{
"epoch": 0.517939034259509,
"grad_norm": 21.966707029951465,
"learning_rate": 9.906779016292554e-07,
"loss": 0.8269,
"step": 1440
},
{
"epoch": 0.5182987141444114,
"grad_norm": 6.889870704423818,
"learning_rate": 9.895126796812696e-07,
"loss": 0.8528,
"step": 1441
},
{
"epoch": 0.5186583940293139,
"grad_norm": 10.036096239883863,
"learning_rate": 9.88347471973758e-07,
"loss": 0.8566,
"step": 1442
},
{
"epoch": 0.5190180739142164,
"grad_norm": 12.626659221449701,
"learning_rate": 9.871822800889282e-07,
"loss": 0.8295,
"step": 1443
},
{
"epoch": 0.5193777537991188,
"grad_norm": 13.160401226184998,
"learning_rate": 9.860171056089645e-07,
"loss": 0.8984,
"step": 1444
},
{
"epoch": 0.5197374336840213,
"grad_norm": 81.19250120234352,
"learning_rate": 9.84851950116029e-07,
"loss": 0.8519,
"step": 1445
},
{
"epoch": 0.5200971135689236,
"grad_norm": 17.169130559487115,
"learning_rate": 9.836868151922578e-07,
"loss": 0.8276,
"step": 1446
},
{
"epoch": 0.5204567934538261,
"grad_norm": 12.062677567952813,
"learning_rate": 9.825217024197593e-07,
"loss": 0.8482,
"step": 1447
},
{
"epoch": 0.5208164733387285,
"grad_norm": 13.841853689575174,
"learning_rate": 9.81356613380611e-07,
"loss": 0.8572,
"step": 1448
},
{
"epoch": 0.521176153223631,
"grad_norm": 14.375266001914962,
"learning_rate": 9.801915496568585e-07,
"loss": 0.7885,
"step": 1449
},
{
"epoch": 0.5215358331085334,
"grad_norm": 12.844866158220983,
"learning_rate": 9.790265128305136e-07,
"loss": 0.8304,
"step": 1450
},
{
"epoch": 0.5218955129934358,
"grad_norm": 13.736766260260909,
"learning_rate": 9.77861504483551e-07,
"loss": 0.7662,
"step": 1451
},
{
"epoch": 0.5222551928783383,
"grad_norm": 10.887638718572212,
"learning_rate": 9.76696526197907e-07,
"loss": 0.8089,
"step": 1452
},
{
"epoch": 0.5226148727632407,
"grad_norm": 14.662475373623883,
"learning_rate": 9.755315795554765e-07,
"loss": 0.7953,
"step": 1453
},
{
"epoch": 0.5229745526481432,
"grad_norm": 19.29409847751306,
"learning_rate": 9.743666661381123e-07,
"loss": 0.7501,
"step": 1454
},
{
"epoch": 0.5233342325330456,
"grad_norm": 12.944227049303985,
"learning_rate": 9.73201787527622e-07,
"loss": 0.8094,
"step": 1455
},
{
"epoch": 0.523693912417948,
"grad_norm": 20.84274658229437,
"learning_rate": 9.720369453057648e-07,
"loss": 0.8358,
"step": 1456
},
{
"epoch": 0.5240535923028504,
"grad_norm": 12.187489960033687,
"learning_rate": 9.708721410542516e-07,
"loss": 0.8417,
"step": 1457
},
{
"epoch": 0.5244132721877529,
"grad_norm": 15.544458243079116,
"learning_rate": 9.697073763547415e-07,
"loss": 0.771,
"step": 1458
},
{
"epoch": 0.5247729520726553,
"grad_norm": 10.343413968696762,
"learning_rate": 9.6854265278884e-07,
"loss": 0.8581,
"step": 1459
},
{
"epoch": 0.5251326319575578,
"grad_norm": 11.403801343493468,
"learning_rate": 9.673779719380965e-07,
"loss": 0.8037,
"step": 1460
},
{
"epoch": 0.5254923118424603,
"grad_norm": 10.809633080377072,
"learning_rate": 9.662133353840023e-07,
"loss": 0.791,
"step": 1461
},
{
"epoch": 0.5258519917273626,
"grad_norm": 11.58855078217236,
"learning_rate": 9.65048744707989e-07,
"loss": 0.8823,
"step": 1462
},
{
"epoch": 0.5262116716122651,
"grad_norm": 15.640967862665777,
"learning_rate": 9.638842014914252e-07,
"loss": 0.8181,
"step": 1463
},
{
"epoch": 0.5265713514971675,
"grad_norm": 10.193297185297975,
"learning_rate": 9.627197073156158e-07,
"loss": 0.7847,
"step": 1464
},
{
"epoch": 0.52693103138207,
"grad_norm": 10.429545287628065,
"learning_rate": 9.615552637617988e-07,
"loss": 0.7273,
"step": 1465
},
{
"epoch": 0.5272907112669724,
"grad_norm": 20.54073989969678,
"learning_rate": 9.603908724111435e-07,
"loss": 0.8451,
"step": 1466
},
{
"epoch": 0.5276503911518748,
"grad_norm": 9.495552383487167,
"learning_rate": 9.59226534844748e-07,
"loss": 0.8064,
"step": 1467
},
{
"epoch": 0.5280100710367772,
"grad_norm": 11.400319429575987,
"learning_rate": 9.58062252643638e-07,
"loss": 0.8652,
"step": 1468
},
{
"epoch": 0.5283697509216797,
"grad_norm": 32.94938027219996,
"learning_rate": 9.568980273887636e-07,
"loss": 0.7586,
"step": 1469
},
{
"epoch": 0.5287294308065822,
"grad_norm": 28.295421524856383,
"learning_rate": 9.557338606609972e-07,
"loss": 0.769,
"step": 1470
},
{
"epoch": 0.5290891106914846,
"grad_norm": 68.60370187564824,
"learning_rate": 9.545697540411325e-07,
"loss": 0.8393,
"step": 1471
},
{
"epoch": 0.529448790576387,
"grad_norm": 7.431586021514424,
"learning_rate": 9.534057091098813e-07,
"loss": 0.8348,
"step": 1472
},
{
"epoch": 0.5298084704612894,
"grad_norm": 29.972602708832238,
"learning_rate": 9.522417274478715e-07,
"loss": 0.7477,
"step": 1473
},
{
"epoch": 0.5301681503461919,
"grad_norm": 18.61238653204131,
"learning_rate": 9.510778106356449e-07,
"loss": 0.793,
"step": 1474
},
{
"epoch": 0.5305278302310943,
"grad_norm": 10.815377021249557,
"learning_rate": 9.499139602536559e-07,
"loss": 0.8629,
"step": 1475
},
{
"epoch": 0.5308875101159968,
"grad_norm": 9.614312682859984,
"learning_rate": 9.487501778822683e-07,
"loss": 0.8357,
"step": 1476
},
{
"epoch": 0.5312471900008991,
"grad_norm": 17.47808247149681,
"learning_rate": 9.475864651017534e-07,
"loss": 0.8385,
"step": 1477
},
{
"epoch": 0.5316068698858016,
"grad_norm": 10.662664118752236,
"learning_rate": 9.464228234922881e-07,
"loss": 0.7612,
"step": 1478
},
{
"epoch": 0.5319665497707041,
"grad_norm": 16.040971042085854,
"learning_rate": 9.452592546339526e-07,
"loss": 0.8207,
"step": 1479
},
{
"epoch": 0.5323262296556065,
"grad_norm": 10.19304995373299,
"learning_rate": 9.440957601067292e-07,
"loss": 0.822,
"step": 1480
},
{
"epoch": 0.532685909540509,
"grad_norm": 77.96987404339865,
"learning_rate": 9.429323414904974e-07,
"loss": 0.8543,
"step": 1481
},
{
"epoch": 0.5330455894254114,
"grad_norm": 12.961172981124303,
"learning_rate": 9.417690003650352e-07,
"loss": 0.8026,
"step": 1482
},
{
"epoch": 0.5334052693103138,
"grad_norm": 10.970260110165201,
"learning_rate": 9.40605738310015e-07,
"loss": 0.8781,
"step": 1483
},
{
"epoch": 0.5337649491952162,
"grad_norm": 11.726910688626658,
"learning_rate": 9.394425569050016e-07,
"loss": 0.8684,
"step": 1484
},
{
"epoch": 0.5341246290801187,
"grad_norm": 18.136468552117464,
"learning_rate": 9.382794577294509e-07,
"loss": 0.8169,
"step": 1485
},
{
"epoch": 0.5344843089650211,
"grad_norm": 37.14349559707124,
"learning_rate": 9.371164423627059e-07,
"loss": 0.7818,
"step": 1486
},
{
"epoch": 0.5348439888499236,
"grad_norm": 11.69014220736439,
"learning_rate": 9.359535123839969e-07,
"loss": 0.8351,
"step": 1487
},
{
"epoch": 0.535203668734826,
"grad_norm": 11.223978301119892,
"learning_rate": 9.347906693724378e-07,
"loss": 0.8401,
"step": 1488
},
{
"epoch": 0.5355633486197284,
"grad_norm": 7.540815914834626,
"learning_rate": 9.336279149070251e-07,
"loss": 0.7378,
"step": 1489
},
{
"epoch": 0.5359230285046309,
"grad_norm": 60.946790599090555,
"learning_rate": 9.324652505666335e-07,
"loss": 0.8744,
"step": 1490
},
{
"epoch": 0.5362827083895333,
"grad_norm": 9.41396235262222,
"learning_rate": 9.313026779300168e-07,
"loss": 0.7885,
"step": 1491
},
{
"epoch": 0.5366423882744358,
"grad_norm": 8.27483048687943,
"learning_rate": 9.301401985758038e-07,
"loss": 0.851,
"step": 1492
},
{
"epoch": 0.5370020681593382,
"grad_norm": 8.97709051839709,
"learning_rate": 9.289778140824972e-07,
"loss": 0.7918,
"step": 1493
},
{
"epoch": 0.5373617480442406,
"grad_norm": 12.26427197135262,
"learning_rate": 9.278155260284691e-07,
"loss": 0.8106,
"step": 1494
},
{
"epoch": 0.537721427929143,
"grad_norm": 10.671233564758419,
"learning_rate": 9.266533359919626e-07,
"loss": 0.7956,
"step": 1495
},
{
"epoch": 0.5380811078140455,
"grad_norm": 7.350928819737693,
"learning_rate": 9.254912455510868e-07,
"loss": 0.7588,
"step": 1496
},
{
"epoch": 0.538440787698948,
"grad_norm": 9.552539809403818,
"learning_rate": 9.243292562838162e-07,
"loss": 0.8061,
"step": 1497
},
{
"epoch": 0.5388004675838504,
"grad_norm": 17.642432824835815,
"learning_rate": 9.231673697679866e-07,
"loss": 0.8017,
"step": 1498
},
{
"epoch": 0.5391601474687528,
"grad_norm": 9.334809921162734,
"learning_rate": 9.220055875812954e-07,
"loss": 0.7367,
"step": 1499
},
{
"epoch": 0.5395198273536552,
"grad_norm": 8.715324114387212,
"learning_rate": 9.208439113012983e-07,
"loss": 0.8062,
"step": 1500
},
{
"epoch": 0.5398795072385577,
"grad_norm": 13.031352351676288,
"learning_rate": 9.196823425054073e-07,
"loss": 0.7777,
"step": 1501
},
{
"epoch": 0.5402391871234601,
"grad_norm": 8.442707719950755,
"learning_rate": 9.185208827708869e-07,
"loss": 0.7946,
"step": 1502
},
{
"epoch": 0.5405988670083626,
"grad_norm": 13.126914120443349,
"learning_rate": 9.173595336748557e-07,
"loss": 0.8134,
"step": 1503
},
{
"epoch": 0.540958546893265,
"grad_norm": 23.512187948367245,
"learning_rate": 9.161982967942806e-07,
"loss": 0.7985,
"step": 1504
},
{
"epoch": 0.5413182267781674,
"grad_norm": 10.761145031843663,
"learning_rate": 9.150371737059772e-07,
"loss": 0.8286,
"step": 1505
},
{
"epoch": 0.5416779066630699,
"grad_norm": 10.184059228494528,
"learning_rate": 9.138761659866052e-07,
"loss": 0.8173,
"step": 1506
},
{
"epoch": 0.5420375865479723,
"grad_norm": 27.90335595700793,
"learning_rate": 9.127152752126688e-07,
"loss": 0.8092,
"step": 1507
},
{
"epoch": 0.5423972664328748,
"grad_norm": 43.52143495960643,
"learning_rate": 9.115545029605128e-07,
"loss": 0.771,
"step": 1508
},
{
"epoch": 0.5427569463177772,
"grad_norm": 10.801115688015788,
"learning_rate": 9.103938508063221e-07,
"loss": 0.7346,
"step": 1509
},
{
"epoch": 0.5431166262026796,
"grad_norm": 10.60432526070125,
"learning_rate": 9.092333203261167e-07,
"loss": 0.8135,
"step": 1510
},
{
"epoch": 0.543476306087582,
"grad_norm": 11.596061165601153,
"learning_rate": 9.080729130957527e-07,
"loss": 0.8025,
"step": 1511
},
{
"epoch": 0.5438359859724845,
"grad_norm": 8.561347390062522,
"learning_rate": 9.069126306909186e-07,
"loss": 0.7888,
"step": 1512
},
{
"epoch": 0.5441956658573869,
"grad_norm": 29.250079789481337,
"learning_rate": 9.057524746871333e-07,
"loss": 0.79,
"step": 1513
},
{
"epoch": 0.5445553457422894,
"grad_norm": 11.968239449874433,
"learning_rate": 9.045924466597447e-07,
"loss": 0.8773,
"step": 1514
},
{
"epoch": 0.5449150256271919,
"grad_norm": 9.934235153048508,
"learning_rate": 9.034325481839252e-07,
"loss": 0.8296,
"step": 1515
},
{
"epoch": 0.5452747055120942,
"grad_norm": 15.402183864430881,
"learning_rate": 9.022727808346731e-07,
"loss": 0.8458,
"step": 1516
},
{
"epoch": 0.5456343853969967,
"grad_norm": 18.35032544724908,
"learning_rate": 9.011131461868077e-07,
"loss": 0.769,
"step": 1517
},
{
"epoch": 0.5459940652818991,
"grad_norm": 12.860782881222937,
"learning_rate": 8.99953645814969e-07,
"loss": 0.8618,
"step": 1518
},
{
"epoch": 0.5463537451668016,
"grad_norm": 9.549990369811512,
"learning_rate": 8.987942812936132e-07,
"loss": 0.7778,
"step": 1519
},
{
"epoch": 0.546713425051704,
"grad_norm": 15.989159785555255,
"learning_rate": 8.976350541970129e-07,
"loss": 0.8149,
"step": 1520
},
{
"epoch": 0.5470731049366064,
"grad_norm": 10.664324645088254,
"learning_rate": 8.964759660992545e-07,
"loss": 0.7395,
"step": 1521
},
{
"epoch": 0.5474327848215088,
"grad_norm": 9.098071536124595,
"learning_rate": 8.953170185742355e-07,
"loss": 0.7646,
"step": 1522
},
{
"epoch": 0.5477924647064113,
"grad_norm": 425.6877435366872,
"learning_rate": 8.941582131956614e-07,
"loss": 0.8052,
"step": 1523
},
{
"epoch": 0.5481521445913138,
"grad_norm": 17.728284471453698,
"learning_rate": 8.929995515370459e-07,
"loss": 0.8455,
"step": 1524
},
{
"epoch": 0.5485118244762162,
"grad_norm": 10.354466220926385,
"learning_rate": 8.918410351717073e-07,
"loss": 0.7448,
"step": 1525
},
{
"epoch": 0.5488715043611186,
"grad_norm": 7.833962450955127,
"learning_rate": 8.906826656727664e-07,
"loss": 0.837,
"step": 1526
},
{
"epoch": 0.549231184246021,
"grad_norm": 10.557262707031633,
"learning_rate": 8.895244446131443e-07,
"loss": 0.7661,
"step": 1527
},
{
"epoch": 0.5495908641309235,
"grad_norm": 24.680259140974954,
"learning_rate": 8.883663735655612e-07,
"loss": 0.8053,
"step": 1528
},
{
"epoch": 0.5499505440158259,
"grad_norm": 10.64492012360957,
"learning_rate": 8.872084541025336e-07,
"loss": 0.8016,
"step": 1529
},
{
"epoch": 0.5503102239007284,
"grad_norm": 19.601472181141222,
"learning_rate": 8.860506877963714e-07,
"loss": 0.7641,
"step": 1530
},
{
"epoch": 0.5506699037856307,
"grad_norm": 9.671158817227013,
"learning_rate": 8.848930762191768e-07,
"loss": 0.8009,
"step": 1531
},
{
"epoch": 0.5510295836705332,
"grad_norm": 14.7316121621697,
"learning_rate": 8.837356209428426e-07,
"loss": 0.9208,
"step": 1532
},
{
"epoch": 0.5513892635554357,
"grad_norm": 10.997978454337733,
"learning_rate": 8.825783235390488e-07,
"loss": 0.8159,
"step": 1533
},
{
"epoch": 0.5517489434403381,
"grad_norm": 8.272077025049713,
"learning_rate": 8.814211855792609e-07,
"loss": 0.7496,
"step": 1534
},
{
"epoch": 0.5521086233252406,
"grad_norm": 13.40204884222874,
"learning_rate": 8.802642086347277e-07,
"loss": 0.7705,
"step": 1535
},
{
"epoch": 0.552468303210143,
"grad_norm": 9.13911646763841,
"learning_rate": 8.791073942764805e-07,
"loss": 0.8221,
"step": 1536
},
{
"epoch": 0.5528279830950454,
"grad_norm": 95.37842495705642,
"learning_rate": 8.779507440753284e-07,
"loss": 0.8346,
"step": 1537
},
{
"epoch": 0.5531876629799478,
"grad_norm": 12.253181880734747,
"learning_rate": 8.767942596018585e-07,
"loss": 0.8203,
"step": 1538
},
{
"epoch": 0.5535473428648503,
"grad_norm": 8.814195351830474,
"learning_rate": 8.756379424264328e-07,
"loss": 0.7809,
"step": 1539
},
{
"epoch": 0.5539070227497527,
"grad_norm": 11.992274017200714,
"learning_rate": 8.74481794119186e-07,
"loss": 0.7505,
"step": 1540
},
{
"epoch": 0.5542667026346552,
"grad_norm": 11.097112573647356,
"learning_rate": 8.733258162500228e-07,
"loss": 0.8214,
"step": 1541
},
{
"epoch": 0.5546263825195576,
"grad_norm": 15.971347238479705,
"learning_rate": 8.721700103886176e-07,
"loss": 0.827,
"step": 1542
},
{
"epoch": 0.55498606240446,
"grad_norm": 30.014919253725115,
"learning_rate": 8.710143781044113e-07,
"loss": 0.8519,
"step": 1543
},
{
"epoch": 0.5553457422893625,
"grad_norm": 26.81170112824856,
"learning_rate": 8.698589209666073e-07,
"loss": 0.8627,
"step": 1544
},
{
"epoch": 0.5557054221742649,
"grad_norm": 18.33628542089601,
"learning_rate": 8.687036405441732e-07,
"loss": 0.7444,
"step": 1545
},
{
"epoch": 0.5560651020591674,
"grad_norm": 10.040499270398149,
"learning_rate": 8.675485384058356e-07,
"loss": 0.8195,
"step": 1546
},
{
"epoch": 0.5564247819440697,
"grad_norm": 17.268711898463536,
"learning_rate": 8.663936161200798e-07,
"loss": 0.7472,
"step": 1547
},
{
"epoch": 0.5567844618289722,
"grad_norm": 7.149729583504873,
"learning_rate": 8.652388752551457e-07,
"loss": 0.7998,
"step": 1548
},
{
"epoch": 0.5571441417138746,
"grad_norm": 27.709180822307594,
"learning_rate": 8.640843173790277e-07,
"loss": 0.7689,
"step": 1549
},
{
"epoch": 0.5575038215987771,
"grad_norm": 18.26212825646285,
"learning_rate": 8.629299440594717e-07,
"loss": 0.7806,
"step": 1550
},
{
"epoch": 0.5578635014836796,
"grad_norm": 8.704360397256853,
"learning_rate": 8.617757568639731e-07,
"loss": 0.8493,
"step": 1551
},
{
"epoch": 0.558223181368582,
"grad_norm": 15.388803067466043,
"learning_rate": 8.606217573597737e-07,
"loss": 0.7594,
"step": 1552
},
{
"epoch": 0.5585828612534844,
"grad_norm": 13.75311500960052,
"learning_rate": 8.594679471138611e-07,
"loss": 0.799,
"step": 1553
},
{
"epoch": 0.5589425411383868,
"grad_norm": 9.725642219860728,
"learning_rate": 8.58314327692966e-07,
"loss": 0.8048,
"step": 1554
},
{
"epoch": 0.5593022210232893,
"grad_norm": 15.927126719512732,
"learning_rate": 8.571609006635604e-07,
"loss": 0.8141,
"step": 1555
},
{
"epoch": 0.5596619009081917,
"grad_norm": 8.05123488042932,
"learning_rate": 8.560076675918535e-07,
"loss": 0.8883,
"step": 1556
},
{
"epoch": 0.5600215807930942,
"grad_norm": 8.732920737327605,
"learning_rate": 8.548546300437927e-07,
"loss": 0.7587,
"step": 1557
},
{
"epoch": 0.5603812606779965,
"grad_norm": 38.797439762016744,
"learning_rate": 8.537017895850591e-07,
"loss": 0.8558,
"step": 1558
},
{
"epoch": 0.560740940562899,
"grad_norm": 21.26469174794471,
"learning_rate": 8.525491477810669e-07,
"loss": 0.8658,
"step": 1559
},
{
"epoch": 0.5611006204478014,
"grad_norm": 13.346270839636258,
"learning_rate": 8.513967061969593e-07,
"loss": 0.7985,
"step": 1560
},
{
"epoch": 0.5614603003327039,
"grad_norm": 9.342793989203471,
"learning_rate": 8.502444663976087e-07,
"loss": 0.8314,
"step": 1561
},
{
"epoch": 0.5618199802176064,
"grad_norm": 16.37141319073614,
"learning_rate": 8.490924299476133e-07,
"loss": 0.737,
"step": 1562
},
{
"epoch": 0.5621796601025087,
"grad_norm": 15.262379384490513,
"learning_rate": 8.479405984112948e-07,
"loss": 0.7485,
"step": 1563
},
{
"epoch": 0.5625393399874112,
"grad_norm": 14.432524222304044,
"learning_rate": 8.467889733526976e-07,
"loss": 0.8049,
"step": 1564
},
{
"epoch": 0.5628990198723136,
"grad_norm": 8.35035684372924,
"learning_rate": 8.456375563355842e-07,
"loss": 0.7843,
"step": 1565
},
{
"epoch": 0.5632586997572161,
"grad_norm": 9.03293272220025,
"learning_rate": 8.444863489234356e-07,
"loss": 0.8045,
"step": 1566
},
{
"epoch": 0.5636183796421185,
"grad_norm": 28.474405239707348,
"learning_rate": 8.433353526794482e-07,
"loss": 0.8365,
"step": 1567
},
{
"epoch": 0.563978059527021,
"grad_norm": 13.94415385830813,
"learning_rate": 8.42184569166532e-07,
"loss": 0.8721,
"step": 1568
},
{
"epoch": 0.5643377394119233,
"grad_norm": 9.51307347896263,
"learning_rate": 8.410339999473065e-07,
"loss": 0.7889,
"step": 1569
},
{
"epoch": 0.5646974192968258,
"grad_norm": 8.301031918905753,
"learning_rate": 8.398836465841019e-07,
"loss": 0.7683,
"step": 1570
},
{
"epoch": 0.5650570991817283,
"grad_norm": 8.592527188963984,
"learning_rate": 8.387335106389549e-07,
"loss": 0.784,
"step": 1571
},
{
"epoch": 0.5654167790666307,
"grad_norm": 19.199075553424677,
"learning_rate": 8.375835936736071e-07,
"loss": 0.816,
"step": 1572
},
{
"epoch": 0.5657764589515332,
"grad_norm": 18.549140323686558,
"learning_rate": 8.364338972495016e-07,
"loss": 0.8246,
"step": 1573
},
{
"epoch": 0.5661361388364355,
"grad_norm": 9.371172026712124,
"learning_rate": 8.352844229277832e-07,
"loss": 0.8167,
"step": 1574
},
{
"epoch": 0.566495818721338,
"grad_norm": 14.200981394455908,
"learning_rate": 8.341351722692951e-07,
"loss": 0.785,
"step": 1575
},
{
"epoch": 0.5668554986062404,
"grad_norm": 12.554963101861757,
"learning_rate": 8.329861468345767e-07,
"loss": 0.8083,
"step": 1576
},
{
"epoch": 0.5672151784911429,
"grad_norm": 23.556173884147064,
"learning_rate": 8.318373481838604e-07,
"loss": 0.7853,
"step": 1577
},
{
"epoch": 0.5675748583760453,
"grad_norm": 11.931308569332678,
"learning_rate": 8.306887778770723e-07,
"loss": 0.7943,
"step": 1578
},
{
"epoch": 0.5679345382609478,
"grad_norm": 15.530610503935467,
"learning_rate": 8.295404374738277e-07,
"loss": 0.8688,
"step": 1579
},
{
"epoch": 0.5682942181458502,
"grad_norm": 10.763767906174117,
"learning_rate": 8.283923285334303e-07,
"loss": 0.8201,
"step": 1580
},
{
"epoch": 0.5686538980307526,
"grad_norm": 15.974553059167864,
"learning_rate": 8.27244452614868e-07,
"loss": 0.771,
"step": 1581
},
{
"epoch": 0.5690135779156551,
"grad_norm": 12.632225729750193,
"learning_rate": 8.260968112768136e-07,
"loss": 0.8878,
"step": 1582
},
{
"epoch": 0.5693732578005575,
"grad_norm": 29.514389708254264,
"learning_rate": 8.249494060776214e-07,
"loss": 0.8457,
"step": 1583
},
{
"epoch": 0.56973293768546,
"grad_norm": 75.55606752285239,
"learning_rate": 8.238022385753247e-07,
"loss": 0.8524,
"step": 1584
},
{
"epoch": 0.5700926175703623,
"grad_norm": 16.74577979702134,
"learning_rate": 8.226553103276334e-07,
"loss": 0.8064,
"step": 1585
},
{
"epoch": 0.5704522974552648,
"grad_norm": 10.292321500664308,
"learning_rate": 8.215086228919336e-07,
"loss": 0.7388,
"step": 1586
},
{
"epoch": 0.5708119773401672,
"grad_norm": 21.035453250353576,
"learning_rate": 8.203621778252838e-07,
"loss": 0.817,
"step": 1587
},
{
"epoch": 0.5711716572250697,
"grad_norm": 6.894357013489585,
"learning_rate": 8.19215976684414e-07,
"loss": 0.7624,
"step": 1588
},
{
"epoch": 0.5715313371099722,
"grad_norm": 13.671745224088038,
"learning_rate": 8.180700210257221e-07,
"loss": 0.7699,
"step": 1589
},
{
"epoch": 0.5718910169948745,
"grad_norm": 10.76508931416682,
"learning_rate": 8.16924312405273e-07,
"loss": 0.8594,
"step": 1590
},
{
"epoch": 0.572250696879777,
"grad_norm": 8.52246032699738,
"learning_rate": 8.157788523787966e-07,
"loss": 0.7449,
"step": 1591
},
{
"epoch": 0.5726103767646794,
"grad_norm": 10.420745032826296,
"learning_rate": 8.146336425016848e-07,
"loss": 0.8031,
"step": 1592
},
{
"epoch": 0.5729700566495819,
"grad_norm": 18.16963817231503,
"learning_rate": 8.134886843289899e-07,
"loss": 0.8334,
"step": 1593
},
{
"epoch": 0.5733297365344843,
"grad_norm": 14.348144219376929,
"learning_rate": 8.123439794154221e-07,
"loss": 0.8811,
"step": 1594
},
{
"epoch": 0.5736894164193868,
"grad_norm": 11.625933704619902,
"learning_rate": 8.111995293153484e-07,
"loss": 0.809,
"step": 1595
},
{
"epoch": 0.5740490963042891,
"grad_norm": 11.567273100902918,
"learning_rate": 8.100553355827896e-07,
"loss": 0.7572,
"step": 1596
},
{
"epoch": 0.5744087761891916,
"grad_norm": 26.793760953721204,
"learning_rate": 8.089113997714179e-07,
"loss": 0.8495,
"step": 1597
},
{
"epoch": 0.5747684560740941,
"grad_norm": 17.518932515308045,
"learning_rate": 8.077677234345557e-07,
"loss": 0.8459,
"step": 1598
},
{
"epoch": 0.5751281359589965,
"grad_norm": 11.232300984378966,
"learning_rate": 8.066243081251729e-07,
"loss": 0.8607,
"step": 1599
},
{
"epoch": 0.575487815843899,
"grad_norm": 11.014541018417304,
"learning_rate": 8.054811553958851e-07,
"loss": 0.8606,
"step": 1600
},
{
"epoch": 0.5758474957288013,
"grad_norm": 10.043805051702309,
"learning_rate": 8.043382667989513e-07,
"loss": 0.7684,
"step": 1601
},
{
"epoch": 0.5762071756137038,
"grad_norm": 10.829708717271632,
"learning_rate": 8.031956438862717e-07,
"loss": 0.845,
"step": 1602
},
{
"epoch": 0.5765668554986062,
"grad_norm": 7.900714257136215,
"learning_rate": 8.02053288209386e-07,
"loss": 0.826,
"step": 1603
},
{
"epoch": 0.5769265353835087,
"grad_norm": 7.701628007687078,
"learning_rate": 8.009112013194706e-07,
"loss": 0.7989,
"step": 1604
},
{
"epoch": 0.5772862152684111,
"grad_norm": 13.014171738803286,
"learning_rate": 7.997693847673376e-07,
"loss": 0.8044,
"step": 1605
},
{
"epoch": 0.5776458951533135,
"grad_norm": 12.034836978503431,
"learning_rate": 7.986278401034314e-07,
"loss": 0.7526,
"step": 1606
},
{
"epoch": 0.578005575038216,
"grad_norm": 8.76378610955473,
"learning_rate": 7.97486568877827e-07,
"loss": 0.8735,
"step": 1607
},
{
"epoch": 0.5783652549231184,
"grad_norm": 8.083337469431553,
"learning_rate": 7.96345572640229e-07,
"loss": 0.8618,
"step": 1608
},
{
"epoch": 0.5787249348080209,
"grad_norm": 17.5439931020745,
"learning_rate": 7.952048529399684e-07,
"loss": 0.8366,
"step": 1609
},
{
"epoch": 0.5790846146929233,
"grad_norm": 24.84467945535168,
"learning_rate": 7.94064411326e-07,
"loss": 0.8344,
"step": 1610
},
{
"epoch": 0.5794442945778258,
"grad_norm": 12.652147596563642,
"learning_rate": 7.929242493469011e-07,
"loss": 0.7845,
"step": 1611
},
{
"epoch": 0.5798039744627281,
"grad_norm": 13.230941718882002,
"learning_rate": 7.917843685508701e-07,
"loss": 0.7772,
"step": 1612
},
{
"epoch": 0.5801636543476306,
"grad_norm": 7.4947545373566955,
"learning_rate": 7.906447704857232e-07,
"loss": 0.7963,
"step": 1613
},
{
"epoch": 0.580523334232533,
"grad_norm": 13.049017429706673,
"learning_rate": 7.895054566988923e-07,
"loss": 0.7467,
"step": 1614
},
{
"epoch": 0.5808830141174355,
"grad_norm": 12.331575353775431,
"learning_rate": 7.883664287374234e-07,
"loss": 0.9336,
"step": 1615
},
{
"epoch": 0.581242694002338,
"grad_norm": 8.464406782261062,
"learning_rate": 7.872276881479748e-07,
"loss": 0.8211,
"step": 1616
},
{
"epoch": 0.5816023738872403,
"grad_norm": 11.254172352508087,
"learning_rate": 7.860892364768143e-07,
"loss": 0.9262,
"step": 1617
},
{
"epoch": 0.5819620537721428,
"grad_norm": 11.331168954777441,
"learning_rate": 7.849510752698179e-07,
"loss": 0.755,
"step": 1618
},
{
"epoch": 0.5823217336570452,
"grad_norm": 18.84593878696583,
"learning_rate": 7.838132060724656e-07,
"loss": 0.7542,
"step": 1619
},
{
"epoch": 0.5826814135419477,
"grad_norm": 9.630203661435184,
"learning_rate": 7.826756304298428e-07,
"loss": 0.8442,
"step": 1620
},
{
"epoch": 0.5830410934268501,
"grad_norm": 12.235166401525152,
"learning_rate": 7.815383498866351e-07,
"loss": 0.8065,
"step": 1621
},
{
"epoch": 0.5834007733117526,
"grad_norm": 37.672315833888156,
"learning_rate": 7.804013659871284e-07,
"loss": 0.7702,
"step": 1622
},
{
"epoch": 0.5837604531966549,
"grad_norm": 13.29452157979337,
"learning_rate": 7.792646802752044e-07,
"loss": 0.7965,
"step": 1623
},
{
"epoch": 0.5841201330815574,
"grad_norm": 10.764627403580235,
"learning_rate": 7.78128294294341e-07,
"loss": 0.7735,
"step": 1624
},
{
"epoch": 0.5844798129664599,
"grad_norm": 8.737810342888794,
"learning_rate": 7.769922095876087e-07,
"loss": 0.7802,
"step": 1625
},
{
"epoch": 0.5848394928513623,
"grad_norm": 14.381254651476704,
"learning_rate": 7.758564276976695e-07,
"loss": 0.7713,
"step": 1626
},
{
"epoch": 0.5851991727362648,
"grad_norm": 10.22112868832261,
"learning_rate": 7.747209501667728e-07,
"loss": 0.7799,
"step": 1627
},
{
"epoch": 0.5855588526211671,
"grad_norm": 10.624653396163458,
"learning_rate": 7.73585778536756e-07,
"loss": 0.7532,
"step": 1628
},
{
"epoch": 0.5859185325060696,
"grad_norm": 12.149799716586122,
"learning_rate": 7.724509143490407e-07,
"loss": 0.8116,
"step": 1629
},
{
"epoch": 0.586278212390972,
"grad_norm": 10.637991127225355,
"learning_rate": 7.713163591446317e-07,
"loss": 0.7916,
"step": 1630
},
{
"epoch": 0.5866378922758745,
"grad_norm": 7.961233529096206,
"learning_rate": 7.701821144641125e-07,
"loss": 0.834,
"step": 1631
},
{
"epoch": 0.5869975721607769,
"grad_norm": 15.480251438295925,
"learning_rate": 7.690481818476467e-07,
"loss": 0.8796,
"step": 1632
},
{
"epoch": 0.5873572520456793,
"grad_norm": 22.493955035772476,
"learning_rate": 7.679145628349733e-07,
"loss": 0.8464,
"step": 1633
},
{
"epoch": 0.5877169319305818,
"grad_norm": 8.238857603348297,
"learning_rate": 7.667812589654061e-07,
"loss": 0.7564,
"step": 1634
},
{
"epoch": 0.5880766118154842,
"grad_norm": 11.141054739419813,
"learning_rate": 7.656482717778298e-07,
"loss": 0.7308,
"step": 1635
},
{
"epoch": 0.5884362917003867,
"grad_norm": 13.536432967707173,
"learning_rate": 7.645156028107004e-07,
"loss": 0.7676,
"step": 1636
},
{
"epoch": 0.5887959715852891,
"grad_norm": 17.565506542196687,
"learning_rate": 7.633832536020409e-07,
"loss": 0.8364,
"step": 1637
},
{
"epoch": 0.5891556514701916,
"grad_norm": 12.592416432890023,
"learning_rate": 7.622512256894411e-07,
"loss": 0.8126,
"step": 1638
},
{
"epoch": 0.5895153313550939,
"grad_norm": 8.757685937856875,
"learning_rate": 7.611195206100528e-07,
"loss": 0.803,
"step": 1639
},
{
"epoch": 0.5898750112399964,
"grad_norm": 25.36742880137935,
"learning_rate": 7.599881399005911e-07,
"loss": 0.8508,
"step": 1640
},
{
"epoch": 0.5902346911248988,
"grad_norm": 10.397222307031699,
"learning_rate": 7.5885708509733e-07,
"loss": 0.7614,
"step": 1641
},
{
"epoch": 0.5905943710098013,
"grad_norm": 9.026116126538364,
"learning_rate": 7.577263577361009e-07,
"loss": 0.8438,
"step": 1642
},
{
"epoch": 0.5909540508947038,
"grad_norm": 13.858732556753937,
"learning_rate": 7.565959593522912e-07,
"loss": 0.8125,
"step": 1643
},
{
"epoch": 0.5913137307796061,
"grad_norm": 12.255674448639857,
"learning_rate": 7.554658914808403e-07,
"loss": 0.7885,
"step": 1644
},
{
"epoch": 0.5916734106645086,
"grad_norm": 16.936034511555206,
"learning_rate": 7.543361556562396e-07,
"loss": 0.8317,
"step": 1645
},
{
"epoch": 0.592033090549411,
"grad_norm": 38.265449794638506,
"learning_rate": 7.532067534125299e-07,
"loss": 0.8391,
"step": 1646
},
{
"epoch": 0.5923927704343135,
"grad_norm": 11.01071430615483,
"learning_rate": 7.520776862832992e-07,
"loss": 0.8272,
"step": 1647
},
{
"epoch": 0.5927524503192159,
"grad_norm": 16.703534635338457,
"learning_rate": 7.509489558016789e-07,
"loss": 0.7588,
"step": 1648
},
{
"epoch": 0.5931121302041183,
"grad_norm": 13.918339297662166,
"learning_rate": 7.49820563500345e-07,
"loss": 0.7804,
"step": 1649
},
{
"epoch": 0.5934718100890207,
"grad_norm": 11.949416105380944,
"learning_rate": 7.486925109115134e-07,
"loss": 0.8599,
"step": 1650
},
{
"epoch": 0.5938314899739232,
"grad_norm": 39.123143987842894,
"learning_rate": 7.475647995669396e-07,
"loss": 0.793,
"step": 1651
},
{
"epoch": 0.5941911698588257,
"grad_norm": 12.33935596524586,
"learning_rate": 7.464374309979142e-07,
"loss": 0.7959,
"step": 1652
},
{
"epoch": 0.5945508497437281,
"grad_norm": 34.48309402454981,
"learning_rate": 7.453104067352636e-07,
"loss": 0.7531,
"step": 1653
},
{
"epoch": 0.5949105296286306,
"grad_norm": 10.537791080208066,
"learning_rate": 7.441837283093463e-07,
"loss": 0.8255,
"step": 1654
},
{
"epoch": 0.5952702095135329,
"grad_norm": 9.4365930426022,
"learning_rate": 7.430573972500518e-07,
"loss": 0.8213,
"step": 1655
},
{
"epoch": 0.5956298893984354,
"grad_norm": 14.142693442308168,
"learning_rate": 7.419314150867964e-07,
"loss": 0.8534,
"step": 1656
},
{
"epoch": 0.5959895692833378,
"grad_norm": 12.728732783659234,
"learning_rate": 7.40805783348524e-07,
"loss": 0.8539,
"step": 1657
},
{
"epoch": 0.5963492491682403,
"grad_norm": 10.607627014324681,
"learning_rate": 7.396805035637021e-07,
"loss": 0.7968,
"step": 1658
},
{
"epoch": 0.5967089290531427,
"grad_norm": 15.922722762925876,
"learning_rate": 7.385555772603212e-07,
"loss": 0.7998,
"step": 1659
},
{
"epoch": 0.5970686089380451,
"grad_norm": 9.371180648602238,
"learning_rate": 7.374310059658899e-07,
"loss": 0.8754,
"step": 1660
},
{
"epoch": 0.5974282888229476,
"grad_norm": 21.04428397163054,
"learning_rate": 7.363067912074366e-07,
"loss": 0.8101,
"step": 1661
},
{
"epoch": 0.59778796870785,
"grad_norm": 8.686895121328693,
"learning_rate": 7.351829345115046e-07,
"loss": 0.7323,
"step": 1662
},
{
"epoch": 0.5981476485927525,
"grad_norm": 11.025227706986179,
"learning_rate": 7.340594374041515e-07,
"loss": 0.8196,
"step": 1663
},
{
"epoch": 0.5985073284776549,
"grad_norm": 8.66746116218025,
"learning_rate": 7.329363014109462e-07,
"loss": 0.8179,
"step": 1664
},
{
"epoch": 0.5988670083625574,
"grad_norm": 13.456402927781026,
"learning_rate": 7.318135280569673e-07,
"loss": 0.7836,
"step": 1665
},
{
"epoch": 0.5992266882474597,
"grad_norm": 8.996043879179224,
"learning_rate": 7.306911188668016e-07,
"loss": 0.8255,
"step": 1666
},
{
"epoch": 0.5995863681323622,
"grad_norm": 14.971979355282974,
"learning_rate": 7.295690753645403e-07,
"loss": 0.7884,
"step": 1667
},
{
"epoch": 0.5999460480172646,
"grad_norm": 13.741062159581267,
"learning_rate": 7.284473990737794e-07,
"loss": 0.7873,
"step": 1668
},
{
"epoch": 0.6003057279021671,
"grad_norm": 13.18784983193868,
"learning_rate": 7.27326091517615e-07,
"loss": 0.8479,
"step": 1669
},
{
"epoch": 0.6006654077870696,
"grad_norm": 14.518311097413894,
"learning_rate": 7.262051542186429e-07,
"loss": 0.8235,
"step": 1670
},
{
"epoch": 0.6010250876719719,
"grad_norm": 13.527841789567097,
"learning_rate": 7.250845886989567e-07,
"loss": 0.8892,
"step": 1671
},
{
"epoch": 0.6013847675568744,
"grad_norm": 11.632315458958761,
"learning_rate": 7.239643964801449e-07,
"loss": 0.8006,
"step": 1672
},
{
"epoch": 0.6017444474417768,
"grad_norm": 17.626581694580985,
"learning_rate": 7.228445790832885e-07,
"loss": 0.8434,
"step": 1673
},
{
"epoch": 0.6021041273266793,
"grad_norm": 28.75211959641394,
"learning_rate": 7.217251380289601e-07,
"loss": 0.8482,
"step": 1674
},
{
"epoch": 0.6024638072115817,
"grad_norm": 12.345644933002072,
"learning_rate": 7.206060748372212e-07,
"loss": 0.873,
"step": 1675
},
{
"epoch": 0.6028234870964841,
"grad_norm": 8.424235857182305,
"learning_rate": 7.194873910276203e-07,
"loss": 0.792,
"step": 1676
},
{
"epoch": 0.6031831669813865,
"grad_norm": 16.950896602803002,
"learning_rate": 7.183690881191907e-07,
"loss": 0.7963,
"step": 1677
},
{
"epoch": 0.603542846866289,
"grad_norm": 11.204109864484545,
"learning_rate": 7.17251167630448e-07,
"loss": 0.8174,
"step": 1678
},
{
"epoch": 0.6039025267511915,
"grad_norm": 18.393413798144334,
"learning_rate": 7.161336310793893e-07,
"loss": 0.7951,
"step": 1679
},
{
"epoch": 0.6042622066360939,
"grad_norm": 14.165209694169475,
"learning_rate": 7.150164799834902e-07,
"loss": 0.7865,
"step": 1680
},
{
"epoch": 0.6046218865209964,
"grad_norm": 21.166700297544274,
"learning_rate": 7.138997158597019e-07,
"loss": 0.8377,
"step": 1681
},
{
"epoch": 0.6049815664058987,
"grad_norm": 15.084184088563942,
"learning_rate": 7.127833402244514e-07,
"loss": 0.8096,
"step": 1682
},
{
"epoch": 0.6053412462908012,
"grad_norm": 14.157352525865917,
"learning_rate": 7.116673545936378e-07,
"loss": 0.8438,
"step": 1683
},
{
"epoch": 0.6057009261757036,
"grad_norm": 11.058120466139862,
"learning_rate": 7.105517604826307e-07,
"loss": 0.8251,
"step": 1684
},
{
"epoch": 0.6060606060606061,
"grad_norm": 10.01805659251896,
"learning_rate": 7.094365594062675e-07,
"loss": 0.7872,
"step": 1685
},
{
"epoch": 0.6064202859455085,
"grad_norm": 14.417351252084616,
"learning_rate": 7.083217528788524e-07,
"loss": 0.7586,
"step": 1686
},
{
"epoch": 0.6067799658304109,
"grad_norm": 23.018726266905283,
"learning_rate": 7.072073424141537e-07,
"loss": 0.8036,
"step": 1687
},
{
"epoch": 0.6071396457153134,
"grad_norm": 23.108894385648515,
"learning_rate": 7.060933295254025e-07,
"loss": 0.859,
"step": 1688
},
{
"epoch": 0.6074993256002158,
"grad_norm": 14.950461839381,
"learning_rate": 7.049797157252888e-07,
"loss": 0.8105,
"step": 1689
},
{
"epoch": 0.6078590054851183,
"grad_norm": 8.919361344653822,
"learning_rate": 7.038665025259615e-07,
"loss": 0.8807,
"step": 1690
},
{
"epoch": 0.6082186853700207,
"grad_norm": 14.925105391763122,
"learning_rate": 7.027536914390257e-07,
"loss": 0.8696,
"step": 1691
},
{
"epoch": 0.6085783652549231,
"grad_norm": 69.98923360282737,
"learning_rate": 7.016412839755399e-07,
"loss": 0.784,
"step": 1692
},
{
"epoch": 0.6089380451398255,
"grad_norm": 35.44843074757706,
"learning_rate": 7.005292816460155e-07,
"loss": 0.8002,
"step": 1693
},
{
"epoch": 0.609297725024728,
"grad_norm": 9.939970667136594,
"learning_rate": 6.994176859604121e-07,
"loss": 0.8157,
"step": 1694
},
{
"epoch": 0.6096574049096304,
"grad_norm": 7.863375520704018,
"learning_rate": 6.983064984281389e-07,
"loss": 0.7949,
"step": 1695
},
{
"epoch": 0.6100170847945329,
"grad_norm": 97.29261826984394,
"learning_rate": 6.971957205580497e-07,
"loss": 0.7726,
"step": 1696
},
{
"epoch": 0.6103767646794354,
"grad_norm": 9.81021368261749,
"learning_rate": 6.96085353858443e-07,
"loss": 0.7813,
"step": 1697
},
{
"epoch": 0.6107364445643377,
"grad_norm": 16.02010761133172,
"learning_rate": 6.949753998370578e-07,
"loss": 0.7399,
"step": 1698
},
{
"epoch": 0.6110961244492402,
"grad_norm": 18.792860754940573,
"learning_rate": 6.938658600010734e-07,
"loss": 0.825,
"step": 1699
},
{
"epoch": 0.6114558043341426,
"grad_norm": 31.606223339116642,
"learning_rate": 6.92756735857107e-07,
"loss": 0.7607,
"step": 1700
},
{
"epoch": 0.6118154842190451,
"grad_norm": 24.82977511564448,
"learning_rate": 6.91648028911211e-07,
"loss": 0.8375,
"step": 1701
},
{
"epoch": 0.6121751641039475,
"grad_norm": 12.601085318467119,
"learning_rate": 6.905397406688708e-07,
"loss": 0.8364,
"step": 1702
},
{
"epoch": 0.6125348439888499,
"grad_norm": 12.705958637702684,
"learning_rate": 6.894318726350041e-07,
"loss": 0.8558,
"step": 1703
},
{
"epoch": 0.6128945238737523,
"grad_norm": 12.086710100708398,
"learning_rate": 6.883244263139577e-07,
"loss": 0.8178,
"step": 1704
},
{
"epoch": 0.6132542037586548,
"grad_norm": 7.7903843239514785,
"learning_rate": 6.87217403209506e-07,
"loss": 0.8087,
"step": 1705
},
{
"epoch": 0.6136138836435572,
"grad_norm": 12.198803272759948,
"learning_rate": 6.861108048248477e-07,
"loss": 0.7609,
"step": 1706
},
{
"epoch": 0.6139735635284597,
"grad_norm": 9.528152912375319,
"learning_rate": 6.850046326626058e-07,
"loss": 0.7557,
"step": 1707
},
{
"epoch": 0.6143332434133622,
"grad_norm": 10.049261688513136,
"learning_rate": 6.838988882248243e-07,
"loss": 0.8031,
"step": 1708
},
{
"epoch": 0.6146929232982645,
"grad_norm": 8.447514196519638,
"learning_rate": 6.827935730129669e-07,
"loss": 0.7961,
"step": 1709
},
{
"epoch": 0.615052603183167,
"grad_norm": 23.11292378241723,
"learning_rate": 6.816886885279131e-07,
"loss": 0.8443,
"step": 1710
},
{
"epoch": 0.6154122830680694,
"grad_norm": 11.119142446850603,
"learning_rate": 6.805842362699588e-07,
"loss": 0.7363,
"step": 1711
},
{
"epoch": 0.6157719629529719,
"grad_norm": 11.202186712862288,
"learning_rate": 6.794802177388122e-07,
"loss": 0.8158,
"step": 1712
},
{
"epoch": 0.6161316428378742,
"grad_norm": 13.296179979250441,
"learning_rate": 6.783766344335939e-07,
"loss": 0.8386,
"step": 1713
},
{
"epoch": 0.6164913227227767,
"grad_norm": 21.518111938157613,
"learning_rate": 6.772734878528312e-07,
"loss": 0.8087,
"step": 1714
},
{
"epoch": 0.6168510026076791,
"grad_norm": 46.66561116718014,
"learning_rate": 6.761707794944604e-07,
"loss": 0.8192,
"step": 1715
},
{
"epoch": 0.6172106824925816,
"grad_norm": 12.296866343279195,
"learning_rate": 6.750685108558221e-07,
"loss": 0.863,
"step": 1716
},
{
"epoch": 0.6175703623774841,
"grad_norm": 11.297803831567172,
"learning_rate": 6.739666834336598e-07,
"loss": 0.8261,
"step": 1717
},
{
"epoch": 0.6179300422623865,
"grad_norm": 13.288465766136282,
"learning_rate": 6.728652987241174e-07,
"loss": 0.8536,
"step": 1718
},
{
"epoch": 0.618289722147289,
"grad_norm": 14.012393699858668,
"learning_rate": 6.717643582227384e-07,
"loss": 0.8657,
"step": 1719
},
{
"epoch": 0.6186494020321913,
"grad_norm": 17.641059860172508,
"learning_rate": 6.706638634244628e-07,
"loss": 0.7291,
"step": 1720
},
{
"epoch": 0.6190090819170938,
"grad_norm": 21.798931499322553,
"learning_rate": 6.695638158236254e-07,
"loss": 0.8835,
"step": 1721
},
{
"epoch": 0.6193687618019962,
"grad_norm": 9.725572593880349,
"learning_rate": 6.684642169139543e-07,
"loss": 0.8005,
"step": 1722
},
{
"epoch": 0.6197284416868987,
"grad_norm": 14.195318412043791,
"learning_rate": 6.673650681885668e-07,
"loss": 0.7608,
"step": 1723
},
{
"epoch": 0.620088121571801,
"grad_norm": 12.923746404615972,
"learning_rate": 6.662663711399705e-07,
"loss": 0.7936,
"step": 1724
},
{
"epoch": 0.6204478014567035,
"grad_norm": 9.857065771465349,
"learning_rate": 6.651681272600591e-07,
"loss": 0.8411,
"step": 1725
},
{
"epoch": 0.620807481341606,
"grad_norm": 14.888609976537028,
"learning_rate": 6.64070338040111e-07,
"loss": 0.8305,
"step": 1726
},
{
"epoch": 0.6211671612265084,
"grad_norm": 11.79121313028303,
"learning_rate": 6.629730049707868e-07,
"loss": 0.7806,
"step": 1727
},
{
"epoch": 0.6215268411114109,
"grad_norm": 9.388306130381386,
"learning_rate": 6.618761295421284e-07,
"loss": 0.8264,
"step": 1728
},
{
"epoch": 0.6218865209963133,
"grad_norm": 10.896893029733993,
"learning_rate": 6.607797132435559e-07,
"loss": 0.7853,
"step": 1729
},
{
"epoch": 0.6222462008812157,
"grad_norm": 24.33571401714642,
"learning_rate": 6.596837575638663e-07,
"loss": 0.8379,
"step": 1730
},
{
"epoch": 0.6226058807661181,
"grad_norm": 14.737447005737831,
"learning_rate": 6.585882639912302e-07,
"loss": 0.7539,
"step": 1731
},
{
"epoch": 0.6229655606510206,
"grad_norm": 11.369836569855831,
"learning_rate": 6.574932340131917e-07,
"loss": 0.8058,
"step": 1732
},
{
"epoch": 0.623325240535923,
"grad_norm": 10.305482256804485,
"learning_rate": 6.563986691166655e-07,
"loss": 0.7784,
"step": 1733
},
{
"epoch": 0.6236849204208255,
"grad_norm": 22.814135768221593,
"learning_rate": 6.553045707879336e-07,
"loss": 0.7798,
"step": 1734
},
{
"epoch": 0.624044600305728,
"grad_norm": 28.116517448330193,
"learning_rate": 6.542109405126457e-07,
"loss": 0.7473,
"step": 1735
},
{
"epoch": 0.6244042801906303,
"grad_norm": 14.580162755655769,
"learning_rate": 6.531177797758154e-07,
"loss": 0.8007,
"step": 1736
},
{
"epoch": 0.6247639600755328,
"grad_norm": 16.04566880088962,
"learning_rate": 6.520250900618185e-07,
"loss": 0.9144,
"step": 1737
},
{
"epoch": 0.6251236399604352,
"grad_norm": 15.673671179728919,
"learning_rate": 6.509328728543917e-07,
"loss": 0.8013,
"step": 1738
},
{
"epoch": 0.6254833198453377,
"grad_norm": 35.50996042236339,
"learning_rate": 6.498411296366299e-07,
"loss": 0.8097,
"step": 1739
},
{
"epoch": 0.62584299973024,
"grad_norm": 16.880957362909395,
"learning_rate": 6.487498618909844e-07,
"loss": 0.756,
"step": 1740
},
{
"epoch": 0.6262026796151425,
"grad_norm": 14.035214514055044,
"learning_rate": 6.476590710992604e-07,
"loss": 0.8141,
"step": 1741
},
{
"epoch": 0.6265623595000449,
"grad_norm": 15.814422206787214,
"learning_rate": 6.465687587426165e-07,
"loss": 0.8343,
"step": 1742
},
{
"epoch": 0.6269220393849474,
"grad_norm": 15.188642929960775,
"learning_rate": 6.454789263015609e-07,
"loss": 0.8491,
"step": 1743
},
{
"epoch": 0.6272817192698499,
"grad_norm": 14.583983954449385,
"learning_rate": 6.443895752559498e-07,
"loss": 0.7419,
"step": 1744
},
{
"epoch": 0.6276413991547523,
"grad_norm": 133.0666371031153,
"learning_rate": 6.433007070849863e-07,
"loss": 0.9221,
"step": 1745
},
{
"epoch": 0.6280010790396547,
"grad_norm": 8.003593212036993,
"learning_rate": 6.422123232672181e-07,
"loss": 0.7631,
"step": 1746
},
{
"epoch": 0.6283607589245571,
"grad_norm": 7.922901645887125,
"learning_rate": 6.411244252805351e-07,
"loss": 0.8314,
"step": 1747
},
{
"epoch": 0.6287204388094596,
"grad_norm": 14.548918361862677,
"learning_rate": 6.400370146021661e-07,
"loss": 0.7949,
"step": 1748
},
{
"epoch": 0.629080118694362,
"grad_norm": 10.892622626019605,
"learning_rate": 6.389500927086799e-07,
"loss": 0.7548,
"step": 1749
},
{
"epoch": 0.6294397985792645,
"grad_norm": 8.588043511139587,
"learning_rate": 6.378636610759811e-07,
"loss": 0.8308,
"step": 1750
},
{
"epoch": 0.6297994784641668,
"grad_norm": 12.340274835878352,
"learning_rate": 6.367777211793089e-07,
"loss": 0.8127,
"step": 1751
},
{
"epoch": 0.6301591583490693,
"grad_norm": 15.78505028084241,
"learning_rate": 6.356922744932334e-07,
"loss": 0.815,
"step": 1752
},
{
"epoch": 0.6305188382339718,
"grad_norm": 11.773994533558056,
"learning_rate": 6.346073224916565e-07,
"loss": 0.9125,
"step": 1753
},
{
"epoch": 0.6308785181188742,
"grad_norm": 9.971471855959267,
"learning_rate": 6.335228666478077e-07,
"loss": 0.8055,
"step": 1754
},
{
"epoch": 0.6312381980037767,
"grad_norm": 23.62925258069257,
"learning_rate": 6.324389084342434e-07,
"loss": 0.8056,
"step": 1755
},
{
"epoch": 0.631597877888679,
"grad_norm": 25.21027748771099,
"learning_rate": 6.31355449322843e-07,
"loss": 0.8109,
"step": 1756
},
{
"epoch": 0.6319575577735815,
"grad_norm": 27.015234963332013,
"learning_rate": 6.302724907848095e-07,
"loss": 0.876,
"step": 1757
},
{
"epoch": 0.6323172376584839,
"grad_norm": 9.541397387947747,
"learning_rate": 6.291900342906653e-07,
"loss": 0.7522,
"step": 1758
},
{
"epoch": 0.6326769175433864,
"grad_norm": 15.225320262719087,
"learning_rate": 6.281080813102521e-07,
"loss": 0.8016,
"step": 1759
},
{
"epoch": 0.6330365974282888,
"grad_norm": 22.474412580868492,
"learning_rate": 6.270266333127265e-07,
"loss": 0.8175,
"step": 1760
},
{
"epoch": 0.6333962773131913,
"grad_norm": 29.12108471492303,
"learning_rate": 6.259456917665604e-07,
"loss": 0.8182,
"step": 1761
},
{
"epoch": 0.6337559571980937,
"grad_norm": 12.204041386576975,
"learning_rate": 6.248652581395377e-07,
"loss": 0.7633,
"step": 1762
},
{
"epoch": 0.6341156370829961,
"grad_norm": 20.134854518828618,
"learning_rate": 6.237853338987531e-07,
"loss": 0.8276,
"step": 1763
},
{
"epoch": 0.6344753169678986,
"grad_norm": 12.916402082316186,
"learning_rate": 6.227059205106085e-07,
"loss": 0.7856,
"step": 1764
},
{
"epoch": 0.634834996852801,
"grad_norm": 11.036413506251774,
"learning_rate": 6.216270194408129e-07,
"loss": 0.9049,
"step": 1765
},
{
"epoch": 0.6351946767377035,
"grad_norm": 10.062422641931368,
"learning_rate": 6.205486321543797e-07,
"loss": 0.8437,
"step": 1766
},
{
"epoch": 0.6355543566226058,
"grad_norm": 51.73009803827442,
"learning_rate": 6.194707601156248e-07,
"loss": 0.7957,
"step": 1767
},
{
"epoch": 0.6359140365075083,
"grad_norm": 11.334317202740843,
"learning_rate": 6.183934047881635e-07,
"loss": 0.7942,
"step": 1768
},
{
"epoch": 0.6362737163924107,
"grad_norm": 99.22508216639132,
"learning_rate": 6.173165676349102e-07,
"loss": 0.933,
"step": 1769
},
{
"epoch": 0.6366333962773132,
"grad_norm": 11.198191543243793,
"learning_rate": 6.162402501180759e-07,
"loss": 0.795,
"step": 1770
},
{
"epoch": 0.6369930761622157,
"grad_norm": 17.03016020066533,
"learning_rate": 6.151644536991655e-07,
"loss": 0.798,
"step": 1771
},
{
"epoch": 0.637352756047118,
"grad_norm": 10.164294215006395,
"learning_rate": 6.140891798389769e-07,
"loss": 0.8287,
"step": 1772
},
{
"epoch": 0.6377124359320205,
"grad_norm": 8.807026779685549,
"learning_rate": 6.130144299975972e-07,
"loss": 0.8174,
"step": 1773
},
{
"epoch": 0.6380721158169229,
"grad_norm": 21.14645986610551,
"learning_rate": 6.119402056344032e-07,
"loss": 0.8481,
"step": 1774
},
{
"epoch": 0.6384317957018254,
"grad_norm": 7.879847914566813,
"learning_rate": 6.108665082080578e-07,
"loss": 0.8275,
"step": 1775
},
{
"epoch": 0.6387914755867278,
"grad_norm": 12.434974102674712,
"learning_rate": 6.097933391765087e-07,
"loss": 0.7707,
"step": 1776
},
{
"epoch": 0.6391511554716303,
"grad_norm": 8.505561474001542,
"learning_rate": 6.087206999969847e-07,
"loss": 0.7402,
"step": 1777
},
{
"epoch": 0.6395108353565326,
"grad_norm": 56.64536593663647,
"learning_rate": 6.07648592125997e-07,
"loss": 0.7865,
"step": 1778
},
{
"epoch": 0.6398705152414351,
"grad_norm": 12.507722360780223,
"learning_rate": 6.065770170193341e-07,
"loss": 0.8205,
"step": 1779
},
{
"epoch": 0.6402301951263376,
"grad_norm": 12.722603618717091,
"learning_rate": 6.05505976132062e-07,
"loss": 0.7469,
"step": 1780
},
{
"epoch": 0.64058987501124,
"grad_norm": 10.269738579037984,
"learning_rate": 6.044354709185202e-07,
"loss": 0.7875,
"step": 1781
},
{
"epoch": 0.6409495548961425,
"grad_norm": 30.758909873658617,
"learning_rate": 6.033655028323215e-07,
"loss": 0.7216,
"step": 1782
},
{
"epoch": 0.6413092347810448,
"grad_norm": 14.316956448393078,
"learning_rate": 6.022960733263493e-07,
"loss": 0.8473,
"step": 1783
},
{
"epoch": 0.6416689146659473,
"grad_norm": 12.431060394684565,
"learning_rate": 6.01227183852756e-07,
"loss": 0.7882,
"step": 1784
},
{
"epoch": 0.6420285945508497,
"grad_norm": 9.500973513559808,
"learning_rate": 6.001588358629597e-07,
"loss": 0.8482,
"step": 1785
},
{
"epoch": 0.6423882744357522,
"grad_norm": 20.92781239812433,
"learning_rate": 5.990910308076442e-07,
"loss": 0.7918,
"step": 1786
},
{
"epoch": 0.6427479543206546,
"grad_norm": 8.664992551100244,
"learning_rate": 5.980237701367556e-07,
"loss": 0.7638,
"step": 1787
},
{
"epoch": 0.643107634205557,
"grad_norm": 11.090661919182084,
"learning_rate": 5.969570552995014e-07,
"loss": 0.8059,
"step": 1788
},
{
"epoch": 0.6434673140904595,
"grad_norm": 18.287091314618046,
"learning_rate": 5.958908877443465e-07,
"loss": 0.786,
"step": 1789
},
{
"epoch": 0.6438269939753619,
"grad_norm": 48.024587013915905,
"learning_rate": 5.948252689190141e-07,
"loss": 0.7355,
"step": 1790
},
{
"epoch": 0.6441866738602644,
"grad_norm": 12.859201948345953,
"learning_rate": 5.937602002704818e-07,
"loss": 0.8528,
"step": 1791
},
{
"epoch": 0.6445463537451668,
"grad_norm": 10.955287621995861,
"learning_rate": 5.926956832449805e-07,
"loss": 0.8152,
"step": 1792
},
{
"epoch": 0.6449060336300693,
"grad_norm": 15.435275641349563,
"learning_rate": 5.916317192879909e-07,
"loss": 0.852,
"step": 1793
},
{
"epoch": 0.6452657135149716,
"grad_norm": 76.3314449574763,
"learning_rate": 5.90568309844244e-07,
"loss": 0.7824,
"step": 1794
},
{
"epoch": 0.6456253933998741,
"grad_norm": 14.837934574547674,
"learning_rate": 5.895054563577171e-07,
"loss": 0.7831,
"step": 1795
},
{
"epoch": 0.6459850732847765,
"grad_norm": 24.168391521465132,
"learning_rate": 5.884431602716331e-07,
"loss": 0.782,
"step": 1796
},
{
"epoch": 0.646344753169679,
"grad_norm": 10.7570153448703,
"learning_rate": 5.873814230284575e-07,
"loss": 0.7747,
"step": 1797
},
{
"epoch": 0.6467044330545815,
"grad_norm": 90.55006051473893,
"learning_rate": 5.86320246069897e-07,
"loss": 0.7818,
"step": 1798
},
{
"epoch": 0.6470641129394838,
"grad_norm": 19.438214260499464,
"learning_rate": 5.852596308368981e-07,
"loss": 0.866,
"step": 1799
},
{
"epoch": 0.6474237928243863,
"grad_norm": 13.657916190310873,
"learning_rate": 5.841995787696438e-07,
"loss": 0.7081,
"step": 1800
},
{
"epoch": 0.6477834727092887,
"grad_norm": 14.665885552199574,
"learning_rate": 5.831400913075529e-07,
"loss": 0.856,
"step": 1801
},
{
"epoch": 0.6481431525941912,
"grad_norm": 10.162631162678629,
"learning_rate": 5.820811698892774e-07,
"loss": 0.7576,
"step": 1802
},
{
"epoch": 0.6485028324790936,
"grad_norm": 11.323067223912123,
"learning_rate": 5.810228159527002e-07,
"loss": 0.8584,
"step": 1803
},
{
"epoch": 0.6488625123639961,
"grad_norm": 11.27372613935998,
"learning_rate": 5.799650309349348e-07,
"loss": 0.8396,
"step": 1804
},
{
"epoch": 0.6492221922488984,
"grad_norm": 17.460419964750038,
"learning_rate": 5.789078162723212e-07,
"loss": 0.7775,
"step": 1805
},
{
"epoch": 0.6495818721338009,
"grad_norm": 14.508483865670446,
"learning_rate": 5.778511734004248e-07,
"loss": 0.8165,
"step": 1806
},
{
"epoch": 0.6499415520187034,
"grad_norm": 9.676751789233563,
"learning_rate": 5.767951037540349e-07,
"loss": 0.7913,
"step": 1807
},
{
"epoch": 0.6503012319036058,
"grad_norm": 11.971497636489813,
"learning_rate": 5.757396087671633e-07,
"loss": 0.8127,
"step": 1808
},
{
"epoch": 0.6506609117885083,
"grad_norm": 10.770337299108396,
"learning_rate": 5.746846898730402e-07,
"loss": 0.8241,
"step": 1809
},
{
"epoch": 0.6510205916734106,
"grad_norm": 12.045985601282291,
"learning_rate": 5.736303485041141e-07,
"loss": 0.8244,
"step": 1810
},
{
"epoch": 0.6513802715583131,
"grad_norm": 16.15883927758613,
"learning_rate": 5.725765860920487e-07,
"loss": 0.7476,
"step": 1811
},
{
"epoch": 0.6517399514432155,
"grad_norm": 16.66474408815995,
"learning_rate": 5.715234040677229e-07,
"loss": 0.8377,
"step": 1812
},
{
"epoch": 0.652099631328118,
"grad_norm": 21.888182978506073,
"learning_rate": 5.70470803861226e-07,
"loss": 0.7885,
"step": 1813
},
{
"epoch": 0.6524593112130204,
"grad_norm": 27.342810272479124,
"learning_rate": 5.694187869018583e-07,
"loss": 0.767,
"step": 1814
},
{
"epoch": 0.6528189910979229,
"grad_norm": 8.161359323030767,
"learning_rate": 5.683673546181274e-07,
"loss": 0.8278,
"step": 1815
},
{
"epoch": 0.6531786709828253,
"grad_norm": 15.661535313928974,
"learning_rate": 5.673165084377478e-07,
"loss": 0.7923,
"step": 1816
},
{
"epoch": 0.6535383508677277,
"grad_norm": 7.689055843441256,
"learning_rate": 5.662662497876374e-07,
"loss": 0.7484,
"step": 1817
},
{
"epoch": 0.6538980307526302,
"grad_norm": 24.838247028824966,
"learning_rate": 5.652165800939167e-07,
"loss": 0.7913,
"step": 1818
},
{
"epoch": 0.6542577106375326,
"grad_norm": 27.729926198991425,
"learning_rate": 5.641675007819057e-07,
"loss": 0.7404,
"step": 1819
},
{
"epoch": 0.6546173905224351,
"grad_norm": 63.23334053232722,
"learning_rate": 5.631190132761247e-07,
"loss": 0.7521,
"step": 1820
},
{
"epoch": 0.6549770704073374,
"grad_norm": 15.542551429874342,
"learning_rate": 5.620711190002878e-07,
"loss": 0.8185,
"step": 1821
},
{
"epoch": 0.6553367502922399,
"grad_norm": 9.809043926468073,
"learning_rate": 5.610238193773061e-07,
"loss": 0.7819,
"step": 1822
},
{
"epoch": 0.6556964301771423,
"grad_norm": 8.58369213799892,
"learning_rate": 5.599771158292805e-07,
"loss": 0.7355,
"step": 1823
},
{
"epoch": 0.6560561100620448,
"grad_norm": 13.820619552056009,
"learning_rate": 5.589310097775054e-07,
"loss": 0.8129,
"step": 1824
},
{
"epoch": 0.6564157899469473,
"grad_norm": 13.067661393809157,
"learning_rate": 5.578855026424618e-07,
"loss": 0.8095,
"step": 1825
},
{
"epoch": 0.6567754698318496,
"grad_norm": 10.305852999489563,
"learning_rate": 5.568405958438181e-07,
"loss": 0.8123,
"step": 1826
},
{
"epoch": 0.6571351497167521,
"grad_norm": 8.673385969068832,
"learning_rate": 5.557962908004274e-07,
"loss": 0.7976,
"step": 1827
},
{
"epoch": 0.6574948296016545,
"grad_norm": 15.45928638878636,
"learning_rate": 5.547525889303264e-07,
"loss": 0.7952,
"step": 1828
},
{
"epoch": 0.657854509486557,
"grad_norm": 10.10341774407125,
"learning_rate": 5.537094916507319e-07,
"loss": 0.7889,
"step": 1829
},
{
"epoch": 0.6582141893714594,
"grad_norm": 8.556881114275807,
"learning_rate": 5.526670003780399e-07,
"loss": 0.7618,
"step": 1830
},
{
"epoch": 0.6585738692563619,
"grad_norm": 19.342068228599718,
"learning_rate": 5.516251165278234e-07,
"loss": 0.7546,
"step": 1831
},
{
"epoch": 0.6589335491412642,
"grad_norm": 59.54816995344833,
"learning_rate": 5.505838415148316e-07,
"loss": 0.7827,
"step": 1832
},
{
"epoch": 0.6592932290261667,
"grad_norm": 10.14681148147005,
"learning_rate": 5.495431767529857e-07,
"loss": 0.7988,
"step": 1833
},
{
"epoch": 0.6596529089110692,
"grad_norm": 13.135031573911437,
"learning_rate": 5.485031236553791e-07,
"loss": 0.7537,
"step": 1834
},
{
"epoch": 0.6600125887959716,
"grad_norm": 6.967976646359824,
"learning_rate": 5.474636836342736e-07,
"loss": 0.7402,
"step": 1835
},
{
"epoch": 0.6603722686808741,
"grad_norm": 9.216758380737184,
"learning_rate": 5.464248581011002e-07,
"loss": 0.8323,
"step": 1836
},
{
"epoch": 0.6607319485657764,
"grad_norm": 11.600107990635108,
"learning_rate": 5.453866484664542e-07,
"loss": 0.7451,
"step": 1837
},
{
"epoch": 0.6610916284506789,
"grad_norm": 10.892374294450866,
"learning_rate": 5.443490561400948e-07,
"loss": 0.7621,
"step": 1838
},
{
"epoch": 0.6614513083355813,
"grad_norm": 11.45842615630558,
"learning_rate": 5.433120825309425e-07,
"loss": 0.7832,
"step": 1839
},
{
"epoch": 0.6618109882204838,
"grad_norm": 16.153448433055885,
"learning_rate": 5.422757290470794e-07,
"loss": 0.788,
"step": 1840
},
{
"epoch": 0.6621706681053862,
"grad_norm": 12.577386597072442,
"learning_rate": 5.412399970957439e-07,
"loss": 0.8579,
"step": 1841
},
{
"epoch": 0.6625303479902886,
"grad_norm": 12.782695711759963,
"learning_rate": 5.402048880833308e-07,
"loss": 0.7313,
"step": 1842
},
{
"epoch": 0.6628900278751911,
"grad_norm": 12.47174858281745,
"learning_rate": 5.391704034153894e-07,
"loss": 0.8876,
"step": 1843
},
{
"epoch": 0.6632497077600935,
"grad_norm": 18.66655631801789,
"learning_rate": 5.381365444966204e-07,
"loss": 0.7878,
"step": 1844
},
{
"epoch": 0.663609387644996,
"grad_norm": 16.319352082499176,
"learning_rate": 5.371033127308762e-07,
"loss": 0.7858,
"step": 1845
},
{
"epoch": 0.6639690675298984,
"grad_norm": 20.719394774267887,
"learning_rate": 5.360707095211565e-07,
"loss": 0.8364,
"step": 1846
},
{
"epoch": 0.6643287474148009,
"grad_norm": 14.478872964211194,
"learning_rate": 5.350387362696076e-07,
"loss": 0.7589,
"step": 1847
},
{
"epoch": 0.6646884272997032,
"grad_norm": 8.595109081067351,
"learning_rate": 5.340073943775205e-07,
"loss": 0.8464,
"step": 1848
},
{
"epoch": 0.6650481071846057,
"grad_norm": 11.04673137932079,
"learning_rate": 5.329766852453296e-07,
"loss": 0.7933,
"step": 1849
},
{
"epoch": 0.6654077870695081,
"grad_norm": 10.53038761065107,
"learning_rate": 5.319466102726087e-07,
"loss": 0.7025,
"step": 1850
},
{
"epoch": 0.6657674669544106,
"grad_norm": 15.143280900133385,
"learning_rate": 5.309171708580723e-07,
"loss": 0.8126,
"step": 1851
},
{
"epoch": 0.6661271468393131,
"grad_norm": 26.58920303139226,
"learning_rate": 5.298883683995696e-07,
"loss": 0.789,
"step": 1852
},
{
"epoch": 0.6664868267242154,
"grad_norm": 19.284983239901692,
"learning_rate": 5.288602042940871e-07,
"loss": 0.8039,
"step": 1853
},
{
"epoch": 0.6668465066091179,
"grad_norm": 12.145551006460884,
"learning_rate": 5.278326799377427e-07,
"loss": 0.7906,
"step": 1854
},
{
"epoch": 0.6672061864940203,
"grad_norm": 7.405085830831934,
"learning_rate": 5.26805796725788e-07,
"loss": 0.7586,
"step": 1855
},
{
"epoch": 0.6675658663789228,
"grad_norm": 28.002207391036826,
"learning_rate": 5.257795560526004e-07,
"loss": 0.8145,
"step": 1856
},
{
"epoch": 0.6679255462638252,
"grad_norm": 16.62199670397666,
"learning_rate": 5.247539593116883e-07,
"loss": 0.7662,
"step": 1857
},
{
"epoch": 0.6682852261487277,
"grad_norm": 16.954919581500018,
"learning_rate": 5.237290078956835e-07,
"loss": 0.8472,
"step": 1858
},
{
"epoch": 0.66864490603363,
"grad_norm": 18.123531709541997,
"learning_rate": 5.227047031963434e-07,
"loss": 0.7853,
"step": 1859
},
{
"epoch": 0.6690045859185325,
"grad_norm": 10.415660732269528,
"learning_rate": 5.216810466045448e-07,
"loss": 0.7737,
"step": 1860
},
{
"epoch": 0.6693642658034349,
"grad_norm": 56.09425474270922,
"learning_rate": 5.206580395102866e-07,
"loss": 0.9381,
"step": 1861
},
{
"epoch": 0.6697239456883374,
"grad_norm": 13.543162717953841,
"learning_rate": 5.196356833026845e-07,
"loss": 0.7916,
"step": 1862
},
{
"epoch": 0.6700836255732399,
"grad_norm": 64.23400514552281,
"learning_rate": 5.18613979369972e-07,
"loss": 0.7931,
"step": 1863
},
{
"epoch": 0.6704433054581422,
"grad_norm": 11.403829262924216,
"learning_rate": 5.175929290994941e-07,
"loss": 0.8338,
"step": 1864
},
{
"epoch": 0.6708029853430447,
"grad_norm": 42.81872528268903,
"learning_rate": 5.16572533877711e-07,
"loss": 0.8484,
"step": 1865
},
{
"epoch": 0.6711626652279471,
"grad_norm": 39.066022443972905,
"learning_rate": 5.155527950901914e-07,
"loss": 0.8624,
"step": 1866
},
{
"epoch": 0.6715223451128496,
"grad_norm": 41.603319141147985,
"learning_rate": 5.145337141216149e-07,
"loss": 0.7927,
"step": 1867
},
{
"epoch": 0.671882024997752,
"grad_norm": 10.974669475457675,
"learning_rate": 5.135152923557647e-07,
"loss": 0.8326,
"step": 1868
},
{
"epoch": 0.6722417048826544,
"grad_norm": 20.415936770591237,
"learning_rate": 5.124975311755319e-07,
"loss": 0.809,
"step": 1869
},
{
"epoch": 0.6726013847675568,
"grad_norm": 23.862286254886733,
"learning_rate": 5.114804319629087e-07,
"loss": 0.8581,
"step": 1870
},
{
"epoch": 0.6729610646524593,
"grad_norm": 18.29927024843112,
"learning_rate": 5.104639960989903e-07,
"loss": 0.847,
"step": 1871
},
{
"epoch": 0.6733207445373618,
"grad_norm": 14.9425225110357,
"learning_rate": 5.094482249639682e-07,
"loss": 0.8231,
"step": 1872
},
{
"epoch": 0.6736804244222642,
"grad_norm": 7.204493801894027,
"learning_rate": 5.084331199371342e-07,
"loss": 0.8022,
"step": 1873
},
{
"epoch": 0.6740401043071667,
"grad_norm": 14.905346487920383,
"learning_rate": 5.074186823968739e-07,
"loss": 0.8486,
"step": 1874
},
{
"epoch": 0.674399784192069,
"grad_norm": 9.676339560933618,
"learning_rate": 5.064049137206677e-07,
"loss": 0.7858,
"step": 1875
},
{
"epoch": 0.6747594640769715,
"grad_norm": 9.49562244506686,
"learning_rate": 5.053918152850867e-07,
"loss": 0.7997,
"step": 1876
},
{
"epoch": 0.6751191439618739,
"grad_norm": 9.780546884507606,
"learning_rate": 5.043793884657925e-07,
"loss": 0.7547,
"step": 1877
},
{
"epoch": 0.6754788238467764,
"grad_norm": 9.470322325518556,
"learning_rate": 5.033676346375342e-07,
"loss": 0.8269,
"step": 1878
},
{
"epoch": 0.6758385037316788,
"grad_norm": 11.78272180033259,
"learning_rate": 5.02356555174148e-07,
"loss": 0.7977,
"step": 1879
},
{
"epoch": 0.6761981836165812,
"grad_norm": 10.724813017397079,
"learning_rate": 5.013461514485535e-07,
"loss": 0.8066,
"step": 1880
},
{
"epoch": 0.6765578635014837,
"grad_norm": 10.658619259472248,
"learning_rate": 5.003364248327533e-07,
"loss": 0.7963,
"step": 1881
},
{
"epoch": 0.6769175433863861,
"grad_norm": 17.35152082996863,
"learning_rate": 4.993273766978296e-07,
"loss": 0.8972,
"step": 1882
},
{
"epoch": 0.6772772232712886,
"grad_norm": 11.019779966265489,
"learning_rate": 4.983190084139452e-07,
"loss": 0.7863,
"step": 1883
},
{
"epoch": 0.677636903156191,
"grad_norm": 8.55292885917213,
"learning_rate": 4.973113213503378e-07,
"loss": 0.8869,
"step": 1884
},
{
"epoch": 0.6779965830410934,
"grad_norm": 26.74748899643954,
"learning_rate": 4.963043168753211e-07,
"loss": 0.7416,
"step": 1885
},
{
"epoch": 0.6783562629259958,
"grad_norm": 12.517286521411632,
"learning_rate": 4.952979963562813e-07,
"loss": 0.7884,
"step": 1886
},
{
"epoch": 0.6787159428108983,
"grad_norm": 12.144101674093983,
"learning_rate": 4.942923611596771e-07,
"loss": 0.7733,
"step": 1887
},
{
"epoch": 0.6790756226958007,
"grad_norm": 16.766125673571228,
"learning_rate": 4.932874126510352e-07,
"loss": 0.7858,
"step": 1888
},
{
"epoch": 0.6794353025807032,
"grad_norm": 7.743675635326617,
"learning_rate": 4.922831521949507e-07,
"loss": 0.8114,
"step": 1889
},
{
"epoch": 0.6797949824656057,
"grad_norm": 9.264045520369624,
"learning_rate": 4.912795811550836e-07,
"loss": 0.7946,
"step": 1890
},
{
"epoch": 0.680154662350508,
"grad_norm": 14.583110690880584,
"learning_rate": 4.902767008941593e-07,
"loss": 0.8318,
"step": 1891
},
{
"epoch": 0.6805143422354105,
"grad_norm": 9.560335815158966,
"learning_rate": 4.892745127739635e-07,
"loss": 0.8546,
"step": 1892
},
{
"epoch": 0.6808740221203129,
"grad_norm": 9.64660352252174,
"learning_rate": 4.882730181553433e-07,
"loss": 0.7828,
"step": 1893
},
{
"epoch": 0.6812337020052154,
"grad_norm": 17.208502485149108,
"learning_rate": 4.872722183982028e-07,
"loss": 0.7728,
"step": 1894
},
{
"epoch": 0.6815933818901178,
"grad_norm": 9.362234657825736,
"learning_rate": 4.862721148615043e-07,
"loss": 0.827,
"step": 1895
},
{
"epoch": 0.6819530617750202,
"grad_norm": 12.453547390878855,
"learning_rate": 4.852727089032634e-07,
"loss": 0.8151,
"step": 1896
},
{
"epoch": 0.6823127416599226,
"grad_norm": 15.795587125698905,
"learning_rate": 4.842740018805488e-07,
"loss": 0.7693,
"step": 1897
},
{
"epoch": 0.6826724215448251,
"grad_norm": 18.90914749581996,
"learning_rate": 4.832759951494798e-07,
"loss": 0.8,
"step": 1898
},
{
"epoch": 0.6830321014297276,
"grad_norm": 10.919574407657244,
"learning_rate": 4.822786900652261e-07,
"loss": 0.7599,
"step": 1899
},
{
"epoch": 0.68339178131463,
"grad_norm": 13.333825107133132,
"learning_rate": 4.812820879820033e-07,
"loss": 0.8187,
"step": 1900
},
{
"epoch": 0.6837514611995325,
"grad_norm": 17.048361445562644,
"learning_rate": 4.80286190253073e-07,
"loss": 0.775,
"step": 1901
},
{
"epoch": 0.6841111410844348,
"grad_norm": 7.944052113757398,
"learning_rate": 4.792909982307394e-07,
"loss": 0.8115,
"step": 1902
},
{
"epoch": 0.6844708209693373,
"grad_norm": 14.828276861307213,
"learning_rate": 4.782965132663505e-07,
"loss": 0.7556,
"step": 1903
},
{
"epoch": 0.6848305008542397,
"grad_norm": 18.213914896580356,
"learning_rate": 4.773027367102923e-07,
"loss": 0.8387,
"step": 1904
},
{
"epoch": 0.6851901807391422,
"grad_norm": 20.98108938653177,
"learning_rate": 4.763096699119896e-07,
"loss": 0.8085,
"step": 1905
},
{
"epoch": 0.6855498606240445,
"grad_norm": 16.73875927605446,
"learning_rate": 4.753173142199035e-07,
"loss": 0.7929,
"step": 1906
},
{
"epoch": 0.685909540508947,
"grad_norm": 54.077453505313734,
"learning_rate": 4.7432567098152886e-07,
"loss": 0.8155,
"step": 1907
},
{
"epoch": 0.6862692203938495,
"grad_norm": 19.07631987950139,
"learning_rate": 4.7333474154339446e-07,
"loss": 0.7632,
"step": 1908
},
{
"epoch": 0.6866289002787519,
"grad_norm": 13.156347839106504,
"learning_rate": 4.723445272510587e-07,
"loss": 0.7859,
"step": 1909
},
{
"epoch": 0.6869885801636544,
"grad_norm": 10.73868733548354,
"learning_rate": 4.7135502944910897e-07,
"loss": 0.8162,
"step": 1910
},
{
"epoch": 0.6873482600485568,
"grad_norm": 14.177996454482498,
"learning_rate": 4.7036624948115987e-07,
"loss": 0.815,
"step": 1911
},
{
"epoch": 0.6877079399334592,
"grad_norm": 7.439216214586587,
"learning_rate": 4.6937818868985204e-07,
"loss": 0.7929,
"step": 1912
},
{
"epoch": 0.6880676198183616,
"grad_norm": 10.92147774408899,
"learning_rate": 4.683908484168486e-07,
"loss": 0.8003,
"step": 1913
},
{
"epoch": 0.6884272997032641,
"grad_norm": 9.626214440116033,
"learning_rate": 4.6740423000283445e-07,
"loss": 0.7699,
"step": 1914
},
{
"epoch": 0.6887869795881665,
"grad_norm": 20.19775053928553,
"learning_rate": 4.6641833478751433e-07,
"loss": 0.7459,
"step": 1915
},
{
"epoch": 0.689146659473069,
"grad_norm": 27.79660959589243,
"learning_rate": 4.654331641096118e-07,
"loss": 0.8494,
"step": 1916
},
{
"epoch": 0.6895063393579715,
"grad_norm": 11.552197064577827,
"learning_rate": 4.6444871930686523e-07,
"loss": 0.8517,
"step": 1917
},
{
"epoch": 0.6898660192428738,
"grad_norm": 13.096752705587633,
"learning_rate": 4.6346500171602843e-07,
"loss": 0.8142,
"step": 1918
},
{
"epoch": 0.6902256991277763,
"grad_norm": 59.88184273645699,
"learning_rate": 4.6248201267286655e-07,
"loss": 0.775,
"step": 1919
},
{
"epoch": 0.6905853790126787,
"grad_norm": 20.949835354690556,
"learning_rate": 4.614997535121573e-07,
"loss": 0.789,
"step": 1920
},
{
"epoch": 0.6909450588975812,
"grad_norm": 21.181926495259585,
"learning_rate": 4.6051822556768573e-07,
"loss": 0.7091,
"step": 1921
},
{
"epoch": 0.6913047387824836,
"grad_norm": 16.283509132556304,
"learning_rate": 4.5953743017224446e-07,
"loss": 0.7371,
"step": 1922
},
{
"epoch": 0.691664418667386,
"grad_norm": 9.597201305512248,
"learning_rate": 4.5855736865763096e-07,
"loss": 0.7847,
"step": 1923
},
{
"epoch": 0.6920240985522884,
"grad_norm": 10.068854727813337,
"learning_rate": 4.575780423546476e-07,
"loss": 0.786,
"step": 1924
},
{
"epoch": 0.6923837784371909,
"grad_norm": 24.80676142205765,
"learning_rate": 4.565994525930966e-07,
"loss": 0.829,
"step": 1925
},
{
"epoch": 0.6927434583220934,
"grad_norm": 15.267556902740353,
"learning_rate": 4.5562160070178213e-07,
"loss": 0.8247,
"step": 1926
},
{
"epoch": 0.6931031382069958,
"grad_norm": 14.712025615536657,
"learning_rate": 4.5464448800850366e-07,
"loss": 0.776,
"step": 1927
},
{
"epoch": 0.6934628180918982,
"grad_norm": 9.762214400361668,
"learning_rate": 4.536681158400597e-07,
"loss": 0.7797,
"step": 1928
},
{
"epoch": 0.6938224979768006,
"grad_norm": 43.29180425471539,
"learning_rate": 4.5269248552224105e-07,
"loss": 0.7862,
"step": 1929
},
{
"epoch": 0.6941821778617031,
"grad_norm": 9.770913691162873,
"learning_rate": 4.517175983798334e-07,
"loss": 0.8091,
"step": 1930
},
{
"epoch": 0.6945418577466055,
"grad_norm": 7.634925217920101,
"learning_rate": 4.5074345573661057e-07,
"loss": 0.8002,
"step": 1931
},
{
"epoch": 0.694901537631508,
"grad_norm": 8.682744882544698,
"learning_rate": 4.497700589153378e-07,
"loss": 0.7941,
"step": 1932
},
{
"epoch": 0.6952612175164103,
"grad_norm": 15.118180922821043,
"learning_rate": 4.487974092377661e-07,
"loss": 0.8083,
"step": 1933
},
{
"epoch": 0.6956208974013128,
"grad_norm": 9.826564546122903,
"learning_rate": 4.478255080246337e-07,
"loss": 0.8315,
"step": 1934
},
{
"epoch": 0.6959805772862153,
"grad_norm": 17.69865416850283,
"learning_rate": 4.4685435659565975e-07,
"loss": 0.8013,
"step": 1935
},
{
"epoch": 0.6963402571711177,
"grad_norm": 9.554872577224419,
"learning_rate": 4.45883956269548e-07,
"loss": 0.8306,
"step": 1936
},
{
"epoch": 0.6966999370560202,
"grad_norm": 22.472663863651814,
"learning_rate": 4.449143083639805e-07,
"loss": 0.7858,
"step": 1937
},
{
"epoch": 0.6970596169409226,
"grad_norm": 16.04948422993258,
"learning_rate": 4.439454141956194e-07,
"loss": 0.8231,
"step": 1938
},
{
"epoch": 0.697419296825825,
"grad_norm": 12.693051165018526,
"learning_rate": 4.4297727508010065e-07,
"loss": 0.8049,
"step": 1939
},
{
"epoch": 0.6977789767107274,
"grad_norm": 13.998451344856502,
"learning_rate": 4.4200989233203777e-07,
"loss": 0.9358,
"step": 1940
},
{
"epoch": 0.6981386565956299,
"grad_norm": 15.132969994592672,
"learning_rate": 4.410432672650153e-07,
"loss": 0.8,
"step": 1941
},
{
"epoch": 0.6984983364805323,
"grad_norm": 28.32415329498456,
"learning_rate": 4.4007740119159065e-07,
"loss": 0.7684,
"step": 1942
},
{
"epoch": 0.6988580163654348,
"grad_norm": 7.444797986384278,
"learning_rate": 4.391122954232882e-07,
"loss": 0.7511,
"step": 1943
},
{
"epoch": 0.6992176962503373,
"grad_norm": 10.877249177667075,
"learning_rate": 4.3814795127060243e-07,
"loss": 0.7996,
"step": 1944
},
{
"epoch": 0.6995773761352396,
"grad_norm": 8.69990505795878,
"learning_rate": 4.371843700429917e-07,
"loss": 0.8351,
"step": 1945
},
{
"epoch": 0.6999370560201421,
"grad_norm": 9.371009111740484,
"learning_rate": 4.362215530488804e-07,
"loss": 0.7712,
"step": 1946
},
{
"epoch": 0.7002967359050445,
"grad_norm": 9.552032151923505,
"learning_rate": 4.352595015956527e-07,
"loss": 0.7947,
"step": 1947
},
{
"epoch": 0.700656415789947,
"grad_norm": 28.270828787029203,
"learning_rate": 4.342982169896555e-07,
"loss": 0.8124,
"step": 1948
},
{
"epoch": 0.7010160956748493,
"grad_norm": 9.330800789772027,
"learning_rate": 4.33337700536193e-07,
"loss": 0.8077,
"step": 1949
},
{
"epoch": 0.7013757755597518,
"grad_norm": 13.526816107359819,
"learning_rate": 4.323779535395278e-07,
"loss": 0.8185,
"step": 1950
},
{
"epoch": 0.7017354554446542,
"grad_norm": 13.426541469641524,
"learning_rate": 4.3141897730287535e-07,
"loss": 0.7832,
"step": 1951
},
{
"epoch": 0.7020951353295567,
"grad_norm": 12.99847244436818,
"learning_rate": 4.304607731284069e-07,
"loss": 0.734,
"step": 1952
},
{
"epoch": 0.7024548152144592,
"grad_norm": 22.515219398290462,
"learning_rate": 4.295033423172437e-07,
"loss": 0.8445,
"step": 1953
},
{
"epoch": 0.7028144950993616,
"grad_norm": 26.97147625270981,
"learning_rate": 4.285466861694582e-07,
"loss": 0.8348,
"step": 1954
},
{
"epoch": 0.703174174984264,
"grad_norm": 16.924420570758134,
"learning_rate": 4.2759080598406984e-07,
"loss": 0.8136,
"step": 1955
},
{
"epoch": 0.7035338548691664,
"grad_norm": 10.888168178794485,
"learning_rate": 4.2663570305904486e-07,
"loss": 0.809,
"step": 1956
},
{
"epoch": 0.7038935347540689,
"grad_norm": 33.89340294150417,
"learning_rate": 4.256813786912936e-07,
"loss": 0.8494,
"step": 1957
},
{
"epoch": 0.7042532146389713,
"grad_norm": 9.418302266803023,
"learning_rate": 4.247278341766705e-07,
"loss": 0.7757,
"step": 1958
},
{
"epoch": 0.7046128945238738,
"grad_norm": 16.829275622088073,
"learning_rate": 4.2377507080996965e-07,
"loss": 0.7698,
"step": 1959
},
{
"epoch": 0.7049725744087761,
"grad_norm": 16.20127709968279,
"learning_rate": 4.2282308988492524e-07,
"loss": 0.8087,
"step": 1960
},
{
"epoch": 0.7053322542936786,
"grad_norm": 13.413985764534917,
"learning_rate": 4.2187189269420807e-07,
"loss": 0.7703,
"step": 1961
},
{
"epoch": 0.7056919341785811,
"grad_norm": 9.345601704137358,
"learning_rate": 4.209214805294263e-07,
"loss": 0.8064,
"step": 1962
},
{
"epoch": 0.7060516140634835,
"grad_norm": 9.650003757320542,
"learning_rate": 4.19971854681121e-07,
"loss": 0.7875,
"step": 1963
},
{
"epoch": 0.706411293948386,
"grad_norm": 10.834910211171145,
"learning_rate": 4.190230164387655e-07,
"loss": 0.8344,
"step": 1964
},
{
"epoch": 0.7067709738332884,
"grad_norm": 27.01328289264853,
"learning_rate": 4.180749670907637e-07,
"loss": 0.794,
"step": 1965
},
{
"epoch": 0.7071306537181908,
"grad_norm": 14.513481418662815,
"learning_rate": 4.171277079244492e-07,
"loss": 0.843,
"step": 1966
},
{
"epoch": 0.7074903336030932,
"grad_norm": 13.34443731739716,
"learning_rate": 4.1618124022608136e-07,
"loss": 0.7949,
"step": 1967
},
{
"epoch": 0.7078500134879957,
"grad_norm": 14.408048881960235,
"learning_rate": 4.152355652808457e-07,
"loss": 0.8316,
"step": 1968
},
{
"epoch": 0.7082096933728981,
"grad_norm": 10.413228637565929,
"learning_rate": 4.1429068437285044e-07,
"loss": 0.7707,
"step": 1969
},
{
"epoch": 0.7085693732578006,
"grad_norm": 30.08608479735217,
"learning_rate": 4.133465987851268e-07,
"loss": 0.7963,
"step": 1970
},
{
"epoch": 0.708929053142703,
"grad_norm": 10.085831691124902,
"learning_rate": 4.124033097996251e-07,
"loss": 0.7691,
"step": 1971
},
{
"epoch": 0.7092887330276054,
"grad_norm": 15.398408177770955,
"learning_rate": 4.1146081869721427e-07,
"loss": 0.8193,
"step": 1972
},
{
"epoch": 0.7096484129125079,
"grad_norm": 11.82948307422115,
"learning_rate": 4.1051912675767966e-07,
"loss": 0.7734,
"step": 1973
},
{
"epoch": 0.7100080927974103,
"grad_norm": 15.811539804068559,
"learning_rate": 4.0957823525972137e-07,
"loss": 0.7838,
"step": 1974
},
{
"epoch": 0.7103677726823128,
"grad_norm": 22.68444207026732,
"learning_rate": 4.0863814548095344e-07,
"loss": 0.7359,
"step": 1975
},
{
"epoch": 0.7107274525672151,
"grad_norm": 46.43690895790918,
"learning_rate": 4.076988586979003e-07,
"loss": 0.8163,
"step": 1976
},
{
"epoch": 0.7110871324521176,
"grad_norm": 10.07946020957789,
"learning_rate": 4.067603761859965e-07,
"loss": 0.7408,
"step": 1977
},
{
"epoch": 0.71144681233702,
"grad_norm": 12.451771689573075,
"learning_rate": 4.058226992195838e-07,
"loss": 0.811,
"step": 1978
},
{
"epoch": 0.7118064922219225,
"grad_norm": 19.989042214097164,
"learning_rate": 4.048858290719115e-07,
"loss": 0.8138,
"step": 1979
},
{
"epoch": 0.712166172106825,
"grad_norm": 24.291023591576753,
"learning_rate": 4.0394976701513226e-07,
"loss": 0.8115,
"step": 1980
},
{
"epoch": 0.7125258519917274,
"grad_norm": 8.736651935734901,
"learning_rate": 4.0301451432030156e-07,
"loss": 0.7856,
"step": 1981
},
{
"epoch": 0.7128855318766298,
"grad_norm": 22.569035241318087,
"learning_rate": 4.0208007225737573e-07,
"loss": 0.8013,
"step": 1982
},
{
"epoch": 0.7132452117615322,
"grad_norm": 16.647520932154134,
"learning_rate": 4.011464420952114e-07,
"loss": 0.7989,
"step": 1983
},
{
"epoch": 0.7136048916464347,
"grad_norm": 73.6227989347106,
"learning_rate": 4.0021362510156166e-07,
"loss": 0.7922,
"step": 1984
},
{
"epoch": 0.7139645715313371,
"grad_norm": 11.824501604075454,
"learning_rate": 3.992816225430757e-07,
"loss": 0.7911,
"step": 1985
},
{
"epoch": 0.7143242514162396,
"grad_norm": 48.20878397409259,
"learning_rate": 3.9835043568529657e-07,
"loss": 0.7653,
"step": 1986
},
{
"epoch": 0.7146839313011419,
"grad_norm": 9.706882923552332,
"learning_rate": 3.974200657926606e-07,
"loss": 0.7461,
"step": 1987
},
{
"epoch": 0.7150436111860444,
"grad_norm": 16.316800895159567,
"learning_rate": 3.96490514128494e-07,
"loss": 0.8397,
"step": 1988
},
{
"epoch": 0.7154032910709469,
"grad_norm": 16.864508545034322,
"learning_rate": 3.95561781955012e-07,
"loss": 0.8423,
"step": 1989
},
{
"epoch": 0.7157629709558493,
"grad_norm": 10.59809473648667,
"learning_rate": 3.9463387053331676e-07,
"loss": 0.7803,
"step": 1990
},
{
"epoch": 0.7161226508407518,
"grad_norm": 10.822493210906448,
"learning_rate": 3.9370678112339716e-07,
"loss": 0.7719,
"step": 1991
},
{
"epoch": 0.7164823307256541,
"grad_norm": 8.335943511398538,
"learning_rate": 3.9278051498412466e-07,
"loss": 0.8375,
"step": 1992
},
{
"epoch": 0.7168420106105566,
"grad_norm": 14.68225712271285,
"learning_rate": 3.918550733732535e-07,
"loss": 0.85,
"step": 1993
},
{
"epoch": 0.717201690495459,
"grad_norm": 9.854404405078158,
"learning_rate": 3.9093045754741747e-07,
"loss": 0.8618,
"step": 1994
},
{
"epoch": 0.7175613703803615,
"grad_norm": 18.36600366376659,
"learning_rate": 3.900066687621305e-07,
"loss": 0.7804,
"step": 1995
},
{
"epoch": 0.7179210502652639,
"grad_norm": 12.991887734967218,
"learning_rate": 3.8908370827178216e-07,
"loss": 0.7598,
"step": 1996
},
{
"epoch": 0.7182807301501664,
"grad_norm": 22.804948845431067,
"learning_rate": 3.8816157732963807e-07,
"loss": 0.8132,
"step": 1997
},
{
"epoch": 0.7186404100350688,
"grad_norm": 214.02017904335946,
"learning_rate": 3.8724027718783646e-07,
"loss": 0.822,
"step": 1998
},
{
"epoch": 0.7190000899199712,
"grad_norm": 7.9949123256405645,
"learning_rate": 3.86319809097389e-07,
"loss": 0.7757,
"step": 1999
},
{
"epoch": 0.7193597698048737,
"grad_norm": 11.197490836725933,
"learning_rate": 3.854001743081764e-07,
"loss": 0.8376,
"step": 2000
},
{
"epoch": 0.7197194496897761,
"grad_norm": 12.522094083330089,
"learning_rate": 3.8448137406894797e-07,
"loss": 0.8051,
"step": 2001
},
{
"epoch": 0.7200791295746786,
"grad_norm": 10.149491054895638,
"learning_rate": 3.835634096273197e-07,
"loss": 0.7669,
"step": 2002
},
{
"epoch": 0.7204388094595809,
"grad_norm": 15.598336809557521,
"learning_rate": 3.826462822297736e-07,
"loss": 0.848,
"step": 2003
},
{
"epoch": 0.7207984893444834,
"grad_norm": 17.354478744327977,
"learning_rate": 3.8172999312165367e-07,
"loss": 0.7534,
"step": 2004
},
{
"epoch": 0.7211581692293858,
"grad_norm": 16.184795703538978,
"learning_rate": 3.8081454354716734e-07,
"loss": 0.7438,
"step": 2005
},
{
"epoch": 0.7215178491142883,
"grad_norm": 12.173927326851151,
"learning_rate": 3.798999347493799e-07,
"loss": 0.8225,
"step": 2006
},
{
"epoch": 0.7218775289991907,
"grad_norm": 171.9320034029074,
"learning_rate": 3.789861679702169e-07,
"loss": 0.7795,
"step": 2007
},
{
"epoch": 0.7222372088840932,
"grad_norm": 7.948351057381663,
"learning_rate": 3.780732444504592e-07,
"loss": 0.8606,
"step": 2008
},
{
"epoch": 0.7225968887689956,
"grad_norm": 22.116562347064672,
"learning_rate": 3.771611654297443e-07,
"loss": 0.903,
"step": 2009
},
{
"epoch": 0.722956568653898,
"grad_norm": 10.512462282058872,
"learning_rate": 3.7624993214656043e-07,
"loss": 0.8358,
"step": 2010
},
{
"epoch": 0.7233162485388005,
"grad_norm": 23.075159073089754,
"learning_rate": 3.7533954583824976e-07,
"loss": 0.8763,
"step": 2011
},
{
"epoch": 0.7236759284237029,
"grad_norm": 8.953974994412894,
"learning_rate": 3.74430007741003e-07,
"loss": 0.7403,
"step": 2012
},
{
"epoch": 0.7240356083086054,
"grad_norm": 12.80624775766204,
"learning_rate": 3.735213190898604e-07,
"loss": 0.8046,
"step": 2013
},
{
"epoch": 0.7243952881935077,
"grad_norm": 8.476228291194738,
"learning_rate": 3.726134811187066e-07,
"loss": 0.788,
"step": 2014
},
{
"epoch": 0.7247549680784102,
"grad_norm": 16.157490955268806,
"learning_rate": 3.717064950602736e-07,
"loss": 0.8756,
"step": 2015
},
{
"epoch": 0.7251146479633126,
"grad_norm": 11.908040661691748,
"learning_rate": 3.708003621461346e-07,
"loss": 0.8009,
"step": 2016
},
{
"epoch": 0.7254743278482151,
"grad_norm": 14.59810362091746,
"learning_rate": 3.698950836067064e-07,
"loss": 0.8144,
"step": 2017
},
{
"epoch": 0.7258340077331176,
"grad_norm": 17.554682865657345,
"learning_rate": 3.6899066067124284e-07,
"loss": 0.8152,
"step": 2018
},
{
"epoch": 0.72619368761802,
"grad_norm": 21.24521103903718,
"learning_rate": 3.680870945678388e-07,
"loss": 0.8427,
"step": 2019
},
{
"epoch": 0.7265533675029224,
"grad_norm": 12.355234140259519,
"learning_rate": 3.6718438652342376e-07,
"loss": 0.8953,
"step": 2020
},
{
"epoch": 0.7269130473878248,
"grad_norm": 9.99757087967759,
"learning_rate": 3.6628253776376375e-07,
"loss": 0.8058,
"step": 2021
},
{
"epoch": 0.7272727272727273,
"grad_norm": 9.422636518937265,
"learning_rate": 3.6538154951345566e-07,
"loss": 0.754,
"step": 2022
},
{
"epoch": 0.7276324071576297,
"grad_norm": 15.509519030816742,
"learning_rate": 3.644814229959302e-07,
"loss": 0.8216,
"step": 2023
},
{
"epoch": 0.7279920870425322,
"grad_norm": 8.274006258533728,
"learning_rate": 3.635821594334466e-07,
"loss": 0.7768,
"step": 2024
},
{
"epoch": 0.7283517669274345,
"grad_norm": 13.558731403121147,
"learning_rate": 3.6268376004709344e-07,
"loss": 0.7722,
"step": 2025
},
{
"epoch": 0.728711446812337,
"grad_norm": 7.813752032299304,
"learning_rate": 3.61786226056784e-07,
"loss": 0.8221,
"step": 2026
},
{
"epoch": 0.7290711266972395,
"grad_norm": 15.095957653688293,
"learning_rate": 3.608895586812586e-07,
"loss": 0.8057,
"step": 2027
},
{
"epoch": 0.7294308065821419,
"grad_norm": 17.28853822691677,
"learning_rate": 3.5999375913807904e-07,
"loss": 0.8369,
"step": 2028
},
{
"epoch": 0.7297904864670444,
"grad_norm": 15.788250879298607,
"learning_rate": 3.590988286436302e-07,
"loss": 0.8234,
"step": 2029
},
{
"epoch": 0.7301501663519467,
"grad_norm": 59.285793453033286,
"learning_rate": 3.5820476841311586e-07,
"loss": 0.8256,
"step": 2030
},
{
"epoch": 0.7305098462368492,
"grad_norm": 9.799300739333693,
"learning_rate": 3.5731157966055835e-07,
"loss": 0.8144,
"step": 2031
},
{
"epoch": 0.7308695261217516,
"grad_norm": 14.02110369680676,
"learning_rate": 3.564192635987966e-07,
"loss": 0.8119,
"step": 2032
},
{
"epoch": 0.7312292060066541,
"grad_norm": 11.044167830136333,
"learning_rate": 3.55527821439485e-07,
"loss": 0.784,
"step": 2033
},
{
"epoch": 0.7315888858915565,
"grad_norm": 11.212578833923999,
"learning_rate": 3.546372543930908e-07,
"loss": 0.703,
"step": 2034
},
{
"epoch": 0.731948565776459,
"grad_norm": 10.179725377907866,
"learning_rate": 3.537475636688929e-07,
"loss": 0.8103,
"step": 2035
},
{
"epoch": 0.7323082456613614,
"grad_norm": 9.140277108662909,
"learning_rate": 3.5285875047498073e-07,
"loss": 0.8338,
"step": 2036
},
{
"epoch": 0.7326679255462638,
"grad_norm": 15.783836317379054,
"learning_rate": 3.519708160182513e-07,
"loss": 0.7391,
"step": 2037
},
{
"epoch": 0.7330276054311663,
"grad_norm": 12.170788311536235,
"learning_rate": 3.510837615044097e-07,
"loss": 0.7683,
"step": 2038
},
{
"epoch": 0.7333872853160687,
"grad_norm": 10.937526810183094,
"learning_rate": 3.501975881379651e-07,
"loss": 0.7547,
"step": 2039
},
{
"epoch": 0.7337469652009712,
"grad_norm": 19.054117416753936,
"learning_rate": 3.493122971222304e-07,
"loss": 0.7536,
"step": 2040
},
{
"epoch": 0.7341066450858735,
"grad_norm": 24.43819899317763,
"learning_rate": 3.4842788965932036e-07,
"loss": 0.8195,
"step": 2041
},
{
"epoch": 0.734466324970776,
"grad_norm": 13.830521845361822,
"learning_rate": 3.4754436695015075e-07,
"loss": 0.8489,
"step": 2042
},
{
"epoch": 0.7348260048556784,
"grad_norm": 7.155680001752106,
"learning_rate": 3.466617301944348e-07,
"loss": 0.8186,
"step": 2043
},
{
"epoch": 0.7351856847405809,
"grad_norm": 7.494449154337195,
"learning_rate": 3.4577998059068345e-07,
"loss": 0.8351,
"step": 2044
},
{
"epoch": 0.7355453646254834,
"grad_norm": 9.533522287478453,
"learning_rate": 3.448991193362024e-07,
"loss": 0.8353,
"step": 2045
},
{
"epoch": 0.7359050445103857,
"grad_norm": 31.22646722400908,
"learning_rate": 3.4401914762709217e-07,
"loss": 0.6916,
"step": 2046
},
{
"epoch": 0.7362647243952882,
"grad_norm": 14.097405364781384,
"learning_rate": 3.4314006665824425e-07,
"loss": 0.811,
"step": 2047
},
{
"epoch": 0.7366244042801906,
"grad_norm": 40.90354140997975,
"learning_rate": 3.4226187762334126e-07,
"loss": 0.8321,
"step": 2048
},
{
"epoch": 0.7369840841650931,
"grad_norm": 20.388688863695087,
"learning_rate": 3.41384581714854e-07,
"loss": 0.8219,
"step": 2049
},
{
"epoch": 0.7373437640499955,
"grad_norm": 9.039715730630718,
"learning_rate": 3.405081801240416e-07,
"loss": 0.7441,
"step": 2050
},
{
"epoch": 0.737703443934898,
"grad_norm": 10.662421431616126,
"learning_rate": 3.396326740409481e-07,
"loss": 0.7736,
"step": 2051
},
{
"epoch": 0.7380631238198003,
"grad_norm": 13.947146247653457,
"learning_rate": 3.3875806465440147e-07,
"loss": 0.7789,
"step": 2052
},
{
"epoch": 0.7384228037047028,
"grad_norm": 11.653362956755682,
"learning_rate": 3.3788435315201215e-07,
"loss": 0.6684,
"step": 2053
},
{
"epoch": 0.7387824835896053,
"grad_norm": 10.52905110701649,
"learning_rate": 3.3701154072017235e-07,
"loss": 0.877,
"step": 2054
},
{
"epoch": 0.7391421634745077,
"grad_norm": 16.581261902849825,
"learning_rate": 3.361396285440513e-07,
"loss": 0.7918,
"step": 2055
},
{
"epoch": 0.7395018433594102,
"grad_norm": 16.753947709914218,
"learning_rate": 3.352686178075981e-07,
"loss": 0.8152,
"step": 2056
},
{
"epoch": 0.7398615232443125,
"grad_norm": 18.821758509696345,
"learning_rate": 3.343985096935361e-07,
"loss": 0.8248,
"step": 2057
},
{
"epoch": 0.740221203129215,
"grad_norm": 12.276146780111988,
"learning_rate": 3.3352930538336443e-07,
"loss": 0.7722,
"step": 2058
},
{
"epoch": 0.7405808830141174,
"grad_norm": 19.90004147321965,
"learning_rate": 3.3266100605735394e-07,
"loss": 0.8282,
"step": 2059
},
{
"epoch": 0.7409405628990199,
"grad_norm": 9.834270848478702,
"learning_rate": 3.317936128945469e-07,
"loss": 0.8405,
"step": 2060
},
{
"epoch": 0.7413002427839223,
"grad_norm": 17.338370497758397,
"learning_rate": 3.309271270727546e-07,
"loss": 0.7359,
"step": 2061
},
{
"epoch": 0.7416599226688247,
"grad_norm": 9.741758752743277,
"learning_rate": 3.300615497685578e-07,
"loss": 0.7996,
"step": 2062
},
{
"epoch": 0.7420196025537272,
"grad_norm": 31.504247815988247,
"learning_rate": 3.2919688215730225e-07,
"loss": 0.7738,
"step": 2063
},
{
"epoch": 0.7423792824386296,
"grad_norm": 9.4546055919221,
"learning_rate": 3.2833312541309864e-07,
"loss": 0.8385,
"step": 2064
},
{
"epoch": 0.7427389623235321,
"grad_norm": 9.80680314535367,
"learning_rate": 3.2747028070882074e-07,
"loss": 0.7487,
"step": 2065
},
{
"epoch": 0.7430986422084345,
"grad_norm": 9.542150367120579,
"learning_rate": 3.266083492161049e-07,
"loss": 0.7829,
"step": 2066
},
{
"epoch": 0.743458322093337,
"grad_norm": 20.31567157031523,
"learning_rate": 3.257473321053463e-07,
"loss": 0.7968,
"step": 2067
},
{
"epoch": 0.7438180019782393,
"grad_norm": 13.837825732503578,
"learning_rate": 3.24887230545699e-07,
"loss": 0.9057,
"step": 2068
},
{
"epoch": 0.7441776818631418,
"grad_norm": 25.850371390444145,
"learning_rate": 3.2402804570507316e-07,
"loss": 0.7896,
"step": 2069
},
{
"epoch": 0.7445373617480442,
"grad_norm": 12.56587736916352,
"learning_rate": 3.2316977875013565e-07,
"loss": 0.7503,
"step": 2070
},
{
"epoch": 0.7448970416329467,
"grad_norm": 23.9209488111018,
"learning_rate": 3.2231243084630567e-07,
"loss": 0.8051,
"step": 2071
},
{
"epoch": 0.7452567215178492,
"grad_norm": 15.065538782610378,
"learning_rate": 3.214560031577548e-07,
"loss": 0.8623,
"step": 2072
},
{
"epoch": 0.7456164014027515,
"grad_norm": 94.90024186443381,
"learning_rate": 3.2060049684740474e-07,
"loss": 0.7944,
"step": 2073
},
{
"epoch": 0.745976081287654,
"grad_norm": 8.949350221122438,
"learning_rate": 3.197459130769272e-07,
"loss": 0.9138,
"step": 2074
},
{
"epoch": 0.7463357611725564,
"grad_norm": 7.665554311410304,
"learning_rate": 3.1889225300674014e-07,
"loss": 0.8285,
"step": 2075
},
{
"epoch": 0.7466954410574589,
"grad_norm": 8.039592238769972,
"learning_rate": 3.180395177960077e-07,
"loss": 0.8628,
"step": 2076
},
{
"epoch": 0.7470551209423613,
"grad_norm": 46.5255094528435,
"learning_rate": 3.171877086026374e-07,
"loss": 0.8002,
"step": 2077
},
{
"epoch": 0.7474148008272637,
"grad_norm": 11.212560508497486,
"learning_rate": 3.163368265832809e-07,
"loss": 0.8214,
"step": 2078
},
{
"epoch": 0.7477744807121661,
"grad_norm": 25.645983032084697,
"learning_rate": 3.1548687289332955e-07,
"loss": 0.7532,
"step": 2079
},
{
"epoch": 0.7481341605970686,
"grad_norm": 19.823630319451564,
"learning_rate": 3.1463784868691454e-07,
"loss": 0.7899,
"step": 2080
},
{
"epoch": 0.7484938404819711,
"grad_norm": 14.78470146418017,
"learning_rate": 3.1378975511690465e-07,
"loss": 0.7984,
"step": 2081
},
{
"epoch": 0.7488535203668735,
"grad_norm": 34.461545612303475,
"learning_rate": 3.129425933349059e-07,
"loss": 0.8304,
"step": 2082
},
{
"epoch": 0.749213200251776,
"grad_norm": 11.61532578962588,
"learning_rate": 3.1209636449125787e-07,
"loss": 0.8576,
"step": 2083
},
{
"epoch": 0.7495728801366783,
"grad_norm": 12.4044011444979,
"learning_rate": 3.112510697350348e-07,
"loss": 0.7523,
"step": 2084
},
{
"epoch": 0.7499325600215808,
"grad_norm": 9.357511728174131,
"learning_rate": 3.104067102140404e-07,
"loss": 0.8337,
"step": 2085
},
{
"epoch": 0.7502922399064832,
"grad_norm": 12.16077722373795,
"learning_rate": 3.095632870748105e-07,
"loss": 0.7571,
"step": 2086
},
{
"epoch": 0.7506519197913857,
"grad_norm": 13.638246920777654,
"learning_rate": 3.087208014626081e-07,
"loss": 0.7929,
"step": 2087
},
{
"epoch": 0.751011599676288,
"grad_norm": 17.680869612047026,
"learning_rate": 3.078792545214247e-07,
"loss": 0.7412,
"step": 2088
},
{
"epoch": 0.7513712795611905,
"grad_norm": 14.24665972454809,
"learning_rate": 3.0703864739397487e-07,
"loss": 0.8377,
"step": 2089
},
{
"epoch": 0.751730959446093,
"grad_norm": 45.522628663774555,
"learning_rate": 3.061989812216994e-07,
"loss": 0.8219,
"step": 2090
},
{
"epoch": 0.7520906393309954,
"grad_norm": 13.222103675700293,
"learning_rate": 3.053602571447594e-07,
"loss": 0.8223,
"step": 2091
},
{
"epoch": 0.7524503192158979,
"grad_norm": 15.175365572068145,
"learning_rate": 3.04522476302039e-07,
"loss": 0.7151,
"step": 2092
},
{
"epoch": 0.7528099991008003,
"grad_norm": 17.72918727027556,
"learning_rate": 3.036856398311386e-07,
"loss": 0.7974,
"step": 2093
},
{
"epoch": 0.7531696789857028,
"grad_norm": 15.427939599509612,
"learning_rate": 3.02849748868379e-07,
"loss": 0.7558,
"step": 2094
},
{
"epoch": 0.7535293588706051,
"grad_norm": 12.664430894009977,
"learning_rate": 3.0201480454879524e-07,
"loss": 0.8245,
"step": 2095
},
{
"epoch": 0.7538890387555076,
"grad_norm": 11.062996843287921,
"learning_rate": 3.011808080061387e-07,
"loss": 0.8098,
"step": 2096
},
{
"epoch": 0.75424871864041,
"grad_norm": 18.255148000865006,
"learning_rate": 3.0034776037287146e-07,
"loss": 0.7799,
"step": 2097
},
{
"epoch": 0.7546083985253125,
"grad_norm": 8.101439238334796,
"learning_rate": 2.995156627801694e-07,
"loss": 0.8066,
"step": 2098
},
{
"epoch": 0.754968078410215,
"grad_norm": 13.523471148927442,
"learning_rate": 2.9868451635791705e-07,
"loss": 0.768,
"step": 2099
},
{
"epoch": 0.7553277582951173,
"grad_norm": 13.530099368833888,
"learning_rate": 2.9785432223470753e-07,
"loss": 0.8117,
"step": 2100
},
{
"epoch": 0.7556874381800198,
"grad_norm": 19.13140506976365,
"learning_rate": 2.970250815378409e-07,
"loss": 0.87,
"step": 2101
},
{
"epoch": 0.7560471180649222,
"grad_norm": 18.17155476521602,
"learning_rate": 2.9619679539332334e-07,
"loss": 0.7897,
"step": 2102
},
{
"epoch": 0.7564067979498247,
"grad_norm": 12.123078364836363,
"learning_rate": 2.953694649258638e-07,
"loss": 0.7877,
"step": 2103
},
{
"epoch": 0.7567664778347271,
"grad_norm": 7.608292682104856,
"learning_rate": 2.94543091258874e-07,
"loss": 0.7465,
"step": 2104
},
{
"epoch": 0.7571261577196295,
"grad_norm": 12.975406694520702,
"learning_rate": 2.9371767551446616e-07,
"loss": 0.7985,
"step": 2105
},
{
"epoch": 0.7574858376045319,
"grad_norm": 19.27946366676412,
"learning_rate": 2.9289321881345254e-07,
"loss": 0.811,
"step": 2106
},
{
"epoch": 0.7578455174894344,
"grad_norm": 46.025272332147345,
"learning_rate": 2.9206972227534234e-07,
"loss": 0.8429,
"step": 2107
},
{
"epoch": 0.7582051973743369,
"grad_norm": 7.682745343898326,
"learning_rate": 2.9124718701834105e-07,
"loss": 0.8021,
"step": 2108
},
{
"epoch": 0.7585648772592393,
"grad_norm": 10.627958729450935,
"learning_rate": 2.904256141593495e-07,
"loss": 0.8382,
"step": 2109
},
{
"epoch": 0.7589245571441418,
"grad_norm": 9.795786264443564,
"learning_rate": 2.896050048139611e-07,
"loss": 0.8071,
"step": 2110
},
{
"epoch": 0.7592842370290441,
"grad_norm": 18.768381546725443,
"learning_rate": 2.8878536009646105e-07,
"loss": 0.8058,
"step": 2111
},
{
"epoch": 0.7596439169139466,
"grad_norm": 18.423993695523897,
"learning_rate": 2.879666811198244e-07,
"loss": 0.8213,
"step": 2112
},
{
"epoch": 0.760003596798849,
"grad_norm": 11.02320646000752,
"learning_rate": 2.871489689957157e-07,
"loss": 0.8092,
"step": 2113
},
{
"epoch": 0.7603632766837515,
"grad_norm": 10.25284067769936,
"learning_rate": 2.863322248344862e-07,
"loss": 0.8196,
"step": 2114
},
{
"epoch": 0.7607229565686539,
"grad_norm": 8.574507521038404,
"learning_rate": 2.8551644974517233e-07,
"loss": 0.7875,
"step": 2115
},
{
"epoch": 0.7610826364535563,
"grad_norm": 69.39568915406147,
"learning_rate": 2.8470164483549475e-07,
"loss": 0.8059,
"step": 2116
},
{
"epoch": 0.7614423163384588,
"grad_norm": 9.438779925806173,
"learning_rate": 2.838878112118581e-07,
"loss": 0.7672,
"step": 2117
},
{
"epoch": 0.7618019962233612,
"grad_norm": 9.601180518139573,
"learning_rate": 2.8307494997934575e-07,
"loss": 0.8064,
"step": 2118
},
{
"epoch": 0.7621616761082637,
"grad_norm": 15.191257494193449,
"learning_rate": 2.822630622417228e-07,
"loss": 0.7851,
"step": 2119
},
{
"epoch": 0.7625213559931661,
"grad_norm": 10.74841922116242,
"learning_rate": 2.814521491014312e-07,
"loss": 0.837,
"step": 2120
},
{
"epoch": 0.7628810358780685,
"grad_norm": 11.650209232181394,
"learning_rate": 2.806422116595907e-07,
"loss": 0.8205,
"step": 2121
},
{
"epoch": 0.7632407157629709,
"grad_norm": 31.29876607629954,
"learning_rate": 2.798332510159942e-07,
"loss": 0.7754,
"step": 2122
},
{
"epoch": 0.7636003956478734,
"grad_norm": 12.216849267684562,
"learning_rate": 2.7902526826911054e-07,
"loss": 0.8268,
"step": 2123
},
{
"epoch": 0.7639600755327758,
"grad_norm": 8.36986123454882,
"learning_rate": 2.7821826451607887e-07,
"loss": 0.82,
"step": 2124
},
{
"epoch": 0.7643197554176783,
"grad_norm": 9.4368882957973,
"learning_rate": 2.7741224085271063e-07,
"loss": 0.7585,
"step": 2125
},
{
"epoch": 0.7646794353025808,
"grad_norm": 35.56391519578202,
"learning_rate": 2.7660719837348445e-07,
"loss": 0.7725,
"step": 2126
},
{
"epoch": 0.7650391151874831,
"grad_norm": 26.46363386044735,
"learning_rate": 2.7580313817154844e-07,
"loss": 0.8388,
"step": 2127
},
{
"epoch": 0.7653987950723856,
"grad_norm": 8.769898467051506,
"learning_rate": 2.750000613387157e-07,
"loss": 0.7511,
"step": 2128
},
{
"epoch": 0.765758474957288,
"grad_norm": 11.442060171791937,
"learning_rate": 2.741979689654653e-07,
"loss": 0.7838,
"step": 2129
},
{
"epoch": 0.7661181548421905,
"grad_norm": 45.76135894576074,
"learning_rate": 2.733968621409377e-07,
"loss": 0.8819,
"step": 2130
},
{
"epoch": 0.7664778347270929,
"grad_norm": 19.029392095367616,
"learning_rate": 2.725967419529369e-07,
"loss": 0.8424,
"step": 2131
},
{
"epoch": 0.7668375146119953,
"grad_norm": 37.74042828997578,
"learning_rate": 2.7179760948792596e-07,
"loss": 0.8091,
"step": 2132
},
{
"epoch": 0.7671971944968977,
"grad_norm": 24.68003304528056,
"learning_rate": 2.709994658310276e-07,
"loss": 0.8521,
"step": 2133
},
{
"epoch": 0.7675568743818002,
"grad_norm": 60.917685417016344,
"learning_rate": 2.702023120660213e-07,
"loss": 0.8373,
"step": 2134
},
{
"epoch": 0.7679165542667027,
"grad_norm": 13.285572185374493,
"learning_rate": 2.6940614927534255e-07,
"loss": 0.8285,
"step": 2135
},
{
"epoch": 0.7682762341516051,
"grad_norm": 13.95750492177659,
"learning_rate": 2.686109785400809e-07,
"loss": 0.8398,
"step": 2136
},
{
"epoch": 0.7686359140365076,
"grad_norm": 28.79794073125316,
"learning_rate": 2.678168009399796e-07,
"loss": 0.7885,
"step": 2137
},
{
"epoch": 0.7689955939214099,
"grad_norm": 12.483715319878124,
"learning_rate": 2.670236175534327e-07,
"loss": 0.8527,
"step": 2138
},
{
"epoch": 0.7693552738063124,
"grad_norm": 27.28468354299244,
"learning_rate": 2.6623142945748447e-07,
"loss": 0.8472,
"step": 2139
},
{
"epoch": 0.7697149536912148,
"grad_norm": 8.837428554019429,
"learning_rate": 2.654402377278273e-07,
"loss": 0.7795,
"step": 2140
},
{
"epoch": 0.7700746335761173,
"grad_norm": 13.568315862204315,
"learning_rate": 2.646500434388015e-07,
"loss": 0.7786,
"step": 2141
},
{
"epoch": 0.7704343134610196,
"grad_norm": 11.54364558142577,
"learning_rate": 2.638608476633921e-07,
"loss": 0.7903,
"step": 2142
},
{
"epoch": 0.7707939933459221,
"grad_norm": 15.35010777649344,
"learning_rate": 2.6307265147322886e-07,
"loss": 0.7506,
"step": 2143
},
{
"epoch": 0.7711536732308246,
"grad_norm": 12.44341166425653,
"learning_rate": 2.6228545593858353e-07,
"loss": 0.7809,
"step": 2144
},
{
"epoch": 0.771513353115727,
"grad_norm": 9.849527159029504,
"learning_rate": 2.6149926212837015e-07,
"loss": 0.8182,
"step": 2145
},
{
"epoch": 0.7718730330006295,
"grad_norm": 17.311250410971674,
"learning_rate": 2.6071407111014177e-07,
"loss": 0.7556,
"step": 2146
},
{
"epoch": 0.7722327128855319,
"grad_norm": 7.433177146386238,
"learning_rate": 2.599298839500899e-07,
"loss": 0.7906,
"step": 2147
},
{
"epoch": 0.7725923927704343,
"grad_norm": 9.368131404282552,
"learning_rate": 2.5914670171304254e-07,
"loss": 0.846,
"step": 2148
},
{
"epoch": 0.7729520726553367,
"grad_norm": 27.941836286050815,
"learning_rate": 2.5836452546246447e-07,
"loss": 0.7627,
"step": 2149
},
{
"epoch": 0.7733117525402392,
"grad_norm": 9.126302607974916,
"learning_rate": 2.57583356260453e-07,
"loss": 0.7827,
"step": 2150
},
{
"epoch": 0.7736714324251416,
"grad_norm": 8.83382897893599,
"learning_rate": 2.5680319516773885e-07,
"loss": 0.8658,
"step": 2151
},
{
"epoch": 0.7740311123100441,
"grad_norm": 7.5971246576903395,
"learning_rate": 2.560240432436831e-07,
"loss": 0.8016,
"step": 2152
},
{
"epoch": 0.7743907921949464,
"grad_norm": 22.060343200505887,
"learning_rate": 2.5524590154627756e-07,
"loss": 0.8256,
"step": 2153
},
{
"epoch": 0.7747504720798489,
"grad_norm": 12.317280851375296,
"learning_rate": 2.544687711321415e-07,
"loss": 0.7787,
"step": 2154
},
{
"epoch": 0.7751101519647514,
"grad_norm": 97.31961999779651,
"learning_rate": 2.536926530565211e-07,
"loss": 0.8061,
"step": 2155
},
{
"epoch": 0.7754698318496538,
"grad_norm": 15.478890567396736,
"learning_rate": 2.529175483732878e-07,
"loss": 0.7684,
"step": 2156
},
{
"epoch": 0.7758295117345563,
"grad_norm": 30.21366282134613,
"learning_rate": 2.521434581349378e-07,
"loss": 0.8361,
"step": 2157
},
{
"epoch": 0.7761891916194587,
"grad_norm": 10.59332738328557,
"learning_rate": 2.513703833925883e-07,
"loss": 0.7736,
"step": 2158
},
{
"epoch": 0.7765488715043611,
"grad_norm": 17.7406623566783,
"learning_rate": 2.505983251959798e-07,
"loss": 0.8598,
"step": 2159
},
{
"epoch": 0.7769085513892635,
"grad_norm": 13.111518092783921,
"learning_rate": 2.4982728459346967e-07,
"loss": 0.7786,
"step": 2160
},
{
"epoch": 0.777268231274166,
"grad_norm": 15.285683355007304,
"learning_rate": 2.4905726263203587e-07,
"loss": 0.7385,
"step": 2161
},
{
"epoch": 0.7776279111590684,
"grad_norm": 11.992210244588739,
"learning_rate": 2.4828826035727214e-07,
"loss": 0.7321,
"step": 2162
},
{
"epoch": 0.7779875910439709,
"grad_norm": 16.229451043839767,
"learning_rate": 2.4752027881338757e-07,
"loss": 0.798,
"step": 2163
},
{
"epoch": 0.7783472709288733,
"grad_norm": 13.289808587733184,
"learning_rate": 2.467533190432053e-07,
"loss": 0.8036,
"step": 2164
},
{
"epoch": 0.7787069508137757,
"grad_norm": 8.035681458730258,
"learning_rate": 2.459873820881615e-07,
"loss": 0.8259,
"step": 2165
},
{
"epoch": 0.7790666306986782,
"grad_norm": 16.319641097250493,
"learning_rate": 2.4522246898830304e-07,
"loss": 0.8338,
"step": 2166
},
{
"epoch": 0.7794263105835806,
"grad_norm": 11.834868911534926,
"learning_rate": 2.4445858078228643e-07,
"loss": 0.7462,
"step": 2167
},
{
"epoch": 0.7797859904684831,
"grad_norm": 12.69739034024986,
"learning_rate": 2.4369571850737657e-07,
"loss": 0.8008,
"step": 2168
},
{
"epoch": 0.7801456703533854,
"grad_norm": 16.92832348270919,
"learning_rate": 2.4293388319944574e-07,
"loss": 0.8546,
"step": 2169
},
{
"epoch": 0.7805053502382879,
"grad_norm": 15.34914146252241,
"learning_rate": 2.421730758929713e-07,
"loss": 0.8025,
"step": 2170
},
{
"epoch": 0.7808650301231903,
"grad_norm": 13.15048232145177,
"learning_rate": 2.414132976210346e-07,
"loss": 0.8324,
"step": 2171
},
{
"epoch": 0.7812247100080928,
"grad_norm": 15.702733431224983,
"learning_rate": 2.406545494153196e-07,
"loss": 0.8452,
"step": 2172
},
{
"epoch": 0.7815843898929953,
"grad_norm": 27.901819642019326,
"learning_rate": 2.398968323061125e-07,
"loss": 0.8131,
"step": 2173
},
{
"epoch": 0.7819440697778977,
"grad_norm": 12.340345284800485,
"learning_rate": 2.3914014732229827e-07,
"loss": 0.8028,
"step": 2174
},
{
"epoch": 0.7823037496628001,
"grad_norm": 11.59919363746485,
"learning_rate": 2.3838449549136098e-07,
"loss": 0.8401,
"step": 2175
},
{
"epoch": 0.7826634295477025,
"grad_norm": 14.074748636264571,
"learning_rate": 2.3762987783938138e-07,
"loss": 0.8651,
"step": 2176
},
{
"epoch": 0.783023109432605,
"grad_norm": 23.2984563671798,
"learning_rate": 2.368762953910367e-07,
"loss": 0.8034,
"step": 2177
},
{
"epoch": 0.7833827893175074,
"grad_norm": 12.530144218369257,
"learning_rate": 2.3612374916959777e-07,
"loss": 0.8701,
"step": 2178
},
{
"epoch": 0.7837424692024099,
"grad_norm": 19.979093313956696,
"learning_rate": 2.353722401969286e-07,
"loss": 0.7643,
"step": 2179
},
{
"epoch": 0.7841021490873122,
"grad_norm": 11.678206397949163,
"learning_rate": 2.3462176949348465e-07,
"loss": 0.8756,
"step": 2180
},
{
"epoch": 0.7844618289722147,
"grad_norm": 10.938030186767781,
"learning_rate": 2.3387233807831143e-07,
"loss": 0.7688,
"step": 2181
},
{
"epoch": 0.7848215088571172,
"grad_norm": 8.402466597228221,
"learning_rate": 2.3312394696904403e-07,
"loss": 0.825,
"step": 2182
},
{
"epoch": 0.7851811887420196,
"grad_norm": 8.984508957322987,
"learning_rate": 2.3237659718190394e-07,
"loss": 0.7859,
"step": 2183
},
{
"epoch": 0.7855408686269221,
"grad_norm": 15.217116220541765,
"learning_rate": 2.3163028973169917e-07,
"loss": 0.7741,
"step": 2184
},
{
"epoch": 0.7859005485118244,
"grad_norm": 11.432027827847797,
"learning_rate": 2.3088502563182178e-07,
"loss": 0.8224,
"step": 2185
},
{
"epoch": 0.7862602283967269,
"grad_norm": 34.900853193772434,
"learning_rate": 2.3014080589424834e-07,
"loss": 0.8385,
"step": 2186
},
{
"epoch": 0.7866199082816293,
"grad_norm": 8.633212140752239,
"learning_rate": 2.2939763152953573e-07,
"loss": 0.8747,
"step": 2187
},
{
"epoch": 0.7869795881665318,
"grad_norm": 33.25082323243209,
"learning_rate": 2.2865550354682327e-07,
"loss": 0.794,
"step": 2188
},
{
"epoch": 0.7873392680514342,
"grad_norm": 12.807958889113452,
"learning_rate": 2.279144229538269e-07,
"loss": 0.8289,
"step": 2189
},
{
"epoch": 0.7876989479363367,
"grad_norm": 17.16475182339757,
"learning_rate": 2.2717439075684263e-07,
"loss": 0.7846,
"step": 2190
},
{
"epoch": 0.7880586278212391,
"grad_norm": 20.462281814135242,
"learning_rate": 2.2643540796074156e-07,
"loss": 0.8003,
"step": 2191
},
{
"epoch": 0.7884183077061415,
"grad_norm": 9.673004562271768,
"learning_rate": 2.25697475568971e-07,
"loss": 0.7275,
"step": 2192
},
{
"epoch": 0.788777987591044,
"grad_norm": 16.26554078587725,
"learning_rate": 2.2496059458355e-07,
"loss": 0.8404,
"step": 2193
},
{
"epoch": 0.7891376674759464,
"grad_norm": 21.94352506788799,
"learning_rate": 2.2422476600507202e-07,
"loss": 0.7741,
"step": 2194
},
{
"epoch": 0.7894973473608489,
"grad_norm": 11.762770147070169,
"learning_rate": 2.2348999083270003e-07,
"loss": 0.7671,
"step": 2195
},
{
"epoch": 0.7898570272457512,
"grad_norm": 12.6430145839643,
"learning_rate": 2.2275627006416797e-07,
"loss": 0.8173,
"step": 2196
},
{
"epoch": 0.7902167071306537,
"grad_norm": 15.746501248599223,
"learning_rate": 2.2202360469577618e-07,
"loss": 0.7956,
"step": 2197
},
{
"epoch": 0.7905763870155561,
"grad_norm": 9.03212636630556,
"learning_rate": 2.2129199572239377e-07,
"loss": 0.7448,
"step": 2198
},
{
"epoch": 0.7909360669004586,
"grad_norm": 11.119608189348266,
"learning_rate": 2.2056144413745392e-07,
"loss": 0.8432,
"step": 2199
},
{
"epoch": 0.7912957467853611,
"grad_norm": 13.153580032958237,
"learning_rate": 2.1983195093295558e-07,
"loss": 0.8131,
"step": 2200
},
{
"epoch": 0.7916554266702635,
"grad_norm": 43.553967757305315,
"learning_rate": 2.191035170994584e-07,
"loss": 0.8152,
"step": 2201
},
{
"epoch": 0.7920151065551659,
"grad_norm": 10.000064039868674,
"learning_rate": 2.1837614362608569e-07,
"loss": 0.8311,
"step": 2202
},
{
"epoch": 0.7923747864400683,
"grad_norm": 20.717835143857574,
"learning_rate": 2.1764983150051951e-07,
"loss": 0.7754,
"step": 2203
},
{
"epoch": 0.7927344663249708,
"grad_norm": 28.512642566065328,
"learning_rate": 2.1692458170900197e-07,
"loss": 0.751,
"step": 2204
},
{
"epoch": 0.7930941462098732,
"grad_norm": 10.923945652982237,
"learning_rate": 2.162003952363307e-07,
"loss": 0.8223,
"step": 2205
},
{
"epoch": 0.7934538260947757,
"grad_norm": 14.368074118286325,
"learning_rate": 2.154772730658617e-07,
"loss": 0.7747,
"step": 2206
},
{
"epoch": 0.793813505979678,
"grad_norm": 18.327110926138854,
"learning_rate": 2.147552161795042e-07,
"loss": 0.7646,
"step": 2207
},
{
"epoch": 0.7941731858645805,
"grad_norm": 15.2718923928323,
"learning_rate": 2.1403422555772222e-07,
"loss": 0.8747,
"step": 2208
},
{
"epoch": 0.794532865749483,
"grad_norm": 9.7450328168099,
"learning_rate": 2.1331430217953018e-07,
"loss": 0.84,
"step": 2209
},
{
"epoch": 0.7948925456343854,
"grad_norm": 12.472111364195849,
"learning_rate": 2.125954470224951e-07,
"loss": 0.7651,
"step": 2210
},
{
"epoch": 0.7952522255192879,
"grad_norm": 11.409454024321807,
"learning_rate": 2.118776610627322e-07,
"loss": 0.7816,
"step": 2211
},
{
"epoch": 0.7956119054041902,
"grad_norm": 9.683463158955522,
"learning_rate": 2.111609452749059e-07,
"loss": 0.7534,
"step": 2212
},
{
"epoch": 0.7959715852890927,
"grad_norm": 16.518476649797382,
"learning_rate": 2.1044530063222677e-07,
"loss": 0.8282,
"step": 2213
},
{
"epoch": 0.7963312651739951,
"grad_norm": 11.076871284328455,
"learning_rate": 2.0973072810645077e-07,
"loss": 0.8042,
"step": 2214
},
{
"epoch": 0.7966909450588976,
"grad_norm": 43.22107060606698,
"learning_rate": 2.0901722866787842e-07,
"loss": 0.7533,
"step": 2215
},
{
"epoch": 0.7970506249438,
"grad_norm": 9.757499067901705,
"learning_rate": 2.083048032853534e-07,
"loss": 0.7762,
"step": 2216
},
{
"epoch": 0.7974103048287025,
"grad_norm": 16.026769525040617,
"learning_rate": 2.075934529262604e-07,
"loss": 0.8547,
"step": 2217
},
{
"epoch": 0.7977699847136049,
"grad_norm": 9.660745818513375,
"learning_rate": 2.068831785565246e-07,
"loss": 0.8358,
"step": 2218
},
{
"epoch": 0.7981296645985073,
"grad_norm": 9.096098076225008,
"learning_rate": 2.0617398114060979e-07,
"loss": 0.7666,
"step": 2219
},
{
"epoch": 0.7984893444834098,
"grad_norm": 11.264490778875238,
"learning_rate": 2.0546586164151824e-07,
"loss": 0.7986,
"step": 2220
},
{
"epoch": 0.7988490243683122,
"grad_norm": 9.544501329809775,
"learning_rate": 2.0475882102078767e-07,
"loss": 0.8543,
"step": 2221
},
{
"epoch": 0.7992087042532147,
"grad_norm": 12.395444297553139,
"learning_rate": 2.040528602384912e-07,
"loss": 0.8004,
"step": 2222
},
{
"epoch": 0.799568384138117,
"grad_norm": 34.52252165265367,
"learning_rate": 2.033479802532354e-07,
"loss": 0.8051,
"step": 2223
},
{
"epoch": 0.7999280640230195,
"grad_norm": 13.270992324968976,
"learning_rate": 2.0264418202215994e-07,
"loss": 0.822,
"step": 2224
},
{
"epoch": 0.8002877439079219,
"grad_norm": 9.766171438459644,
"learning_rate": 2.019414665009349e-07,
"loss": 0.7732,
"step": 2225
},
{
"epoch": 0.8006474237928244,
"grad_norm": 18.260697135030036,
"learning_rate": 2.0123983464376026e-07,
"loss": 0.8167,
"step": 2226
},
{
"epoch": 0.8010071036777269,
"grad_norm": 57.98811902368256,
"learning_rate": 2.0053928740336456e-07,
"loss": 0.7993,
"step": 2227
},
{
"epoch": 0.8013667835626292,
"grad_norm": 10.95989256001396,
"learning_rate": 1.9983982573100412e-07,
"loss": 0.7865,
"step": 2228
},
{
"epoch": 0.8017264634475317,
"grad_norm": 11.970402475239974,
"learning_rate": 1.991414505764605e-07,
"loss": 0.8536,
"step": 2229
},
{
"epoch": 0.8020861433324341,
"grad_norm": 32.45658865342094,
"learning_rate": 1.9844416288804e-07,
"loss": 0.7624,
"step": 2230
},
{
"epoch": 0.8024458232173366,
"grad_norm": 8.270619979920946,
"learning_rate": 1.977479636125724e-07,
"loss": 0.7601,
"step": 2231
},
{
"epoch": 0.802805503102239,
"grad_norm": 13.656390055544307,
"learning_rate": 1.9705285369540993e-07,
"loss": 0.7493,
"step": 2232
},
{
"epoch": 0.8031651829871415,
"grad_norm": 13.140181265672696,
"learning_rate": 1.963588340804251e-07,
"loss": 0.8749,
"step": 2233
},
{
"epoch": 0.8035248628720438,
"grad_norm": 18.307093635487576,
"learning_rate": 1.9566590571000996e-07,
"loss": 0.8121,
"step": 2234
},
{
"epoch": 0.8038845427569463,
"grad_norm": 11.080598749167935,
"learning_rate": 1.9497406952507455e-07,
"loss": 0.8414,
"step": 2235
},
{
"epoch": 0.8042442226418488,
"grad_norm": 10.283915598070877,
"learning_rate": 1.9428332646504696e-07,
"loss": 0.8315,
"step": 2236
},
{
"epoch": 0.8046039025267512,
"grad_norm": 10.114121338459718,
"learning_rate": 1.9359367746786992e-07,
"loss": 0.7474,
"step": 2237
},
{
"epoch": 0.8049635824116537,
"grad_norm": 11.161215023066037,
"learning_rate": 1.9290512347000065e-07,
"loss": 0.787,
"step": 2238
},
{
"epoch": 0.805323262296556,
"grad_norm": 71.78359928084078,
"learning_rate": 1.922176654064096e-07,
"loss": 0.7856,
"step": 2239
},
{
"epoch": 0.8056829421814585,
"grad_norm": 10.096572266223637,
"learning_rate": 1.915313042105795e-07,
"loss": 0.7992,
"step": 2240
},
{
"epoch": 0.8060426220663609,
"grad_norm": 12.440587005025414,
"learning_rate": 1.9084604081450328e-07,
"loss": 0.7781,
"step": 2241
},
{
"epoch": 0.8064023019512634,
"grad_norm": 7.801681726617352,
"learning_rate": 1.9016187614868306e-07,
"loss": 0.7551,
"step": 2242
},
{
"epoch": 0.8067619818361658,
"grad_norm": 245.08014326423097,
"learning_rate": 1.8947881114212938e-07,
"loss": 0.7905,
"step": 2243
},
{
"epoch": 0.8071216617210683,
"grad_norm": 7.459230321147278,
"learning_rate": 1.8879684672235906e-07,
"loss": 0.808,
"step": 2244
},
{
"epoch": 0.8074813416059707,
"grad_norm": 21.293012577378732,
"learning_rate": 1.881159838153954e-07,
"loss": 0.8194,
"step": 2245
},
{
"epoch": 0.8078410214908731,
"grad_norm": 17.210640953005576,
"learning_rate": 1.874362233457649e-07,
"loss": 0.7831,
"step": 2246
},
{
"epoch": 0.8082007013757756,
"grad_norm": 13.274695128165218,
"learning_rate": 1.8675756623649784e-07,
"loss": 0.8061,
"step": 2247
},
{
"epoch": 0.808560381260678,
"grad_norm": 109.63537116604127,
"learning_rate": 1.860800134091257e-07,
"loss": 0.7985,
"step": 2248
},
{
"epoch": 0.8089200611455805,
"grad_norm": 12.499922719400626,
"learning_rate": 1.8540356578368134e-07,
"loss": 0.7765,
"step": 2249
},
{
"epoch": 0.8092797410304828,
"grad_norm": 9.143999427644584,
"learning_rate": 1.8472822427869595e-07,
"loss": 0.8028,
"step": 2250
},
{
"epoch": 0.8096394209153853,
"grad_norm": 11.49632452562713,
"learning_rate": 1.8405398981119925e-07,
"loss": 0.8331,
"step": 2251
},
{
"epoch": 0.8099991008002877,
"grad_norm": 13.131820032853435,
"learning_rate": 1.833808632967173e-07,
"loss": 0.8637,
"step": 2252
},
{
"epoch": 0.8103587806851902,
"grad_norm": 28.848155079319856,
"learning_rate": 1.827088456492727e-07,
"loss": 0.7767,
"step": 2253
},
{
"epoch": 0.8107184605700927,
"grad_norm": 15.045863398114395,
"learning_rate": 1.820379377813812e-07,
"loss": 0.7856,
"step": 2254
},
{
"epoch": 0.811078140454995,
"grad_norm": 14.563166145473161,
"learning_rate": 1.8136814060405238e-07,
"loss": 0.8327,
"step": 2255
},
{
"epoch": 0.8114378203398975,
"grad_norm": 13.067979080382221,
"learning_rate": 1.8069945502678684e-07,
"loss": 0.8341,
"step": 2256
},
{
"epoch": 0.8117975002247999,
"grad_norm": 7.3393645572314705,
"learning_rate": 1.800318819575769e-07,
"loss": 0.8122,
"step": 2257
},
{
"epoch": 0.8121571801097024,
"grad_norm": 8.341294685695992,
"learning_rate": 1.793654223029033e-07,
"loss": 0.7387,
"step": 2258
},
{
"epoch": 0.8125168599946048,
"grad_norm": 9.953164368399987,
"learning_rate": 1.7870007696773537e-07,
"loss": 0.7421,
"step": 2259
},
{
"epoch": 0.8128765398795073,
"grad_norm": 17.530753665127847,
"learning_rate": 1.7803584685552876e-07,
"loss": 0.7636,
"step": 2260
},
{
"epoch": 0.8132362197644096,
"grad_norm": 12.159844887847953,
"learning_rate": 1.7737273286822562e-07,
"loss": 0.8127,
"step": 2261
},
{
"epoch": 0.8135958996493121,
"grad_norm": 10.656349633773116,
"learning_rate": 1.7671073590625184e-07,
"loss": 0.8071,
"step": 2262
},
{
"epoch": 0.8139555795342146,
"grad_norm": 13.921647802255146,
"learning_rate": 1.7604985686851749e-07,
"loss": 0.7979,
"step": 2263
},
{
"epoch": 0.814315259419117,
"grad_norm": 8.126821450958387,
"learning_rate": 1.753900966524129e-07,
"loss": 0.7862,
"step": 2264
},
{
"epoch": 0.8146749393040195,
"grad_norm": 18.1747354401724,
"learning_rate": 1.747314561538109e-07,
"loss": 0.8089,
"step": 2265
},
{
"epoch": 0.8150346191889218,
"grad_norm": 7.509916073561728,
"learning_rate": 1.74073936267063e-07,
"loss": 0.6772,
"step": 2266
},
{
"epoch": 0.8153942990738243,
"grad_norm": 22.82208884868346,
"learning_rate": 1.734175378849998e-07,
"loss": 0.8215,
"step": 2267
},
{
"epoch": 0.8157539789587267,
"grad_norm": 9.366109026277558,
"learning_rate": 1.7276226189892763e-07,
"loss": 0.8726,
"step": 2268
},
{
"epoch": 0.8161136588436292,
"grad_norm": 11.54468674035775,
"learning_rate": 1.7210810919863028e-07,
"loss": 0.7652,
"step": 2269
},
{
"epoch": 0.8164733387285316,
"grad_norm": 20.218143587638693,
"learning_rate": 1.7145508067236514e-07,
"loss": 0.7877,
"step": 2270
},
{
"epoch": 0.816833018613434,
"grad_norm": 12.569015763923973,
"learning_rate": 1.7080317720686432e-07,
"loss": 0.7718,
"step": 2271
},
{
"epoch": 0.8171926984983365,
"grad_norm": 13.661390149624827,
"learning_rate": 1.7015239968733065e-07,
"loss": 0.8318,
"step": 2272
},
{
"epoch": 0.8175523783832389,
"grad_norm": 23.004313653824514,
"learning_rate": 1.6950274899743944e-07,
"loss": 0.7868,
"step": 2273
},
{
"epoch": 0.8179120582681414,
"grad_norm": 9.115356687709024,
"learning_rate": 1.6885422601933507e-07,
"loss": 0.8094,
"step": 2274
},
{
"epoch": 0.8182717381530438,
"grad_norm": 11.49471905807452,
"learning_rate": 1.6820683163363159e-07,
"loss": 0.794,
"step": 2275
},
{
"epoch": 0.8186314180379463,
"grad_norm": 14.167061691113378,
"learning_rate": 1.67560566719409e-07,
"loss": 0.755,
"step": 2276
},
{
"epoch": 0.8189910979228486,
"grad_norm": 11.387631631432074,
"learning_rate": 1.669154321542151e-07,
"loss": 0.7853,
"step": 2277
},
{
"epoch": 0.8193507778077511,
"grad_norm": 37.87639215970217,
"learning_rate": 1.6627142881406186e-07,
"loss": 0.8561,
"step": 2278
},
{
"epoch": 0.8197104576926535,
"grad_norm": 27.221934678319457,
"learning_rate": 1.656285575734263e-07,
"loss": 0.8201,
"step": 2279
},
{
"epoch": 0.820070137577556,
"grad_norm": 16.209203561546843,
"learning_rate": 1.649868193052465e-07,
"loss": 0.8523,
"step": 2280
},
{
"epoch": 0.8204298174624585,
"grad_norm": 581.721183790373,
"learning_rate": 1.643462148809238e-07,
"loss": 0.8274,
"step": 2281
},
{
"epoch": 0.8207894973473608,
"grad_norm": 11.914813702261155,
"learning_rate": 1.6370674517031868e-07,
"loss": 0.802,
"step": 2282
},
{
"epoch": 0.8211491772322633,
"grad_norm": 12.227694797112598,
"learning_rate": 1.6306841104175218e-07,
"loss": 0.8508,
"step": 2283
},
{
"epoch": 0.8215088571171657,
"grad_norm": 20.330985201805433,
"learning_rate": 1.6243121336200127e-07,
"loss": 0.7792,
"step": 2284
},
{
"epoch": 0.8218685370020682,
"grad_norm": 20.298174086052803,
"learning_rate": 1.6179515299630199e-07,
"loss": 0.876,
"step": 2285
},
{
"epoch": 0.8222282168869706,
"grad_norm": 15.942233863267372,
"learning_rate": 1.611602308083444e-07,
"loss": 0.7673,
"step": 2286
},
{
"epoch": 0.822587896771873,
"grad_norm": 10.927464046082013,
"learning_rate": 1.6052644766027467e-07,
"loss": 0.8461,
"step": 2287
},
{
"epoch": 0.8229475766567754,
"grad_norm": 8.077305958641276,
"learning_rate": 1.5989380441269006e-07,
"loss": 0.7749,
"step": 2288
},
{
"epoch": 0.8233072565416779,
"grad_norm": 11.827162514740742,
"learning_rate": 1.5926230192464228e-07,
"loss": 0.8414,
"step": 2289
},
{
"epoch": 0.8236669364265804,
"grad_norm": 10.2629240946057,
"learning_rate": 1.5863194105363242e-07,
"loss": 0.734,
"step": 2290
},
{
"epoch": 0.8240266163114828,
"grad_norm": 13.148047468292425,
"learning_rate": 1.5800272265561254e-07,
"loss": 0.8053,
"step": 2291
},
{
"epoch": 0.8243862961963853,
"grad_norm": 15.035973238675208,
"learning_rate": 1.5737464758498243e-07,
"loss": 0.8434,
"step": 2292
},
{
"epoch": 0.8247459760812876,
"grad_norm": 10.933368632795961,
"learning_rate": 1.5674771669458996e-07,
"loss": 0.8182,
"step": 2293
},
{
"epoch": 0.8251056559661901,
"grad_norm": 12.115690506938334,
"learning_rate": 1.5612193083572877e-07,
"loss": 0.8067,
"step": 2294
},
{
"epoch": 0.8254653358510925,
"grad_norm": 11.569037443632995,
"learning_rate": 1.554972908581388e-07,
"loss": 0.8071,
"step": 2295
},
{
"epoch": 0.825825015735995,
"grad_norm": 49.8101801470549,
"learning_rate": 1.5487379761000273e-07,
"loss": 0.8369,
"step": 2296
},
{
"epoch": 0.8261846956208974,
"grad_norm": 10.746606837227537,
"learning_rate": 1.5425145193794697e-07,
"loss": 0.8167,
"step": 2297
},
{
"epoch": 0.8265443755057998,
"grad_norm": 18.159478189765707,
"learning_rate": 1.5363025468703917e-07,
"loss": 0.7757,
"step": 2298
},
{
"epoch": 0.8269040553907022,
"grad_norm": 10.381707934132592,
"learning_rate": 1.5301020670078802e-07,
"loss": 0.8095,
"step": 2299
},
{
"epoch": 0.8272637352756047,
"grad_norm": 17.8844332800837,
"learning_rate": 1.523913088211415e-07,
"loss": 0.784,
"step": 2300
},
{
"epoch": 0.8276234151605072,
"grad_norm": 13.426469758773418,
"learning_rate": 1.5177356188848556e-07,
"loss": 0.687,
"step": 2301
},
{
"epoch": 0.8279830950454096,
"grad_norm": 39.28947042539643,
"learning_rate": 1.5115696674164346e-07,
"loss": 0.8498,
"step": 2302
},
{
"epoch": 0.828342774930312,
"grad_norm": 8.23500161318244,
"learning_rate": 1.5054152421787503e-07,
"loss": 0.7089,
"step": 2303
},
{
"epoch": 0.8287024548152144,
"grad_norm": 13.77245256670465,
"learning_rate": 1.499272351528742e-07,
"loss": 0.7693,
"step": 2304
},
{
"epoch": 0.8290621347001169,
"grad_norm": 102.03568022021852,
"learning_rate": 1.4931410038076918e-07,
"loss": 0.8191,
"step": 2305
},
{
"epoch": 0.8294218145850193,
"grad_norm": 25.26898915553537,
"learning_rate": 1.4870212073412024e-07,
"loss": 0.7804,
"step": 2306
},
{
"epoch": 0.8297814944699218,
"grad_norm": 10.865074298124393,
"learning_rate": 1.4809129704391997e-07,
"loss": 0.7229,
"step": 2307
},
{
"epoch": 0.8301411743548242,
"grad_norm": 10.384806850095348,
"learning_rate": 1.4748163013959058e-07,
"loss": 0.7937,
"step": 2308
},
{
"epoch": 0.8305008542397266,
"grad_norm": 15.475422809640303,
"learning_rate": 1.4687312084898386e-07,
"loss": 0.8122,
"step": 2309
},
{
"epoch": 0.8308605341246291,
"grad_norm": 60.90412866491603,
"learning_rate": 1.4626576999837937e-07,
"loss": 0.8524,
"step": 2310
},
{
"epoch": 0.8312202140095315,
"grad_norm": 11.62715155651753,
"learning_rate": 1.456595784124839e-07,
"loss": 0.7943,
"step": 2311
},
{
"epoch": 0.831579893894434,
"grad_norm": 14.894094173204277,
"learning_rate": 1.4505454691443042e-07,
"loss": 0.7828,
"step": 2312
},
{
"epoch": 0.8319395737793364,
"grad_norm": 15.846119117751503,
"learning_rate": 1.4445067632577622e-07,
"loss": 0.8351,
"step": 2313
},
{
"epoch": 0.8322992536642388,
"grad_norm": 11.851981736041145,
"learning_rate": 1.438479674665022e-07,
"loss": 0.8186,
"step": 2314
},
{
"epoch": 0.8326589335491412,
"grad_norm": 18.741996160363115,
"learning_rate": 1.432464211550116e-07,
"loss": 0.8106,
"step": 2315
},
{
"epoch": 0.8330186134340437,
"grad_norm": 10.821171540681902,
"learning_rate": 1.4264603820813005e-07,
"loss": 0.8973,
"step": 2316
},
{
"epoch": 0.8333782933189461,
"grad_norm": 9.795264408170965,
"learning_rate": 1.4204681944110242e-07,
"loss": 0.7635,
"step": 2317
},
{
"epoch": 0.8337379732038486,
"grad_norm": 15.708568592847959,
"learning_rate": 1.41448765667593e-07,
"loss": 0.8359,
"step": 2318
},
{
"epoch": 0.8340976530887511,
"grad_norm": 9.194444315770358,
"learning_rate": 1.4085187769968431e-07,
"loss": 0.8341,
"step": 2319
},
{
"epoch": 0.8344573329736534,
"grad_norm": 11.930235028959371,
"learning_rate": 1.4025615634787613e-07,
"loss": 0.7688,
"step": 2320
},
{
"epoch": 0.8348170128585559,
"grad_norm": 14.356223028981091,
"learning_rate": 1.3966160242108372e-07,
"loss": 0.8027,
"step": 2321
},
{
"epoch": 0.8351766927434583,
"grad_norm": 9.562412213997689,
"learning_rate": 1.3906821672663704e-07,
"loss": 0.7349,
"step": 2322
},
{
"epoch": 0.8355363726283608,
"grad_norm": 10.43683433112123,
"learning_rate": 1.3847600007027994e-07,
"loss": 0.8652,
"step": 2323
},
{
"epoch": 0.8358960525132632,
"grad_norm": 7.776153454746316,
"learning_rate": 1.378849532561691e-07,
"loss": 0.7701,
"step": 2324
},
{
"epoch": 0.8362557323981656,
"grad_norm": 24.332429727924744,
"learning_rate": 1.372950770868724e-07,
"loss": 0.7697,
"step": 2325
},
{
"epoch": 0.836615412283068,
"grad_norm": 9.926742645416553,
"learning_rate": 1.3670637236336814e-07,
"loss": 0.7948,
"step": 2326
},
{
"epoch": 0.8369750921679705,
"grad_norm": 8.679025247195929,
"learning_rate": 1.3611883988504358e-07,
"loss": 0.8182,
"step": 2327
},
{
"epoch": 0.837334772052873,
"grad_norm": 14.838497631869352,
"learning_rate": 1.3553248044969524e-07,
"loss": 0.8189,
"step": 2328
},
{
"epoch": 0.8376944519377754,
"grad_norm": 10.051187806732115,
"learning_rate": 1.3494729485352586e-07,
"loss": 0.7508,
"step": 2329
},
{
"epoch": 0.8380541318226779,
"grad_norm": 11.298601823584674,
"learning_rate": 1.3436328389114472e-07,
"loss": 0.7925,
"step": 2330
},
{
"epoch": 0.8384138117075802,
"grad_norm": 10.243660862667488,
"learning_rate": 1.3378044835556557e-07,
"loss": 0.8215,
"step": 2331
},
{
"epoch": 0.8387734915924827,
"grad_norm": 11.258117329842404,
"learning_rate": 1.331987890382068e-07,
"loss": 0.806,
"step": 2332
},
{
"epoch": 0.8391331714773851,
"grad_norm": 12.887297854681147,
"learning_rate": 1.326183067288893e-07,
"loss": 0.7611,
"step": 2333
},
{
"epoch": 0.8394928513622876,
"grad_norm": 8.685018023842622,
"learning_rate": 1.3203900221583563e-07,
"loss": 0.7333,
"step": 2334
},
{
"epoch": 0.83985253124719,
"grad_norm": 12.059613850313461,
"learning_rate": 1.3146087628566894e-07,
"loss": 0.7111,
"step": 2335
},
{
"epoch": 0.8402122111320924,
"grad_norm": 15.434102439416437,
"learning_rate": 1.3088392972341257e-07,
"loss": 0.7703,
"step": 2336
},
{
"epoch": 0.8405718910169949,
"grad_norm": 7.988816915844384,
"learning_rate": 1.3030816331248783e-07,
"loss": 0.7412,
"step": 2337
},
{
"epoch": 0.8409315709018973,
"grad_norm": 17.8684095934584,
"learning_rate": 1.2973357783471385e-07,
"loss": 0.7891,
"step": 2338
},
{
"epoch": 0.8412912507867998,
"grad_norm": 10.605578795961502,
"learning_rate": 1.2916017407030587e-07,
"loss": 0.7966,
"step": 2339
},
{
"epoch": 0.8416509306717022,
"grad_norm": 7.979151580364497,
"learning_rate": 1.2858795279787515e-07,
"loss": 0.8085,
"step": 2340
},
{
"epoch": 0.8420106105566046,
"grad_norm": 15.045629674435435,
"learning_rate": 1.2801691479442654e-07,
"loss": 0.7548,
"step": 2341
},
{
"epoch": 0.842370290441507,
"grad_norm": 22.460005334784544,
"learning_rate": 1.2744706083535906e-07,
"loss": 0.783,
"step": 2342
},
{
"epoch": 0.8427299703264095,
"grad_norm": 25.059742983395793,
"learning_rate": 1.2687839169446256e-07,
"loss": 0.8405,
"step": 2343
},
{
"epoch": 0.8430896502113119,
"grad_norm": 9.113176144604909,
"learning_rate": 1.2631090814391943e-07,
"loss": 0.7647,
"step": 2344
},
{
"epoch": 0.8434493300962144,
"grad_norm": 12.555101725316918,
"learning_rate": 1.2574461095430144e-07,
"loss": 0.7997,
"step": 2345
},
{
"epoch": 0.8438090099811169,
"grad_norm": 7.54974258001931,
"learning_rate": 1.2517950089456997e-07,
"loss": 0.7705,
"step": 2346
},
{
"epoch": 0.8441686898660192,
"grad_norm": 27.249421233014953,
"learning_rate": 1.2461557873207328e-07,
"loss": 0.7757,
"step": 2347
},
{
"epoch": 0.8445283697509217,
"grad_norm": 9.405443108338877,
"learning_rate": 1.240528452325482e-07,
"loss": 0.7469,
"step": 2348
},
{
"epoch": 0.8448880496358241,
"grad_norm": 12.970887430421074,
"learning_rate": 1.2349130116011618e-07,
"loss": 0.8021,
"step": 2349
},
{
"epoch": 0.8452477295207266,
"grad_norm": 12.890285997253583,
"learning_rate": 1.229309472772847e-07,
"loss": 0.8027,
"step": 2350
},
{
"epoch": 0.845607409405629,
"grad_norm": 14.74162801700718,
"learning_rate": 1.2237178434494378e-07,
"loss": 0.8477,
"step": 2351
},
{
"epoch": 0.8459670892905314,
"grad_norm": 167.2267924054436,
"learning_rate": 1.2181381312236748e-07,
"loss": 0.7981,
"step": 2352
},
{
"epoch": 0.8463267691754338,
"grad_norm": 12.960623291109226,
"learning_rate": 1.2125703436721091e-07,
"loss": 0.7431,
"step": 2353
},
{
"epoch": 0.8466864490603363,
"grad_norm": 8.664210286860856,
"learning_rate": 1.207014488355107e-07,
"loss": 0.7999,
"step": 2354
},
{
"epoch": 0.8470461289452388,
"grad_norm": 28.279223774903546,
"learning_rate": 1.2014705728168218e-07,
"loss": 0.775,
"step": 2355
},
{
"epoch": 0.8474058088301412,
"grad_norm": 38.82554154366558,
"learning_rate": 1.1959386045852048e-07,
"loss": 0.8309,
"step": 2356
},
{
"epoch": 0.8477654887150436,
"grad_norm": 13.895528440091361,
"learning_rate": 1.1904185911719767e-07,
"loss": 0.7915,
"step": 2357
},
{
"epoch": 0.848125168599946,
"grad_norm": 12.125042321937775,
"learning_rate": 1.1849105400726322e-07,
"loss": 0.823,
"step": 2358
},
{
"epoch": 0.8484848484848485,
"grad_norm": 9.97026854760588,
"learning_rate": 1.1794144587664113e-07,
"loss": 0.8106,
"step": 2359
},
{
"epoch": 0.8488445283697509,
"grad_norm": 26.051019497196574,
"learning_rate": 1.1739303547163138e-07,
"loss": 0.7895,
"step": 2360
},
{
"epoch": 0.8492042082546534,
"grad_norm": 11.852463625231042,
"learning_rate": 1.1684582353690642e-07,
"loss": 0.8865,
"step": 2361
},
{
"epoch": 0.8495638881395557,
"grad_norm": 20.853977821016382,
"learning_rate": 1.1629981081551276e-07,
"loss": 0.7361,
"step": 2362
},
{
"epoch": 0.8499235680244582,
"grad_norm": 8.79126880257627,
"learning_rate": 1.1575499804886657e-07,
"loss": 0.773,
"step": 2363
},
{
"epoch": 0.8502832479093607,
"grad_norm": 13.126326647726362,
"learning_rate": 1.152113859767565e-07,
"loss": 0.8399,
"step": 2364
},
{
"epoch": 0.8506429277942631,
"grad_norm": 9.79130876826058,
"learning_rate": 1.1466897533733943e-07,
"loss": 0.7747,
"step": 2365
},
{
"epoch": 0.8510026076791656,
"grad_norm": 9.629331238536345,
"learning_rate": 1.1412776686714199e-07,
"loss": 0.7794,
"step": 2366
},
{
"epoch": 0.851362287564068,
"grad_norm": 12.657470224794194,
"learning_rate": 1.1358776130105763e-07,
"loss": 0.796,
"step": 2367
},
{
"epoch": 0.8517219674489704,
"grad_norm": 14.083863594684702,
"learning_rate": 1.1304895937234649e-07,
"loss": 0.7918,
"step": 2368
},
{
"epoch": 0.8520816473338728,
"grad_norm": 9.738032938074392,
"learning_rate": 1.1251136181263432e-07,
"loss": 0.8091,
"step": 2369
},
{
"epoch": 0.8524413272187753,
"grad_norm": 9.538609823673212,
"learning_rate": 1.1197496935191208e-07,
"loss": 0.8568,
"step": 2370
},
{
"epoch": 0.8528010071036777,
"grad_norm": 9.082270731085444,
"learning_rate": 1.114397827185336e-07,
"loss": 0.8054,
"step": 2371
},
{
"epoch": 0.8531606869885802,
"grad_norm": 35.09383378696445,
"learning_rate": 1.1090580263921578e-07,
"loss": 0.7911,
"step": 2372
},
{
"epoch": 0.8535203668734826,
"grad_norm": 1250.82091100558,
"learning_rate": 1.1037302983903684e-07,
"loss": 0.8286,
"step": 2373
},
{
"epoch": 0.853880046758385,
"grad_norm": 8.04664795906332,
"learning_rate": 1.0984146504143588e-07,
"loss": 0.743,
"step": 2374
},
{
"epoch": 0.8542397266432875,
"grad_norm": 13.908120492868619,
"learning_rate": 1.0931110896821182e-07,
"loss": 0.8826,
"step": 2375
},
{
"epoch": 0.8545994065281899,
"grad_norm": 8.293378871159591,
"learning_rate": 1.087819623395222e-07,
"loss": 0.82,
"step": 2376
},
{
"epoch": 0.8549590864130924,
"grad_norm": 12.821152267637753,
"learning_rate": 1.08254025873882e-07,
"loss": 0.8965,
"step": 2377
},
{
"epoch": 0.8553187662979947,
"grad_norm": 25.32307007964656,
"learning_rate": 1.0772730028816302e-07,
"loss": 0.891,
"step": 2378
},
{
"epoch": 0.8556784461828972,
"grad_norm": 24.227306921120448,
"learning_rate": 1.0720178629759347e-07,
"loss": 0.8235,
"step": 2379
},
{
"epoch": 0.8560381260677996,
"grad_norm": 49.12110666155429,
"learning_rate": 1.0667748461575544e-07,
"loss": 0.784,
"step": 2380
},
{
"epoch": 0.8563978059527021,
"grad_norm": 12.865030237798612,
"learning_rate": 1.0615439595458553e-07,
"loss": 0.7789,
"step": 2381
},
{
"epoch": 0.8567574858376046,
"grad_norm": 27.76612472619855,
"learning_rate": 1.0563252102437259e-07,
"loss": 0.7404,
"step": 2382
},
{
"epoch": 0.857117165722507,
"grad_norm": 11.219391329197183,
"learning_rate": 1.0511186053375832e-07,
"loss": 0.7762,
"step": 2383
},
{
"epoch": 0.8574768456074094,
"grad_norm": 12.787439446751387,
"learning_rate": 1.0459241518973439e-07,
"loss": 0.7834,
"step": 2384
},
{
"epoch": 0.8578365254923118,
"grad_norm": 14.73054580968944,
"learning_rate": 1.0407418569764304e-07,
"loss": 0.8216,
"step": 2385
},
{
"epoch": 0.8581962053772143,
"grad_norm": 11.97325876203116,
"learning_rate": 1.0355717276117503e-07,
"loss": 0.7907,
"step": 2386
},
{
"epoch": 0.8585558852621167,
"grad_norm": 9.302593646290104,
"learning_rate": 1.0304137708236992e-07,
"loss": 0.7809,
"step": 2387
},
{
"epoch": 0.8589155651470192,
"grad_norm": 12.586732395421038,
"learning_rate": 1.025267993616139e-07,
"loss": 0.7515,
"step": 2388
},
{
"epoch": 0.8592752450319215,
"grad_norm": 12.060204409506623,
"learning_rate": 1.0201344029763925e-07,
"loss": 0.7799,
"step": 2389
},
{
"epoch": 0.859634924916824,
"grad_norm": 11.687658910496713,
"learning_rate": 1.0150130058752349e-07,
"loss": 0.7901,
"step": 2390
},
{
"epoch": 0.8599946048017265,
"grad_norm": 18.111372110913653,
"learning_rate": 1.0099038092668899e-07,
"loss": 0.8045,
"step": 2391
},
{
"epoch": 0.8603542846866289,
"grad_norm": 16.779825294723597,
"learning_rate": 1.0048068200890037e-07,
"loss": 0.7497,
"step": 2392
},
{
"epoch": 0.8607139645715314,
"grad_norm": 10.189539353170648,
"learning_rate": 9.997220452626587e-08,
"loss": 0.7694,
"step": 2393
},
{
"epoch": 0.8610736444564338,
"grad_norm": 10.508841460655566,
"learning_rate": 9.946494916923398e-08,
"loss": 0.775,
"step": 2394
},
{
"epoch": 0.8614333243413362,
"grad_norm": 28.910622544604255,
"learning_rate": 9.895891662659484e-08,
"loss": 0.743,
"step": 2395
},
{
"epoch": 0.8617930042262386,
"grad_norm": 8.110628199281892,
"learning_rate": 9.845410758547723e-08,
"loss": 0.8655,
"step": 2396
},
{
"epoch": 0.8621526841111411,
"grad_norm": 11.794263357334533,
"learning_rate": 9.795052273134907e-08,
"loss": 0.7456,
"step": 2397
},
{
"epoch": 0.8625123639960435,
"grad_norm": 29.382340644289357,
"learning_rate": 9.74481627480156e-08,
"loss": 0.8409,
"step": 2398
},
{
"epoch": 0.862872043880946,
"grad_norm": 13.60620815681596,
"learning_rate": 9.694702831761937e-08,
"loss": 0.7681,
"step": 2399
},
{
"epoch": 0.8632317237658484,
"grad_norm": 22.600202809252284,
"learning_rate": 9.64471201206385e-08,
"loss": 0.8101,
"step": 2400
},
{
"epoch": 0.8635914036507508,
"grad_norm": 13.70468540326132,
"learning_rate": 9.594843883588588e-08,
"loss": 0.7787,
"step": 2401
},
{
"epoch": 0.8639510835356533,
"grad_norm": 17.287845281656523,
"learning_rate": 9.545098514050842e-08,
"loss": 0.84,
"step": 2402
},
{
"epoch": 0.8643107634205557,
"grad_norm": 28.82379473225906,
"learning_rate": 9.495475970998667e-08,
"loss": 0.8419,
"step": 2403
},
{
"epoch": 0.8646704433054582,
"grad_norm": 18.80326675692738,
"learning_rate": 9.445976321813276e-08,
"loss": 0.8139,
"step": 2404
},
{
"epoch": 0.8650301231903605,
"grad_norm": 8.873151323104636,
"learning_rate": 9.396599633709012e-08,
"loss": 0.8906,
"step": 2405
},
{
"epoch": 0.865389803075263,
"grad_norm": 29.108317139778812,
"learning_rate": 9.347345973733256e-08,
"loss": 0.7823,
"step": 2406
},
{
"epoch": 0.8657494829601654,
"grad_norm": 57.376392562292516,
"learning_rate": 9.298215408766375e-08,
"loss": 0.8272,
"step": 2407
},
{
"epoch": 0.8661091628450679,
"grad_norm": 12.507098112586599,
"learning_rate": 9.249208005521536e-08,
"loss": 0.8554,
"step": 2408
},
{
"epoch": 0.8664688427299704,
"grad_norm": 19.933083580678844,
"learning_rate": 9.200323830544699e-08,
"loss": 0.7296,
"step": 2409
},
{
"epoch": 0.8668285226148728,
"grad_norm": 11.540300634818104,
"learning_rate": 9.151562950214443e-08,
"loss": 0.8011,
"step": 2410
},
{
"epoch": 0.8671882024997752,
"grad_norm": 9.122997702763547,
"learning_rate": 9.102925430742015e-08,
"loss": 0.7509,
"step": 2411
},
{
"epoch": 0.8675478823846776,
"grad_norm": 9.276170308686988,
"learning_rate": 9.054411338171097e-08,
"loss": 0.7616,
"step": 2412
},
{
"epoch": 0.8679075622695801,
"grad_norm": 14.80827922183617,
"learning_rate": 9.006020738377762e-08,
"loss": 0.8061,
"step": 2413
},
{
"epoch": 0.8682672421544825,
"grad_norm": 15.06547846126099,
"learning_rate": 8.957753697070413e-08,
"loss": 0.8666,
"step": 2414
},
{
"epoch": 0.868626922039385,
"grad_norm": 8.648973227689028,
"learning_rate": 8.909610279789714e-08,
"loss": 0.7728,
"step": 2415
},
{
"epoch": 0.8689866019242873,
"grad_norm": 15.863682113126744,
"learning_rate": 8.861590551908404e-08,
"loss": 0.7556,
"step": 2416
},
{
"epoch": 0.8693462818091898,
"grad_norm": 28.09939290308373,
"learning_rate": 8.813694578631281e-08,
"loss": 0.7793,
"step": 2417
},
{
"epoch": 0.8697059616940923,
"grad_norm": 28.819946477351102,
"learning_rate": 8.76592242499511e-08,
"loss": 0.7464,
"step": 2418
},
{
"epoch": 0.8700656415789947,
"grad_norm": 106.5794495887269,
"learning_rate": 8.718274155868543e-08,
"loss": 0.7709,
"step": 2419
},
{
"epoch": 0.8704253214638972,
"grad_norm": 17.350613213171766,
"learning_rate": 8.670749835951963e-08,
"loss": 0.8158,
"step": 2420
},
{
"epoch": 0.8707850013487995,
"grad_norm": 8.004833697151915,
"learning_rate": 8.623349529777524e-08,
"loss": 0.7737,
"step": 2421
},
{
"epoch": 0.871144681233702,
"grad_norm": 10.6819927235279,
"learning_rate": 8.576073301708875e-08,
"loss": 0.8069,
"step": 2422
},
{
"epoch": 0.8715043611186044,
"grad_norm": 15.605449994443598,
"learning_rate": 8.528921215941298e-08,
"loss": 0.8134,
"step": 2423
},
{
"epoch": 0.8718640410035069,
"grad_norm": 16.08474338996225,
"learning_rate": 8.481893336501389e-08,
"loss": 0.9273,
"step": 2424
},
{
"epoch": 0.8722237208884093,
"grad_norm": 15.130247859775276,
"learning_rate": 8.434989727247232e-08,
"loss": 0.8169,
"step": 2425
},
{
"epoch": 0.8725834007733118,
"grad_norm": 18.686906062019553,
"learning_rate": 8.388210451868005e-08,
"loss": 0.8607,
"step": 2426
},
{
"epoch": 0.8729430806582142,
"grad_norm": 29.476551071939696,
"learning_rate": 8.341555573884173e-08,
"loss": 0.7818,
"step": 2427
},
{
"epoch": 0.8733027605431166,
"grad_norm": 13.2768876743159,
"learning_rate": 8.295025156647228e-08,
"loss": 0.8134,
"step": 2428
},
{
"epoch": 0.8736624404280191,
"grad_norm": 22.00954931797058,
"learning_rate": 8.248619263339728e-08,
"loss": 0.8381,
"step": 2429
},
{
"epoch": 0.8740221203129215,
"grad_norm": 13.944033996554646,
"learning_rate": 8.202337956975024e-08,
"loss": 0.846,
"step": 2430
},
{
"epoch": 0.874381800197824,
"grad_norm": 11.527076317610028,
"learning_rate": 8.156181300397413e-08,
"loss": 0.8129,
"step": 2431
},
{
"epoch": 0.8747414800827263,
"grad_norm": 12.669302449767654,
"learning_rate": 8.110149356281847e-08,
"loss": 0.8022,
"step": 2432
},
{
"epoch": 0.8751011599676288,
"grad_norm": 8.284644891082207,
"learning_rate": 8.06424218713403e-08,
"loss": 0.7859,
"step": 2433
},
{
"epoch": 0.8754608398525312,
"grad_norm": 8.656145459043579,
"learning_rate": 8.018459855290104e-08,
"loss": 0.8375,
"step": 2434
},
{
"epoch": 0.8758205197374337,
"grad_norm": 18.107629198048162,
"learning_rate": 7.972802422916825e-08,
"loss": 0.8271,
"step": 2435
},
{
"epoch": 0.8761801996223362,
"grad_norm": 13.099365057109353,
"learning_rate": 7.927269952011284e-08,
"loss": 0.8131,
"step": 2436
},
{
"epoch": 0.8765398795072386,
"grad_norm": 10.251909754431694,
"learning_rate": 7.881862504400883e-08,
"loss": 0.7417,
"step": 2437
},
{
"epoch": 0.876899559392141,
"grad_norm": 17.19512422107007,
"learning_rate": 7.836580141743288e-08,
"loss": 0.9009,
"step": 2438
},
{
"epoch": 0.8772592392770434,
"grad_norm": 89.25206201161427,
"learning_rate": 7.791422925526325e-08,
"loss": 0.8347,
"step": 2439
},
{
"epoch": 0.8776189191619459,
"grad_norm": 16.113169300626446,
"learning_rate": 7.746390917067846e-08,
"loss": 0.8061,
"step": 2440
},
{
"epoch": 0.8779785990468483,
"grad_norm": 23.32434417648339,
"learning_rate": 7.701484177515716e-08,
"loss": 0.7796,
"step": 2441
},
{
"epoch": 0.8783382789317508,
"grad_norm": 23.50007900752126,
"learning_rate": 7.656702767847678e-08,
"loss": 0.7934,
"step": 2442
},
{
"epoch": 0.8786979588166531,
"grad_norm": 10.184777637570136,
"learning_rate": 7.612046748871326e-08,
"loss": 0.7884,
"step": 2443
},
{
"epoch": 0.8790576387015556,
"grad_norm": 10.329105062305402,
"learning_rate": 7.567516181223965e-08,
"loss": 0.8039,
"step": 2444
},
{
"epoch": 0.8794173185864581,
"grad_norm": 168.65893464914373,
"learning_rate": 7.523111125372538e-08,
"loss": 0.7646,
"step": 2445
},
{
"epoch": 0.8797769984713605,
"grad_norm": 8.57131518316382,
"learning_rate": 7.478831641613615e-08,
"loss": 0.7408,
"step": 2446
},
{
"epoch": 0.880136678356263,
"grad_norm": 53.17870646724811,
"learning_rate": 7.434677790073196e-08,
"loss": 0.7859,
"step": 2447
},
{
"epoch": 0.8804963582411653,
"grad_norm": 9.39752643043822,
"learning_rate": 7.390649630706702e-08,
"loss": 0.7826,
"step": 2448
},
{
"epoch": 0.8808560381260678,
"grad_norm": 17.039867436299417,
"learning_rate": 7.346747223298888e-08,
"loss": 0.8614,
"step": 2449
},
{
"epoch": 0.8812157180109702,
"grad_norm": 24.478200274372707,
"learning_rate": 7.302970627463779e-08,
"loss": 0.8557,
"step": 2450
},
{
"epoch": 0.8815753978958727,
"grad_norm": 10.200742151291859,
"learning_rate": 7.259319902644512e-08,
"loss": 0.794,
"step": 2451
},
{
"epoch": 0.8819350777807751,
"grad_norm": 12.04275285618893,
"learning_rate": 7.215795108113343e-08,
"loss": 0.7997,
"step": 2452
},
{
"epoch": 0.8822947576656776,
"grad_norm": 19.699950092626096,
"learning_rate": 7.172396302971507e-08,
"loss": 0.8055,
"step": 2453
},
{
"epoch": 0.8826544375505799,
"grad_norm": 7.572428124589908,
"learning_rate": 7.129123546149208e-08,
"loss": 0.7693,
"step": 2454
},
{
"epoch": 0.8830141174354824,
"grad_norm": 8.84951609822255,
"learning_rate": 7.0859768964054e-08,
"loss": 0.8012,
"step": 2455
},
{
"epoch": 0.8833737973203849,
"grad_norm": 26.091676112292046,
"learning_rate": 7.042956412327916e-08,
"loss": 0.7914,
"step": 2456
},
{
"epoch": 0.8837334772052873,
"grad_norm": 21.259823000357553,
"learning_rate": 7.000062152333164e-08,
"loss": 0.8482,
"step": 2457
},
{
"epoch": 0.8840931570901898,
"grad_norm": 17.576420557735393,
"learning_rate": 6.957294174666262e-08,
"loss": 0.8336,
"step": 2458
},
{
"epoch": 0.8844528369750921,
"grad_norm": 13.527958380566336,
"learning_rate": 6.914652537400733e-08,
"loss": 0.8384,
"step": 2459
},
{
"epoch": 0.8848125168599946,
"grad_norm": 22.67826064074168,
"learning_rate": 6.872137298438652e-08,
"loss": 0.8449,
"step": 2460
},
{
"epoch": 0.885172196744897,
"grad_norm": 14.872871176877165,
"learning_rate": 6.829748515510381e-08,
"loss": 0.7367,
"step": 2461
},
{
"epoch": 0.8855318766297995,
"grad_norm": 25.033817472057137,
"learning_rate": 6.787486246174656e-08,
"loss": 0.858,
"step": 2462
},
{
"epoch": 0.8858915565147019,
"grad_norm": 68.62693486748991,
"learning_rate": 6.745350547818307e-08,
"loss": 0.7359,
"step": 2463
},
{
"epoch": 0.8862512363996043,
"grad_norm": 14.400838286048678,
"learning_rate": 6.703341477656421e-08,
"loss": 0.7596,
"step": 2464
},
{
"epoch": 0.8866109162845068,
"grad_norm": 11.111972047827287,
"learning_rate": 6.661459092732035e-08,
"loss": 0.7342,
"step": 2465
},
{
"epoch": 0.8869705961694092,
"grad_norm": 17.456689961196204,
"learning_rate": 6.619703449916259e-08,
"loss": 0.7515,
"step": 2466
},
{
"epoch": 0.8873302760543117,
"grad_norm": 16.8347650742012,
"learning_rate": 6.578074605908002e-08,
"loss": 0.7949,
"step": 2467
},
{
"epoch": 0.8876899559392141,
"grad_norm": 23.662246677536164,
"learning_rate": 6.536572617234082e-08,
"loss": 0.7869,
"step": 2468
},
{
"epoch": 0.8880496358241166,
"grad_norm": 15.299606346328554,
"learning_rate": 6.495197540248998e-08,
"loss": 0.7176,
"step": 2469
},
{
"epoch": 0.8884093157090189,
"grad_norm": 8.941773527942491,
"learning_rate": 6.453949431134987e-08,
"loss": 0.7632,
"step": 2470
},
{
"epoch": 0.8887689955939214,
"grad_norm": 13.760823992932636,
"learning_rate": 6.412828345901811e-08,
"loss": 0.8206,
"step": 2471
},
{
"epoch": 0.8891286754788238,
"grad_norm": 29.754042300263702,
"learning_rate": 6.371834340386806e-08,
"loss": 0.8171,
"step": 2472
},
{
"epoch": 0.8894883553637263,
"grad_norm": 8.639639912360048,
"learning_rate": 6.330967470254689e-08,
"loss": 0.7482,
"step": 2473
},
{
"epoch": 0.8898480352486288,
"grad_norm": 23.380442945006624,
"learning_rate": 6.290227790997605e-08,
"loss": 0.7601,
"step": 2474
},
{
"epoch": 0.8902077151335311,
"grad_norm": 14.740993896157672,
"learning_rate": 6.249615357934967e-08,
"loss": 0.7157,
"step": 2475
},
{
"epoch": 0.8905673950184336,
"grad_norm": 14.583092846147524,
"learning_rate": 6.209130226213377e-08,
"loss": 0.8589,
"step": 2476
},
{
"epoch": 0.890927074903336,
"grad_norm": 8.402288272443613,
"learning_rate": 6.168772450806603e-08,
"loss": 0.8275,
"step": 2477
},
{
"epoch": 0.8912867547882385,
"grad_norm": 10.4688747751167,
"learning_rate": 6.128542086515498e-08,
"loss": 0.8381,
"step": 2478
},
{
"epoch": 0.8916464346731409,
"grad_norm": 15.43687344409759,
"learning_rate": 6.088439187967865e-08,
"loss": 0.7961,
"step": 2479
},
{
"epoch": 0.8920061145580434,
"grad_norm": 23.699575211475587,
"learning_rate": 6.048463809618443e-08,
"loss": 0.8376,
"step": 2480
},
{
"epoch": 0.8923657944429457,
"grad_norm": 10.61473470795305,
"learning_rate": 6.0086160057488e-08,
"loss": 0.7562,
"step": 2481
},
{
"epoch": 0.8927254743278482,
"grad_norm": 9.472523365558738,
"learning_rate": 5.968895830467324e-08,
"loss": 0.8776,
"step": 2482
},
{
"epoch": 0.8930851542127507,
"grad_norm": 11.49518915052341,
"learning_rate": 5.929303337709046e-08,
"loss": 0.7815,
"step": 2483
},
{
"epoch": 0.8934448340976531,
"grad_norm": 8.884363641553396,
"learning_rate": 5.88983858123564e-08,
"loss": 0.8092,
"step": 2484
},
{
"epoch": 0.8938045139825556,
"grad_norm": 24.567263812985427,
"learning_rate": 5.850501614635317e-08,
"loss": 0.8345,
"step": 2485
},
{
"epoch": 0.8941641938674579,
"grad_norm": 20.815878420590963,
"learning_rate": 5.8112924913227945e-08,
"loss": 0.7904,
"step": 2486
},
{
"epoch": 0.8945238737523604,
"grad_norm": 15.641619948346964,
"learning_rate": 5.772211264539162e-08,
"loss": 0.8015,
"step": 2487
},
{
"epoch": 0.8948835536372628,
"grad_norm": 10.234778936016037,
"learning_rate": 5.733257987351869e-08,
"loss": 0.8038,
"step": 2488
},
{
"epoch": 0.8952432335221653,
"grad_norm": 36.35850049341599,
"learning_rate": 5.694432712654595e-08,
"loss": 0.7995,
"step": 2489
},
{
"epoch": 0.8956029134070677,
"grad_norm": 7.360898163803578,
"learning_rate": 5.6557354931672465e-08,
"loss": 0.8698,
"step": 2490
},
{
"epoch": 0.8959625932919701,
"grad_norm": 10.859256879240384,
"learning_rate": 5.617166381435812e-08,
"loss": 0.728,
"step": 2491
},
{
"epoch": 0.8963222731768726,
"grad_norm": 12.035500160936694,
"learning_rate": 5.5787254298323426e-08,
"loss": 0.8241,
"step": 2492
},
{
"epoch": 0.896681953061775,
"grad_norm": 7.175535588198203,
"learning_rate": 5.540412690554841e-08,
"loss": 0.8192,
"step": 2493
},
{
"epoch": 0.8970416329466775,
"grad_norm": 9.941413830807381,
"learning_rate": 5.5022282156272806e-08,
"loss": 0.7613,
"step": 2494
},
{
"epoch": 0.8974013128315799,
"grad_norm": 17.16769052805302,
"learning_rate": 5.464172056899363e-08,
"loss": 0.7722,
"step": 2495
},
{
"epoch": 0.8977609927164824,
"grad_norm": 12.120269744481789,
"learning_rate": 5.4262442660466756e-08,
"loss": 0.7857,
"step": 2496
},
{
"epoch": 0.8981206726013847,
"grad_norm": 75.32373857015753,
"learning_rate": 5.388444894570377e-08,
"loss": 0.8388,
"step": 2497
},
{
"epoch": 0.8984803524862872,
"grad_norm": 14.802277158407454,
"learning_rate": 5.350773993797331e-08,
"loss": 0.841,
"step": 2498
},
{
"epoch": 0.8988400323711896,
"grad_norm": 13.58398817417819,
"learning_rate": 5.3132316148799095e-08,
"loss": 0.7859,
"step": 2499
},
{
"epoch": 0.8991997122560921,
"grad_norm": 9.139002292783852,
"learning_rate": 5.275817808796013e-08,
"loss": 0.7882,
"step": 2500
},
{
"epoch": 0.8995593921409946,
"grad_norm": 11.111461052429972,
"learning_rate": 5.23853262634889e-08,
"loss": 0.8377,
"step": 2501
},
{
"epoch": 0.8999190720258969,
"grad_norm": 8.200434480095561,
"learning_rate": 5.2013761181672e-08,
"loss": 0.7919,
"step": 2502
},
{
"epoch": 0.9002787519107994,
"grad_norm": 13.89542235334582,
"learning_rate": 5.164348334704849e-08,
"loss": 0.8154,
"step": 2503
},
{
"epoch": 0.9006384317957018,
"grad_norm": 8.047659453419161,
"learning_rate": 5.127449326240951e-08,
"loss": 0.7924,
"step": 2504
},
{
"epoch": 0.9009981116806043,
"grad_norm": 20.904000094620358,
"learning_rate": 5.09067914287975e-08,
"loss": 0.7235,
"step": 2505
},
{
"epoch": 0.9013577915655067,
"grad_norm": 9.98471380828884,
"learning_rate": 5.054037834550595e-08,
"loss": 0.799,
"step": 2506
},
{
"epoch": 0.9017174714504091,
"grad_norm": 8.296611008202895,
"learning_rate": 5.017525451007809e-08,
"loss": 0.779,
"step": 2507
},
{
"epoch": 0.9020771513353115,
"grad_norm": 10.39266899999923,
"learning_rate": 4.981142041830644e-08,
"loss": 0.8545,
"step": 2508
},
{
"epoch": 0.902436831220214,
"grad_norm": 7.858549897953844,
"learning_rate": 4.9448876564232464e-08,
"loss": 0.8008,
"step": 2509
},
{
"epoch": 0.9027965111051165,
"grad_norm": 23.344213960087007,
"learning_rate": 4.908762344014572e-08,
"loss": 0.7701,
"step": 2510
},
{
"epoch": 0.9031561909900189,
"grad_norm": 49.67751895688126,
"learning_rate": 4.87276615365827e-08,
"loss": 0.7661,
"step": 2511
},
{
"epoch": 0.9035158708749214,
"grad_norm": 9.000851802072338,
"learning_rate": 4.836899134232686e-08,
"loss": 0.8127,
"step": 2512
},
{
"epoch": 0.9038755507598237,
"grad_norm": 13.131390273242742,
"learning_rate": 4.801161334440762e-08,
"loss": 0.8028,
"step": 2513
},
{
"epoch": 0.9042352306447262,
"grad_norm": 11.852376589318085,
"learning_rate": 4.765552802809991e-08,
"loss": 0.791,
"step": 2514
},
{
"epoch": 0.9045949105296286,
"grad_norm": 10.068709792910127,
"learning_rate": 4.7300735876923184e-08,
"loss": 0.8158,
"step": 2515
},
{
"epoch": 0.9049545904145311,
"grad_norm": 15.319789847910473,
"learning_rate": 4.6947237372640945e-08,
"loss": 0.8258,
"step": 2516
},
{
"epoch": 0.9053142702994335,
"grad_norm": 17.361132961110712,
"learning_rate": 4.659503299526013e-08,
"loss": 0.8196,
"step": 2517
},
{
"epoch": 0.9056739501843359,
"grad_norm": 11.683250687452635,
"learning_rate": 4.624412322303017e-08,
"loss": 0.8444,
"step": 2518
},
{
"epoch": 0.9060336300692384,
"grad_norm": 14.754742107720682,
"learning_rate": 4.589450853244314e-08,
"loss": 0.7155,
"step": 2519
},
{
"epoch": 0.9063933099541408,
"grad_norm": 8.867267420272865,
"learning_rate": 4.554618939823207e-08,
"loss": 0.8608,
"step": 2520
},
{
"epoch": 0.9067529898390433,
"grad_norm": 10.106898952777634,
"learning_rate": 4.519916629337106e-08,
"loss": 0.8043,
"step": 2521
},
{
"epoch": 0.9071126697239457,
"grad_norm": 10.523182003142079,
"learning_rate": 4.485343968907396e-08,
"loss": 0.7779,
"step": 2522
},
{
"epoch": 0.9074723496088482,
"grad_norm": 22.917096510725628,
"learning_rate": 4.450901005479468e-08,
"loss": 0.7474,
"step": 2523
},
{
"epoch": 0.9078320294937505,
"grad_norm": 13.072147811346369,
"learning_rate": 4.416587785822568e-08,
"loss": 0.778,
"step": 2524
},
{
"epoch": 0.908191709378653,
"grad_norm": 11.485613445553305,
"learning_rate": 4.382404356529801e-08,
"loss": 0.7712,
"step": 2525
},
{
"epoch": 0.9085513892635554,
"grad_norm": 10.877208036313718,
"learning_rate": 4.3483507640179494e-08,
"loss": 0.8222,
"step": 2526
},
{
"epoch": 0.9089110691484579,
"grad_norm": 32.74604399494894,
"learning_rate": 4.3144270545275805e-08,
"loss": 0.8116,
"step": 2527
},
{
"epoch": 0.9092707490333604,
"grad_norm": 34.911386803795274,
"learning_rate": 4.280633274122858e-08,
"loss": 0.7851,
"step": 2528
},
{
"epoch": 0.9096304289182627,
"grad_norm": 23.12238563090395,
"learning_rate": 4.246969468691553e-08,
"loss": 0.8234,
"step": 2529
},
{
"epoch": 0.9099901088031652,
"grad_norm": 29.191829829834525,
"learning_rate": 4.213435683944866e-08,
"loss": 0.7927,
"step": 2530
},
{
"epoch": 0.9103497886880676,
"grad_norm": 24.646806193615582,
"learning_rate": 4.1800319654175406e-08,
"loss": 0.8315,
"step": 2531
},
{
"epoch": 0.9107094685729701,
"grad_norm": 22.859981327872294,
"learning_rate": 4.146758358467639e-08,
"loss": 0.8253,
"step": 2532
},
{
"epoch": 0.9110691484578725,
"grad_norm": 12.996291640216834,
"learning_rate": 4.113614908276608e-08,
"loss": 0.6909,
"step": 2533
},
{
"epoch": 0.9114288283427749,
"grad_norm": 79.01114401298018,
"learning_rate": 4.0806016598490703e-08,
"loss": 0.8135,
"step": 2534
},
{
"epoch": 0.9117885082276773,
"grad_norm": 19.90454201196664,
"learning_rate": 4.047718658012944e-08,
"loss": 0.7807,
"step": 2535
},
{
"epoch": 0.9121481881125798,
"grad_norm": 8.167197207827675,
"learning_rate": 4.0149659474192356e-08,
"loss": 0.7933,
"step": 2536
},
{
"epoch": 0.9125078679974823,
"grad_norm": 12.86960868138441,
"learning_rate": 3.982343572542068e-08,
"loss": 0.8294,
"step": 2537
},
{
"epoch": 0.9128675478823847,
"grad_norm": 21.000756246385816,
"learning_rate": 3.94985157767852e-08,
"loss": 0.8321,
"step": 2538
},
{
"epoch": 0.9132272277672872,
"grad_norm": 8.592256301355716,
"learning_rate": 3.917490006948698e-08,
"loss": 0.825,
"step": 2539
},
{
"epoch": 0.9135869076521895,
"grad_norm": 20.33574361050648,
"learning_rate": 3.885258904295574e-08,
"loss": 0.7844,
"step": 2540
},
{
"epoch": 0.913946587537092,
"grad_norm": 16.092620216955467,
"learning_rate": 3.853158313484994e-08,
"loss": 0.747,
"step": 2541
},
{
"epoch": 0.9143062674219944,
"grad_norm": 12.022595522312857,
"learning_rate": 3.8211882781055136e-08,
"loss": 0.8149,
"step": 2542
},
{
"epoch": 0.9146659473068969,
"grad_norm": 10.921442456182392,
"learning_rate": 3.789348841568496e-08,
"loss": 0.7515,
"step": 2543
},
{
"epoch": 0.9150256271917993,
"grad_norm": 9.541539259640993,
"learning_rate": 3.7576400471079015e-08,
"loss": 0.8507,
"step": 2544
},
{
"epoch": 0.9153853070767017,
"grad_norm": 11.742185837327472,
"learning_rate": 3.7260619377803673e-08,
"loss": 0.8257,
"step": 2545
},
{
"epoch": 0.9157449869616042,
"grad_norm": 10.38470288875268,
"learning_rate": 3.6946145564649813e-08,
"loss": 0.8143,
"step": 2546
},
{
"epoch": 0.9161046668465066,
"grad_norm": 52.44317807546952,
"learning_rate": 3.663297945863386e-08,
"loss": 0.8216,
"step": 2547
},
{
"epoch": 0.9164643467314091,
"grad_norm": 16.112270463265936,
"learning_rate": 3.632112148499644e-08,
"loss": 0.8474,
"step": 2548
},
{
"epoch": 0.9168240266163115,
"grad_norm": 17.832343118912398,
"learning_rate": 3.6010572067201814e-08,
"loss": 0.8092,
"step": 2549
},
{
"epoch": 0.917183706501214,
"grad_norm": 10.2969972546708,
"learning_rate": 3.570133162693734e-08,
"loss": 0.8921,
"step": 2550
},
{
"epoch": 0.9175433863861163,
"grad_norm": 15.341033816609158,
"learning_rate": 3.5393400584113e-08,
"loss": 0.8019,
"step": 2551
},
{
"epoch": 0.9179030662710188,
"grad_norm": 28.508299636133415,
"learning_rate": 3.508677935686077e-08,
"loss": 0.8667,
"step": 2552
},
{
"epoch": 0.9182627461559212,
"grad_norm": 12.249861652869686,
"learning_rate": 3.478146836153417e-08,
"loss": 0.7562,
"step": 2553
},
{
"epoch": 0.9186224260408237,
"grad_norm": 9.324071352603683,
"learning_rate": 3.4477468012707455e-08,
"loss": 0.7514,
"step": 2554
},
{
"epoch": 0.9189821059257262,
"grad_norm": 9.837763647360559,
"learning_rate": 3.41747787231752e-08,
"loss": 0.8081,
"step": 2555
},
{
"epoch": 0.9193417858106285,
"grad_norm": 11.02043622469786,
"learning_rate": 3.387340090395163e-08,
"loss": 0.7744,
"step": 2556
},
{
"epoch": 0.919701465695531,
"grad_norm": 22.970139968632665,
"learning_rate": 3.357333496427039e-08,
"loss": 0.7736,
"step": 2557
},
{
"epoch": 0.9200611455804334,
"grad_norm": 13.114721774192239,
"learning_rate": 3.3274581311583783e-08,
"loss": 0.7807,
"step": 2558
},
{
"epoch": 0.9204208254653359,
"grad_norm": 10.921941898637904,
"learning_rate": 3.2977140351561736e-08,
"loss": 0.7888,
"step": 2559
},
{
"epoch": 0.9207805053502383,
"grad_norm": 9.11820697290462,
"learning_rate": 3.2681012488092185e-08,
"loss": 0.8308,
"step": 2560
},
{
"epoch": 0.9211401852351407,
"grad_norm": 9.553285009017845,
"learning_rate": 3.2386198123279915e-08,
"loss": 0.7925,
"step": 2561
},
{
"epoch": 0.9214998651200431,
"grad_norm": 12.302523221993972,
"learning_rate": 3.2092697657446044e-08,
"loss": 0.821,
"step": 2562
},
{
"epoch": 0.9218595450049456,
"grad_norm": 14.912768413919956,
"learning_rate": 3.180051148912755e-08,
"loss": 0.7751,
"step": 2563
},
{
"epoch": 0.9222192248898481,
"grad_norm": 8.69058806479011,
"learning_rate": 3.150964001507694e-08,
"loss": 0.74,
"step": 2564
},
{
"epoch": 0.9225789047747505,
"grad_norm": 8.300323424451845,
"learning_rate": 3.122008363026141e-08,
"loss": 0.7701,
"step": 2565
},
{
"epoch": 0.922938584659653,
"grad_norm": 18.17822427408239,
"learning_rate": 3.0931842727862536e-08,
"loss": 0.7585,
"step": 2566
},
{
"epoch": 0.9232982645445553,
"grad_norm": 15.417981907475573,
"learning_rate": 3.064491769927535e-08,
"loss": 0.8259,
"step": 2567
},
{
"epoch": 0.9236579444294578,
"grad_norm": 7.498723352819035,
"learning_rate": 3.035930893410843e-08,
"loss": 0.7875,
"step": 2568
},
{
"epoch": 0.9240176243143602,
"grad_norm": 7.869261716229639,
"learning_rate": 3.007501682018288e-08,
"loss": 0.7703,
"step": 2569
},
{
"epoch": 0.9243773041992627,
"grad_norm": 39.539952172595655,
"learning_rate": 2.9792041743532002e-08,
"loss": 0.8358,
"step": 2570
},
{
"epoch": 0.924736984084165,
"grad_norm": 9.922858274516205,
"learning_rate": 2.9510384088400674e-08,
"loss": 0.8146,
"step": 2571
},
{
"epoch": 0.9250966639690675,
"grad_norm": 18.170136855272634,
"learning_rate": 2.9230044237244732e-08,
"loss": 0.8725,
"step": 2572
},
{
"epoch": 0.92545634385397,
"grad_norm": 10.762704827491863,
"learning_rate": 2.895102257073101e-08,
"loss": 0.7705,
"step": 2573
},
{
"epoch": 0.9258160237388724,
"grad_norm": 22.28881961766892,
"learning_rate": 2.86733194677361e-08,
"loss": 0.8354,
"step": 2574
},
{
"epoch": 0.9261757036237749,
"grad_norm": 12.67808461853552,
"learning_rate": 2.8396935305346036e-08,
"loss": 0.8081,
"step": 2575
},
{
"epoch": 0.9265353835086773,
"grad_norm": 10.51996998799239,
"learning_rate": 2.812187045885628e-08,
"loss": 0.8434,
"step": 2576
},
{
"epoch": 0.9268950633935797,
"grad_norm": 16.685006494100037,
"learning_rate": 2.7848125301770498e-08,
"loss": 0.8404,
"step": 2577
},
{
"epoch": 0.9272547432784821,
"grad_norm": 17.391189679151648,
"learning_rate": 2.757570020580069e-08,
"loss": 0.8016,
"step": 2578
},
{
"epoch": 0.9276144231633846,
"grad_norm": 12.200773941293791,
"learning_rate": 2.730459554086595e-08,
"loss": 0.8153,
"step": 2579
},
{
"epoch": 0.927974103048287,
"grad_norm": 9.017422390363933,
"learning_rate": 2.7034811675092806e-08,
"loss": 0.8482,
"step": 2580
},
{
"epoch": 0.9283337829331895,
"grad_norm": 14.335525486714841,
"learning_rate": 2.676634897481389e-08,
"loss": 0.7806,
"step": 2581
},
{
"epoch": 0.928693462818092,
"grad_norm": 10.074582781397503,
"learning_rate": 2.649920780456849e-08,
"loss": 0.8156,
"step": 2582
},
{
"epoch": 0.9290531427029943,
"grad_norm": 48.25905128199228,
"learning_rate": 2.6233388527100773e-08,
"loss": 0.8383,
"step": 2583
},
{
"epoch": 0.9294128225878968,
"grad_norm": 13.313452109675913,
"learning_rate": 2.5968891503360235e-08,
"loss": 0.7692,
"step": 2584
},
{
"epoch": 0.9297725024727992,
"grad_norm": 21.51215656478663,
"learning_rate": 2.570571709250069e-08,
"loss": 0.8186,
"step": 2585
},
{
"epoch": 0.9301321823577017,
"grad_norm": 10.877961635645919,
"learning_rate": 2.5443865651880615e-08,
"loss": 0.76,
"step": 2586
},
{
"epoch": 0.930491862242604,
"grad_norm": 9.051919932920649,
"learning_rate": 2.5183337537061365e-08,
"loss": 0.7774,
"step": 2587
},
{
"epoch": 0.9308515421275065,
"grad_norm": 15.429443230815407,
"learning_rate": 2.492413310180763e-08,
"loss": 0.8207,
"step": 2588
},
{
"epoch": 0.9312112220124089,
"grad_norm": 11.971056851995911,
"learning_rate": 2.4666252698086864e-08,
"loss": 0.7847,
"step": 2589
},
{
"epoch": 0.9315709018973114,
"grad_norm": 8.886524433373074,
"learning_rate": 2.4409696676068513e-08,
"loss": 0.7715,
"step": 2590
},
{
"epoch": 0.9319305817822139,
"grad_norm": 8.932491424444477,
"learning_rate": 2.415446538412358e-08,
"loss": 0.7326,
"step": 2591
},
{
"epoch": 0.9322902616671163,
"grad_norm": 24.838760776155556,
"learning_rate": 2.3900559168824498e-08,
"loss": 0.8226,
"step": 2592
},
{
"epoch": 0.9326499415520187,
"grad_norm": 10.184889834390296,
"learning_rate": 2.3647978374944032e-08,
"loss": 0.8067,
"step": 2593
},
{
"epoch": 0.9330096214369211,
"grad_norm": 20.39934160539424,
"learning_rate": 2.3396723345455726e-08,
"loss": 0.7883,
"step": 2594
},
{
"epoch": 0.9333693013218236,
"grad_norm": 34.16356835417824,
"learning_rate": 2.3146794421532556e-08,
"loss": 0.7415,
"step": 2595
},
{
"epoch": 0.933728981206726,
"grad_norm": 8.381718901793608,
"learning_rate": 2.2898191942546608e-08,
"loss": 0.7556,
"step": 2596
},
{
"epoch": 0.9340886610916285,
"grad_norm": 8.969461130065204,
"learning_rate": 2.2650916246069297e-08,
"loss": 0.8207,
"step": 2597
},
{
"epoch": 0.9344483409765308,
"grad_norm": 16.188778153796065,
"learning_rate": 2.2404967667870143e-08,
"loss": 0.7943,
"step": 2598
},
{
"epoch": 0.9348080208614333,
"grad_norm": 17.95530480731631,
"learning_rate": 2.2160346541916674e-08,
"loss": 0.7264,
"step": 2599
},
{
"epoch": 0.9351677007463357,
"grad_norm": 24.190170970831844,
"learning_rate": 2.1917053200374068e-08,
"loss": 0.8199,
"step": 2600
},
{
"epoch": 0.9355273806312382,
"grad_norm": 10.123026970672111,
"learning_rate": 2.1675087973603957e-08,
"loss": 0.8274,
"step": 2601
},
{
"epoch": 0.9358870605161407,
"grad_norm": 14.224219466589894,
"learning_rate": 2.1434451190165292e-08,
"loss": 0.8012,
"step": 2602
},
{
"epoch": 0.936246740401043,
"grad_norm": 10.600627557539825,
"learning_rate": 2.1195143176812812e-08,
"loss": 0.7483,
"step": 2603
},
{
"epoch": 0.9366064202859455,
"grad_norm": 13.072901545139233,
"learning_rate": 2.0957164258497028e-08,
"loss": 0.7783,
"step": 2604
},
{
"epoch": 0.9369661001708479,
"grad_norm": 17.476053436586472,
"learning_rate": 2.072051475836334e-08,
"loss": 0.7654,
"step": 2605
},
{
"epoch": 0.9373257800557504,
"grad_norm": 10.49174050370654,
"learning_rate": 2.0485194997752696e-08,
"loss": 0.8084,
"step": 2606
},
{
"epoch": 0.9376854599406528,
"grad_norm": 12.951335904589302,
"learning_rate": 2.0251205296199613e-08,
"loss": 0.7594,
"step": 2607
},
{
"epoch": 0.9380451398255553,
"grad_norm": 14.401952391066391,
"learning_rate": 2.0018545971433486e-08,
"loss": 0.8313,
"step": 2608
},
{
"epoch": 0.9384048197104576,
"grad_norm": 10.086198937724093,
"learning_rate": 1.978721733937605e-08,
"loss": 0.752,
"step": 2609
},
{
"epoch": 0.9387644995953601,
"grad_norm": 16.618758204083733,
"learning_rate": 1.955721971414326e-08,
"loss": 0.7926,
"step": 2610
},
{
"epoch": 0.9391241794802626,
"grad_norm": 11.248267278277176,
"learning_rate": 1.932855340804296e-08,
"loss": 0.7707,
"step": 2611
},
{
"epoch": 0.939483859365165,
"grad_norm": 27.38329889632666,
"learning_rate": 1.9101218731575776e-08,
"loss": 0.7865,
"step": 2612
},
{
"epoch": 0.9398435392500675,
"grad_norm": 19.212814276037737,
"learning_rate": 1.8875215993433448e-08,
"loss": 0.8443,
"step": 2613
},
{
"epoch": 0.9402032191349698,
"grad_norm": 14.275362212978525,
"learning_rate": 1.8650545500499936e-08,
"loss": 0.8491,
"step": 2614
},
{
"epoch": 0.9405628990198723,
"grad_norm": 14.319193665650705,
"learning_rate": 1.8427207557849434e-08,
"loss": 0.862,
"step": 2615
},
{
"epoch": 0.9409225789047747,
"grad_norm": 9.750968215322466,
"learning_rate": 1.820520246874746e-08,
"loss": 0.7874,
"step": 2616
},
{
"epoch": 0.9412822587896772,
"grad_norm": 10.999793069766183,
"learning_rate": 1.7984530534648878e-08,
"loss": 0.8975,
"step": 2617
},
{
"epoch": 0.9416419386745796,
"grad_norm": 9.934147693276143,
"learning_rate": 1.7765192055198886e-08,
"loss": 0.7585,
"step": 2618
},
{
"epoch": 0.9420016185594821,
"grad_norm": 7.035081749310813,
"learning_rate": 1.7547187328231572e-08,
"loss": 0.8036,
"step": 2619
},
{
"epoch": 0.9423612984443845,
"grad_norm": 14.465406847865054,
"learning_rate": 1.73305166497707e-08,
"loss": 0.7117,
"step": 2620
},
{
"epoch": 0.9427209783292869,
"grad_norm": 13.419580416553304,
"learning_rate": 1.711518031402748e-08,
"loss": 0.846,
"step": 2621
},
{
"epoch": 0.9430806582141894,
"grad_norm": 24.297195597575204,
"learning_rate": 1.6901178613402124e-08,
"loss": 0.7531,
"step": 2622
},
{
"epoch": 0.9434403380990918,
"grad_norm": 14.640786860392414,
"learning_rate": 1.668851183848219e-08,
"loss": 0.7826,
"step": 2623
},
{
"epoch": 0.9438000179839943,
"grad_norm": 17.94888635356105,
"learning_rate": 1.647718027804279e-08,
"loss": 0.7278,
"step": 2624
},
{
"epoch": 0.9441596978688966,
"grad_norm": 59.51196881712969,
"learning_rate": 1.6267184219045604e-08,
"loss": 0.8323,
"step": 2625
},
{
"epoch": 0.9445193777537991,
"grad_norm": 13.566168202037804,
"learning_rate": 1.6058523946639425e-08,
"loss": 0.845,
"step": 2626
},
{
"epoch": 0.9448790576387015,
"grad_norm": 31.437962719597333,
"learning_rate": 1.5851199744158607e-08,
"loss": 0.8412,
"step": 2627
},
{
"epoch": 0.945238737523604,
"grad_norm": 24.101411848318847,
"learning_rate": 1.5645211893123845e-08,
"loss": 0.8179,
"step": 2628
},
{
"epoch": 0.9455984174085065,
"grad_norm": 13.703323167891854,
"learning_rate": 1.5440560673240733e-08,
"loss": 0.7231,
"step": 2629
},
{
"epoch": 0.9459580972934089,
"grad_norm": 18.103807635943653,
"learning_rate": 1.5237246362400312e-08,
"loss": 0.7647,
"step": 2630
},
{
"epoch": 0.9463177771783113,
"grad_norm": 7.46822224629538,
"learning_rate": 1.503526923667797e-08,
"loss": 0.8045,
"step": 2631
},
{
"epoch": 0.9466774570632137,
"grad_norm": 23.408064292569758,
"learning_rate": 1.4834629570333546e-08,
"loss": 0.8662,
"step": 2632
},
{
"epoch": 0.9470371369481162,
"grad_norm": 34.23282671327533,
"learning_rate": 1.4635327635810768e-08,
"loss": 0.6998,
"step": 2633
},
{
"epoch": 0.9473968168330186,
"grad_norm": 45.904877302306865,
"learning_rate": 1.4437363703736716e-08,
"loss": 0.7153,
"step": 2634
},
{
"epoch": 0.9477564967179211,
"grad_norm": 13.096331272320922,
"learning_rate": 1.4240738042921586e-08,
"loss": 0.802,
"step": 2635
},
{
"epoch": 0.9481161766028234,
"grad_norm": 13.455777039167259,
"learning_rate": 1.4045450920358914e-08,
"loss": 0.8362,
"step": 2636
},
{
"epoch": 0.9484758564877259,
"grad_norm": 164.2037460425521,
"learning_rate": 1.385150260122403e-08,
"loss": 0.834,
"step": 2637
},
{
"epoch": 0.9488355363726284,
"grad_norm": 18.680018112786254,
"learning_rate": 1.3658893348874712e-08,
"loss": 0.8363,
"step": 2638
},
{
"epoch": 0.9491952162575308,
"grad_norm": 9.617601101777366,
"learning_rate": 1.3467623424850083e-08,
"loss": 0.7966,
"step": 2639
},
{
"epoch": 0.9495548961424333,
"grad_norm": 16.39249663420055,
"learning_rate": 1.3277693088871166e-08,
"loss": 0.7445,
"step": 2640
},
{
"epoch": 0.9499145760273356,
"grad_norm": 56.34814949802547,
"learning_rate": 1.3089102598839441e-08,
"loss": 0.7713,
"step": 2641
},
{
"epoch": 0.9502742559122381,
"grad_norm": 15.464516964279793,
"learning_rate": 1.2901852210837505e-08,
"loss": 0.7829,
"step": 2642
},
{
"epoch": 0.9506339357971405,
"grad_norm": 8.517435737858888,
"learning_rate": 1.271594217912797e-08,
"loss": 0.7735,
"step": 2643
},
{
"epoch": 0.950993615682043,
"grad_norm": 12.980233129690735,
"learning_rate": 1.2531372756153458e-08,
"loss": 0.7528,
"step": 2644
},
{
"epoch": 0.9513532955669454,
"grad_norm": 8.473487915051534,
"learning_rate": 1.2348144192536269e-08,
"loss": 0.9364,
"step": 2645
},
{
"epoch": 0.9517129754518479,
"grad_norm": 13.7834439599011,
"learning_rate": 1.216625673707794e-08,
"loss": 0.7419,
"step": 2646
},
{
"epoch": 0.9520726553367503,
"grad_norm": 12.823487237000318,
"learning_rate": 1.1985710636759128e-08,
"loss": 0.7881,
"step": 2647
},
{
"epoch": 0.9524323352216527,
"grad_norm": 223.59177883502042,
"learning_rate": 1.1806506136738614e-08,
"loss": 0.7553,
"step": 2648
},
{
"epoch": 0.9527920151065552,
"grad_norm": 7.401975669691004,
"learning_rate": 1.1628643480354083e-08,
"loss": 0.8268,
"step": 2649
},
{
"epoch": 0.9531516949914576,
"grad_norm": 16.516496252813177,
"learning_rate": 1.1452122909120788e-08,
"loss": 0.8058,
"step": 2650
},
{
"epoch": 0.9535113748763601,
"grad_norm": 12.520635742432743,
"learning_rate": 1.1276944662731658e-08,
"loss": 0.8203,
"step": 2651
},
{
"epoch": 0.9538710547612624,
"grad_norm": 7.273339324226177,
"learning_rate": 1.1103108979056862e-08,
"loss": 0.759,
"step": 2652
},
{
"epoch": 0.9542307346461649,
"grad_norm": 12.221006199133758,
"learning_rate": 1.0930616094143696e-08,
"loss": 0.843,
"step": 2653
},
{
"epoch": 0.9545904145310673,
"grad_norm": 10.982164990366092,
"learning_rate": 1.0759466242215909e-08,
"loss": 0.7948,
"step": 2654
},
{
"epoch": 0.9549500944159698,
"grad_norm": 34.0030866637079,
"learning_rate": 1.058965965567371e-08,
"loss": 0.7652,
"step": 2655
},
{
"epoch": 0.9553097743008723,
"grad_norm": 15.747030639242206,
"learning_rate": 1.0421196565093216e-08,
"loss": 0.8198,
"step": 2656
},
{
"epoch": 0.9556694541857746,
"grad_norm": 24.440296459219436,
"learning_rate": 1.0254077199226552e-08,
"loss": 0.8896,
"step": 2657
},
{
"epoch": 0.9560291340706771,
"grad_norm": 43.92761632601037,
"learning_rate": 1.0088301785000753e-08,
"loss": 0.7735,
"step": 2658
},
{
"epoch": 0.9563888139555795,
"grad_norm": 7.338427187913148,
"learning_rate": 9.923870547518309e-09,
"loss": 0.7591,
"step": 2659
},
{
"epoch": 0.956748493840482,
"grad_norm": 10.638046454075525,
"learning_rate": 9.760783710056175e-09,
"loss": 0.8222,
"step": 2660
},
{
"epoch": 0.9571081737253844,
"grad_norm": 8.979052132037536,
"learning_rate": 9.599041494066207e-09,
"loss": 0.8273,
"step": 2661
},
{
"epoch": 0.9574678536102869,
"grad_norm": 23.602876418783797,
"learning_rate": 9.438644119174055e-09,
"loss": 0.8427,
"step": 2662
},
{
"epoch": 0.9578275334951892,
"grad_norm": 19.131944211146887,
"learning_rate": 9.279591803179277e-09,
"loss": 0.8288,
"step": 2663
},
{
"epoch": 0.9581872133800917,
"grad_norm": 9.059579836541714,
"learning_rate": 9.12188476205522e-09,
"loss": 0.8367,
"step": 2664
},
{
"epoch": 0.9585468932649942,
"grad_norm": 11.933115901091556,
"learning_rate": 8.965523209948366e-09,
"loss": 0.7535,
"step": 2665
},
{
"epoch": 0.9589065731498966,
"grad_norm": 14.054611295402403,
"learning_rate": 8.810507359178321e-09,
"loss": 0.8375,
"step": 2666
},
{
"epoch": 0.9592662530347991,
"grad_norm": 8.834709100235033,
"learning_rate": 8.656837420237151e-09,
"loss": 0.8657,
"step": 2667
},
{
"epoch": 0.9596259329197014,
"grad_norm": 10.745709201482285,
"learning_rate": 8.504513601789386e-09,
"loss": 0.7891,
"step": 2668
},
{
"epoch": 0.9599856128046039,
"grad_norm": 7.514508704592103,
"learning_rate": 8.353536110672133e-09,
"loss": 0.7951,
"step": 2669
},
{
"epoch": 0.9603452926895063,
"grad_norm": 10.628735366005465,
"learning_rate": 8.203905151893731e-09,
"loss": 0.8347,
"step": 2670
},
{
"epoch": 0.9607049725744088,
"grad_norm": 9.665634328411453,
"learning_rate": 8.055620928634433e-09,
"loss": 0.836,
"step": 2671
},
{
"epoch": 0.9610646524593112,
"grad_norm": 11.385271385247645,
"learning_rate": 7.90868364224584e-09,
"loss": 0.784,
"step": 2672
},
{
"epoch": 0.9614243323442137,
"grad_norm": 11.77954267090191,
"learning_rate": 7.76309349225035e-09,
"loss": 0.7976,
"step": 2673
},
{
"epoch": 0.9617840122291161,
"grad_norm": 96.72406629090244,
"learning_rate": 7.618850676341381e-09,
"loss": 0.7697,
"step": 2674
},
{
"epoch": 0.9621436921140185,
"grad_norm": 19.978249531416974,
"learning_rate": 7.475955390382483e-09,
"loss": 0.7746,
"step": 2675
},
{
"epoch": 0.962503371998921,
"grad_norm": 17.644446773428314,
"learning_rate": 7.334407828407885e-09,
"loss": 0.824,
"step": 2676
},
{
"epoch": 0.9628630518838234,
"grad_norm": 8.40972118443302,
"learning_rate": 7.1942081826215085e-09,
"loss": 0.6929,
"step": 2677
},
{
"epoch": 0.9632227317687259,
"grad_norm": 17.744908237982273,
"learning_rate": 7.0553566433968485e-09,
"loss": 0.7624,
"step": 2678
},
{
"epoch": 0.9635824116536282,
"grad_norm": 15.543084120458476,
"learning_rate": 6.917853399277196e-09,
"loss": 0.8241,
"step": 2679
},
{
"epoch": 0.9639420915385307,
"grad_norm": 7.202723902595622,
"learning_rate": 6.781698636974531e-09,
"loss": 0.737,
"step": 2680
},
{
"epoch": 0.9643017714234331,
"grad_norm": 15.979553321957102,
"learning_rate": 6.646892541370408e-09,
"loss": 0.7976,
"step": 2681
},
{
"epoch": 0.9646614513083356,
"grad_norm": 45.082630303411385,
"learning_rate": 6.513435295514402e-09,
"loss": 0.774,
"step": 2682
},
{
"epoch": 0.9650211311932381,
"grad_norm": 9.931768652353941,
"learning_rate": 6.381327080625109e-09,
"loss": 0.8188,
"step": 2683
},
{
"epoch": 0.9653808110781404,
"grad_norm": 8.624359023921118,
"learning_rate": 6.250568076088814e-09,
"loss": 0.8211,
"step": 2684
},
{
"epoch": 0.9657404909630429,
"grad_norm": 15.123515096117305,
"learning_rate": 6.121158459460041e-09,
"loss": 0.8049,
"step": 2685
},
{
"epoch": 0.9661001708479453,
"grad_norm": 11.989707609085379,
"learning_rate": 5.9930984064608946e-09,
"loss": 0.7931,
"step": 2686
},
{
"epoch": 0.9664598507328478,
"grad_norm": 8.768179389271808,
"learning_rate": 5.866388090980945e-09,
"loss": 0.7865,
"step": 2687
},
{
"epoch": 0.9668195306177502,
"grad_norm": 18.247534235534676,
"learning_rate": 5.741027685077005e-09,
"loss": 0.716,
"step": 2688
},
{
"epoch": 0.9671792105026527,
"grad_norm": 13.960685103084211,
"learning_rate": 5.61701735897302e-09,
"loss": 0.8079,
"step": 2689
},
{
"epoch": 0.967538890387555,
"grad_norm": 10.893739858824933,
"learning_rate": 5.494357281059403e-09,
"loss": 0.8029,
"step": 2690
},
{
"epoch": 0.9678985702724575,
"grad_norm": 20.614764178998072,
"learning_rate": 5.373047617893478e-09,
"loss": 0.7982,
"step": 2691
},
{
"epoch": 0.96825825015736,
"grad_norm": 11.193102120432794,
"learning_rate": 5.253088534198258e-09,
"loss": 0.772,
"step": 2692
},
{
"epoch": 0.9686179300422624,
"grad_norm": 9.543764352530816,
"learning_rate": 5.134480192863666e-09,
"loss": 0.7926,
"step": 2693
},
{
"epoch": 0.9689776099271649,
"grad_norm": 10.39901130663553,
"learning_rate": 5.01722275494465e-09,
"loss": 0.8168,
"step": 2694
},
{
"epoch": 0.9693372898120672,
"grad_norm": 20.38370112308751,
"learning_rate": 4.9013163796626234e-09,
"loss": 0.8422,
"step": 2695
},
{
"epoch": 0.9696969696969697,
"grad_norm": 16.007176538189192,
"learning_rate": 4.78676122440369e-09,
"loss": 0.8267,
"step": 2696
},
{
"epoch": 0.9700566495818721,
"grad_norm": 10.072443968941924,
"learning_rate": 4.6735574447195335e-09,
"loss": 0.8246,
"step": 2697
},
{
"epoch": 0.9704163294667746,
"grad_norm": 14.511353166731903,
"learning_rate": 4.561705194326748e-09,
"loss": 0.843,
"step": 2698
},
{
"epoch": 0.970776009351677,
"grad_norm": 13.514019727821859,
"learning_rate": 4.4512046251070636e-09,
"loss": 0.803,
"step": 2699
},
{
"epoch": 0.9711356892365794,
"grad_norm": 7.232606061135063,
"learning_rate": 4.342055887106011e-09,
"loss": 0.7693,
"step": 2700
},
{
"epoch": 0.9714953691214819,
"grad_norm": 8.87084975584444,
"learning_rate": 4.234259128534368e-09,
"loss": 0.7394,
"step": 2701
},
{
"epoch": 0.9718550490063843,
"grad_norm": 10.156862890036118,
"learning_rate": 4.12781449576638e-09,
"loss": 0.8209,
"step": 2702
},
{
"epoch": 0.9722147288912868,
"grad_norm": 9.317641827650974,
"learning_rate": 4.022722133340873e-09,
"loss": 0.7536,
"step": 2703
},
{
"epoch": 0.9725744087761892,
"grad_norm": 16.558863139113416,
"learning_rate": 3.918982183960029e-09,
"loss": 0.6953,
"step": 2704
},
{
"epoch": 0.9729340886610917,
"grad_norm": 14.54925389895523,
"learning_rate": 3.816594788489835e-09,
"loss": 0.7382,
"step": 2705
},
{
"epoch": 0.973293768545994,
"grad_norm": 10.300577211253348,
"learning_rate": 3.715560085959524e-09,
"loss": 0.7824,
"step": 2706
},
{
"epoch": 0.9736534484308965,
"grad_norm": 49.51727220703054,
"learning_rate": 3.615878213561796e-09,
"loss": 0.7771,
"step": 2707
},
{
"epoch": 0.9740131283157989,
"grad_norm": 19.386477701053455,
"learning_rate": 3.5175493066521566e-09,
"loss": 0.7306,
"step": 2708
},
{
"epoch": 0.9743728082007014,
"grad_norm": 10.11442472138445,
"learning_rate": 3.4205734987488022e-09,
"loss": 0.8273,
"step": 2709
},
{
"epoch": 0.9747324880856039,
"grad_norm": 9.448581140496838,
"learning_rate": 3.3249509215330653e-09,
"loss": 0.7171,
"step": 2710
},
{
"epoch": 0.9750921679705062,
"grad_norm": 10.047085026998909,
"learning_rate": 3.230681704848415e-09,
"loss": 0.7713,
"step": 2711
},
{
"epoch": 0.9754518478554087,
"grad_norm": 28.940520628953887,
"learning_rate": 3.137765976700679e-09,
"loss": 0.7832,
"step": 2712
},
{
"epoch": 0.9758115277403111,
"grad_norm": 9.672876193912304,
"learning_rate": 3.0462038632577126e-09,
"loss": 0.8147,
"step": 2713
},
{
"epoch": 0.9761712076252136,
"grad_norm": 21.468780176500793,
"learning_rate": 2.9559954888497273e-09,
"loss": 0.8169,
"step": 2714
},
{
"epoch": 0.976530887510116,
"grad_norm": 8.441217808841902,
"learning_rate": 2.867140975968185e-09,
"loss": 0.7538,
"step": 2715
},
{
"epoch": 0.9768905673950184,
"grad_norm": 23.627273043331684,
"learning_rate": 2.7796404452666842e-09,
"loss": 0.8236,
"step": 2716
},
{
"epoch": 0.9772502472799208,
"grad_norm": 14.339270513718278,
"learning_rate": 2.6934940155598497e-09,
"loss": 0.783,
"step": 2717
},
{
"epoch": 0.9776099271648233,
"grad_norm": 42.18713808719393,
"learning_rate": 2.608701803823998e-09,
"loss": 0.8042,
"step": 2718
},
{
"epoch": 0.9779696070497258,
"grad_norm": 58.728909365298485,
"learning_rate": 2.5252639251961415e-09,
"loss": 0.7737,
"step": 2719
},
{
"epoch": 0.9783292869346282,
"grad_norm": 47.30809751767778,
"learning_rate": 2.4431804929746504e-09,
"loss": 0.7958,
"step": 2720
},
{
"epoch": 0.9786889668195307,
"grad_norm": 9.998818970646738,
"learning_rate": 2.3624516186186994e-09,
"loss": 0.7902,
"step": 2721
},
{
"epoch": 0.979048646704433,
"grad_norm": 54.8025412208281,
"learning_rate": 2.2830774117478245e-09,
"loss": 0.7751,
"step": 2722
},
{
"epoch": 0.9794083265893355,
"grad_norm": 9.736425047874677,
"learning_rate": 2.2050579801424772e-09,
"loss": 0.7783,
"step": 2723
},
{
"epoch": 0.9797680064742379,
"grad_norm": 15.104438925217062,
"learning_rate": 2.128393429743247e-09,
"loss": 0.8454,
"step": 2724
},
{
"epoch": 0.9801276863591404,
"grad_norm": 34.20483043624472,
"learning_rate": 2.053083864651084e-09,
"loss": 0.8331,
"step": 2725
},
{
"epoch": 0.9804873662440428,
"grad_norm": 9.107948696828554,
"learning_rate": 1.9791293871269655e-09,
"loss": 0.7865,
"step": 2726
},
{
"epoch": 0.9808470461289452,
"grad_norm": 10.612138564329928,
"learning_rate": 1.9065300975917853e-09,
"loss": 0.7857,
"step": 2727
},
{
"epoch": 0.9812067260138477,
"grad_norm": 12.557314133584349,
"learning_rate": 1.8352860946265758e-09,
"loss": 0.8363,
"step": 2728
},
{
"epoch": 0.9815664058987501,
"grad_norm": 21.292533606395093,
"learning_rate": 1.7653974749715083e-09,
"loss": 0.7834,
"step": 2729
},
{
"epoch": 0.9819260857836526,
"grad_norm": 10.652717987422117,
"learning_rate": 1.696864333526893e-09,
"loss": 0.8064,
"step": 2730
},
{
"epoch": 0.982285765668555,
"grad_norm": 14.96513417679561,
"learning_rate": 1.6296867633519561e-09,
"loss": 0.8403,
"step": 2731
},
{
"epoch": 0.9826454455534575,
"grad_norm": 10.511363408813793,
"learning_rate": 1.5638648556656199e-09,
"loss": 0.8613,
"step": 2732
},
{
"epoch": 0.9830051254383598,
"grad_norm": 16.133962245000316,
"learning_rate": 1.499398699845722e-09,
"loss": 0.8369,
"step": 2733
},
{
"epoch": 0.9833648053232623,
"grad_norm": 12.810716009645525,
"learning_rate": 1.4362883834294625e-09,
"loss": 0.7847,
"step": 2734
},
{
"epoch": 0.9837244852081647,
"grad_norm": 13.472511295730412,
"learning_rate": 1.3745339921126253e-09,
"loss": 0.7475,
"step": 2735
},
{
"epoch": 0.9840841650930672,
"grad_norm": 15.788414497991974,
"learning_rate": 1.3141356097500222e-09,
"loss": 0.8171,
"step": 2736
},
{
"epoch": 0.9844438449779697,
"grad_norm": 110.75185154489911,
"learning_rate": 1.2550933183550494e-09,
"loss": 0.8388,
"step": 2737
},
{
"epoch": 0.984803524862872,
"grad_norm": 20.327737292045065,
"learning_rate": 1.1974071980999089e-09,
"loss": 0.8154,
"step": 2738
},
{
"epoch": 0.9851632047477745,
"grad_norm": 13.453153627263834,
"learning_rate": 1.1410773273151653e-09,
"loss": 0.7984,
"step": 2739
},
{
"epoch": 0.9855228846326769,
"grad_norm": 25.942566003142183,
"learning_rate": 1.0861037824896336e-09,
"loss": 0.8358,
"step": 2740
},
{
"epoch": 0.9858825645175794,
"grad_norm": 10.841796001652206,
"learning_rate": 1.0324866382707131e-09,
"loss": 0.7878,
"step": 2741
},
{
"epoch": 0.9862422444024818,
"grad_norm": 20.25552578544338,
"learning_rate": 9.802259674637214e-10,
"loss": 0.791,
"step": 2742
},
{
"epoch": 0.9866019242873842,
"grad_norm": 9.384407310708053,
"learning_rate": 9.293218410320047e-10,
"loss": 0.8044,
"step": 2743
},
{
"epoch": 0.9869616041722866,
"grad_norm": 10.552914433769093,
"learning_rate": 8.797743280972714e-10,
"loss": 0.7534,
"step": 2744
},
{
"epoch": 0.9873212840571891,
"grad_norm": 9.132184340166688,
"learning_rate": 8.315834959385925e-10,
"loss": 0.7398,
"step": 2745
},
{
"epoch": 0.9876809639420915,
"grad_norm": 10.817060944381932,
"learning_rate": 7.847494099934015e-10,
"loss": 0.7376,
"step": 2746
},
{
"epoch": 0.988040643826994,
"grad_norm": 7.404812161515181,
"learning_rate": 7.392721338563834e-10,
"loss": 0.7866,
"step": 2747
},
{
"epoch": 0.9884003237118965,
"grad_norm": 13.024550676718645,
"learning_rate": 6.951517292800301e-10,
"loss": 0.7846,
"step": 2748
},
{
"epoch": 0.9887600035967988,
"grad_norm": 9.171386738230428,
"learning_rate": 6.523882561744188e-10,
"loss": 0.8258,
"step": 2749
},
{
"epoch": 0.9891196834817013,
"grad_norm": 8.571326039535148,
"learning_rate": 6.109817726068778e-10,
"loss": 0.7831,
"step": 2750
},
{
"epoch": 0.9894793633666037,
"grad_norm": 14.004779110089878,
"learning_rate": 5.70932334802432e-10,
"loss": 0.799,
"step": 2751
},
{
"epoch": 0.9898390432515062,
"grad_norm": 12.068703242749297,
"learning_rate": 5.322399971431357e-10,
"loss": 0.7628,
"step": 2752
},
{
"epoch": 0.9901987231364086,
"grad_norm": 30.981918985778567,
"learning_rate": 4.949048121682953e-10,
"loss": 0.8533,
"step": 2753
},
{
"epoch": 0.990558403021311,
"grad_norm": 11.118356722937763,
"learning_rate": 4.589268305745797e-10,
"loss": 0.7183,
"step": 2754
},
{
"epoch": 0.9909180829062134,
"grad_norm": 10.271196213470246,
"learning_rate": 4.2430610121546586e-10,
"loss": 0.7859,
"step": 2755
},
{
"epoch": 0.9912777627911159,
"grad_norm": 6.525791514753714,
"learning_rate": 3.9104267110168234e-10,
"loss": 0.8111,
"step": 2756
},
{
"epoch": 0.9916374426760184,
"grad_norm": 20.043156103334308,
"learning_rate": 3.5913658540087654e-10,
"loss": 0.853,
"step": 2757
},
{
"epoch": 0.9919971225609208,
"grad_norm": 69.46079333495119,
"learning_rate": 3.285878874373926e-10,
"loss": 0.7862,
"step": 2758
},
{
"epoch": 0.9923568024458232,
"grad_norm": 9.504945412841762,
"learning_rate": 2.9939661869260445e-10,
"loss": 0.8189,
"step": 2759
},
{
"epoch": 0.9927164823307256,
"grad_norm": 13.469627197959296,
"learning_rate": 2.7156281880469366e-10,
"loss": 0.7905,
"step": 2760
},
{
"epoch": 0.9930761622156281,
"grad_norm": 13.210194487635937,
"learning_rate": 2.4508652556842756e-10,
"loss": 0.7695,
"step": 2761
},
{
"epoch": 0.9934358421005305,
"grad_norm": 8.594677550896781,
"learning_rate": 2.199677749352702e-10,
"loss": 0.7899,
"step": 2762
},
{
"epoch": 0.993795521985433,
"grad_norm": 30.38091428233754,
"learning_rate": 1.962066010134933e-10,
"loss": 0.7273,
"step": 2763
},
{
"epoch": 0.9941552018703353,
"grad_norm": 18.508392049584348,
"learning_rate": 1.7380303606773227e-10,
"loss": 0.848,
"step": 2764
},
{
"epoch": 0.9945148817552378,
"grad_norm": 10.467248005948727,
"learning_rate": 1.5275711051909724e-10,
"loss": 0.7759,
"step": 2765
},
{
"epoch": 0.9948745616401403,
"grad_norm": 10.146614594409671,
"learning_rate": 1.3306885294561697e-10,
"loss": 0.7582,
"step": 2766
},
{
"epoch": 0.9952342415250427,
"grad_norm": 8.67161970446411,
"learning_rate": 1.1473829008123992e-10,
"loss": 0.7286,
"step": 2767
},
{
"epoch": 0.9955939214099452,
"grad_norm": 9.589313135228506,
"learning_rate": 9.776544681672216e-11,
"loss": 0.7611,
"step": 2768
},
{
"epoch": 0.9959536012948476,
"grad_norm": 9.980806412038273,
"learning_rate": 8.215034619907247e-11,
"loss": 0.7889,
"step": 2769
},
{
"epoch": 0.99631328117975,
"grad_norm": 21.0842961965178,
"learning_rate": 6.789300943155218e-11,
"loss": 0.8279,
"step": 2770
},
{
"epoch": 0.9966729610646524,
"grad_norm": 28.22526942472408,
"learning_rate": 5.499345587389736e-11,
"loss": 0.8025,
"step": 2771
},
{
"epoch": 0.9970326409495549,
"grad_norm": 41.860819728538175,
"learning_rate": 4.345170304220769e-11,
"loss": 0.818,
"step": 2772
},
{
"epoch": 0.9973923208344573,
"grad_norm": 21.07573778196831,
"learning_rate": 3.326776660850239e-11,
"loss": 0.8747,
"step": 2773
},
{
"epoch": 0.9977520007193598,
"grad_norm": 15.028474676120968,
"learning_rate": 2.4441660401608444e-11,
"loss": 0.8014,
"step": 2774
},
{
"epoch": 0.9981116806042623,
"grad_norm": 15.303493993823228,
"learning_rate": 1.6973396405939312e-11,
"loss": 0.7715,
"step": 2775
},
{
"epoch": 0.9984713604891646,
"grad_norm": 12.09864455306113,
"learning_rate": 1.0862984762716187e-11,
"loss": 0.8467,
"step": 2776
},
{
"epoch": 0.9988310403740671,
"grad_norm": 8.12788452017707,
"learning_rate": 6.110433769079826e-12,
"loss": 0.8188,
"step": 2777
},
{
"epoch": 0.9991907202589695,
"grad_norm": 9.770055403147271,
"learning_rate": 2.715749878312579e-12,
"loss": 0.8117,
"step": 2778
},
{
"epoch": 0.999550400143872,
"grad_norm": 8.763906114496235,
"learning_rate": 6.789377000604446e-13,
"loss": 0.7695,
"step": 2779
},
{
"epoch": 0.9999100800287744,
"grad_norm": 36.349351007850856,
"learning_rate": 0.0,
"loss": 0.7575,
"step": 2780
},
{
"epoch": 0.9999100800287744,
"step": 2780,
"total_flos": 807392659931136.0,
"train_loss": 0.8599558137303633,
"train_runtime": 15742.0767,
"train_samples_per_second": 22.606,
"train_steps_per_second": 0.177
}
],
"logging_steps": 1.0,
"max_steps": 2780,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 10000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 807392659931136.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}