DACMini / trainer_state.json
Mattimax's picture
Upload 15 files
18692a5 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 58914,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.005092297899427116,
"grad_norm": 3.7509827613830566,
"learning_rate": 4.9500000000000004e-05,
"loss": 3.2642,
"step": 100
},
{
"epoch": 0.010184595798854232,
"grad_norm": 3.509660005569458,
"learning_rate": 4.991583636549121e-05,
"loss": 3.014,
"step": 200
},
{
"epoch": 0.015276893698281349,
"grad_norm": 3.0620908737182617,
"learning_rate": 4.983082259326011e-05,
"loss": 2.901,
"step": 300
},
{
"epoch": 0.020369191597708464,
"grad_norm": 2.637498617172241,
"learning_rate": 4.974580882102901e-05,
"loss": 2.8888,
"step": 400
},
{
"epoch": 0.025461489497135583,
"grad_norm": 2.582336902618408,
"learning_rate": 4.966079504879791e-05,
"loss": 2.8137,
"step": 500
},
{
"epoch": 0.030553787396562698,
"grad_norm": 2.575382709503174,
"learning_rate": 4.957578127656681e-05,
"loss": 2.8131,
"step": 600
},
{
"epoch": 0.03564608529598982,
"grad_norm": 2.2707173824310303,
"learning_rate": 4.94907675043357e-05,
"loss": 2.7753,
"step": 700
},
{
"epoch": 0.04073838319541693,
"grad_norm": 2.1095917224884033,
"learning_rate": 4.94057537321046e-05,
"loss": 2.7512,
"step": 800
},
{
"epoch": 0.04583068109484405,
"grad_norm": 1.7593672275543213,
"learning_rate": 4.93207399598735e-05,
"loss": 2.7556,
"step": 900
},
{
"epoch": 0.050922978994271166,
"grad_norm": 2.2801873683929443,
"learning_rate": 4.92357261876424e-05,
"loss": 2.7417,
"step": 1000
},
{
"epoch": 0.056015276893698285,
"grad_norm": 1.9631321430206299,
"learning_rate": 4.91507124154113e-05,
"loss": 2.7122,
"step": 1100
},
{
"epoch": 0.061107574793125397,
"grad_norm": 1.6080312728881836,
"learning_rate": 4.90656986431802e-05,
"loss": 2.687,
"step": 1200
},
{
"epoch": 0.06619987269255251,
"grad_norm": 2.1147282123565674,
"learning_rate": 4.89806848709491e-05,
"loss": 2.6617,
"step": 1300
},
{
"epoch": 0.07129217059197963,
"grad_norm": 1.905120849609375,
"learning_rate": 4.889567109871799e-05,
"loss": 2.6655,
"step": 1400
},
{
"epoch": 0.07638446849140675,
"grad_norm": 1.6756385564804077,
"learning_rate": 4.881065732648689e-05,
"loss": 2.6286,
"step": 1500
},
{
"epoch": 0.08147676639083386,
"grad_norm": 1.8816139698028564,
"learning_rate": 4.872564355425579e-05,
"loss": 2.6414,
"step": 1600
},
{
"epoch": 0.08656906429026098,
"grad_norm": 1.611456036567688,
"learning_rate": 4.864062978202469e-05,
"loss": 2.635,
"step": 1700
},
{
"epoch": 0.0916613621896881,
"grad_norm": 1.8698660135269165,
"learning_rate": 4.855561600979359e-05,
"loss": 2.6683,
"step": 1800
},
{
"epoch": 0.0967536600891152,
"grad_norm": 1.6007249355316162,
"learning_rate": 4.847060223756249e-05,
"loss": 2.633,
"step": 1900
},
{
"epoch": 0.10184595798854233,
"grad_norm": 1.5520641803741455,
"learning_rate": 4.838558846533139e-05,
"loss": 2.5912,
"step": 2000
},
{
"epoch": 0.10693825588796944,
"grad_norm": 1.522303819656372,
"learning_rate": 4.8300574693100286e-05,
"loss": 2.614,
"step": 2100
},
{
"epoch": 0.11203055378739657,
"grad_norm": 1.752119541168213,
"learning_rate": 4.8215560920869186e-05,
"loss": 2.5984,
"step": 2200
},
{
"epoch": 0.11712285168682368,
"grad_norm": 1.5005803108215332,
"learning_rate": 4.8130547148638085e-05,
"loss": 2.6231,
"step": 2300
},
{
"epoch": 0.12221514958625079,
"grad_norm": 1.3557181358337402,
"learning_rate": 4.8045533376406984e-05,
"loss": 2.6189,
"step": 2400
},
{
"epoch": 0.1273074474856779,
"grad_norm": 1.5560193061828613,
"learning_rate": 4.796051960417588e-05,
"loss": 2.5609,
"step": 2500
},
{
"epoch": 0.13239974538510502,
"grad_norm": 1.4254344701766968,
"learning_rate": 4.7875505831944776e-05,
"loss": 2.5632,
"step": 2600
},
{
"epoch": 0.13749204328453216,
"grad_norm": 1.366593599319458,
"learning_rate": 4.7790492059713675e-05,
"loss": 2.54,
"step": 2700
},
{
"epoch": 0.14258434118395927,
"grad_norm": 1.2629475593566895,
"learning_rate": 4.7705478287482575e-05,
"loss": 2.5679,
"step": 2800
},
{
"epoch": 0.14767663908338638,
"grad_norm": 1.234580159187317,
"learning_rate": 4.7620464515251474e-05,
"loss": 2.5671,
"step": 2900
},
{
"epoch": 0.1527689369828135,
"grad_norm": 1.4017528295516968,
"learning_rate": 4.7535450743020373e-05,
"loss": 2.5859,
"step": 3000
},
{
"epoch": 0.1578612348822406,
"grad_norm": 1.3464558124542236,
"learning_rate": 4.7450436970789266e-05,
"loss": 2.5289,
"step": 3100
},
{
"epoch": 0.16295353278166771,
"grad_norm": 1.3121877908706665,
"learning_rate": 4.7365423198558165e-05,
"loss": 2.548,
"step": 3200
},
{
"epoch": 0.16804583068109485,
"grad_norm": 1.2319351434707642,
"learning_rate": 4.7280409426327065e-05,
"loss": 2.5425,
"step": 3300
},
{
"epoch": 0.17313812858052197,
"grad_norm": 1.243325114250183,
"learning_rate": 4.7195395654095964e-05,
"loss": 2.5798,
"step": 3400
},
{
"epoch": 0.17823042647994908,
"grad_norm": 1.2152389287948608,
"learning_rate": 4.711038188186486e-05,
"loss": 2.5235,
"step": 3500
},
{
"epoch": 0.1833227243793762,
"grad_norm": 1.2546372413635254,
"learning_rate": 4.702536810963376e-05,
"loss": 2.5451,
"step": 3600
},
{
"epoch": 0.1884150222788033,
"grad_norm": 1.2566453218460083,
"learning_rate": 4.694035433740266e-05,
"loss": 2.5031,
"step": 3700
},
{
"epoch": 0.1935073201782304,
"grad_norm": 1.4164502620697021,
"learning_rate": 4.685534056517156e-05,
"loss": 2.5002,
"step": 3800
},
{
"epoch": 0.19859961807765755,
"grad_norm": 1.2575647830963135,
"learning_rate": 4.677032679294046e-05,
"loss": 2.5175,
"step": 3900
},
{
"epoch": 0.20369191597708466,
"grad_norm": 1.2546263933181763,
"learning_rate": 4.668531302070936e-05,
"loss": 2.5374,
"step": 4000
},
{
"epoch": 0.20878421387651178,
"grad_norm": 1.4746454954147339,
"learning_rate": 4.660029924847826e-05,
"loss": 2.5077,
"step": 4100
},
{
"epoch": 0.2138765117759389,
"grad_norm": 1.3161815404891968,
"learning_rate": 4.651528547624716e-05,
"loss": 2.4939,
"step": 4200
},
{
"epoch": 0.218968809675366,
"grad_norm": 1.2247682809829712,
"learning_rate": 4.643027170401605e-05,
"loss": 2.5047,
"step": 4300
},
{
"epoch": 0.22406110757479314,
"grad_norm": 1.024702787399292,
"learning_rate": 4.634525793178495e-05,
"loss": 2.4986,
"step": 4400
},
{
"epoch": 0.22915340547422025,
"grad_norm": 1.2271933555603027,
"learning_rate": 4.626024415955385e-05,
"loss": 2.4815,
"step": 4500
},
{
"epoch": 0.23424570337364736,
"grad_norm": 1.1049838066101074,
"learning_rate": 4.617523038732275e-05,
"loss": 2.5055,
"step": 4600
},
{
"epoch": 0.23933800127307447,
"grad_norm": 1.1865185499191284,
"learning_rate": 4.609021661509165e-05,
"loss": 2.4932,
"step": 4700
},
{
"epoch": 0.24443029917250159,
"grad_norm": 1.2031099796295166,
"learning_rate": 4.600520284286055e-05,
"loss": 2.4857,
"step": 4800
},
{
"epoch": 0.2495225970719287,
"grad_norm": 1.2100847959518433,
"learning_rate": 4.592018907062944e-05,
"loss": 2.4704,
"step": 4900
},
{
"epoch": 0.2546148949713558,
"grad_norm": 1.306518793106079,
"learning_rate": 4.583517529839834e-05,
"loss": 2.4679,
"step": 5000
},
{
"epoch": 0.2597071928707829,
"grad_norm": 1.3596395254135132,
"learning_rate": 4.575016152616724e-05,
"loss": 2.5029,
"step": 5100
},
{
"epoch": 0.26479949077021003,
"grad_norm": 1.1463990211486816,
"learning_rate": 4.566514775393614e-05,
"loss": 2.4678,
"step": 5200
},
{
"epoch": 0.2698917886696372,
"grad_norm": 1.4843939542770386,
"learning_rate": 4.558013398170504e-05,
"loss": 2.4549,
"step": 5300
},
{
"epoch": 0.2749840865690643,
"grad_norm": 1.4119912385940552,
"learning_rate": 4.549512020947394e-05,
"loss": 2.4449,
"step": 5400
},
{
"epoch": 0.2800763844684914,
"grad_norm": 1.1640745401382446,
"learning_rate": 4.5410106437242836e-05,
"loss": 2.4133,
"step": 5500
},
{
"epoch": 0.28516868236791854,
"grad_norm": 1.2901395559310913,
"learning_rate": 4.532509266501173e-05,
"loss": 2.4493,
"step": 5600
},
{
"epoch": 0.29026098026734565,
"grad_norm": 1.3150924444198608,
"learning_rate": 4.5240078892780635e-05,
"loss": 2.4616,
"step": 5700
},
{
"epoch": 0.29535327816677276,
"grad_norm": 1.1391271352767944,
"learning_rate": 4.5155065120549534e-05,
"loss": 2.4491,
"step": 5800
},
{
"epoch": 0.30044557606619987,
"grad_norm": 1.047142505645752,
"learning_rate": 4.5070051348318434e-05,
"loss": 2.4664,
"step": 5900
},
{
"epoch": 0.305537873965627,
"grad_norm": 1.2513772249221802,
"learning_rate": 4.498503757608733e-05,
"loss": 2.4356,
"step": 6000
},
{
"epoch": 0.3106301718650541,
"grad_norm": 1.2248339653015137,
"learning_rate": 4.4900023803856225e-05,
"loss": 2.458,
"step": 6100
},
{
"epoch": 0.3157224697644812,
"grad_norm": 0.9861664772033691,
"learning_rate": 4.4815010031625125e-05,
"loss": 2.4494,
"step": 6200
},
{
"epoch": 0.3208147676639083,
"grad_norm": 1.087272047996521,
"learning_rate": 4.4729996259394024e-05,
"loss": 2.4459,
"step": 6300
},
{
"epoch": 0.32590706556333543,
"grad_norm": 1.0361382961273193,
"learning_rate": 4.464498248716292e-05,
"loss": 2.451,
"step": 6400
},
{
"epoch": 0.3309993634627626,
"grad_norm": 1.0861406326293945,
"learning_rate": 4.455996871493182e-05,
"loss": 2.4426,
"step": 6500
},
{
"epoch": 0.3360916613621897,
"grad_norm": 0.9402614235877991,
"learning_rate": 4.447495494270072e-05,
"loss": 2.4189,
"step": 6600
},
{
"epoch": 0.3411839592616168,
"grad_norm": 0.9866734743118286,
"learning_rate": 4.4389941170469615e-05,
"loss": 2.4521,
"step": 6700
},
{
"epoch": 0.34627625716104393,
"grad_norm": 1.0977962017059326,
"learning_rate": 4.4304927398238514e-05,
"loss": 2.4505,
"step": 6800
},
{
"epoch": 0.35136855506047104,
"grad_norm": 1.1266326904296875,
"learning_rate": 4.421991362600741e-05,
"loss": 2.3999,
"step": 6900
},
{
"epoch": 0.35646085295989816,
"grad_norm": 1.1100637912750244,
"learning_rate": 4.413489985377631e-05,
"loss": 2.4226,
"step": 7000
},
{
"epoch": 0.36155315085932527,
"grad_norm": 1.1532678604125977,
"learning_rate": 4.404988608154521e-05,
"loss": 2.4048,
"step": 7100
},
{
"epoch": 0.3666454487587524,
"grad_norm": 1.02146315574646,
"learning_rate": 4.396487230931411e-05,
"loss": 2.4177,
"step": 7200
},
{
"epoch": 0.3717377466581795,
"grad_norm": 1.1943087577819824,
"learning_rate": 4.387985853708301e-05,
"loss": 2.4276,
"step": 7300
},
{
"epoch": 0.3768300445576066,
"grad_norm": 1.118034839630127,
"learning_rate": 4.37948447648519e-05,
"loss": 2.3933,
"step": 7400
},
{
"epoch": 0.3819223424570337,
"grad_norm": 1.0506726503372192,
"learning_rate": 4.370983099262081e-05,
"loss": 2.4162,
"step": 7500
},
{
"epoch": 0.3870146403564608,
"grad_norm": 1.1072652339935303,
"learning_rate": 4.362481722038971e-05,
"loss": 2.4166,
"step": 7600
},
{
"epoch": 0.392106938255888,
"grad_norm": 0.9805678129196167,
"learning_rate": 4.353980344815861e-05,
"loss": 2.3771,
"step": 7700
},
{
"epoch": 0.3971992361553151,
"grad_norm": 1.0781447887420654,
"learning_rate": 4.345478967592751e-05,
"loss": 2.3971,
"step": 7800
},
{
"epoch": 0.4022915340547422,
"grad_norm": 1.1752007007598877,
"learning_rate": 4.33697759036964e-05,
"loss": 2.3837,
"step": 7900
},
{
"epoch": 0.40738383195416933,
"grad_norm": 1.0886644124984741,
"learning_rate": 4.32847621314653e-05,
"loss": 2.4372,
"step": 8000
},
{
"epoch": 0.41247612985359644,
"grad_norm": 1.01775062084198,
"learning_rate": 4.31997483592342e-05,
"loss": 2.4051,
"step": 8100
},
{
"epoch": 0.41756842775302355,
"grad_norm": 1.0455646514892578,
"learning_rate": 4.31147345870031e-05,
"loss": 2.366,
"step": 8200
},
{
"epoch": 0.42266072565245066,
"grad_norm": 0.9850195646286011,
"learning_rate": 4.3029720814772e-05,
"loss": 2.3816,
"step": 8300
},
{
"epoch": 0.4277530235518778,
"grad_norm": 1.092155933380127,
"learning_rate": 4.2944707042540896e-05,
"loss": 2.396,
"step": 8400
},
{
"epoch": 0.4328453214513049,
"grad_norm": 1.008317232131958,
"learning_rate": 4.285969327030979e-05,
"loss": 2.3976,
"step": 8500
},
{
"epoch": 0.437937619350732,
"grad_norm": 1.1001275777816772,
"learning_rate": 4.277467949807869e-05,
"loss": 2.4009,
"step": 8600
},
{
"epoch": 0.4430299172501591,
"grad_norm": 0.9589524865150452,
"learning_rate": 4.268966572584759e-05,
"loss": 2.3755,
"step": 8700
},
{
"epoch": 0.4481222151495863,
"grad_norm": 0.9529566168785095,
"learning_rate": 4.260465195361649e-05,
"loss": 2.3961,
"step": 8800
},
{
"epoch": 0.4532145130490134,
"grad_norm": 1.0157649517059326,
"learning_rate": 4.2519638181385386e-05,
"loss": 2.3743,
"step": 8900
},
{
"epoch": 0.4583068109484405,
"grad_norm": 1.0096311569213867,
"learning_rate": 4.2434624409154286e-05,
"loss": 2.3702,
"step": 9000
},
{
"epoch": 0.4633991088478676,
"grad_norm": 1.0700254440307617,
"learning_rate": 4.2349610636923185e-05,
"loss": 2.3486,
"step": 9100
},
{
"epoch": 0.4684914067472947,
"grad_norm": 0.9580355286598206,
"learning_rate": 4.226459686469208e-05,
"loss": 2.3686,
"step": 9200
},
{
"epoch": 0.47358370464672184,
"grad_norm": 1.0027587413787842,
"learning_rate": 4.217958309246098e-05,
"loss": 2.4074,
"step": 9300
},
{
"epoch": 0.47867600254614895,
"grad_norm": 0.9647036194801331,
"learning_rate": 4.209456932022988e-05,
"loss": 2.3631,
"step": 9400
},
{
"epoch": 0.48376830044557606,
"grad_norm": 1.0718977451324463,
"learning_rate": 4.200955554799878e-05,
"loss": 2.3613,
"step": 9500
},
{
"epoch": 0.48886059834500317,
"grad_norm": 1.1674007177352905,
"learning_rate": 4.192454177576768e-05,
"loss": 2.3604,
"step": 9600
},
{
"epoch": 0.4939528962444303,
"grad_norm": 0.8964582681655884,
"learning_rate": 4.1839528003536574e-05,
"loss": 2.3517,
"step": 9700
},
{
"epoch": 0.4990451941438574,
"grad_norm": 0.9950689673423767,
"learning_rate": 4.175451423130547e-05,
"loss": 2.3609,
"step": 9800
},
{
"epoch": 0.5041374920432845,
"grad_norm": 1.0391299724578857,
"learning_rate": 4.166950045907437e-05,
"loss": 2.3764,
"step": 9900
},
{
"epoch": 0.5092297899427116,
"grad_norm": 0.9937861561775208,
"learning_rate": 4.158448668684327e-05,
"loss": 2.3439,
"step": 10000
},
{
"epoch": 0.5143220878421387,
"grad_norm": 0.9637438654899597,
"learning_rate": 4.149947291461217e-05,
"loss": 2.3599,
"step": 10100
},
{
"epoch": 0.5194143857415658,
"grad_norm": 0.991791844367981,
"learning_rate": 4.141445914238107e-05,
"loss": 2.3688,
"step": 10200
},
{
"epoch": 0.524506683640993,
"grad_norm": 1.1475801467895508,
"learning_rate": 4.132944537014996e-05,
"loss": 2.351,
"step": 10300
},
{
"epoch": 0.5295989815404201,
"grad_norm": 1.018678069114685,
"learning_rate": 4.124443159791886e-05,
"loss": 2.3381,
"step": 10400
},
{
"epoch": 0.5346912794398472,
"grad_norm": 1.0166884660720825,
"learning_rate": 4.115941782568776e-05,
"loss": 2.3393,
"step": 10500
},
{
"epoch": 0.5397835773392744,
"grad_norm": 0.9590491652488708,
"learning_rate": 4.107440405345666e-05,
"loss": 2.3428,
"step": 10600
},
{
"epoch": 0.5448758752387015,
"grad_norm": 1.0007227659225464,
"learning_rate": 4.098939028122556e-05,
"loss": 2.3388,
"step": 10700
},
{
"epoch": 0.5499681731381286,
"grad_norm": 0.8273807764053345,
"learning_rate": 4.090437650899446e-05,
"loss": 2.3238,
"step": 10800
},
{
"epoch": 0.5550604710375557,
"grad_norm": 0.9188222885131836,
"learning_rate": 4.081936273676335e-05,
"loss": 2.3171,
"step": 10900
},
{
"epoch": 0.5601527689369828,
"grad_norm": 1.2066142559051514,
"learning_rate": 4.073434896453225e-05,
"loss": 2.385,
"step": 11000
},
{
"epoch": 0.56524506683641,
"grad_norm": 1.0904101133346558,
"learning_rate": 4.064933519230115e-05,
"loss": 2.341,
"step": 11100
},
{
"epoch": 0.5703373647358371,
"grad_norm": 1.0374412536621094,
"learning_rate": 4.056432142007005e-05,
"loss": 2.3398,
"step": 11200
},
{
"epoch": 0.5754296626352642,
"grad_norm": 0.9854114055633545,
"learning_rate": 4.0479307647838956e-05,
"loss": 2.3512,
"step": 11300
},
{
"epoch": 0.5805219605346913,
"grad_norm": 1.071382999420166,
"learning_rate": 4.0394293875607856e-05,
"loss": 2.3145,
"step": 11400
},
{
"epoch": 0.5856142584341184,
"grad_norm": 0.9923407435417175,
"learning_rate": 4.030928010337675e-05,
"loss": 2.3475,
"step": 11500
},
{
"epoch": 0.5907065563335455,
"grad_norm": 1.034600019454956,
"learning_rate": 4.022426633114565e-05,
"loss": 2.3196,
"step": 11600
},
{
"epoch": 0.5957988542329726,
"grad_norm": 1.4072537422180176,
"learning_rate": 4.013925255891455e-05,
"loss": 2.3435,
"step": 11700
},
{
"epoch": 0.6008911521323997,
"grad_norm": 1.0498465299606323,
"learning_rate": 4.0054238786683446e-05,
"loss": 2.3488,
"step": 11800
},
{
"epoch": 0.6059834500318269,
"grad_norm": 0.9911717176437378,
"learning_rate": 3.9969225014452346e-05,
"loss": 2.3286,
"step": 11900
},
{
"epoch": 0.611075747931254,
"grad_norm": 0.9431672692298889,
"learning_rate": 3.9884211242221245e-05,
"loss": 2.3502,
"step": 12000
},
{
"epoch": 0.6161680458306811,
"grad_norm": 1.0439810752868652,
"learning_rate": 3.979919746999014e-05,
"loss": 2.3516,
"step": 12100
},
{
"epoch": 0.6212603437301082,
"grad_norm": 0.8762308955192566,
"learning_rate": 3.971418369775904e-05,
"loss": 2.2836,
"step": 12200
},
{
"epoch": 0.6263526416295353,
"grad_norm": 0.8706735372543335,
"learning_rate": 3.9629169925527936e-05,
"loss": 2.349,
"step": 12300
},
{
"epoch": 0.6314449395289624,
"grad_norm": 0.9823511838912964,
"learning_rate": 3.9544156153296836e-05,
"loss": 2.3356,
"step": 12400
},
{
"epoch": 0.6365372374283895,
"grad_norm": 0.939285933971405,
"learning_rate": 3.9459142381065735e-05,
"loss": 2.3435,
"step": 12500
},
{
"epoch": 0.6416295353278166,
"grad_norm": 1.033011555671692,
"learning_rate": 3.9374128608834634e-05,
"loss": 2.3208,
"step": 12600
},
{
"epoch": 0.6467218332272437,
"grad_norm": 0.9835578799247742,
"learning_rate": 3.928911483660353e-05,
"loss": 2.3332,
"step": 12700
},
{
"epoch": 0.6518141311266709,
"grad_norm": 0.9082310795783997,
"learning_rate": 3.9204101064372426e-05,
"loss": 2.3216,
"step": 12800
},
{
"epoch": 0.6569064290260981,
"grad_norm": 0.8588578701019287,
"learning_rate": 3.9119087292141325e-05,
"loss": 2.3114,
"step": 12900
},
{
"epoch": 0.6619987269255252,
"grad_norm": 1.040531873703003,
"learning_rate": 3.9034073519910225e-05,
"loss": 2.3328,
"step": 13000
},
{
"epoch": 0.6670910248249523,
"grad_norm": 1.0225043296813965,
"learning_rate": 3.894905974767913e-05,
"loss": 2.3245,
"step": 13100
},
{
"epoch": 0.6721833227243794,
"grad_norm": 1.0172550678253174,
"learning_rate": 3.886404597544803e-05,
"loss": 2.3056,
"step": 13200
},
{
"epoch": 0.6772756206238065,
"grad_norm": 0.9119499921798706,
"learning_rate": 3.877903220321692e-05,
"loss": 2.317,
"step": 13300
},
{
"epoch": 0.6823679185232336,
"grad_norm": 0.8971495032310486,
"learning_rate": 3.869401843098582e-05,
"loss": 2.3292,
"step": 13400
},
{
"epoch": 0.6874602164226608,
"grad_norm": 0.9643430709838867,
"learning_rate": 3.860900465875472e-05,
"loss": 2.3779,
"step": 13500
},
{
"epoch": 0.6925525143220879,
"grad_norm": 0.919440507888794,
"learning_rate": 3.852399088652362e-05,
"loss": 2.2993,
"step": 13600
},
{
"epoch": 0.697644812221515,
"grad_norm": 0.9949972033500671,
"learning_rate": 3.843897711429252e-05,
"loss": 2.3255,
"step": 13700
},
{
"epoch": 0.7027371101209421,
"grad_norm": 0.9251271486282349,
"learning_rate": 3.835396334206142e-05,
"loss": 2.2997,
"step": 13800
},
{
"epoch": 0.7078294080203692,
"grad_norm": 0.9567040205001831,
"learning_rate": 3.826894956983031e-05,
"loss": 2.3198,
"step": 13900
},
{
"epoch": 0.7129217059197963,
"grad_norm": 1.1165566444396973,
"learning_rate": 3.818393579759921e-05,
"loss": 2.3074,
"step": 14000
},
{
"epoch": 0.7180140038192234,
"grad_norm": 0.9649367928504944,
"learning_rate": 3.809892202536811e-05,
"loss": 2.2916,
"step": 14100
},
{
"epoch": 0.7231063017186505,
"grad_norm": 0.8595756888389587,
"learning_rate": 3.801390825313701e-05,
"loss": 2.3386,
"step": 14200
},
{
"epoch": 0.7281985996180776,
"grad_norm": 0.7877846360206604,
"learning_rate": 3.792889448090591e-05,
"loss": 2.2741,
"step": 14300
},
{
"epoch": 0.7332908975175048,
"grad_norm": 0.9086227416992188,
"learning_rate": 3.784388070867481e-05,
"loss": 2.3186,
"step": 14400
},
{
"epoch": 0.7383831954169319,
"grad_norm": 0.9466003179550171,
"learning_rate": 3.77588669364437e-05,
"loss": 2.2916,
"step": 14500
},
{
"epoch": 0.743475493316359,
"grad_norm": 0.8069922924041748,
"learning_rate": 3.76738531642126e-05,
"loss": 2.3108,
"step": 14600
},
{
"epoch": 0.7485677912157861,
"grad_norm": 1.0324113368988037,
"learning_rate": 3.75888393919815e-05,
"loss": 2.3066,
"step": 14700
},
{
"epoch": 0.7536600891152132,
"grad_norm": 0.892573893070221,
"learning_rate": 3.75038256197504e-05,
"loss": 2.2738,
"step": 14800
},
{
"epoch": 0.7587523870146403,
"grad_norm": 0.7999922037124634,
"learning_rate": 3.74188118475193e-05,
"loss": 2.3195,
"step": 14900
},
{
"epoch": 0.7638446849140674,
"grad_norm": 1.004957914352417,
"learning_rate": 3.73337980752882e-05,
"loss": 2.2935,
"step": 15000
},
{
"epoch": 0.7689369828134945,
"grad_norm": 1.046640157699585,
"learning_rate": 3.72487843030571e-05,
"loss": 2.3109,
"step": 15100
},
{
"epoch": 0.7740292807129217,
"grad_norm": 0.9236047863960266,
"learning_rate": 3.7163770530825996e-05,
"loss": 2.3128,
"step": 15200
},
{
"epoch": 0.7791215786123489,
"grad_norm": 1.0190492868423462,
"learning_rate": 3.7078756758594896e-05,
"loss": 2.3018,
"step": 15300
},
{
"epoch": 0.784213876511776,
"grad_norm": 0.8099306225776672,
"learning_rate": 3.6993742986363795e-05,
"loss": 2.313,
"step": 15400
},
{
"epoch": 0.7893061744112031,
"grad_norm": 0.9618342518806458,
"learning_rate": 3.6908729214132694e-05,
"loss": 2.2864,
"step": 15500
},
{
"epoch": 0.7943984723106302,
"grad_norm": 1.046680212020874,
"learning_rate": 3.6823715441901594e-05,
"loss": 2.2853,
"step": 15600
},
{
"epoch": 0.7994907702100573,
"grad_norm": 0.8486195206642151,
"learning_rate": 3.6738701669670486e-05,
"loss": 2.2854,
"step": 15700
},
{
"epoch": 0.8045830681094844,
"grad_norm": 0.9708773493766785,
"learning_rate": 3.6653687897439386e-05,
"loss": 2.2928,
"step": 15800
},
{
"epoch": 0.8096753660089115,
"grad_norm": 0.8969681262969971,
"learning_rate": 3.6568674125208285e-05,
"loss": 2.2976,
"step": 15900
},
{
"epoch": 0.8147676639083387,
"grad_norm": 0.9385348558425903,
"learning_rate": 3.6483660352977184e-05,
"loss": 2.2847,
"step": 16000
},
{
"epoch": 0.8198599618077658,
"grad_norm": 0.8899937272071838,
"learning_rate": 3.6398646580746083e-05,
"loss": 2.2972,
"step": 16100
},
{
"epoch": 0.8249522597071929,
"grad_norm": 0.8900747299194336,
"learning_rate": 3.631363280851498e-05,
"loss": 2.2952,
"step": 16200
},
{
"epoch": 0.83004455760662,
"grad_norm": 1.026571273803711,
"learning_rate": 3.6228619036283875e-05,
"loss": 2.2842,
"step": 16300
},
{
"epoch": 0.8351368555060471,
"grad_norm": 0.9016963839530945,
"learning_rate": 3.6143605264052775e-05,
"loss": 2.288,
"step": 16400
},
{
"epoch": 0.8402291534054742,
"grad_norm": 0.8101049065589905,
"learning_rate": 3.6058591491821674e-05,
"loss": 2.2486,
"step": 16500
},
{
"epoch": 0.8453214513049013,
"grad_norm": 0.860748827457428,
"learning_rate": 3.597357771959057e-05,
"loss": 2.2911,
"step": 16600
},
{
"epoch": 0.8504137492043284,
"grad_norm": 0.9295821189880371,
"learning_rate": 3.588856394735947e-05,
"loss": 2.2477,
"step": 16700
},
{
"epoch": 0.8555060471037556,
"grad_norm": 0.9582170844078064,
"learning_rate": 3.580355017512837e-05,
"loss": 2.307,
"step": 16800
},
{
"epoch": 0.8605983450031827,
"grad_norm": 0.9199303984642029,
"learning_rate": 3.571853640289727e-05,
"loss": 2.2692,
"step": 16900
},
{
"epoch": 0.8656906429026098,
"grad_norm": 0.8835098743438721,
"learning_rate": 3.563352263066617e-05,
"loss": 2.2681,
"step": 17000
},
{
"epoch": 0.8707829408020369,
"grad_norm": 0.9898850917816162,
"learning_rate": 3.554850885843507e-05,
"loss": 2.2718,
"step": 17100
},
{
"epoch": 0.875875238701464,
"grad_norm": 1.0997586250305176,
"learning_rate": 3.546349508620397e-05,
"loss": 2.2577,
"step": 17200
},
{
"epoch": 0.8809675366008911,
"grad_norm": 0.8374606370925903,
"learning_rate": 3.537848131397287e-05,
"loss": 2.2731,
"step": 17300
},
{
"epoch": 0.8860598345003182,
"grad_norm": 0.9752559065818787,
"learning_rate": 3.529346754174177e-05,
"loss": 2.2776,
"step": 17400
},
{
"epoch": 0.8911521323997453,
"grad_norm": 0.8918510675430298,
"learning_rate": 3.520845376951066e-05,
"loss": 2.2838,
"step": 17500
},
{
"epoch": 0.8962444302991726,
"grad_norm": 0.9751953482627869,
"learning_rate": 3.512343999727956e-05,
"loss": 2.268,
"step": 17600
},
{
"epoch": 0.9013367281985997,
"grad_norm": 0.9787586331367493,
"learning_rate": 3.503842622504846e-05,
"loss": 2.2927,
"step": 17700
},
{
"epoch": 0.9064290260980268,
"grad_norm": 0.9199690222740173,
"learning_rate": 3.495341245281736e-05,
"loss": 2.2785,
"step": 17800
},
{
"epoch": 0.9115213239974539,
"grad_norm": 0.8526634573936462,
"learning_rate": 3.486839868058626e-05,
"loss": 2.2818,
"step": 17900
},
{
"epoch": 0.916613621896881,
"grad_norm": 0.9445266127586365,
"learning_rate": 3.478338490835516e-05,
"loss": 2.3147,
"step": 18000
},
{
"epoch": 0.9217059197963081,
"grad_norm": 0.9607738256454468,
"learning_rate": 3.469837113612405e-05,
"loss": 2.2663,
"step": 18100
},
{
"epoch": 0.9267982176957352,
"grad_norm": 0.8561920523643494,
"learning_rate": 3.461335736389295e-05,
"loss": 2.2355,
"step": 18200
},
{
"epoch": 0.9318905155951623,
"grad_norm": 0.8668131828308105,
"learning_rate": 3.452834359166185e-05,
"loss": 2.2801,
"step": 18300
},
{
"epoch": 0.9369828134945895,
"grad_norm": 0.9161975979804993,
"learning_rate": 3.444332981943075e-05,
"loss": 2.2668,
"step": 18400
},
{
"epoch": 0.9420751113940166,
"grad_norm": 0.9021576046943665,
"learning_rate": 3.435831604719965e-05,
"loss": 2.2887,
"step": 18500
},
{
"epoch": 0.9471674092934437,
"grad_norm": 0.8754701018333435,
"learning_rate": 3.4273302274968546e-05,
"loss": 2.2567,
"step": 18600
},
{
"epoch": 0.9522597071928708,
"grad_norm": 0.9762224555015564,
"learning_rate": 3.4188288502737446e-05,
"loss": 2.2574,
"step": 18700
},
{
"epoch": 0.9573520050922979,
"grad_norm": 0.8961549401283264,
"learning_rate": 3.4103274730506345e-05,
"loss": 2.252,
"step": 18800
},
{
"epoch": 0.962444302991725,
"grad_norm": 0.8942741751670837,
"learning_rate": 3.4018260958275244e-05,
"loss": 2.3098,
"step": 18900
},
{
"epoch": 0.9675366008911521,
"grad_norm": 0.8678953051567078,
"learning_rate": 3.3933247186044144e-05,
"loss": 2.2751,
"step": 19000
},
{
"epoch": 0.9726288987905792,
"grad_norm": 0.9803009629249573,
"learning_rate": 3.384823341381304e-05,
"loss": 2.2329,
"step": 19100
},
{
"epoch": 0.9777211966900063,
"grad_norm": 0.8548142313957214,
"learning_rate": 3.376321964158194e-05,
"loss": 2.2577,
"step": 19200
},
{
"epoch": 0.9828134945894335,
"grad_norm": 0.8247301578521729,
"learning_rate": 3.3678205869350835e-05,
"loss": 2.2776,
"step": 19300
},
{
"epoch": 0.9879057924888606,
"grad_norm": 0.8970145583152771,
"learning_rate": 3.3593192097119734e-05,
"loss": 2.2436,
"step": 19400
},
{
"epoch": 0.9929980903882877,
"grad_norm": 0.9450452923774719,
"learning_rate": 3.3508178324888633e-05,
"loss": 2.274,
"step": 19500
},
{
"epoch": 0.9980903882877148,
"grad_norm": 0.9455347061157227,
"learning_rate": 3.342316455265753e-05,
"loss": 2.2618,
"step": 19600
},
{
"epoch": 1.0031572246976448,
"grad_norm": 0.9727960228919983,
"learning_rate": 3.333815078042643e-05,
"loss": 2.2148,
"step": 19700
},
{
"epoch": 1.008249522597072,
"grad_norm": 1.0244638919830322,
"learning_rate": 3.325313700819533e-05,
"loss": 2.2209,
"step": 19800
},
{
"epoch": 1.013341820496499,
"grad_norm": 1.002837061882019,
"learning_rate": 3.3168123235964224e-05,
"loss": 2.2011,
"step": 19900
},
{
"epoch": 1.0184341183959262,
"grad_norm": 0.8974801898002625,
"learning_rate": 3.308310946373312e-05,
"loss": 2.2186,
"step": 20000
},
{
"epoch": 1.0235264162953532,
"grad_norm": 1.0660030841827393,
"learning_rate": 3.299809569150202e-05,
"loss": 2.2368,
"step": 20100
},
{
"epoch": 1.0286187141947805,
"grad_norm": 0.8874944448471069,
"learning_rate": 3.291308191927092e-05,
"loss": 2.2552,
"step": 20200
},
{
"epoch": 1.0337110120942075,
"grad_norm": 0.9332163333892822,
"learning_rate": 3.282806814703982e-05,
"loss": 2.231,
"step": 20300
},
{
"epoch": 1.0388033099936347,
"grad_norm": 0.8272064328193665,
"learning_rate": 3.274305437480872e-05,
"loss": 2.2287,
"step": 20400
},
{
"epoch": 1.0438956078930617,
"grad_norm": 0.8333924412727356,
"learning_rate": 3.265804060257761e-05,
"loss": 2.2217,
"step": 20500
},
{
"epoch": 1.048987905792489,
"grad_norm": 0.9589939117431641,
"learning_rate": 3.257302683034652e-05,
"loss": 2.2328,
"step": 20600
},
{
"epoch": 1.054080203691916,
"grad_norm": 0.8918903470039368,
"learning_rate": 3.248801305811542e-05,
"loss": 2.2169,
"step": 20700
},
{
"epoch": 1.0591725015913431,
"grad_norm": 0.9166114926338196,
"learning_rate": 3.240299928588432e-05,
"loss": 2.2605,
"step": 20800
},
{
"epoch": 1.0642647994907701,
"grad_norm": 0.8604680895805359,
"learning_rate": 3.231798551365322e-05,
"loss": 2.2591,
"step": 20900
},
{
"epoch": 1.0693570973901974,
"grad_norm": 0.82822185754776,
"learning_rate": 3.2232971741422117e-05,
"loss": 2.2075,
"step": 21000
},
{
"epoch": 1.0744493952896244,
"grad_norm": 0.8195912837982178,
"learning_rate": 3.214795796919101e-05,
"loss": 2.2054,
"step": 21100
},
{
"epoch": 1.0795416931890516,
"grad_norm": 0.9587050080299377,
"learning_rate": 3.206294419695991e-05,
"loss": 2.2558,
"step": 21200
},
{
"epoch": 1.0846339910884786,
"grad_norm": 0.9604052901268005,
"learning_rate": 3.197793042472881e-05,
"loss": 2.2023,
"step": 21300
},
{
"epoch": 1.0897262889879058,
"grad_norm": 0.9480250477790833,
"learning_rate": 3.189291665249771e-05,
"loss": 2.2168,
"step": 21400
},
{
"epoch": 1.094818586887333,
"grad_norm": 0.8999929428100586,
"learning_rate": 3.1807902880266606e-05,
"loss": 2.2089,
"step": 21500
},
{
"epoch": 1.09991088478676,
"grad_norm": 0.9180619716644287,
"learning_rate": 3.1722889108035506e-05,
"loss": 2.2092,
"step": 21600
},
{
"epoch": 1.105003182686187,
"grad_norm": 0.8434627056121826,
"learning_rate": 3.16378753358044e-05,
"loss": 2.2179,
"step": 21700
},
{
"epoch": 1.1100954805856142,
"grad_norm": 0.8810749053955078,
"learning_rate": 3.15528615635733e-05,
"loss": 2.1857,
"step": 21800
},
{
"epoch": 1.1151877784850415,
"grad_norm": 0.9257334470748901,
"learning_rate": 3.14678477913422e-05,
"loss": 2.2205,
"step": 21900
},
{
"epoch": 1.1202800763844685,
"grad_norm": 0.8661274313926697,
"learning_rate": 3.1382834019111096e-05,
"loss": 2.1995,
"step": 22000
},
{
"epoch": 1.1253723742838957,
"grad_norm": 0.8728938698768616,
"learning_rate": 3.1297820246879996e-05,
"loss": 2.2125,
"step": 22100
},
{
"epoch": 1.1304646721833227,
"grad_norm": 0.9176629185676575,
"learning_rate": 3.1212806474648895e-05,
"loss": 2.1908,
"step": 22200
},
{
"epoch": 1.13555697008275,
"grad_norm": 0.9520237445831299,
"learning_rate": 3.112779270241779e-05,
"loss": 2.2345,
"step": 22300
},
{
"epoch": 1.140649267982177,
"grad_norm": 0.8356249928474426,
"learning_rate": 3.1042778930186694e-05,
"loss": 2.2452,
"step": 22400
},
{
"epoch": 1.1457415658816041,
"grad_norm": 1.0978131294250488,
"learning_rate": 3.095776515795559e-05,
"loss": 2.1776,
"step": 22500
},
{
"epoch": 1.1508338637810311,
"grad_norm": 1.1184298992156982,
"learning_rate": 3.087275138572449e-05,
"loss": 2.2174,
"step": 22600
},
{
"epoch": 1.1559261616804584,
"grad_norm": 0.9109058380126953,
"learning_rate": 3.078773761349339e-05,
"loss": 2.2168,
"step": 22700
},
{
"epoch": 1.1610184595798854,
"grad_norm": 0.8274030089378357,
"learning_rate": 3.0702723841262284e-05,
"loss": 2.224,
"step": 22800
},
{
"epoch": 1.1661107574793126,
"grad_norm": 0.8593317270278931,
"learning_rate": 3.0617710069031183e-05,
"loss": 2.2653,
"step": 22900
},
{
"epoch": 1.1712030553787396,
"grad_norm": 1.1305369138717651,
"learning_rate": 3.053269629680008e-05,
"loss": 2.241,
"step": 23000
},
{
"epoch": 1.1762953532781668,
"grad_norm": 1.0249735116958618,
"learning_rate": 3.0447682524568982e-05,
"loss": 2.2044,
"step": 23100
},
{
"epoch": 1.1813876511775938,
"grad_norm": 0.762690007686615,
"learning_rate": 3.036266875233788e-05,
"loss": 2.2057,
"step": 23200
},
{
"epoch": 1.186479949077021,
"grad_norm": 0.7995686531066895,
"learning_rate": 3.0277654980106777e-05,
"loss": 2.2435,
"step": 23300
},
{
"epoch": 1.191572246976448,
"grad_norm": 1.0537996292114258,
"learning_rate": 3.0192641207875677e-05,
"loss": 2.2155,
"step": 23400
},
{
"epoch": 1.1966645448758753,
"grad_norm": 0.8992569446563721,
"learning_rate": 3.0107627435644576e-05,
"loss": 2.217,
"step": 23500
},
{
"epoch": 1.2017568427753023,
"grad_norm": 0.9041591286659241,
"learning_rate": 3.0022613663413472e-05,
"loss": 2.2277,
"step": 23600
},
{
"epoch": 1.2068491406747295,
"grad_norm": 0.9437869787216187,
"learning_rate": 2.993759989118237e-05,
"loss": 2.2151,
"step": 23700
},
{
"epoch": 1.2119414385741565,
"grad_norm": 0.7999377846717834,
"learning_rate": 2.985258611895127e-05,
"loss": 2.2103,
"step": 23800
},
{
"epoch": 1.2170337364735837,
"grad_norm": 0.932995080947876,
"learning_rate": 2.976757234672017e-05,
"loss": 2.1964,
"step": 23900
},
{
"epoch": 1.222126034373011,
"grad_norm": 0.846868097782135,
"learning_rate": 2.9682558574489066e-05,
"loss": 2.1821,
"step": 24000
},
{
"epoch": 1.227218332272438,
"grad_norm": 0.889284610748291,
"learning_rate": 2.9597544802257965e-05,
"loss": 2.2227,
"step": 24100
},
{
"epoch": 1.2323106301718652,
"grad_norm": 0.9376260042190552,
"learning_rate": 2.9512531030026865e-05,
"loss": 2.226,
"step": 24200
},
{
"epoch": 1.2374029280712922,
"grad_norm": 0.8779696226119995,
"learning_rate": 2.9427517257795767e-05,
"loss": 2.2086,
"step": 24300
},
{
"epoch": 1.2424952259707194,
"grad_norm": 0.9524549841880798,
"learning_rate": 2.9342503485564667e-05,
"loss": 2.2026,
"step": 24400
},
{
"epoch": 1.2475875238701464,
"grad_norm": 0.919808030128479,
"learning_rate": 2.9257489713333563e-05,
"loss": 2.192,
"step": 24500
},
{
"epoch": 1.2526798217695736,
"grad_norm": 1.0228092670440674,
"learning_rate": 2.9172475941102462e-05,
"loss": 2.2241,
"step": 24600
},
{
"epoch": 1.2577721196690006,
"grad_norm": 0.8363624811172485,
"learning_rate": 2.908746216887136e-05,
"loss": 2.1808,
"step": 24700
},
{
"epoch": 1.2628644175684278,
"grad_norm": 0.8711551427841187,
"learning_rate": 2.9002448396640257e-05,
"loss": 2.2093,
"step": 24800
},
{
"epoch": 1.2679567154678548,
"grad_norm": 0.9497014284133911,
"learning_rate": 2.8917434624409156e-05,
"loss": 2.1856,
"step": 24900
},
{
"epoch": 1.273049013367282,
"grad_norm": 0.9282352924346924,
"learning_rate": 2.8832420852178056e-05,
"loss": 2.1787,
"step": 25000
},
{
"epoch": 1.278141311266709,
"grad_norm": 0.9017792344093323,
"learning_rate": 2.8747407079946952e-05,
"loss": 2.2054,
"step": 25100
},
{
"epoch": 1.2832336091661363,
"grad_norm": 0.9470519423484802,
"learning_rate": 2.866239330771585e-05,
"loss": 2.1885,
"step": 25200
},
{
"epoch": 1.2883259070655633,
"grad_norm": 0.991397500038147,
"learning_rate": 2.857737953548475e-05,
"loss": 2.1875,
"step": 25300
},
{
"epoch": 1.2934182049649905,
"grad_norm": 0.920644223690033,
"learning_rate": 2.8492365763253646e-05,
"loss": 2.2418,
"step": 25400
},
{
"epoch": 1.2985105028644175,
"grad_norm": 0.8312422037124634,
"learning_rate": 2.8407351991022546e-05,
"loss": 2.1635,
"step": 25500
},
{
"epoch": 1.3036028007638447,
"grad_norm": 0.9457144737243652,
"learning_rate": 2.8322338218791445e-05,
"loss": 2.1945,
"step": 25600
},
{
"epoch": 1.308695098663272,
"grad_norm": 0.8914629220962524,
"learning_rate": 2.8237324446560344e-05,
"loss": 2.2092,
"step": 25700
},
{
"epoch": 1.313787396562699,
"grad_norm": 0.9140703082084656,
"learning_rate": 2.815231067432924e-05,
"loss": 2.2162,
"step": 25800
},
{
"epoch": 1.318879694462126,
"grad_norm": 0.926543116569519,
"learning_rate": 2.806729690209814e-05,
"loss": 2.1906,
"step": 25900
},
{
"epoch": 1.3239719923615532,
"grad_norm": 0.888692319393158,
"learning_rate": 2.798228312986704e-05,
"loss": 2.1866,
"step": 26000
},
{
"epoch": 1.3290642902609804,
"grad_norm": 0.7925876379013062,
"learning_rate": 2.7897269357635935e-05,
"loss": 2.1988,
"step": 26100
},
{
"epoch": 1.3341565881604074,
"grad_norm": 0.8814985752105713,
"learning_rate": 2.781225558540484e-05,
"loss": 2.2072,
"step": 26200
},
{
"epoch": 1.3392488860598344,
"grad_norm": 0.8415858745574951,
"learning_rate": 2.7727241813173737e-05,
"loss": 2.2227,
"step": 26300
},
{
"epoch": 1.3443411839592616,
"grad_norm": 0.9423860907554626,
"learning_rate": 2.7642228040942636e-05,
"loss": 2.2426,
"step": 26400
},
{
"epoch": 1.3494334818586888,
"grad_norm": 0.8816553950309753,
"learning_rate": 2.7557214268711535e-05,
"loss": 2.206,
"step": 26500
},
{
"epoch": 1.3545257797581158,
"grad_norm": 0.8283177018165588,
"learning_rate": 2.747220049648043e-05,
"loss": 2.1859,
"step": 26600
},
{
"epoch": 1.3596180776575428,
"grad_norm": 0.8860555291175842,
"learning_rate": 2.738718672424933e-05,
"loss": 2.178,
"step": 26700
},
{
"epoch": 1.36471037555697,
"grad_norm": 0.8853309154510498,
"learning_rate": 2.730217295201823e-05,
"loss": 2.1844,
"step": 26800
},
{
"epoch": 1.3698026734563973,
"grad_norm": 0.9043028950691223,
"learning_rate": 2.7217159179787126e-05,
"loss": 2.2105,
"step": 26900
},
{
"epoch": 1.3748949713558243,
"grad_norm": 0.8943936824798584,
"learning_rate": 2.7132145407556025e-05,
"loss": 2.1814,
"step": 27000
},
{
"epoch": 1.3799872692552515,
"grad_norm": 0.7901210188865662,
"learning_rate": 2.7047131635324925e-05,
"loss": 2.1819,
"step": 27100
},
{
"epoch": 1.3850795671546785,
"grad_norm": 0.9602735638618469,
"learning_rate": 2.696211786309382e-05,
"loss": 2.2121,
"step": 27200
},
{
"epoch": 1.3901718650541057,
"grad_norm": 0.8327048420906067,
"learning_rate": 2.687710409086272e-05,
"loss": 2.2128,
"step": 27300
},
{
"epoch": 1.3952641629535327,
"grad_norm": 0.8546739220619202,
"learning_rate": 2.679209031863162e-05,
"loss": 2.2035,
"step": 27400
},
{
"epoch": 1.40035646085296,
"grad_norm": 1.585236668586731,
"learning_rate": 2.6707076546400515e-05,
"loss": 2.1845,
"step": 27500
},
{
"epoch": 1.405448758752387,
"grad_norm": 0.9497547745704651,
"learning_rate": 2.6622062774169415e-05,
"loss": 2.1886,
"step": 27600
},
{
"epoch": 1.4105410566518142,
"grad_norm": 0.8747720718383789,
"learning_rate": 2.6537049001938314e-05,
"loss": 2.1735,
"step": 27700
},
{
"epoch": 1.4156333545512412,
"grad_norm": 0.9204273223876953,
"learning_rate": 2.6452035229707213e-05,
"loss": 2.2153,
"step": 27800
},
{
"epoch": 1.4207256524506684,
"grad_norm": 0.868325412273407,
"learning_rate": 2.636702145747611e-05,
"loss": 2.209,
"step": 27900
},
{
"epoch": 1.4258179503500954,
"grad_norm": 0.9367715716362,
"learning_rate": 2.6282007685245015e-05,
"loss": 2.1868,
"step": 28000
},
{
"epoch": 1.4309102482495226,
"grad_norm": 0.9658358693122864,
"learning_rate": 2.619699391301391e-05,
"loss": 2.1757,
"step": 28100
},
{
"epoch": 1.4360025461489498,
"grad_norm": 0.8091734051704407,
"learning_rate": 2.611198014078281e-05,
"loss": 2.1878,
"step": 28200
},
{
"epoch": 1.4410948440483768,
"grad_norm": 0.8200072050094604,
"learning_rate": 2.602696636855171e-05,
"loss": 2.192,
"step": 28300
},
{
"epoch": 1.4461871419478038,
"grad_norm": 0.9280868768692017,
"learning_rate": 2.5941952596320606e-05,
"loss": 2.1829,
"step": 28400
},
{
"epoch": 1.451279439847231,
"grad_norm": 0.9731032252311707,
"learning_rate": 2.5856938824089505e-05,
"loss": 2.156,
"step": 28500
},
{
"epoch": 1.4563717377466583,
"grad_norm": 0.8023040294647217,
"learning_rate": 2.5771925051858404e-05,
"loss": 2.1913,
"step": 28600
},
{
"epoch": 1.4614640356460853,
"grad_norm": 1.003476619720459,
"learning_rate": 2.56869112796273e-05,
"loss": 2.1537,
"step": 28700
},
{
"epoch": 1.4665563335455123,
"grad_norm": 1.0280425548553467,
"learning_rate": 2.56018975073962e-05,
"loss": 2.2106,
"step": 28800
},
{
"epoch": 1.4716486314449395,
"grad_norm": 0.9685016870498657,
"learning_rate": 2.55168837351651e-05,
"loss": 2.1758,
"step": 28900
},
{
"epoch": 1.4767409293443667,
"grad_norm": 0.8572561144828796,
"learning_rate": 2.5431869962933995e-05,
"loss": 2.1647,
"step": 29000
},
{
"epoch": 1.4818332272437937,
"grad_norm": 0.8688543438911438,
"learning_rate": 2.5346856190702894e-05,
"loss": 2.1973,
"step": 29100
},
{
"epoch": 1.486925525143221,
"grad_norm": 1.0197324752807617,
"learning_rate": 2.5261842418471794e-05,
"loss": 2.1649,
"step": 29200
},
{
"epoch": 1.492017823042648,
"grad_norm": 0.8760496377944946,
"learning_rate": 2.517682864624069e-05,
"loss": 2.2024,
"step": 29300
},
{
"epoch": 1.4971101209420752,
"grad_norm": 0.9327671527862549,
"learning_rate": 2.509181487400959e-05,
"loss": 2.2006,
"step": 29400
},
{
"epoch": 1.5022024188415022,
"grad_norm": 0.9184695482254028,
"learning_rate": 2.5006801101778488e-05,
"loss": 2.1616,
"step": 29500
},
{
"epoch": 1.5072947167409292,
"grad_norm": 0.8531858325004578,
"learning_rate": 2.4921787329547387e-05,
"loss": 2.1688,
"step": 29600
},
{
"epoch": 1.5123870146403564,
"grad_norm": 0.8902334570884705,
"learning_rate": 2.4836773557316287e-05,
"loss": 2.1692,
"step": 29700
},
{
"epoch": 1.5174793125397836,
"grad_norm": 0.8231461644172668,
"learning_rate": 2.4751759785085186e-05,
"loss": 2.1855,
"step": 29800
},
{
"epoch": 1.5225716104392109,
"grad_norm": 0.9362125396728516,
"learning_rate": 2.4666746012854082e-05,
"loss": 2.1798,
"step": 29900
},
{
"epoch": 1.5276639083386379,
"grad_norm": 0.8145864009857178,
"learning_rate": 2.458173224062298e-05,
"loss": 2.1655,
"step": 30000
},
{
"epoch": 1.5327562062380649,
"grad_norm": 0.9912553429603577,
"learning_rate": 2.449671846839188e-05,
"loss": 2.2025,
"step": 30100
},
{
"epoch": 1.537848504137492,
"grad_norm": 0.818953275680542,
"learning_rate": 2.4411704696160777e-05,
"loss": 2.1845,
"step": 30200
},
{
"epoch": 1.5429408020369193,
"grad_norm": 0.845649778842926,
"learning_rate": 2.4326690923929676e-05,
"loss": 2.199,
"step": 30300
},
{
"epoch": 1.5480330999363463,
"grad_norm": 1.0135074853897095,
"learning_rate": 2.424167715169858e-05,
"loss": 2.1912,
"step": 30400
},
{
"epoch": 1.5531253978357733,
"grad_norm": 0.9612752199172974,
"learning_rate": 2.4156663379467475e-05,
"loss": 2.159,
"step": 30500
},
{
"epoch": 1.5582176957352005,
"grad_norm": 0.8450791239738464,
"learning_rate": 2.4071649607236374e-05,
"loss": 2.1615,
"step": 30600
},
{
"epoch": 1.5633099936346277,
"grad_norm": 0.9979317784309387,
"learning_rate": 2.3986635835005273e-05,
"loss": 2.1713,
"step": 30700
},
{
"epoch": 1.5684022915340547,
"grad_norm": 0.904403567314148,
"learning_rate": 2.390162206277417e-05,
"loss": 2.2114,
"step": 30800
},
{
"epoch": 1.5734945894334817,
"grad_norm": 0.8977887630462646,
"learning_rate": 2.381660829054307e-05,
"loss": 2.1867,
"step": 30900
},
{
"epoch": 1.578586887332909,
"grad_norm": 0.9076321125030518,
"learning_rate": 2.3731594518311968e-05,
"loss": 2.167,
"step": 31000
},
{
"epoch": 1.5836791852323362,
"grad_norm": 0.9048725962638855,
"learning_rate": 2.3646580746080864e-05,
"loss": 2.1645,
"step": 31100
},
{
"epoch": 1.5887714831317632,
"grad_norm": 0.9547775387763977,
"learning_rate": 2.3561566973849763e-05,
"loss": 2.1849,
"step": 31200
},
{
"epoch": 1.5938637810311902,
"grad_norm": 0.7886509299278259,
"learning_rate": 2.3476553201618666e-05,
"loss": 2.187,
"step": 31300
},
{
"epoch": 1.5989560789306174,
"grad_norm": 0.8473970293998718,
"learning_rate": 2.3391539429387562e-05,
"loss": 2.1722,
"step": 31400
},
{
"epoch": 1.6040483768300446,
"grad_norm": 0.8617937564849854,
"learning_rate": 2.330652565715646e-05,
"loss": 2.2002,
"step": 31500
},
{
"epoch": 1.6091406747294716,
"grad_norm": 0.9672524333000183,
"learning_rate": 2.322151188492536e-05,
"loss": 2.1623,
"step": 31600
},
{
"epoch": 1.6142329726288986,
"grad_norm": 0.8769922852516174,
"learning_rate": 2.3136498112694256e-05,
"loss": 2.1695,
"step": 31700
},
{
"epoch": 1.6193252705283259,
"grad_norm": 0.8249488472938538,
"learning_rate": 2.3051484340463156e-05,
"loss": 2.1647,
"step": 31800
},
{
"epoch": 1.624417568427753,
"grad_norm": 0.9503587484359741,
"learning_rate": 2.2966470568232055e-05,
"loss": 2.2024,
"step": 31900
},
{
"epoch": 1.62950986632718,
"grad_norm": 0.9500870108604431,
"learning_rate": 2.288145679600095e-05,
"loss": 2.1467,
"step": 32000
},
{
"epoch": 1.634602164226607,
"grad_norm": 0.888297975063324,
"learning_rate": 2.279644302376985e-05,
"loss": 2.1586,
"step": 32100
},
{
"epoch": 1.6396944621260343,
"grad_norm": 0.8958535194396973,
"learning_rate": 2.2711429251538753e-05,
"loss": 2.1923,
"step": 32200
},
{
"epoch": 1.6447867600254615,
"grad_norm": 0.7949930429458618,
"learning_rate": 2.262641547930765e-05,
"loss": 2.1925,
"step": 32300
},
{
"epoch": 1.6498790579248888,
"grad_norm": 0.8516358733177185,
"learning_rate": 2.2541401707076548e-05,
"loss": 2.1818,
"step": 32400
},
{
"epoch": 1.6549713558243158,
"grad_norm": 0.9597014784812927,
"learning_rate": 2.2456387934845448e-05,
"loss": 2.1412,
"step": 32500
},
{
"epoch": 1.6600636537237428,
"grad_norm": 0.8643897771835327,
"learning_rate": 2.2371374162614344e-05,
"loss": 2.1645,
"step": 32600
},
{
"epoch": 1.66515595162317,
"grad_norm": 1.069393515586853,
"learning_rate": 2.2286360390383243e-05,
"loss": 2.1468,
"step": 32700
},
{
"epoch": 1.6702482495225972,
"grad_norm": 0.8896872401237488,
"learning_rate": 2.2201346618152142e-05,
"loss": 2.1732,
"step": 32800
},
{
"epoch": 1.6753405474220242,
"grad_norm": 0.8662711381912231,
"learning_rate": 2.2116332845921038e-05,
"loss": 2.1901,
"step": 32900
},
{
"epoch": 1.6804328453214512,
"grad_norm": 0.7606475353240967,
"learning_rate": 2.2031319073689937e-05,
"loss": 2.2045,
"step": 33000
},
{
"epoch": 1.6855251432208784,
"grad_norm": 0.9675360918045044,
"learning_rate": 2.1946305301458837e-05,
"loss": 2.1782,
"step": 33100
},
{
"epoch": 1.6906174411203057,
"grad_norm": 0.8184406757354736,
"learning_rate": 2.1861291529227736e-05,
"loss": 2.1827,
"step": 33200
},
{
"epoch": 1.6957097390197327,
"grad_norm": 0.8774561882019043,
"learning_rate": 2.1776277756996635e-05,
"loss": 2.1592,
"step": 33300
},
{
"epoch": 1.7008020369191597,
"grad_norm": 0.8667624592781067,
"learning_rate": 2.1691263984765535e-05,
"loss": 2.1779,
"step": 33400
},
{
"epoch": 1.7058943348185869,
"grad_norm": 0.9804625511169434,
"learning_rate": 2.160625021253443e-05,
"loss": 2.1985,
"step": 33500
},
{
"epoch": 1.710986632718014,
"grad_norm": 0.9614706039428711,
"learning_rate": 2.152123644030333e-05,
"loss": 2.1687,
"step": 33600
},
{
"epoch": 1.716078930617441,
"grad_norm": 0.8270972967147827,
"learning_rate": 2.143622266807223e-05,
"loss": 2.1539,
"step": 33700
},
{
"epoch": 1.721171228516868,
"grad_norm": 0.9252774119377136,
"learning_rate": 2.1351208895841125e-05,
"loss": 2.16,
"step": 33800
},
{
"epoch": 1.7262635264162953,
"grad_norm": 0.855818510055542,
"learning_rate": 2.1266195123610025e-05,
"loss": 2.1928,
"step": 33900
},
{
"epoch": 1.7313558243157225,
"grad_norm": 0.8505380153656006,
"learning_rate": 2.1181181351378924e-05,
"loss": 2.1748,
"step": 34000
},
{
"epoch": 1.7364481222151495,
"grad_norm": 0.8876926898956299,
"learning_rate": 2.1096167579147823e-05,
"loss": 2.2102,
"step": 34100
},
{
"epoch": 1.7415404201145765,
"grad_norm": 0.8772891163825989,
"learning_rate": 2.1011153806916723e-05,
"loss": 2.1691,
"step": 34200
},
{
"epoch": 1.7466327180140038,
"grad_norm": 0.9799501299858093,
"learning_rate": 2.0926140034685622e-05,
"loss": 2.1858,
"step": 34300
},
{
"epoch": 1.751725015913431,
"grad_norm": 0.8863718509674072,
"learning_rate": 2.0841126262454518e-05,
"loss": 2.1745,
"step": 34400
},
{
"epoch": 1.7568173138128582,
"grad_norm": 0.8394114375114441,
"learning_rate": 2.0756112490223417e-05,
"loss": 2.1629,
"step": 34500
},
{
"epoch": 1.7619096117122852,
"grad_norm": 0.8472095727920532,
"learning_rate": 2.0671098717992317e-05,
"loss": 2.1665,
"step": 34600
},
{
"epoch": 1.7670019096117122,
"grad_norm": 0.9460027813911438,
"learning_rate": 2.0586084945761212e-05,
"loss": 2.1458,
"step": 34700
},
{
"epoch": 1.7720942075111394,
"grad_norm": 0.9211781620979309,
"learning_rate": 2.0501071173530112e-05,
"loss": 2.1922,
"step": 34800
},
{
"epoch": 1.7771865054105667,
"grad_norm": 0.9996361136436462,
"learning_rate": 2.041605740129901e-05,
"loss": 2.1447,
"step": 34900
},
{
"epoch": 1.7822788033099937,
"grad_norm": 0.8266726136207581,
"learning_rate": 2.033104362906791e-05,
"loss": 2.1881,
"step": 35000
},
{
"epoch": 1.7873711012094207,
"grad_norm": 0.8855674862861633,
"learning_rate": 2.024602985683681e-05,
"loss": 2.198,
"step": 35100
},
{
"epoch": 1.7924633991088479,
"grad_norm": 0.9789201021194458,
"learning_rate": 2.016101608460571e-05,
"loss": 2.1685,
"step": 35200
},
{
"epoch": 1.797555697008275,
"grad_norm": 0.8354413509368896,
"learning_rate": 2.0076002312374605e-05,
"loss": 2.1535,
"step": 35300
},
{
"epoch": 1.8026479949077021,
"grad_norm": 0.9418453574180603,
"learning_rate": 1.9990988540143504e-05,
"loss": 2.1671,
"step": 35400
},
{
"epoch": 1.8077402928071291,
"grad_norm": 0.9462503790855408,
"learning_rate": 1.9905974767912404e-05,
"loss": 2.1339,
"step": 35500
},
{
"epoch": 1.8128325907065563,
"grad_norm": 0.8490837216377258,
"learning_rate": 1.98209609956813e-05,
"loss": 2.1528,
"step": 35600
},
{
"epoch": 1.8179248886059836,
"grad_norm": 0.9105218052864075,
"learning_rate": 1.97359472234502e-05,
"loss": 2.1717,
"step": 35700
},
{
"epoch": 1.8230171865054106,
"grad_norm": 0.9058020710945129,
"learning_rate": 1.9650933451219098e-05,
"loss": 2.1535,
"step": 35800
},
{
"epoch": 1.8281094844048376,
"grad_norm": 0.9724037647247314,
"learning_rate": 1.9565919678987994e-05,
"loss": 2.166,
"step": 35900
},
{
"epoch": 1.8332017823042648,
"grad_norm": 0.9018999338150024,
"learning_rate": 1.9480905906756897e-05,
"loss": 2.1528,
"step": 36000
},
{
"epoch": 1.838294080203692,
"grad_norm": 0.9223784804344177,
"learning_rate": 1.9395892134525796e-05,
"loss": 2.1982,
"step": 36100
},
{
"epoch": 1.843386378103119,
"grad_norm": 0.8883550763130188,
"learning_rate": 1.9310878362294692e-05,
"loss": 2.1701,
"step": 36200
},
{
"epoch": 1.848478676002546,
"grad_norm": 0.8294488787651062,
"learning_rate": 1.922586459006359e-05,
"loss": 2.2064,
"step": 36300
},
{
"epoch": 1.8535709739019732,
"grad_norm": 0.8737560510635376,
"learning_rate": 1.914085081783249e-05,
"loss": 2.1529,
"step": 36400
},
{
"epoch": 1.8586632718014005,
"grad_norm": 0.8156319260597229,
"learning_rate": 1.9055837045601387e-05,
"loss": 2.1628,
"step": 36500
},
{
"epoch": 1.8637555697008275,
"grad_norm": 0.8669657111167908,
"learning_rate": 1.8970823273370286e-05,
"loss": 2.2155,
"step": 36600
},
{
"epoch": 1.8688478676002545,
"grad_norm": 0.8657876253128052,
"learning_rate": 1.8885809501139185e-05,
"loss": 2.1506,
"step": 36700
},
{
"epoch": 1.8739401654996817,
"grad_norm": 0.8771129250526428,
"learning_rate": 1.880079572890808e-05,
"loss": 2.1797,
"step": 36800
},
{
"epoch": 1.879032463399109,
"grad_norm": 0.8845404982566833,
"learning_rate": 1.8715781956676984e-05,
"loss": 2.1434,
"step": 36900
},
{
"epoch": 1.8841247612985361,
"grad_norm": 0.9354609251022339,
"learning_rate": 1.8630768184445883e-05,
"loss": 2.1701,
"step": 37000
},
{
"epoch": 1.8892170591979631,
"grad_norm": 0.7781304717063904,
"learning_rate": 1.854575441221478e-05,
"loss": 2.2095,
"step": 37100
},
{
"epoch": 1.8943093570973901,
"grad_norm": 0.9069561958312988,
"learning_rate": 1.846074063998368e-05,
"loss": 2.169,
"step": 37200
},
{
"epoch": 1.8994016549968173,
"grad_norm": 0.9173194766044617,
"learning_rate": 1.8375726867752578e-05,
"loss": 2.121,
"step": 37300
},
{
"epoch": 1.9044939528962446,
"grad_norm": 0.864583432674408,
"learning_rate": 1.8290713095521474e-05,
"loss": 2.1711,
"step": 37400
},
{
"epoch": 1.9095862507956716,
"grad_norm": 0.7620731592178345,
"learning_rate": 1.8205699323290373e-05,
"loss": 2.1967,
"step": 37500
},
{
"epoch": 1.9146785486950986,
"grad_norm": 0.7830232977867126,
"learning_rate": 1.8120685551059273e-05,
"loss": 2.1574,
"step": 37600
},
{
"epoch": 1.9197708465945258,
"grad_norm": 0.8825329542160034,
"learning_rate": 1.803567177882817e-05,
"loss": 2.1432,
"step": 37700
},
{
"epoch": 1.924863144493953,
"grad_norm": 0.9680500030517578,
"learning_rate": 1.795065800659707e-05,
"loss": 2.1808,
"step": 37800
},
{
"epoch": 1.92995544239338,
"grad_norm": 0.9914782047271729,
"learning_rate": 1.786564423436597e-05,
"loss": 2.1945,
"step": 37900
},
{
"epoch": 1.935047740292807,
"grad_norm": 0.882604718208313,
"learning_rate": 1.7780630462134867e-05,
"loss": 2.15,
"step": 38000
},
{
"epoch": 1.9401400381922342,
"grad_norm": 0.8211714625358582,
"learning_rate": 1.7695616689903766e-05,
"loss": 2.178,
"step": 38100
},
{
"epoch": 1.9452323360916615,
"grad_norm": 0.9662156701087952,
"learning_rate": 1.7610602917672665e-05,
"loss": 2.164,
"step": 38200
},
{
"epoch": 1.9503246339910885,
"grad_norm": 0.8627343773841858,
"learning_rate": 1.752558914544156e-05,
"loss": 2.1977,
"step": 38300
},
{
"epoch": 1.9554169318905155,
"grad_norm": 0.8883799910545349,
"learning_rate": 1.744057537321046e-05,
"loss": 2.1813,
"step": 38400
},
{
"epoch": 1.9605092297899427,
"grad_norm": 0.9309747219085693,
"learning_rate": 1.735556160097936e-05,
"loss": 2.1686,
"step": 38500
},
{
"epoch": 1.96560152768937,
"grad_norm": 0.9126595854759216,
"learning_rate": 1.7270547828748256e-05,
"loss": 2.1571,
"step": 38600
},
{
"epoch": 1.970693825588797,
"grad_norm": 0.9490466117858887,
"learning_rate": 1.718553405651716e-05,
"loss": 2.1589,
"step": 38700
},
{
"epoch": 1.975786123488224,
"grad_norm": 0.8641236424446106,
"learning_rate": 1.7100520284286058e-05,
"loss": 2.1728,
"step": 38800
},
{
"epoch": 1.9808784213876511,
"grad_norm": 1.040710210800171,
"learning_rate": 1.7015506512054954e-05,
"loss": 2.1888,
"step": 38900
},
{
"epoch": 1.9859707192870784,
"grad_norm": 0.8207067251205444,
"learning_rate": 1.6930492739823853e-05,
"loss": 2.1504,
"step": 39000
},
{
"epoch": 1.9910630171865054,
"grad_norm": 0.8451477289199829,
"learning_rate": 1.6845478967592752e-05,
"loss": 2.1791,
"step": 39100
},
{
"epoch": 1.9961553150859326,
"grad_norm": 0.9080301523208618,
"learning_rate": 1.6760465195361648e-05,
"loss": 2.181,
"step": 39200
},
{
"epoch": 2.0012221514958624,
"grad_norm": 0.9207277297973633,
"learning_rate": 1.6675451423130548e-05,
"loss": 2.1537,
"step": 39300
},
{
"epoch": 2.0063144493952896,
"grad_norm": 0.8472919464111328,
"learning_rate": 1.6590437650899447e-05,
"loss": 2.1691,
"step": 39400
},
{
"epoch": 2.011406747294717,
"grad_norm": 0.9604014754295349,
"learning_rate": 1.6505423878668343e-05,
"loss": 2.1409,
"step": 39500
},
{
"epoch": 2.016499045194144,
"grad_norm": 0.957785964012146,
"learning_rate": 1.6420410106437242e-05,
"loss": 2.126,
"step": 39600
},
{
"epoch": 2.021591343093571,
"grad_norm": 0.8542806506156921,
"learning_rate": 1.6335396334206145e-05,
"loss": 2.1532,
"step": 39700
},
{
"epoch": 2.026683640992998,
"grad_norm": 0.9949219822883606,
"learning_rate": 1.625038256197504e-05,
"loss": 2.1437,
"step": 39800
},
{
"epoch": 2.0317759388924252,
"grad_norm": 0.8735845685005188,
"learning_rate": 1.616536878974394e-05,
"loss": 2.133,
"step": 39900
},
{
"epoch": 2.0368682367918525,
"grad_norm": 0.9472355842590332,
"learning_rate": 1.608035501751284e-05,
"loss": 2.146,
"step": 40000
},
{
"epoch": 2.0419605346912792,
"grad_norm": 0.9042348861694336,
"learning_rate": 1.5995341245281735e-05,
"loss": 2.1288,
"step": 40100
},
{
"epoch": 2.0470528325907065,
"grad_norm": 0.8667154908180237,
"learning_rate": 1.5910327473050635e-05,
"loss": 2.1182,
"step": 40200
},
{
"epoch": 2.0521451304901337,
"grad_norm": 0.9168582558631897,
"learning_rate": 1.5825313700819534e-05,
"loss": 2.1227,
"step": 40300
},
{
"epoch": 2.057237428389561,
"grad_norm": 0.8843423128128052,
"learning_rate": 1.574029992858843e-05,
"loss": 2.1564,
"step": 40400
},
{
"epoch": 2.0623297262889877,
"grad_norm": 0.8709278106689453,
"learning_rate": 1.565528615635733e-05,
"loss": 2.129,
"step": 40500
},
{
"epoch": 2.067422024188415,
"grad_norm": 1.0448068380355835,
"learning_rate": 1.5570272384126232e-05,
"loss": 2.1259,
"step": 40600
},
{
"epoch": 2.072514322087842,
"grad_norm": 1.014841914176941,
"learning_rate": 1.5485258611895128e-05,
"loss": 2.1526,
"step": 40700
},
{
"epoch": 2.0776066199872694,
"grad_norm": 0.9346544146537781,
"learning_rate": 1.5400244839664027e-05,
"loss": 2.1349,
"step": 40800
},
{
"epoch": 2.082698917886696,
"grad_norm": 1.029351830482483,
"learning_rate": 1.5315231067432927e-05,
"loss": 2.1224,
"step": 40900
},
{
"epoch": 2.0877912157861234,
"grad_norm": 0.8560373783111572,
"learning_rate": 1.5230217295201824e-05,
"loss": 2.0945,
"step": 41000
},
{
"epoch": 2.0928835136855506,
"grad_norm": 0.8771845698356628,
"learning_rate": 1.5145203522970722e-05,
"loss": 2.1215,
"step": 41100
},
{
"epoch": 2.097975811584978,
"grad_norm": 0.7786750197410583,
"learning_rate": 1.506018975073962e-05,
"loss": 2.1119,
"step": 41200
},
{
"epoch": 2.103068109484405,
"grad_norm": 0.8961013555526733,
"learning_rate": 1.4975175978508519e-05,
"loss": 2.1284,
"step": 41300
},
{
"epoch": 2.108160407383832,
"grad_norm": 0.7917054295539856,
"learning_rate": 1.4890162206277417e-05,
"loss": 2.1663,
"step": 41400
},
{
"epoch": 2.113252705283259,
"grad_norm": 0.9229695200920105,
"learning_rate": 1.4805148434046318e-05,
"loss": 2.1255,
"step": 41500
},
{
"epoch": 2.1183450031826863,
"grad_norm": 0.8761498332023621,
"learning_rate": 1.4720134661815215e-05,
"loss": 2.1271,
"step": 41600
},
{
"epoch": 2.1234373010821135,
"grad_norm": 0.8369442820549011,
"learning_rate": 1.4635120889584114e-05,
"loss": 2.1381,
"step": 41700
},
{
"epoch": 2.1285295989815403,
"grad_norm": 1.058815836906433,
"learning_rate": 1.4550107117353012e-05,
"loss": 2.1253,
"step": 41800
},
{
"epoch": 2.1336218968809675,
"grad_norm": 0.8793694972991943,
"learning_rate": 1.4465093345121911e-05,
"loss": 2.1327,
"step": 41900
},
{
"epoch": 2.1387141947803947,
"grad_norm": 0.9903535842895508,
"learning_rate": 1.4380079572890809e-05,
"loss": 2.1585,
"step": 42000
},
{
"epoch": 2.143806492679822,
"grad_norm": 0.8910212516784668,
"learning_rate": 1.4295065800659707e-05,
"loss": 2.1482,
"step": 42100
},
{
"epoch": 2.1488987905792487,
"grad_norm": 0.9088174700737,
"learning_rate": 1.4210052028428606e-05,
"loss": 2.1391,
"step": 42200
},
{
"epoch": 2.153991088478676,
"grad_norm": 0.9213513731956482,
"learning_rate": 1.4125038256197504e-05,
"loss": 2.1447,
"step": 42300
},
{
"epoch": 2.159083386378103,
"grad_norm": 0.9317104816436768,
"learning_rate": 1.4040024483966401e-05,
"loss": 2.1115,
"step": 42400
},
{
"epoch": 2.1641756842775304,
"grad_norm": 0.7989690899848938,
"learning_rate": 1.3955010711735302e-05,
"loss": 2.1385,
"step": 42500
},
{
"epoch": 2.169267982176957,
"grad_norm": 0.8436581492424011,
"learning_rate": 1.3869996939504202e-05,
"loss": 2.1487,
"step": 42600
},
{
"epoch": 2.1743602800763844,
"grad_norm": 0.9113427400588989,
"learning_rate": 1.37849831672731e-05,
"loss": 2.0851,
"step": 42700
},
{
"epoch": 2.1794525779758116,
"grad_norm": 0.8313522338867188,
"learning_rate": 1.3699969395041997e-05,
"loss": 2.1502,
"step": 42800
},
{
"epoch": 2.184544875875239,
"grad_norm": 0.9525701999664307,
"learning_rate": 1.3614955622810896e-05,
"loss": 2.1206,
"step": 42900
},
{
"epoch": 2.189637173774666,
"grad_norm": 0.9474479556083679,
"learning_rate": 1.3529941850579794e-05,
"loss": 2.1117,
"step": 43000
},
{
"epoch": 2.194729471674093,
"grad_norm": 0.8311910629272461,
"learning_rate": 1.3444928078348693e-05,
"loss": 2.1268,
"step": 43100
},
{
"epoch": 2.19982176957352,
"grad_norm": 0.879364013671875,
"learning_rate": 1.335991430611759e-05,
"loss": 2.1426,
"step": 43200
},
{
"epoch": 2.2049140674729473,
"grad_norm": 0.8633144497871399,
"learning_rate": 1.3274900533886488e-05,
"loss": 2.1324,
"step": 43300
},
{
"epoch": 2.210006365372374,
"grad_norm": 0.8333730697631836,
"learning_rate": 1.318988676165539e-05,
"loss": 2.1246,
"step": 43400
},
{
"epoch": 2.2150986632718013,
"grad_norm": 0.8649702072143555,
"learning_rate": 1.3104872989424289e-05,
"loss": 2.122,
"step": 43500
},
{
"epoch": 2.2201909611712285,
"grad_norm": 0.8680943846702576,
"learning_rate": 1.3019859217193186e-05,
"loss": 2.1295,
"step": 43600
},
{
"epoch": 2.2252832590706557,
"grad_norm": 0.9396230578422546,
"learning_rate": 1.2934845444962084e-05,
"loss": 2.1458,
"step": 43700
},
{
"epoch": 2.230375556970083,
"grad_norm": 0.9014144539833069,
"learning_rate": 1.2849831672730983e-05,
"loss": 2.1573,
"step": 43800
},
{
"epoch": 2.2354678548695097,
"grad_norm": 0.9344182014465332,
"learning_rate": 1.2764817900499881e-05,
"loss": 2.1516,
"step": 43900
},
{
"epoch": 2.240560152768937,
"grad_norm": 0.979686439037323,
"learning_rate": 1.267980412826878e-05,
"loss": 2.1307,
"step": 44000
},
{
"epoch": 2.245652450668364,
"grad_norm": 0.8325761556625366,
"learning_rate": 1.2594790356037678e-05,
"loss": 2.1498,
"step": 44100
},
{
"epoch": 2.2507447485677914,
"grad_norm": 0.8997836709022522,
"learning_rate": 1.2509776583806576e-05,
"loss": 2.1494,
"step": 44200
},
{
"epoch": 2.255837046467218,
"grad_norm": 0.8690670132637024,
"learning_rate": 1.2424762811575475e-05,
"loss": 2.1393,
"step": 44300
},
{
"epoch": 2.2609293443666454,
"grad_norm": 0.7817577719688416,
"learning_rate": 1.2339749039344374e-05,
"loss": 2.1341,
"step": 44400
},
{
"epoch": 2.2660216422660726,
"grad_norm": 0.8697742223739624,
"learning_rate": 1.2254735267113272e-05,
"loss": 2.1469,
"step": 44500
},
{
"epoch": 2.2711139401655,
"grad_norm": 0.8965489268302917,
"learning_rate": 1.2169721494882171e-05,
"loss": 2.1257,
"step": 44600
},
{
"epoch": 2.2762062380649266,
"grad_norm": 1.0732325315475464,
"learning_rate": 1.208470772265107e-05,
"loss": 2.1131,
"step": 44700
},
{
"epoch": 2.281298535964354,
"grad_norm": 0.7745924592018127,
"learning_rate": 1.1999693950419968e-05,
"loss": 2.1153,
"step": 44800
},
{
"epoch": 2.286390833863781,
"grad_norm": 0.8988758325576782,
"learning_rate": 1.1914680178188868e-05,
"loss": 2.1545,
"step": 44900
},
{
"epoch": 2.2914831317632083,
"grad_norm": 0.9772248268127441,
"learning_rate": 1.1829666405957767e-05,
"loss": 2.1333,
"step": 45000
},
{
"epoch": 2.296575429662635,
"grad_norm": 0.8579228520393372,
"learning_rate": 1.1744652633726664e-05,
"loss": 2.1122,
"step": 45100
},
{
"epoch": 2.3016677275620623,
"grad_norm": 0.8738901019096375,
"learning_rate": 1.1659638861495562e-05,
"loss": 2.0938,
"step": 45200
},
{
"epoch": 2.3067600254614895,
"grad_norm": 0.8962051868438721,
"learning_rate": 1.1574625089264461e-05,
"loss": 2.1216,
"step": 45300
},
{
"epoch": 2.3118523233609167,
"grad_norm": 0.8730968236923218,
"learning_rate": 1.1489611317033359e-05,
"loss": 2.1067,
"step": 45400
},
{
"epoch": 2.316944621260344,
"grad_norm": 0.9516613483428955,
"learning_rate": 1.1404597544802258e-05,
"loss": 2.1092,
"step": 45500
},
{
"epoch": 2.3220369191597707,
"grad_norm": 1.0411871671676636,
"learning_rate": 1.1319583772571158e-05,
"loss": 2.1199,
"step": 45600
},
{
"epoch": 2.327129217059198,
"grad_norm": 0.9724430441856384,
"learning_rate": 1.1234570000340055e-05,
"loss": 2.1335,
"step": 45700
},
{
"epoch": 2.332221514958625,
"grad_norm": 0.8349046111106873,
"learning_rate": 1.1149556228108955e-05,
"loss": 2.1249,
"step": 45800
},
{
"epoch": 2.337313812858052,
"grad_norm": 0.8713769316673279,
"learning_rate": 1.1064542455877852e-05,
"loss": 2.1054,
"step": 45900
},
{
"epoch": 2.342406110757479,
"grad_norm": 0.8659300208091736,
"learning_rate": 1.0979528683646752e-05,
"loss": 2.1095,
"step": 46000
},
{
"epoch": 2.3474984086569064,
"grad_norm": 1.0436406135559082,
"learning_rate": 1.089451491141565e-05,
"loss": 2.1337,
"step": 46100
},
{
"epoch": 2.3525907065563336,
"grad_norm": 0.8275535106658936,
"learning_rate": 1.0809501139184549e-05,
"loss": 2.1209,
"step": 46200
},
{
"epoch": 2.357683004455761,
"grad_norm": 0.9503908157348633,
"learning_rate": 1.0724487366953446e-05,
"loss": 2.1262,
"step": 46300
},
{
"epoch": 2.3627753023551876,
"grad_norm": 0.8849694728851318,
"learning_rate": 1.0639473594722346e-05,
"loss": 2.121,
"step": 46400
},
{
"epoch": 2.367867600254615,
"grad_norm": 0.8742644786834717,
"learning_rate": 1.0554459822491245e-05,
"loss": 2.1421,
"step": 46500
},
{
"epoch": 2.372959898154042,
"grad_norm": 0.8519076704978943,
"learning_rate": 1.0469446050260143e-05,
"loss": 2.1046,
"step": 46600
},
{
"epoch": 2.3780521960534693,
"grad_norm": 0.8561546206474304,
"learning_rate": 1.038443227802904e-05,
"loss": 2.1262,
"step": 46700
},
{
"epoch": 2.383144493952896,
"grad_norm": 0.8309553265571594,
"learning_rate": 1.029941850579794e-05,
"loss": 2.138,
"step": 46800
},
{
"epoch": 2.3882367918523233,
"grad_norm": 1.0880669355392456,
"learning_rate": 1.0214404733566839e-05,
"loss": 2.0881,
"step": 46900
},
{
"epoch": 2.3933290897517505,
"grad_norm": 0.9982330799102783,
"learning_rate": 1.0129390961335736e-05,
"loss": 2.1086,
"step": 47000
},
{
"epoch": 2.3984213876511777,
"grad_norm": 0.9612807035446167,
"learning_rate": 1.0044377189104636e-05,
"loss": 2.1207,
"step": 47100
},
{
"epoch": 2.4035136855506045,
"grad_norm": 0.848710298538208,
"learning_rate": 9.959363416873533e-06,
"loss": 2.1301,
"step": 47200
},
{
"epoch": 2.4086059834500317,
"grad_norm": 0.8840051889419556,
"learning_rate": 9.874349644642433e-06,
"loss": 2.1118,
"step": 47300
},
{
"epoch": 2.413698281349459,
"grad_norm": 0.916346549987793,
"learning_rate": 9.789335872411332e-06,
"loss": 2.128,
"step": 47400
},
{
"epoch": 2.418790579248886,
"grad_norm": 0.8974706530570984,
"learning_rate": 9.70432210018023e-06,
"loss": 2.1452,
"step": 47500
},
{
"epoch": 2.423882877148313,
"grad_norm": 1.0237131118774414,
"learning_rate": 9.619308327949127e-06,
"loss": 2.121,
"step": 47600
},
{
"epoch": 2.42897517504774,
"grad_norm": 0.9156752228736877,
"learning_rate": 9.534294555718027e-06,
"loss": 2.0985,
"step": 47700
},
{
"epoch": 2.4340674729471674,
"grad_norm": 0.9210427403450012,
"learning_rate": 9.449280783486926e-06,
"loss": 2.0653,
"step": 47800
},
{
"epoch": 2.4391597708465946,
"grad_norm": 0.8185928463935852,
"learning_rate": 9.364267011255824e-06,
"loss": 2.0994,
"step": 47900
},
{
"epoch": 2.444252068746022,
"grad_norm": 0.923605740070343,
"learning_rate": 9.279253239024723e-06,
"loss": 2.1402,
"step": 48000
},
{
"epoch": 2.4493443666454486,
"grad_norm": 0.8515633344650269,
"learning_rate": 9.19423946679362e-06,
"loss": 2.1273,
"step": 48100
},
{
"epoch": 2.454436664544876,
"grad_norm": 0.8325629830360413,
"learning_rate": 9.109225694562518e-06,
"loss": 2.0974,
"step": 48200
},
{
"epoch": 2.459528962444303,
"grad_norm": 0.8125095963478088,
"learning_rate": 9.02421192233142e-06,
"loss": 2.1157,
"step": 48300
},
{
"epoch": 2.4646212603437303,
"grad_norm": 0.8951058387756348,
"learning_rate": 8.939198150100317e-06,
"loss": 2.1111,
"step": 48400
},
{
"epoch": 2.469713558243157,
"grad_norm": 0.8785336017608643,
"learning_rate": 8.854184377869214e-06,
"loss": 2.1412,
"step": 48500
},
{
"epoch": 2.4748058561425843,
"grad_norm": 0.9884998202323914,
"learning_rate": 8.769170605638114e-06,
"loss": 2.1403,
"step": 48600
},
{
"epoch": 2.4798981540420115,
"grad_norm": 0.9092361330986023,
"learning_rate": 8.684156833407011e-06,
"loss": 2.1341,
"step": 48700
},
{
"epoch": 2.4849904519414387,
"grad_norm": 0.9467695951461792,
"learning_rate": 8.59914306117591e-06,
"loss": 2.1098,
"step": 48800
},
{
"epoch": 2.4900827498408655,
"grad_norm": 0.8339031338691711,
"learning_rate": 8.51412928894481e-06,
"loss": 2.1146,
"step": 48900
},
{
"epoch": 2.4951750477402928,
"grad_norm": 0.8132495284080505,
"learning_rate": 8.429115516713708e-06,
"loss": 2.1721,
"step": 49000
},
{
"epoch": 2.50026734563972,
"grad_norm": 0.9209297895431519,
"learning_rate": 8.344101744482605e-06,
"loss": 2.0942,
"step": 49100
},
{
"epoch": 2.505359643539147,
"grad_norm": 0.9470928311347961,
"learning_rate": 8.259087972251506e-06,
"loss": 2.0926,
"step": 49200
},
{
"epoch": 2.510451941438574,
"grad_norm": 0.9337894320487976,
"learning_rate": 8.174074200020404e-06,
"loss": 2.1189,
"step": 49300
},
{
"epoch": 2.515544239338001,
"grad_norm": 0.9764918088912964,
"learning_rate": 8.089060427789302e-06,
"loss": 2.1185,
"step": 49400
},
{
"epoch": 2.5206365372374284,
"grad_norm": 0.894453763961792,
"learning_rate": 8.004046655558201e-06,
"loss": 2.1289,
"step": 49500
},
{
"epoch": 2.5257288351368556,
"grad_norm": 0.8645434379577637,
"learning_rate": 7.919032883327099e-06,
"loss": 2.1025,
"step": 49600
},
{
"epoch": 2.530821133036283,
"grad_norm": 0.8322845101356506,
"learning_rate": 7.834019111095998e-06,
"loss": 2.1128,
"step": 49700
},
{
"epoch": 2.5359134309357096,
"grad_norm": 1.0294426679611206,
"learning_rate": 7.749005338864897e-06,
"loss": 2.1348,
"step": 49800
},
{
"epoch": 2.541005728835137,
"grad_norm": 0.9489388465881348,
"learning_rate": 7.663991566633795e-06,
"loss": 2.1089,
"step": 49900
},
{
"epoch": 2.546098026734564,
"grad_norm": 0.9332979917526245,
"learning_rate": 7.578977794402693e-06,
"loss": 2.1677,
"step": 50000
},
{
"epoch": 2.551190324633991,
"grad_norm": 0.8114882111549377,
"learning_rate": 7.493964022171592e-06,
"loss": 2.1265,
"step": 50100
},
{
"epoch": 2.556282622533418,
"grad_norm": 0.8496439456939697,
"learning_rate": 7.408950249940491e-06,
"loss": 2.1713,
"step": 50200
},
{
"epoch": 2.5613749204328453,
"grad_norm": 1.149905800819397,
"learning_rate": 7.32393647770939e-06,
"loss": 2.1234,
"step": 50300
},
{
"epoch": 2.5664672183322725,
"grad_norm": 1.0552695989608765,
"learning_rate": 7.238922705478287e-06,
"loss": 2.1398,
"step": 50400
},
{
"epoch": 2.5715595162316998,
"grad_norm": 0.9433385133743286,
"learning_rate": 7.153908933247186e-06,
"loss": 2.0986,
"step": 50500
},
{
"epoch": 2.5766518141311265,
"grad_norm": 0.889086127281189,
"learning_rate": 7.068895161016086e-06,
"loss": 2.1338,
"step": 50600
},
{
"epoch": 2.5817441120305538,
"grad_norm": 0.8793154358863831,
"learning_rate": 6.9838813887849835e-06,
"loss": 2.1095,
"step": 50700
},
{
"epoch": 2.586836409929981,
"grad_norm": 0.7565730214118958,
"learning_rate": 6.898867616553882e-06,
"loss": 2.1219,
"step": 50800
},
{
"epoch": 2.5919287078294078,
"grad_norm": 0.8305276036262512,
"learning_rate": 6.8138538443227805e-06,
"loss": 2.099,
"step": 50900
},
{
"epoch": 2.597021005728835,
"grad_norm": 0.9467841386795044,
"learning_rate": 6.728840072091679e-06,
"loss": 2.123,
"step": 51000
},
{
"epoch": 2.602113303628262,
"grad_norm": 0.9913722276687622,
"learning_rate": 6.643826299860578e-06,
"loss": 2.1189,
"step": 51100
},
{
"epoch": 2.6072056015276894,
"grad_norm": 0.9008012413978577,
"learning_rate": 6.558812527629477e-06,
"loss": 2.155,
"step": 51200
},
{
"epoch": 2.6122978994271167,
"grad_norm": 0.9230712056159973,
"learning_rate": 6.473798755398374e-06,
"loss": 2.1333,
"step": 51300
},
{
"epoch": 2.617390197326544,
"grad_norm": 1.0198971033096313,
"learning_rate": 6.388784983167273e-06,
"loss": 2.1374,
"step": 51400
},
{
"epoch": 2.6224824952259707,
"grad_norm": 0.9199273586273193,
"learning_rate": 6.303771210936171e-06,
"loss": 2.1332,
"step": 51500
},
{
"epoch": 2.627574793125398,
"grad_norm": 0.8723760843276978,
"learning_rate": 6.21875743870507e-06,
"loss": 2.1547,
"step": 51600
},
{
"epoch": 2.632667091024825,
"grad_norm": 0.9192347526550293,
"learning_rate": 6.133743666473969e-06,
"loss": 2.1192,
"step": 51700
},
{
"epoch": 2.637759388924252,
"grad_norm": 0.9517456889152527,
"learning_rate": 6.048729894242868e-06,
"loss": 2.1143,
"step": 51800
},
{
"epoch": 2.642851686823679,
"grad_norm": 0.9906876683235168,
"learning_rate": 5.963716122011766e-06,
"loss": 2.1171,
"step": 51900
},
{
"epoch": 2.6479439847231063,
"grad_norm": 0.9755644202232361,
"learning_rate": 5.878702349780665e-06,
"loss": 2.163,
"step": 52000
},
{
"epoch": 2.6530362826225335,
"grad_norm": 0.9300287961959839,
"learning_rate": 5.793688577549564e-06,
"loss": 2.1218,
"step": 52100
},
{
"epoch": 2.6581285805219608,
"grad_norm": 0.8865501284599304,
"learning_rate": 5.7086748053184616e-06,
"loss": 2.1356,
"step": 52200
},
{
"epoch": 2.6632208784213875,
"grad_norm": 0.8156447410583496,
"learning_rate": 5.62366103308736e-06,
"loss": 2.1171,
"step": 52300
},
{
"epoch": 2.6683131763208148,
"grad_norm": 0.8186530470848083,
"learning_rate": 5.538647260856259e-06,
"loss": 2.1052,
"step": 52400
},
{
"epoch": 2.673405474220242,
"grad_norm": 0.790550708770752,
"learning_rate": 5.453633488625157e-06,
"loss": 2.1071,
"step": 52500
},
{
"epoch": 2.6784977721196688,
"grad_norm": 0.8866438865661621,
"learning_rate": 5.368619716394056e-06,
"loss": 2.1354,
"step": 52600
},
{
"epoch": 2.683590070019096,
"grad_norm": 0.9953215718269348,
"learning_rate": 5.283605944162955e-06,
"loss": 2.1383,
"step": 52700
},
{
"epoch": 2.688682367918523,
"grad_norm": 0.9829987287521362,
"learning_rate": 5.198592171931853e-06,
"loss": 2.0919,
"step": 52800
},
{
"epoch": 2.6937746658179504,
"grad_norm": 0.9085790514945984,
"learning_rate": 5.113578399700752e-06,
"loss": 2.1178,
"step": 52900
},
{
"epoch": 2.6988669637173777,
"grad_norm": 0.8004271388053894,
"learning_rate": 5.02856462746965e-06,
"loss": 2.1239,
"step": 53000
},
{
"epoch": 2.7039592616168044,
"grad_norm": 0.9412344098091125,
"learning_rate": 4.943550855238549e-06,
"loss": 2.108,
"step": 53100
},
{
"epoch": 2.7090515595162317,
"grad_norm": 0.9245398640632629,
"learning_rate": 4.858537083007447e-06,
"loss": 2.1241,
"step": 53200
},
{
"epoch": 2.714143857415659,
"grad_norm": 0.9695274233818054,
"learning_rate": 4.7735233107763465e-06,
"loss": 2.1106,
"step": 53300
},
{
"epoch": 2.7192361553150857,
"grad_norm": 0.9269813895225525,
"learning_rate": 4.688509538545244e-06,
"loss": 2.1075,
"step": 53400
},
{
"epoch": 2.724328453214513,
"grad_norm": 0.9783353805541992,
"learning_rate": 4.6034957663141435e-06,
"loss": 2.1127,
"step": 53500
},
{
"epoch": 2.72942075111394,
"grad_norm": 0.9476038813591003,
"learning_rate": 4.518481994083042e-06,
"loss": 2.1284,
"step": 53600
},
{
"epoch": 2.7345130490133673,
"grad_norm": 0.93116295337677,
"learning_rate": 4.43346822185194e-06,
"loss": 2.1004,
"step": 53700
},
{
"epoch": 2.7396053469127946,
"grad_norm": 0.9898892641067505,
"learning_rate": 4.348454449620839e-06,
"loss": 2.1129,
"step": 53800
},
{
"epoch": 2.744697644812222,
"grad_norm": 0.9059526920318604,
"learning_rate": 4.263440677389737e-06,
"loss": 2.1189,
"step": 53900
},
{
"epoch": 2.7497899427116486,
"grad_norm": 0.8806390762329102,
"learning_rate": 4.178426905158636e-06,
"loss": 2.1416,
"step": 54000
},
{
"epoch": 2.754882240611076,
"grad_norm": 0.9231753945350647,
"learning_rate": 4.093413132927534e-06,
"loss": 2.1373,
"step": 54100
},
{
"epoch": 2.759974538510503,
"grad_norm": 0.7574446201324463,
"learning_rate": 4.008399360696434e-06,
"loss": 2.1355,
"step": 54200
},
{
"epoch": 2.76506683640993,
"grad_norm": 0.8553287982940674,
"learning_rate": 3.923385588465331e-06,
"loss": 2.0786,
"step": 54300
},
{
"epoch": 2.770159134309357,
"grad_norm": 0.7898595333099365,
"learning_rate": 3.83837181623423e-06,
"loss": 2.0941,
"step": 54400
},
{
"epoch": 2.7752514322087842,
"grad_norm": 0.8895372748374939,
"learning_rate": 3.7533580440031287e-06,
"loss": 2.1311,
"step": 54500
},
{
"epoch": 2.7803437301082115,
"grad_norm": 0.9352322816848755,
"learning_rate": 3.668344271772027e-06,
"loss": 2.102,
"step": 54600
},
{
"epoch": 2.7854360280076387,
"grad_norm": 1.003927230834961,
"learning_rate": 3.583330499540926e-06,
"loss": 2.1119,
"step": 54700
},
{
"epoch": 2.7905283259070655,
"grad_norm": 0.9228959083557129,
"learning_rate": 3.498316727309824e-06,
"loss": 2.142,
"step": 54800
},
{
"epoch": 2.7956206238064927,
"grad_norm": 0.9431111812591553,
"learning_rate": 3.413302955078723e-06,
"loss": 2.12,
"step": 54900
},
{
"epoch": 2.80071292170592,
"grad_norm": 0.9116231799125671,
"learning_rate": 3.3282891828476215e-06,
"loss": 2.1261,
"step": 55000
},
{
"epoch": 2.8058052196053467,
"grad_norm": 0.9542424082756042,
"learning_rate": 3.2432754106165196e-06,
"loss": 2.1151,
"step": 55100
},
{
"epoch": 2.810897517504774,
"grad_norm": 0.8199505805969238,
"learning_rate": 3.1582616383854185e-06,
"loss": 2.0883,
"step": 55200
},
{
"epoch": 2.815989815404201,
"grad_norm": 0.8526725769042969,
"learning_rate": 3.0732478661543174e-06,
"loss": 2.1094,
"step": 55300
},
{
"epoch": 2.8210821133036283,
"grad_norm": 0.9284189343452454,
"learning_rate": 2.9882340939232155e-06,
"loss": 2.1072,
"step": 55400
},
{
"epoch": 2.8261744112030556,
"grad_norm": 0.9289183616638184,
"learning_rate": 2.9032203216921144e-06,
"loss": 2.1227,
"step": 55500
},
{
"epoch": 2.8312667091024823,
"grad_norm": 1.0548968315124512,
"learning_rate": 2.818206549461013e-06,
"loss": 2.138,
"step": 55600
},
{
"epoch": 2.8363590070019096,
"grad_norm": 0.8402355313301086,
"learning_rate": 2.7331927772299113e-06,
"loss": 2.1394,
"step": 55700
},
{
"epoch": 2.841451304901337,
"grad_norm": 0.9172413349151611,
"learning_rate": 2.64817900499881e-06,
"loss": 2.114,
"step": 55800
},
{
"epoch": 2.8465436028007636,
"grad_norm": 0.8457333445549011,
"learning_rate": 2.5631652327677087e-06,
"loss": 2.1268,
"step": 55900
},
{
"epoch": 2.851635900700191,
"grad_norm": 0.8858858942985535,
"learning_rate": 2.478151460536607e-06,
"loss": 2.0901,
"step": 56000
},
{
"epoch": 2.856728198599618,
"grad_norm": 0.8789589405059814,
"learning_rate": 2.3931376883055057e-06,
"loss": 2.1154,
"step": 56100
},
{
"epoch": 2.8618204964990452,
"grad_norm": 0.9234612584114075,
"learning_rate": 2.308123916074404e-06,
"loss": 2.1106,
"step": 56200
},
{
"epoch": 2.8669127943984725,
"grad_norm": 0.8070857524871826,
"learning_rate": 2.2231101438433026e-06,
"loss": 2.1181,
"step": 56300
},
{
"epoch": 2.8720050922978997,
"grad_norm": 0.9172016978263855,
"learning_rate": 2.138096371612201e-06,
"loss": 2.0832,
"step": 56400
},
{
"epoch": 2.8770973901973265,
"grad_norm": 0.9449873566627502,
"learning_rate": 2.0530825993811e-06,
"loss": 2.126,
"step": 56500
},
{
"epoch": 2.8821896880967537,
"grad_norm": 1.0262093544006348,
"learning_rate": 1.9680688271499985e-06,
"loss": 2.1117,
"step": 56600
},
{
"epoch": 2.887281985996181,
"grad_norm": 0.7934767007827759,
"learning_rate": 1.8830550549188972e-06,
"loss": 2.1256,
"step": 56700
},
{
"epoch": 2.8923742838956077,
"grad_norm": 0.9590465426445007,
"learning_rate": 1.7980412826877954e-06,
"loss": 2.1335,
"step": 56800
},
{
"epoch": 2.897466581795035,
"grad_norm": 1.006219744682312,
"learning_rate": 1.713027510456694e-06,
"loss": 2.0888,
"step": 56900
},
{
"epoch": 2.902558879694462,
"grad_norm": 0.9063106179237366,
"learning_rate": 1.6280137382255926e-06,
"loss": 2.1506,
"step": 57000
},
{
"epoch": 2.9076511775938894,
"grad_norm": 0.8653075695037842,
"learning_rate": 1.542999965994491e-06,
"loss": 2.0845,
"step": 57100
},
{
"epoch": 2.9127434754933166,
"grad_norm": 0.9707706570625305,
"learning_rate": 1.4579861937633898e-06,
"loss": 2.0865,
"step": 57200
},
{
"epoch": 2.9178357733927434,
"grad_norm": 0.9578688740730286,
"learning_rate": 1.3729724215322882e-06,
"loss": 2.1098,
"step": 57300
},
{
"epoch": 2.9229280712921706,
"grad_norm": 0.8037517070770264,
"learning_rate": 1.2879586493011867e-06,
"loss": 2.085,
"step": 57400
},
{
"epoch": 2.928020369191598,
"grad_norm": 0.9694920182228088,
"learning_rate": 1.2029448770700854e-06,
"loss": 2.0926,
"step": 57500
},
{
"epoch": 2.9331126670910246,
"grad_norm": 0.8718476891517639,
"learning_rate": 1.1179311048389841e-06,
"loss": 2.1112,
"step": 57600
},
{
"epoch": 2.938204964990452,
"grad_norm": 0.8940988779067993,
"learning_rate": 1.0329173326078824e-06,
"loss": 2.12,
"step": 57700
},
{
"epoch": 2.943297262889879,
"grad_norm": 0.9514064192771912,
"learning_rate": 9.479035603767811e-07,
"loss": 2.1416,
"step": 57800
},
{
"epoch": 2.9483895607893063,
"grad_norm": 0.9789698719978333,
"learning_rate": 8.628897881456797e-07,
"loss": 2.0913,
"step": 57900
},
{
"epoch": 2.9534818586887335,
"grad_norm": 1.028600811958313,
"learning_rate": 7.778760159145782e-07,
"loss": 2.1142,
"step": 58000
},
{
"epoch": 2.9585741565881603,
"grad_norm": 0.850046694278717,
"learning_rate": 6.928622436834767e-07,
"loss": 2.0929,
"step": 58100
},
{
"epoch": 2.9636664544875875,
"grad_norm": 0.8758450150489807,
"learning_rate": 6.078484714523753e-07,
"loss": 2.0991,
"step": 58200
},
{
"epoch": 2.9687587523870147,
"grad_norm": 0.9652713537216187,
"learning_rate": 5.228346992212739e-07,
"loss": 2.1095,
"step": 58300
},
{
"epoch": 2.973851050286442,
"grad_norm": 1.0260512828826904,
"learning_rate": 4.3782092699017247e-07,
"loss": 2.1069,
"step": 58400
},
{
"epoch": 2.9789433481858687,
"grad_norm": 0.7857241034507751,
"learning_rate": 3.5280715475907095e-07,
"loss": 2.1014,
"step": 58500
},
{
"epoch": 2.984035646085296,
"grad_norm": 0.964096188545227,
"learning_rate": 2.6779338252796954e-07,
"loss": 2.0981,
"step": 58600
},
{
"epoch": 2.989127943984723,
"grad_norm": 0.8568851351737976,
"learning_rate": 1.827796102968681e-07,
"loss": 2.1283,
"step": 58700
},
{
"epoch": 2.9942202418841504,
"grad_norm": 0.9048463702201843,
"learning_rate": 9.776583806576667e-08,
"loss": 2.1011,
"step": 58800
},
{
"epoch": 2.9993125397835776,
"grad_norm": 0.8119781613349915,
"learning_rate": 1.2752065834665216e-08,
"loss": 2.0672,
"step": 58900
}
],
"logging_steps": 100,
"max_steps": 58914,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 5000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.462938693632e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}