EncodeRec / last-checkpoint /trainer_state.json
guyhadad01's picture
Training in progress, step 56591, checkpoint
ec296f1 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 56591,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0008835327172165185,
"grad_norm": 5.665971279144287,
"learning_rate": 4.3286219081272084e-07,
"loss": 1.3738,
"step": 50
},
{
"epoch": 0.001767065434433037,
"grad_norm": 5.6161651611328125,
"learning_rate": 8.745583038869259e-07,
"loss": 1.1661,
"step": 100
},
{
"epoch": 0.0026505981516495554,
"grad_norm": 7.866199970245361,
"learning_rate": 1.3162544169611309e-06,
"loss": 1.2107,
"step": 150
},
{
"epoch": 0.003534130868866074,
"grad_norm": 5.07379674911499,
"learning_rate": 1.7579505300353357e-06,
"loss": 0.9855,
"step": 200
},
{
"epoch": 0.004417663586082593,
"grad_norm": 3.2607851028442383,
"learning_rate": 2.199646643109541e-06,
"loss": 0.9431,
"step": 250
},
{
"epoch": 0.005301196303299111,
"grad_norm": 6.517599105834961,
"learning_rate": 2.6413427561837457e-06,
"loss": 0.8566,
"step": 300
},
{
"epoch": 0.00618472902051563,
"grad_norm": 2.8523333072662354,
"learning_rate": 3.0830388692579506e-06,
"loss": 0.8697,
"step": 350
},
{
"epoch": 0.007068261737732148,
"grad_norm": 3.460226058959961,
"learning_rate": 3.5247349823321555e-06,
"loss": 0.8099,
"step": 400
},
{
"epoch": 0.007951794454948667,
"grad_norm": 3.2528891563415527,
"learning_rate": 3.966431095406361e-06,
"loss": 0.766,
"step": 450
},
{
"epoch": 0.008835327172165185,
"grad_norm": 4.1086039543151855,
"learning_rate": 4.408127208480566e-06,
"loss": 0.7402,
"step": 500
},
{
"epoch": 0.009718859889381704,
"grad_norm": 3.8160510063171387,
"learning_rate": 4.849823321554771e-06,
"loss": 0.8769,
"step": 550
},
{
"epoch": 0.010602392606598222,
"grad_norm": 2.901653289794922,
"learning_rate": 5.291519434628975e-06,
"loss": 0.6827,
"step": 600
},
{
"epoch": 0.011485925323814742,
"grad_norm": 2.5824739933013916,
"learning_rate": 5.73321554770318e-06,
"loss": 0.7252,
"step": 650
},
{
"epoch": 0.01236945804103126,
"grad_norm": 2.586138963699341,
"learning_rate": 6.174911660777385e-06,
"loss": 0.7701,
"step": 700
},
{
"epoch": 0.013252990758247778,
"grad_norm": 2.3450210094451904,
"learning_rate": 6.6166077738515904e-06,
"loss": 0.7525,
"step": 750
},
{
"epoch": 0.014136523475464296,
"grad_norm": 2.7902042865753174,
"learning_rate": 7.058303886925795e-06,
"loss": 0.7097,
"step": 800
},
{
"epoch": 0.015020056192680814,
"grad_norm": 3.297929286956787,
"learning_rate": 7.5e-06,
"loss": 0.7575,
"step": 850
},
{
"epoch": 0.015903588909897334,
"grad_norm": 4.028406143188477,
"learning_rate": 7.941696113074205e-06,
"loss": 0.6899,
"step": 900
},
{
"epoch": 0.016787121627113853,
"grad_norm": 2.2513041496276855,
"learning_rate": 8.38339222614841e-06,
"loss": 0.6655,
"step": 950
},
{
"epoch": 0.01767065434433037,
"grad_norm": 2.402355670928955,
"learning_rate": 8.825088339222614e-06,
"loss": 0.6601,
"step": 1000
},
{
"epoch": 0.01855418706154689,
"grad_norm": 4.492621898651123,
"learning_rate": 9.26678445229682e-06,
"loss": 0.6925,
"step": 1050
},
{
"epoch": 0.019437719778763407,
"grad_norm": 3.8099517822265625,
"learning_rate": 9.708480565371025e-06,
"loss": 0.6169,
"step": 1100
},
{
"epoch": 0.020321252495979925,
"grad_norm": 4.58193826675415,
"learning_rate": 1.0150176678445231e-05,
"loss": 0.6367,
"step": 1150
},
{
"epoch": 0.021204785213196443,
"grad_norm": 4.745123863220215,
"learning_rate": 1.0591872791519434e-05,
"loss": 0.615,
"step": 1200
},
{
"epoch": 0.02208831793041296,
"grad_norm": 3.260239601135254,
"learning_rate": 1.103356890459364e-05,
"loss": 0.6869,
"step": 1250
},
{
"epoch": 0.022971850647629483,
"grad_norm": 2.485383987426758,
"learning_rate": 1.1475265017667845e-05,
"loss": 0.7527,
"step": 1300
},
{
"epoch": 0.023855383364846,
"grad_norm": 2.26680326461792,
"learning_rate": 1.191696113074205e-05,
"loss": 0.6124,
"step": 1350
},
{
"epoch": 0.02473891608206252,
"grad_norm": 2.348688840866089,
"learning_rate": 1.2358657243816255e-05,
"loss": 0.6511,
"step": 1400
},
{
"epoch": 0.025622448799279038,
"grad_norm": 2.770859956741333,
"learning_rate": 1.280035335689046e-05,
"loss": 0.7047,
"step": 1450
},
{
"epoch": 0.026505981516495556,
"grad_norm": 3.188656806945801,
"learning_rate": 1.3242049469964666e-05,
"loss": 0.6639,
"step": 1500
},
{
"epoch": 0.027389514233712074,
"grad_norm": 2.7158899307250977,
"learning_rate": 1.368374558303887e-05,
"loss": 0.6795,
"step": 1550
},
{
"epoch": 0.028273046950928592,
"grad_norm": 2.7986080646514893,
"learning_rate": 1.4125441696113076e-05,
"loss": 0.6341,
"step": 1600
},
{
"epoch": 0.02915657966814511,
"grad_norm": 1.9698214530944824,
"learning_rate": 1.456713780918728e-05,
"loss": 0.6031,
"step": 1650
},
{
"epoch": 0.03004011238536163,
"grad_norm": 2.495985507965088,
"learning_rate": 1.5008833922261484e-05,
"loss": 0.5959,
"step": 1700
},
{
"epoch": 0.030923645102578147,
"grad_norm": 2.990360975265503,
"learning_rate": 1.545053003533569e-05,
"loss": 0.6412,
"step": 1750
},
{
"epoch": 0.03180717781979467,
"grad_norm": 3.658212184906006,
"learning_rate": 1.5892226148409894e-05,
"loss": 0.5065,
"step": 1800
},
{
"epoch": 0.03269071053701118,
"grad_norm": 2.010875940322876,
"learning_rate": 1.63339222614841e-05,
"loss": 0.5611,
"step": 1850
},
{
"epoch": 0.033574243254227705,
"grad_norm": 2.408937692642212,
"learning_rate": 1.6775618374558306e-05,
"loss": 0.5298,
"step": 1900
},
{
"epoch": 0.03445777597144422,
"grad_norm": 2.3144407272338867,
"learning_rate": 1.721731448763251e-05,
"loss": 0.5759,
"step": 1950
},
{
"epoch": 0.03534130868866074,
"grad_norm": 2.944115400314331,
"learning_rate": 1.7659010600706715e-05,
"loss": 0.5782,
"step": 2000
},
{
"epoch": 0.03622484140587726,
"grad_norm": 2.3239428997039795,
"learning_rate": 1.810070671378092e-05,
"loss": 0.5221,
"step": 2050
},
{
"epoch": 0.03710837412309378,
"grad_norm": 4.565939426422119,
"learning_rate": 1.8542402826855124e-05,
"loss": 0.5966,
"step": 2100
},
{
"epoch": 0.0379919068403103,
"grad_norm": 2.6089091300964355,
"learning_rate": 1.898409893992933e-05,
"loss": 0.5989,
"step": 2150
},
{
"epoch": 0.038875439557526814,
"grad_norm": 2.4395945072174072,
"learning_rate": 1.9425795053003533e-05,
"loss": 0.5097,
"step": 2200
},
{
"epoch": 0.039758972274743336,
"grad_norm": 2.274600028991699,
"learning_rate": 1.986749116607774e-05,
"loss": 0.4934,
"step": 2250
},
{
"epoch": 0.04064250499195985,
"grad_norm": 2.393251895904541,
"learning_rate": 2.0309187279151945e-05,
"loss": 0.5354,
"step": 2300
},
{
"epoch": 0.04152603770917637,
"grad_norm": 2.613900899887085,
"learning_rate": 2.075088339222615e-05,
"loss": 0.5236,
"step": 2350
},
{
"epoch": 0.04240957042639289,
"grad_norm": 2.233302116394043,
"learning_rate": 2.1192579505300354e-05,
"loss": 0.5057,
"step": 2400
},
{
"epoch": 0.04329310314360941,
"grad_norm": 2.2634503841400146,
"learning_rate": 2.163427561837456e-05,
"loss": 0.5448,
"step": 2450
},
{
"epoch": 0.04417663586082592,
"grad_norm": 1.6744658946990967,
"learning_rate": 2.2075971731448763e-05,
"loss": 0.5418,
"step": 2500
},
{
"epoch": 0.045060168578042445,
"grad_norm": 2.9320178031921387,
"learning_rate": 2.2517667844522968e-05,
"loss": 0.5944,
"step": 2550
},
{
"epoch": 0.04594370129525897,
"grad_norm": 2.2643797397613525,
"learning_rate": 2.2959363957597176e-05,
"loss": 0.4945,
"step": 2600
},
{
"epoch": 0.04682723401247548,
"grad_norm": 2.389902114868164,
"learning_rate": 2.340106007067138e-05,
"loss": 0.5225,
"step": 2650
},
{
"epoch": 0.047710766729692,
"grad_norm": 2.2676665782928467,
"learning_rate": 2.3842756183745584e-05,
"loss": 0.5661,
"step": 2700
},
{
"epoch": 0.04859429944690852,
"grad_norm": 2.340926170349121,
"learning_rate": 2.428445229681979e-05,
"loss": 0.6125,
"step": 2750
},
{
"epoch": 0.04947783216412504,
"grad_norm": 1.925943374633789,
"learning_rate": 2.4726148409893997e-05,
"loss": 0.5105,
"step": 2800
},
{
"epoch": 0.050361364881341554,
"grad_norm": 3.1281192302703857,
"learning_rate": 2.5167844522968198e-05,
"loss": 0.5893,
"step": 2850
},
{
"epoch": 0.051244897598558076,
"grad_norm": 2.345649242401123,
"learning_rate": 2.5609540636042406e-05,
"loss": 0.545,
"step": 2900
},
{
"epoch": 0.05212843031577459,
"grad_norm": 2.9023561477661133,
"learning_rate": 2.605123674911661e-05,
"loss": 0.5299,
"step": 2950
},
{
"epoch": 0.05301196303299111,
"grad_norm": 2.491269588470459,
"learning_rate": 2.649293286219081e-05,
"loss": 0.5186,
"step": 3000
},
{
"epoch": 0.05389549575020763,
"grad_norm": 1.842517375946045,
"learning_rate": 2.693462897526502e-05,
"loss": 0.5259,
"step": 3050
},
{
"epoch": 0.05477902846742415,
"grad_norm": 3.319514274597168,
"learning_rate": 2.7376325088339223e-05,
"loss": 0.6663,
"step": 3100
},
{
"epoch": 0.05566256118464067,
"grad_norm": 2.7143654823303223,
"learning_rate": 2.781802120141343e-05,
"loss": 0.5152,
"step": 3150
},
{
"epoch": 0.056546093901857185,
"grad_norm": 2.8187732696533203,
"learning_rate": 2.8259717314487632e-05,
"loss": 0.5417,
"step": 3200
},
{
"epoch": 0.057429626619073706,
"grad_norm": 2.8348097801208496,
"learning_rate": 2.870141342756184e-05,
"loss": 0.5039,
"step": 3250
},
{
"epoch": 0.05831315933629022,
"grad_norm": 3.6297833919525146,
"learning_rate": 2.9143109540636045e-05,
"loss": 0.4647,
"step": 3300
},
{
"epoch": 0.05919669205350674,
"grad_norm": 2.6729063987731934,
"learning_rate": 2.9584805653710253e-05,
"loss": 0.4652,
"step": 3350
},
{
"epoch": 0.06008022477072326,
"grad_norm": 3.030548572540283,
"learning_rate": 3.0026501766784454e-05,
"loss": 0.4914,
"step": 3400
},
{
"epoch": 0.06096375748793978,
"grad_norm": 1.844643235206604,
"learning_rate": 3.0468197879858658e-05,
"loss": 0.5449,
"step": 3450
},
{
"epoch": 0.061847290205156294,
"grad_norm": 1.6973118782043457,
"learning_rate": 3.090989399293286e-05,
"loss": 0.5072,
"step": 3500
},
{
"epoch": 0.06273082292237281,
"grad_norm": 2.626692295074463,
"learning_rate": 3.135159010600707e-05,
"loss": 0.5639,
"step": 3550
},
{
"epoch": 0.06361435563958934,
"grad_norm": 2.971773624420166,
"learning_rate": 3.179328621908128e-05,
"loss": 0.4729,
"step": 3600
},
{
"epoch": 0.06449788835680585,
"grad_norm": 2.134610414505005,
"learning_rate": 3.2234982332155476e-05,
"loss": 0.6047,
"step": 3650
},
{
"epoch": 0.06538142107402237,
"grad_norm": 1.8596552610397339,
"learning_rate": 3.267667844522969e-05,
"loss": 0.5369,
"step": 3700
},
{
"epoch": 0.0662649537912389,
"grad_norm": 2.5137698650360107,
"learning_rate": 3.311837455830389e-05,
"loss": 0.5014,
"step": 3750
},
{
"epoch": 0.06714848650845541,
"grad_norm": 2.8211522102355957,
"learning_rate": 3.356007067137809e-05,
"loss": 0.5128,
"step": 3800
},
{
"epoch": 0.06803201922567192,
"grad_norm": 2.095426559448242,
"learning_rate": 3.40017667844523e-05,
"loss": 0.5345,
"step": 3850
},
{
"epoch": 0.06891555194288844,
"grad_norm": 2.1965081691741943,
"learning_rate": 3.4443462897526505e-05,
"loss": 0.479,
"step": 3900
},
{
"epoch": 0.06979908466010497,
"grad_norm": 2.1722958087921143,
"learning_rate": 3.488515901060071e-05,
"loss": 0.5652,
"step": 3950
},
{
"epoch": 0.07068261737732148,
"grad_norm": 2.7183449268341064,
"learning_rate": 3.5326855123674914e-05,
"loss": 0.5272,
"step": 4000
},
{
"epoch": 0.071566150094538,
"grad_norm": 2.356076717376709,
"learning_rate": 3.576855123674912e-05,
"loss": 0.4904,
"step": 4050
},
{
"epoch": 0.07244968281175453,
"grad_norm": 1.7549006938934326,
"learning_rate": 3.621024734982332e-05,
"loss": 0.4755,
"step": 4100
},
{
"epoch": 0.07333321552897104,
"grad_norm": 2.0377912521362305,
"learning_rate": 3.665194346289753e-05,
"loss": 0.4897,
"step": 4150
},
{
"epoch": 0.07421674824618756,
"grad_norm": 2.4711716175079346,
"learning_rate": 3.709363957597173e-05,
"loss": 0.4679,
"step": 4200
},
{
"epoch": 0.07510028096340407,
"grad_norm": 2.700162649154663,
"learning_rate": 3.7535335689045936e-05,
"loss": 0.4712,
"step": 4250
},
{
"epoch": 0.0759838136806206,
"grad_norm": 1.9648590087890625,
"learning_rate": 3.797703180212015e-05,
"loss": 0.4779,
"step": 4300
},
{
"epoch": 0.07686734639783711,
"grad_norm": 2.4238970279693604,
"learning_rate": 3.8418727915194345e-05,
"loss": 0.4463,
"step": 4350
},
{
"epoch": 0.07775087911505363,
"grad_norm": 1.745356798171997,
"learning_rate": 3.8860424028268556e-05,
"loss": 0.4917,
"step": 4400
},
{
"epoch": 0.07863441183227014,
"grad_norm": 5.889612197875977,
"learning_rate": 3.930212014134276e-05,
"loss": 0.5572,
"step": 4450
},
{
"epoch": 0.07951794454948667,
"grad_norm": 2.7529609203338623,
"learning_rate": 3.9743816254416965e-05,
"loss": 0.4553,
"step": 4500
},
{
"epoch": 0.08040147726670319,
"grad_norm": 2.4175944328308105,
"learning_rate": 4.018551236749117e-05,
"loss": 0.4598,
"step": 4550
},
{
"epoch": 0.0812850099839197,
"grad_norm": 2.2330217361450195,
"learning_rate": 4.0627208480565374e-05,
"loss": 0.5445,
"step": 4600
},
{
"epoch": 0.08216854270113623,
"grad_norm": 2.4177329540252686,
"learning_rate": 4.106890459363958e-05,
"loss": 0.4537,
"step": 4650
},
{
"epoch": 0.08305207541835274,
"grad_norm": 2.6188764572143555,
"learning_rate": 4.151060070671378e-05,
"loss": 0.5158,
"step": 4700
},
{
"epoch": 0.08393560813556926,
"grad_norm": 3.5044455528259277,
"learning_rate": 4.195229681978799e-05,
"loss": 0.4598,
"step": 4750
},
{
"epoch": 0.08481914085278577,
"grad_norm": 2.2751505374908447,
"learning_rate": 4.239399293286219e-05,
"loss": 0.4662,
"step": 4800
},
{
"epoch": 0.0857026735700023,
"grad_norm": 2.0289080142974854,
"learning_rate": 4.28356890459364e-05,
"loss": 0.459,
"step": 4850
},
{
"epoch": 0.08658620628721882,
"grad_norm": 2.6102516651153564,
"learning_rate": 4.32773851590106e-05,
"loss": 0.4275,
"step": 4900
},
{
"epoch": 0.08746973900443533,
"grad_norm": 2.5842251777648926,
"learning_rate": 4.3719081272084805e-05,
"loss": 0.5575,
"step": 4950
},
{
"epoch": 0.08835327172165185,
"grad_norm": 3.6427652835845947,
"learning_rate": 4.4160777385159016e-05,
"loss": 0.4197,
"step": 5000
},
{
"epoch": 0.08923680443886838,
"grad_norm": 1.8962676525115967,
"learning_rate": 4.4602473498233214e-05,
"loss": 0.4525,
"step": 5050
},
{
"epoch": 0.09012033715608489,
"grad_norm": 2.1373822689056396,
"learning_rate": 4.5044169611307425e-05,
"loss": 0.4469,
"step": 5100
},
{
"epoch": 0.0910038698733014,
"grad_norm": 5.542126178741455,
"learning_rate": 4.548586572438163e-05,
"loss": 0.5283,
"step": 5150
},
{
"epoch": 0.09188740259051793,
"grad_norm": 2.4414310455322266,
"learning_rate": 4.5927561837455834e-05,
"loss": 0.4826,
"step": 5200
},
{
"epoch": 0.09277093530773445,
"grad_norm": 3.52422833442688,
"learning_rate": 4.636925795053004e-05,
"loss": 0.3895,
"step": 5250
},
{
"epoch": 0.09365446802495096,
"grad_norm": 2.1975631713867188,
"learning_rate": 4.681095406360424e-05,
"loss": 0.4873,
"step": 5300
},
{
"epoch": 0.09453800074216748,
"grad_norm": 3.4910616874694824,
"learning_rate": 4.725265017667845e-05,
"loss": 0.4895,
"step": 5350
},
{
"epoch": 0.095421533459384,
"grad_norm": 2.1225690841674805,
"learning_rate": 4.769434628975265e-05,
"loss": 0.4686,
"step": 5400
},
{
"epoch": 0.09630506617660052,
"grad_norm": 2.2319257259368896,
"learning_rate": 4.8136042402826856e-05,
"loss": 0.4723,
"step": 5450
},
{
"epoch": 0.09718859889381704,
"grad_norm": 2.2340879440307617,
"learning_rate": 4.857773851590106e-05,
"loss": 0.5258,
"step": 5500
},
{
"epoch": 0.09807213161103355,
"grad_norm": 3.2808139324188232,
"learning_rate": 4.901943462897527e-05,
"loss": 0.4851,
"step": 5550
},
{
"epoch": 0.09895566432825008,
"grad_norm": 2.4828484058380127,
"learning_rate": 4.946113074204947e-05,
"loss": 0.5311,
"step": 5600
},
{
"epoch": 0.0998391970454666,
"grad_norm": 1.7307246923446655,
"learning_rate": 4.990282685512368e-05,
"loss": 0.411,
"step": 5650
},
{
"epoch": 0.10072272976268311,
"grad_norm": 1.9073278903961182,
"learning_rate": 4.996171290569595e-05,
"loss": 0.4184,
"step": 5700
},
{
"epoch": 0.10160626247989964,
"grad_norm": 1.8571208715438843,
"learning_rate": 4.9912626887357406e-05,
"loss": 0.4071,
"step": 5750
},
{
"epoch": 0.10248979519711615,
"grad_norm": 1.7524621486663818,
"learning_rate": 4.986354086901887e-05,
"loss": 0.4712,
"step": 5800
},
{
"epoch": 0.10337332791433267,
"grad_norm": 4.2943434715271,
"learning_rate": 4.9814454850680335e-05,
"loss": 0.4912,
"step": 5850
},
{
"epoch": 0.10425686063154918,
"grad_norm": 2.398043632507324,
"learning_rate": 4.97653688323418e-05,
"loss": 0.5589,
"step": 5900
},
{
"epoch": 0.10514039334876571,
"grad_norm": 1.9587973356246948,
"learning_rate": 4.9716282814003265e-05,
"loss": 0.4507,
"step": 5950
},
{
"epoch": 0.10602392606598222,
"grad_norm": 2.0629475116729736,
"learning_rate": 4.966719679566473e-05,
"loss": 0.5429,
"step": 6000
},
{
"epoch": 0.10690745878319874,
"grad_norm": 1.6127039194107056,
"learning_rate": 4.961811077732619e-05,
"loss": 0.3789,
"step": 6050
},
{
"epoch": 0.10779099150041525,
"grad_norm": 2.230015993118286,
"learning_rate": 4.956902475898765e-05,
"loss": 0.3949,
"step": 6100
},
{
"epoch": 0.10867452421763178,
"grad_norm": 1.9963310956954956,
"learning_rate": 4.9519938740649116e-05,
"loss": 0.4491,
"step": 6150
},
{
"epoch": 0.1095580569348483,
"grad_norm": 2.2731542587280273,
"learning_rate": 4.947085272231058e-05,
"loss": 0.435,
"step": 6200
},
{
"epoch": 0.11044158965206481,
"grad_norm": 2.447551727294922,
"learning_rate": 4.9421766703972046e-05,
"loss": 0.3865,
"step": 6250
},
{
"epoch": 0.11132512236928134,
"grad_norm": 2.126950740814209,
"learning_rate": 4.9372680685633504e-05,
"loss": 0.4175,
"step": 6300
},
{
"epoch": 0.11220865508649785,
"grad_norm": 2.22995924949646,
"learning_rate": 4.932359466729497e-05,
"loss": 0.4387,
"step": 6350
},
{
"epoch": 0.11309218780371437,
"grad_norm": 1.5801736116409302,
"learning_rate": 4.927450864895643e-05,
"loss": 0.4554,
"step": 6400
},
{
"epoch": 0.11397572052093088,
"grad_norm": 4.113645553588867,
"learning_rate": 4.92254226306179e-05,
"loss": 0.581,
"step": 6450
},
{
"epoch": 0.11485925323814741,
"grad_norm": 1.6027569770812988,
"learning_rate": 4.917633661227936e-05,
"loss": 0.4746,
"step": 6500
},
{
"epoch": 0.11574278595536393,
"grad_norm": 2.0555272102355957,
"learning_rate": 4.912725059394083e-05,
"loss": 0.4511,
"step": 6550
},
{
"epoch": 0.11662631867258044,
"grad_norm": 2.6827495098114014,
"learning_rate": 4.9078164575602285e-05,
"loss": 0.3871,
"step": 6600
},
{
"epoch": 0.11750985138979697,
"grad_norm": 1.969202995300293,
"learning_rate": 4.902907855726375e-05,
"loss": 0.449,
"step": 6650
},
{
"epoch": 0.11839338410701349,
"grad_norm": 1.9535086154937744,
"learning_rate": 4.8979992538925214e-05,
"loss": 0.3458,
"step": 6700
},
{
"epoch": 0.11927691682423,
"grad_norm": 1.7251821756362915,
"learning_rate": 4.893090652058668e-05,
"loss": 0.4791,
"step": 6750
},
{
"epoch": 0.12016044954144652,
"grad_norm": 1.7175688743591309,
"learning_rate": 4.8881820502248144e-05,
"loss": 0.4445,
"step": 6800
},
{
"epoch": 0.12104398225866304,
"grad_norm": 3.1055896282196045,
"learning_rate": 4.88327344839096e-05,
"loss": 0.4907,
"step": 6850
},
{
"epoch": 0.12192751497587956,
"grad_norm": 3.251380681991577,
"learning_rate": 4.8783648465571066e-05,
"loss": 0.5377,
"step": 6900
},
{
"epoch": 0.12281104769309607,
"grad_norm": 2.909510850906372,
"learning_rate": 4.873456244723254e-05,
"loss": 0.5275,
"step": 6950
},
{
"epoch": 0.12369458041031259,
"grad_norm": 2.0700035095214844,
"learning_rate": 4.8685476428893995e-05,
"loss": 0.5489,
"step": 7000
},
{
"epoch": 0.12457811312752912,
"grad_norm": 1.9759315252304077,
"learning_rate": 4.863639041055546e-05,
"loss": 0.3931,
"step": 7050
},
{
"epoch": 0.12546164584474562,
"grad_norm": 1.9036837816238403,
"learning_rate": 4.8587304392216925e-05,
"loss": 0.5155,
"step": 7100
},
{
"epoch": 0.12634517856196215,
"grad_norm": 3.4224536418914795,
"learning_rate": 4.853821837387838e-05,
"loss": 0.4282,
"step": 7150
},
{
"epoch": 0.12722871127917867,
"grad_norm": 3.1725916862487793,
"learning_rate": 4.8489132355539854e-05,
"loss": 0.4639,
"step": 7200
},
{
"epoch": 0.12811224399639518,
"grad_norm": 1.7154817581176758,
"learning_rate": 4.844004633720131e-05,
"loss": 0.5294,
"step": 7250
},
{
"epoch": 0.1289957767136117,
"grad_norm": 2.130659580230713,
"learning_rate": 4.839096031886278e-05,
"loss": 0.4121,
"step": 7300
},
{
"epoch": 0.12987930943082823,
"grad_norm": 1.8878060579299927,
"learning_rate": 4.834187430052424e-05,
"loss": 0.4139,
"step": 7350
},
{
"epoch": 0.13076284214804473,
"grad_norm": 1.9885565042495728,
"learning_rate": 4.82927882821857e-05,
"loss": 0.4311,
"step": 7400
},
{
"epoch": 0.13164637486526126,
"grad_norm": 2.3639650344848633,
"learning_rate": 4.824370226384717e-05,
"loss": 0.4025,
"step": 7450
},
{
"epoch": 0.1325299075824778,
"grad_norm": 3.4997270107269287,
"learning_rate": 4.8194616245508635e-05,
"loss": 0.4791,
"step": 7500
},
{
"epoch": 0.1334134402996943,
"grad_norm": 1.644084095954895,
"learning_rate": 4.814553022717009e-05,
"loss": 0.4498,
"step": 7550
},
{
"epoch": 0.13429697301691082,
"grad_norm": 1.8292336463928223,
"learning_rate": 4.809644420883156e-05,
"loss": 0.4538,
"step": 7600
},
{
"epoch": 0.13518050573412735,
"grad_norm": 3.380443572998047,
"learning_rate": 4.804735819049302e-05,
"loss": 0.4596,
"step": 7650
},
{
"epoch": 0.13606403845134385,
"grad_norm": 1.6248747110366821,
"learning_rate": 4.799827217215449e-05,
"loss": 0.3508,
"step": 7700
},
{
"epoch": 0.13694757116856038,
"grad_norm": 1.6644774675369263,
"learning_rate": 4.794918615381595e-05,
"loss": 0.5145,
"step": 7750
},
{
"epoch": 0.13783110388577688,
"grad_norm": 1.8441638946533203,
"learning_rate": 4.790010013547741e-05,
"loss": 0.3505,
"step": 7800
},
{
"epoch": 0.1387146366029934,
"grad_norm": 1.761982798576355,
"learning_rate": 4.7851014117138874e-05,
"loss": 0.3354,
"step": 7850
},
{
"epoch": 0.13959816932020994,
"grad_norm": 3.417602777481079,
"learning_rate": 4.780192809880034e-05,
"loss": 0.4474,
"step": 7900
},
{
"epoch": 0.14048170203742644,
"grad_norm": 1.7687017917633057,
"learning_rate": 4.7752842080461804e-05,
"loss": 0.3524,
"step": 7950
},
{
"epoch": 0.14136523475464297,
"grad_norm": 3.2442593574523926,
"learning_rate": 4.770375606212327e-05,
"loss": 0.4957,
"step": 8000
},
{
"epoch": 0.1422487674718595,
"grad_norm": 1.813818335533142,
"learning_rate": 4.765467004378473e-05,
"loss": 0.4461,
"step": 8050
},
{
"epoch": 0.143132300189076,
"grad_norm": 1.936123013496399,
"learning_rate": 4.760558402544619e-05,
"loss": 0.4983,
"step": 8100
},
{
"epoch": 0.14401583290629252,
"grad_norm": 2.0068929195404053,
"learning_rate": 4.7556498007107656e-05,
"loss": 0.4535,
"step": 8150
},
{
"epoch": 0.14489936562350905,
"grad_norm": 1.6743545532226562,
"learning_rate": 4.750741198876913e-05,
"loss": 0.3668,
"step": 8200
},
{
"epoch": 0.14578289834072555,
"grad_norm": 1.9963476657867432,
"learning_rate": 4.7458325970430585e-05,
"loss": 0.4688,
"step": 8250
},
{
"epoch": 0.14666643105794208,
"grad_norm": 1.7402074337005615,
"learning_rate": 4.740923995209205e-05,
"loss": 0.3967,
"step": 8300
},
{
"epoch": 0.14754996377515858,
"grad_norm": 2.0074145793914795,
"learning_rate": 4.736015393375351e-05,
"loss": 0.4911,
"step": 8350
},
{
"epoch": 0.1484334964923751,
"grad_norm": 1.7804876565933228,
"learning_rate": 4.731106791541497e-05,
"loss": 0.4076,
"step": 8400
},
{
"epoch": 0.14931702920959164,
"grad_norm": 2.1234054565429688,
"learning_rate": 4.7261981897076444e-05,
"loss": 0.398,
"step": 8450
},
{
"epoch": 0.15020056192680814,
"grad_norm": 2.1532113552093506,
"learning_rate": 4.72128958787379e-05,
"loss": 0.4203,
"step": 8500
},
{
"epoch": 0.15108409464402467,
"grad_norm": 1.8909550905227661,
"learning_rate": 4.7163809860399366e-05,
"loss": 0.414,
"step": 8550
},
{
"epoch": 0.1519676273612412,
"grad_norm": 1.9415462017059326,
"learning_rate": 4.711472384206083e-05,
"loss": 0.3436,
"step": 8600
},
{
"epoch": 0.1528511600784577,
"grad_norm": 2.2018544673919678,
"learning_rate": 4.706563782372229e-05,
"loss": 0.436,
"step": 8650
},
{
"epoch": 0.15373469279567423,
"grad_norm": 1.5418767929077148,
"learning_rate": 4.701655180538376e-05,
"loss": 0.3761,
"step": 8700
},
{
"epoch": 0.15461822551289076,
"grad_norm": 4.974616050720215,
"learning_rate": 4.6967465787045225e-05,
"loss": 0.5579,
"step": 8750
},
{
"epoch": 0.15550175823010726,
"grad_norm": 1.8653486967086792,
"learning_rate": 4.691837976870668e-05,
"loss": 0.441,
"step": 8800
},
{
"epoch": 0.15638529094732379,
"grad_norm": 2.2241523265838623,
"learning_rate": 4.686929375036815e-05,
"loss": 0.5877,
"step": 8850
},
{
"epoch": 0.15726882366454029,
"grad_norm": 1.8084393739700317,
"learning_rate": 4.6820207732029605e-05,
"loss": 0.4081,
"step": 8900
},
{
"epoch": 0.15815235638175681,
"grad_norm": 1.5464160442352295,
"learning_rate": 4.677112171369108e-05,
"loss": 0.4648,
"step": 8950
},
{
"epoch": 0.15903588909897334,
"grad_norm": 1.7731395959854126,
"learning_rate": 4.672203569535254e-05,
"loss": 0.4321,
"step": 9000
},
{
"epoch": 0.15991942181618984,
"grad_norm": 1.8130481243133545,
"learning_rate": 4.6672949677014e-05,
"loss": 0.4226,
"step": 9050
},
{
"epoch": 0.16080295453340637,
"grad_norm": 2.4127371311187744,
"learning_rate": 4.6623863658675464e-05,
"loss": 0.3634,
"step": 9100
},
{
"epoch": 0.1616864872506229,
"grad_norm": 2.362494707107544,
"learning_rate": 4.657477764033693e-05,
"loss": 0.4252,
"step": 9150
},
{
"epoch": 0.1625700199678394,
"grad_norm": 1.855000615119934,
"learning_rate": 4.6525691621998393e-05,
"loss": 0.3899,
"step": 9200
},
{
"epoch": 0.16345355268505593,
"grad_norm": 1.8728185892105103,
"learning_rate": 4.647660560365986e-05,
"loss": 0.4335,
"step": 9250
},
{
"epoch": 0.16433708540227246,
"grad_norm": 1.977250576019287,
"learning_rate": 4.642751958532132e-05,
"loss": 0.4204,
"step": 9300
},
{
"epoch": 0.16522061811948896,
"grad_norm": 4.992434978485107,
"learning_rate": 4.637843356698278e-05,
"loss": 0.5576,
"step": 9350
},
{
"epoch": 0.1661041508367055,
"grad_norm": 1.673086166381836,
"learning_rate": 4.6329347548644245e-05,
"loss": 0.4712,
"step": 9400
},
{
"epoch": 0.166987683553922,
"grad_norm": 1.8109374046325684,
"learning_rate": 4.628026153030571e-05,
"loss": 0.366,
"step": 9450
},
{
"epoch": 0.16787121627113852,
"grad_norm": 1.9352269172668457,
"learning_rate": 4.6231175511967175e-05,
"loss": 0.3932,
"step": 9500
},
{
"epoch": 0.16875474898835505,
"grad_norm": 1.7740451097488403,
"learning_rate": 4.618208949362864e-05,
"loss": 0.4836,
"step": 9550
},
{
"epoch": 0.16963828170557155,
"grad_norm": 2.0106916427612305,
"learning_rate": 4.61330034752901e-05,
"loss": 0.3989,
"step": 9600
},
{
"epoch": 0.17052181442278808,
"grad_norm": 1.5831292867660522,
"learning_rate": 4.608391745695156e-05,
"loss": 0.4025,
"step": 9650
},
{
"epoch": 0.1714053471400046,
"grad_norm": 5.1861371994018555,
"learning_rate": 4.6034831438613027e-05,
"loss": 0.467,
"step": 9700
},
{
"epoch": 0.1722888798572211,
"grad_norm": 3.7466721534729004,
"learning_rate": 4.598574542027449e-05,
"loss": 0.3558,
"step": 9750
},
{
"epoch": 0.17317241257443763,
"grad_norm": 2.143721342086792,
"learning_rate": 4.5936659401935956e-05,
"loss": 0.3623,
"step": 9800
},
{
"epoch": 0.17405594529165416,
"grad_norm": 2.1482434272766113,
"learning_rate": 4.588757338359742e-05,
"loss": 0.3438,
"step": 9850
},
{
"epoch": 0.17493947800887066,
"grad_norm": 1.458309531211853,
"learning_rate": 4.583848736525888e-05,
"loss": 0.4193,
"step": 9900
},
{
"epoch": 0.1758230107260872,
"grad_norm": 1.8698090314865112,
"learning_rate": 4.578940134692034e-05,
"loss": 0.3173,
"step": 9950
},
{
"epoch": 0.1767065434433037,
"grad_norm": 2.087970018386841,
"learning_rate": 4.574031532858181e-05,
"loss": 0.4569,
"step": 10000
},
{
"epoch": 0.17759007616052022,
"grad_norm": 1.6226812601089478,
"learning_rate": 4.569122931024327e-05,
"loss": 0.4538,
"step": 10050
},
{
"epoch": 0.17847360887773675,
"grad_norm": 1.9845385551452637,
"learning_rate": 4.564214329190474e-05,
"loss": 0.4422,
"step": 10100
},
{
"epoch": 0.17935714159495325,
"grad_norm": 1.7016047239303589,
"learning_rate": 4.5593057273566195e-05,
"loss": 0.3747,
"step": 10150
},
{
"epoch": 0.18024067431216978,
"grad_norm": 2.2167670726776123,
"learning_rate": 4.5543971255227666e-05,
"loss": 0.3989,
"step": 10200
},
{
"epoch": 0.1811242070293863,
"grad_norm": 1.464385747909546,
"learning_rate": 4.549488523688913e-05,
"loss": 0.5315,
"step": 10250
},
{
"epoch": 0.1820077397466028,
"grad_norm": 1.2073971033096313,
"learning_rate": 4.544579921855059e-05,
"loss": 0.3565,
"step": 10300
},
{
"epoch": 0.18289127246381934,
"grad_norm": 1.1773017644882202,
"learning_rate": 4.5396713200212054e-05,
"loss": 0.4409,
"step": 10350
},
{
"epoch": 0.18377480518103587,
"grad_norm": 2.4389290809631348,
"learning_rate": 4.534762718187352e-05,
"loss": 0.3762,
"step": 10400
},
{
"epoch": 0.18465833789825237,
"grad_norm": 3.560997247695923,
"learning_rate": 4.529854116353498e-05,
"loss": 0.4571,
"step": 10450
},
{
"epoch": 0.1855418706154689,
"grad_norm": 2.0075438022613525,
"learning_rate": 4.524945514519645e-05,
"loss": 0.3561,
"step": 10500
},
{
"epoch": 0.1864254033326854,
"grad_norm": 2.405439853668213,
"learning_rate": 4.5200369126857906e-05,
"loss": 0.4595,
"step": 10550
},
{
"epoch": 0.18730893604990193,
"grad_norm": 1.6211732625961304,
"learning_rate": 4.515128310851937e-05,
"loss": 0.4576,
"step": 10600
},
{
"epoch": 0.18819246876711845,
"grad_norm": 1.7272285223007202,
"learning_rate": 4.5102197090180835e-05,
"loss": 0.4957,
"step": 10650
},
{
"epoch": 0.18907600148433495,
"grad_norm": 1.529583215713501,
"learning_rate": 4.50531110718423e-05,
"loss": 0.3533,
"step": 10700
},
{
"epoch": 0.18995953420155148,
"grad_norm": 1.3267425298690796,
"learning_rate": 4.5004025053503764e-05,
"loss": 0.5213,
"step": 10750
},
{
"epoch": 0.190843066918768,
"grad_norm": 2.40889573097229,
"learning_rate": 4.495493903516523e-05,
"loss": 0.4372,
"step": 10800
},
{
"epoch": 0.1917265996359845,
"grad_norm": 2.532017230987549,
"learning_rate": 4.4906834737193457e-05,
"loss": 0.3286,
"step": 10850
},
{
"epoch": 0.19261013235320104,
"grad_norm": 3.721505641937256,
"learning_rate": 4.485774871885493e-05,
"loss": 0.4082,
"step": 10900
},
{
"epoch": 0.19349366507041757,
"grad_norm": 2.2368271350860596,
"learning_rate": 4.4808662700516386e-05,
"loss": 0.4056,
"step": 10950
},
{
"epoch": 0.19437719778763407,
"grad_norm": 2.2011897563934326,
"learning_rate": 4.475957668217785e-05,
"loss": 0.4435,
"step": 11000
},
{
"epoch": 0.1952607305048506,
"grad_norm": 2.1512463092803955,
"learning_rate": 4.4710490663839315e-05,
"loss": 0.4272,
"step": 11050
},
{
"epoch": 0.1961442632220671,
"grad_norm": 1.5526123046875,
"learning_rate": 4.466140464550077e-05,
"loss": 0.4334,
"step": 11100
},
{
"epoch": 0.19702779593928363,
"grad_norm": 1.4258567094802856,
"learning_rate": 4.4612318627162245e-05,
"loss": 0.4479,
"step": 11150
},
{
"epoch": 0.19791132865650016,
"grad_norm": 3.2408463954925537,
"learning_rate": 4.456323260882371e-05,
"loss": 0.3545,
"step": 11200
},
{
"epoch": 0.19879486137371666,
"grad_norm": 2.1903252601623535,
"learning_rate": 4.451414659048517e-05,
"loss": 0.3192,
"step": 11250
},
{
"epoch": 0.1996783940909332,
"grad_norm": 1.9699974060058594,
"learning_rate": 4.446506057214663e-05,
"loss": 0.3883,
"step": 11300
},
{
"epoch": 0.20056192680814972,
"grad_norm": 1.7133831977844238,
"learning_rate": 4.441597455380809e-05,
"loss": 0.3312,
"step": 11350
},
{
"epoch": 0.20144545952536622,
"grad_norm": 3.0174543857574463,
"learning_rate": 4.436688853546956e-05,
"loss": 0.4888,
"step": 11400
},
{
"epoch": 0.20232899224258274,
"grad_norm": 2.010566473007202,
"learning_rate": 4.4317802517131026e-05,
"loss": 0.5102,
"step": 11450
},
{
"epoch": 0.20321252495979927,
"grad_norm": 2.093271493911743,
"learning_rate": 4.4268716498792484e-05,
"loss": 0.4133,
"step": 11500
},
{
"epoch": 0.20409605767701577,
"grad_norm": 1.9231561422348022,
"learning_rate": 4.421963048045395e-05,
"loss": 0.4255,
"step": 11550
},
{
"epoch": 0.2049795903942323,
"grad_norm": 1.561781644821167,
"learning_rate": 4.417054446211541e-05,
"loss": 0.3766,
"step": 11600
},
{
"epoch": 0.2058631231114488,
"grad_norm": 2.006748676300049,
"learning_rate": 4.412145844377688e-05,
"loss": 0.3651,
"step": 11650
},
{
"epoch": 0.20674665582866533,
"grad_norm": 1.5192091464996338,
"learning_rate": 4.407237242543834e-05,
"loss": 0.4562,
"step": 11700
},
{
"epoch": 0.20763018854588186,
"grad_norm": 1.820331335067749,
"learning_rate": 4.402328640709981e-05,
"loss": 0.3946,
"step": 11750
},
{
"epoch": 0.20851372126309836,
"grad_norm": 3.302582025527954,
"learning_rate": 4.3974200388761265e-05,
"loss": 0.4075,
"step": 11800
},
{
"epoch": 0.2093972539803149,
"grad_norm": 2.601897716522217,
"learning_rate": 4.392511437042273e-05,
"loss": 0.4304,
"step": 11850
},
{
"epoch": 0.21028078669753142,
"grad_norm": 1.58085036277771,
"learning_rate": 4.3876028352084194e-05,
"loss": 0.3404,
"step": 11900
},
{
"epoch": 0.21116431941474792,
"grad_norm": 1.7569571733474731,
"learning_rate": 4.382694233374566e-05,
"loss": 0.4013,
"step": 11950
},
{
"epoch": 0.21204785213196445,
"grad_norm": 1.9872467517852783,
"learning_rate": 4.3777856315407124e-05,
"loss": 0.4278,
"step": 12000
},
{
"epoch": 0.21293138484918098,
"grad_norm": 1.4981114864349365,
"learning_rate": 4.372877029706858e-05,
"loss": 0.3905,
"step": 12050
},
{
"epoch": 0.21381491756639748,
"grad_norm": 1.6444882154464722,
"learning_rate": 4.3679684278730046e-05,
"loss": 0.4082,
"step": 12100
},
{
"epoch": 0.214698450283614,
"grad_norm": 1.9731707572937012,
"learning_rate": 4.363059826039151e-05,
"loss": 0.3855,
"step": 12150
},
{
"epoch": 0.2155819830008305,
"grad_norm": 2.66648268699646,
"learning_rate": 4.3581512242052976e-05,
"loss": 0.4567,
"step": 12200
},
{
"epoch": 0.21646551571804704,
"grad_norm": 2.0770373344421387,
"learning_rate": 4.353242622371444e-05,
"loss": 0.4368,
"step": 12250
},
{
"epoch": 0.21734904843526356,
"grad_norm": 1.4739536046981812,
"learning_rate": 4.3483340205375905e-05,
"loss": 0.3686,
"step": 12300
},
{
"epoch": 0.21823258115248007,
"grad_norm": 1.8857239484786987,
"learning_rate": 4.343425418703736e-05,
"loss": 0.4163,
"step": 12350
},
{
"epoch": 0.2191161138696966,
"grad_norm": 1.722424030303955,
"learning_rate": 4.3385168168698834e-05,
"loss": 0.3595,
"step": 12400
},
{
"epoch": 0.21999964658691312,
"grad_norm": 1.5602166652679443,
"learning_rate": 4.333608215036029e-05,
"loss": 0.3326,
"step": 12450
},
{
"epoch": 0.22088317930412962,
"grad_norm": 1.7230535745620728,
"learning_rate": 4.328699613202176e-05,
"loss": 0.3775,
"step": 12500
},
{
"epoch": 0.22176671202134615,
"grad_norm": 1.8666094541549683,
"learning_rate": 4.323791011368322e-05,
"loss": 0.3695,
"step": 12550
},
{
"epoch": 0.22265024473856268,
"grad_norm": 3.1689233779907227,
"learning_rate": 4.318882409534468e-05,
"loss": 0.3545,
"step": 12600
},
{
"epoch": 0.22353377745577918,
"grad_norm": 1.8885284662246704,
"learning_rate": 4.313973807700615e-05,
"loss": 0.3548,
"step": 12650
},
{
"epoch": 0.2244173101729957,
"grad_norm": 1.8508330583572388,
"learning_rate": 4.3090652058667615e-05,
"loss": 0.4847,
"step": 12700
},
{
"epoch": 0.22530084289021224,
"grad_norm": 2.1445882320404053,
"learning_rate": 4.304156604032907e-05,
"loss": 0.4,
"step": 12750
},
{
"epoch": 0.22618437560742874,
"grad_norm": 1.721024990081787,
"learning_rate": 4.299248002199054e-05,
"loss": 0.4755,
"step": 12800
},
{
"epoch": 0.22706790832464527,
"grad_norm": 1.7713844776153564,
"learning_rate": 4.2943394003652e-05,
"loss": 0.3399,
"step": 12850
},
{
"epoch": 0.22795144104186177,
"grad_norm": 1.2936394214630127,
"learning_rate": 4.289528970568024e-05,
"loss": 0.3297,
"step": 12900
},
{
"epoch": 0.2288349737590783,
"grad_norm": 1.6622658967971802,
"learning_rate": 4.28462036873417e-05,
"loss": 0.4071,
"step": 12950
},
{
"epoch": 0.22971850647629483,
"grad_norm": 1.3949196338653564,
"learning_rate": 4.279711766900316e-05,
"loss": 0.4069,
"step": 13000
},
{
"epoch": 0.23060203919351133,
"grad_norm": 1.8681453466415405,
"learning_rate": 4.2748031650664624e-05,
"loss": 0.5156,
"step": 13050
},
{
"epoch": 0.23148557191072786,
"grad_norm": 1.6242793798446655,
"learning_rate": 4.2698945632326096e-05,
"loss": 0.4359,
"step": 13100
},
{
"epoch": 0.23236910462794438,
"grad_norm": 2.897428035736084,
"learning_rate": 4.2649859613987554e-05,
"loss": 0.3702,
"step": 13150
},
{
"epoch": 0.23325263734516088,
"grad_norm": 1.855938196182251,
"learning_rate": 4.260077359564902e-05,
"loss": 0.5026,
"step": 13200
},
{
"epoch": 0.2341361700623774,
"grad_norm": 1.818076252937317,
"learning_rate": 4.2551687577310476e-05,
"loss": 0.5201,
"step": 13250
},
{
"epoch": 0.23501970277959394,
"grad_norm": 1.9688682556152344,
"learning_rate": 4.250260155897194e-05,
"loss": 0.3857,
"step": 13300
},
{
"epoch": 0.23590323549681044,
"grad_norm": 2.4908297061920166,
"learning_rate": 4.245351554063341e-05,
"loss": 0.3555,
"step": 13350
},
{
"epoch": 0.23678676821402697,
"grad_norm": 1.9015276432037354,
"learning_rate": 4.240442952229487e-05,
"loss": 0.381,
"step": 13400
},
{
"epoch": 0.23767030093124347,
"grad_norm": 3.011683225631714,
"learning_rate": 4.2355343503956335e-05,
"loss": 0.3804,
"step": 13450
},
{
"epoch": 0.23855383364846,
"grad_norm": 3.5077691078186035,
"learning_rate": 4.23062574856178e-05,
"loss": 0.3666,
"step": 13500
},
{
"epoch": 0.23943736636567653,
"grad_norm": 2.875953197479248,
"learning_rate": 4.225717146727926e-05,
"loss": 0.3792,
"step": 13550
},
{
"epoch": 0.24032089908289303,
"grad_norm": 2.3432717323303223,
"learning_rate": 4.220808544894073e-05,
"loss": 0.3341,
"step": 13600
},
{
"epoch": 0.24120443180010956,
"grad_norm": 1.6648529767990112,
"learning_rate": 4.2158999430602194e-05,
"loss": 0.4906,
"step": 13650
},
{
"epoch": 0.2420879645173261,
"grad_norm": 2.034646987915039,
"learning_rate": 4.210991341226365e-05,
"loss": 0.541,
"step": 13700
},
{
"epoch": 0.2429714972345426,
"grad_norm": 1.2273883819580078,
"learning_rate": 4.2060827393925116e-05,
"loss": 0.3936,
"step": 13750
},
{
"epoch": 0.24385502995175912,
"grad_norm": 1.6031947135925293,
"learning_rate": 4.201174137558658e-05,
"loss": 0.3871,
"step": 13800
},
{
"epoch": 0.24473856266897565,
"grad_norm": 1.7289350032806396,
"learning_rate": 4.1962655357248045e-05,
"loss": 0.2983,
"step": 13850
},
{
"epoch": 0.24562209538619215,
"grad_norm": 1.792413592338562,
"learning_rate": 4.191356933890951e-05,
"loss": 0.4071,
"step": 13900
},
{
"epoch": 0.24650562810340867,
"grad_norm": 1.5456571578979492,
"learning_rate": 4.186448332057097e-05,
"loss": 0.3434,
"step": 13950
},
{
"epoch": 0.24738916082062518,
"grad_norm": 1.9666177034378052,
"learning_rate": 4.181539730223243e-05,
"loss": 0.3885,
"step": 14000
},
{
"epoch": 0.2482726935378417,
"grad_norm": 2.5290989875793457,
"learning_rate": 4.17663112838939e-05,
"loss": 0.4296,
"step": 14050
},
{
"epoch": 0.24915622625505823,
"grad_norm": 1.9654839038848877,
"learning_rate": 4.171722526555536e-05,
"loss": 0.3853,
"step": 14100
},
{
"epoch": 0.25003975897227476,
"grad_norm": 1.68603515625,
"learning_rate": 4.166813924721683e-05,
"loss": 0.4068,
"step": 14150
},
{
"epoch": 0.25092329168949123,
"grad_norm": 1.9062405824661255,
"learning_rate": 4.161905322887829e-05,
"loss": 0.4071,
"step": 14200
},
{
"epoch": 0.25180682440670776,
"grad_norm": 1.7028473615646362,
"learning_rate": 4.156996721053975e-05,
"loss": 0.3588,
"step": 14250
},
{
"epoch": 0.2526903571239243,
"grad_norm": 1.6032434701919556,
"learning_rate": 4.1520881192201214e-05,
"loss": 0.4161,
"step": 14300
},
{
"epoch": 0.2535738898411408,
"grad_norm": 1.6103026866912842,
"learning_rate": 4.147179517386268e-05,
"loss": 0.3431,
"step": 14350
},
{
"epoch": 0.25445742255835735,
"grad_norm": 3.727078914642334,
"learning_rate": 4.142270915552414e-05,
"loss": 0.3576,
"step": 14400
},
{
"epoch": 0.2553409552755739,
"grad_norm": 1.3540493249893188,
"learning_rate": 4.137362313718561e-05,
"loss": 0.3563,
"step": 14450
},
{
"epoch": 0.25622448799279035,
"grad_norm": 1.7373064756393433,
"learning_rate": 4.1324537118847066e-05,
"loss": 0.3406,
"step": 14500
},
{
"epoch": 0.2571080207100069,
"grad_norm": 2.6311392784118652,
"learning_rate": 4.127545110050853e-05,
"loss": 0.4397,
"step": 14550
},
{
"epoch": 0.2579915534272234,
"grad_norm": 1.845186471939087,
"learning_rate": 4.122636508217e-05,
"loss": 0.411,
"step": 14600
},
{
"epoch": 0.25887508614443994,
"grad_norm": 1.5897334814071655,
"learning_rate": 4.117727906383146e-05,
"loss": 0.3742,
"step": 14650
},
{
"epoch": 0.25975861886165647,
"grad_norm": 3.667428970336914,
"learning_rate": 4.1128193045492924e-05,
"loss": 0.3622,
"step": 14700
},
{
"epoch": 0.26064215157887294,
"grad_norm": 1.7393996715545654,
"learning_rate": 4.107910702715439e-05,
"loss": 0.2782,
"step": 14750
},
{
"epoch": 0.26152568429608947,
"grad_norm": 1.6495802402496338,
"learning_rate": 4.103002100881585e-05,
"loss": 0.36,
"step": 14800
},
{
"epoch": 0.262409217013306,
"grad_norm": 1.5133942365646362,
"learning_rate": 4.098093499047732e-05,
"loss": 0.486,
"step": 14850
},
{
"epoch": 0.2632927497305225,
"grad_norm": 1.848177194595337,
"learning_rate": 4.0932830692505546e-05,
"loss": 0.406,
"step": 14900
},
{
"epoch": 0.26417628244773905,
"grad_norm": 3.320469379425049,
"learning_rate": 4.088374467416701e-05,
"loss": 0.357,
"step": 14950
},
{
"epoch": 0.2650598151649556,
"grad_norm": 1.417015790939331,
"learning_rate": 4.0834658655828475e-05,
"loss": 0.2855,
"step": 15000
},
{
"epoch": 0.26594334788217205,
"grad_norm": 1.8597488403320312,
"learning_rate": 4.078557263748994e-05,
"loss": 0.4424,
"step": 15050
},
{
"epoch": 0.2668268805993886,
"grad_norm": 1.651663899421692,
"learning_rate": 4.0736486619151405e-05,
"loss": 0.352,
"step": 15100
},
{
"epoch": 0.2677104133166051,
"grad_norm": 1.452006459236145,
"learning_rate": 4.068740060081286e-05,
"loss": 0.3638,
"step": 15150
},
{
"epoch": 0.26859394603382164,
"grad_norm": 2.7887187004089355,
"learning_rate": 4.063831458247433e-05,
"loss": 0.3727,
"step": 15200
},
{
"epoch": 0.26947747875103817,
"grad_norm": 1.9209206104278564,
"learning_rate": 4.058922856413579e-05,
"loss": 0.3842,
"step": 15250
},
{
"epoch": 0.2703610114682547,
"grad_norm": 1.946022868156433,
"learning_rate": 4.054014254579726e-05,
"loss": 0.3625,
"step": 15300
},
{
"epoch": 0.27124454418547117,
"grad_norm": 1.4893426895141602,
"learning_rate": 4.049105652745872e-05,
"loss": 0.4088,
"step": 15350
},
{
"epoch": 0.2721280769026877,
"grad_norm": 1.7391968965530396,
"learning_rate": 4.0441970509120186e-05,
"loss": 0.4126,
"step": 15400
},
{
"epoch": 0.2730116096199042,
"grad_norm": 1.7254865169525146,
"learning_rate": 4.0392884490781644e-05,
"loss": 0.4662,
"step": 15450
},
{
"epoch": 0.27389514233712076,
"grad_norm": 4.502954483032227,
"learning_rate": 4.034379847244311e-05,
"loss": 0.3889,
"step": 15500
},
{
"epoch": 0.2747786750543373,
"grad_norm": 2.4406206607818604,
"learning_rate": 4.029471245410458e-05,
"loss": 0.3618,
"step": 15550
},
{
"epoch": 0.27566220777155376,
"grad_norm": 1.6272777318954468,
"learning_rate": 4.024562643576604e-05,
"loss": 0.4126,
"step": 15600
},
{
"epoch": 0.2765457404887703,
"grad_norm": 1.5262032747268677,
"learning_rate": 4.01965404174275e-05,
"loss": 0.3771,
"step": 15650
},
{
"epoch": 0.2774292732059868,
"grad_norm": 1.8245854377746582,
"learning_rate": 4.014745439908896e-05,
"loss": 0.4377,
"step": 15700
},
{
"epoch": 0.27831280592320334,
"grad_norm": 2.8566267490386963,
"learning_rate": 4.0098368380750425e-05,
"loss": 0.4041,
"step": 15750
},
{
"epoch": 0.27919633864041987,
"grad_norm": 2.0167641639709473,
"learning_rate": 4.00492823624119e-05,
"loss": 0.375,
"step": 15800
},
{
"epoch": 0.2800798713576364,
"grad_norm": 1.9363830089569092,
"learning_rate": 4.0000196344073355e-05,
"loss": 0.3339,
"step": 15850
},
{
"epoch": 0.2809634040748529,
"grad_norm": 2.208641767501831,
"learning_rate": 3.995111032573482e-05,
"loss": 0.348,
"step": 15900
},
{
"epoch": 0.2818469367920694,
"grad_norm": 1.5789657831192017,
"learning_rate": 3.9902024307396284e-05,
"loss": 0.367,
"step": 15950
},
{
"epoch": 0.28273046950928593,
"grad_norm": 1.6666336059570312,
"learning_rate": 3.985293828905775e-05,
"loss": 0.3427,
"step": 16000
},
{
"epoch": 0.28361400222650246,
"grad_norm": 3.725020170211792,
"learning_rate": 3.980385227071921e-05,
"loss": 0.3637,
"step": 16050
},
{
"epoch": 0.284497534943719,
"grad_norm": 1.5958735942840576,
"learning_rate": 3.975476625238068e-05,
"loss": 0.3489,
"step": 16100
},
{
"epoch": 0.28538106766093546,
"grad_norm": 1.3779951333999634,
"learning_rate": 3.9705680234042136e-05,
"loss": 0.4209,
"step": 16150
},
{
"epoch": 0.286264600378152,
"grad_norm": 1.6636724472045898,
"learning_rate": 3.96565942157036e-05,
"loss": 0.2984,
"step": 16200
},
{
"epoch": 0.2871481330953685,
"grad_norm": 1.705592155456543,
"learning_rate": 3.9607508197365065e-05,
"loss": 0.3877,
"step": 16250
},
{
"epoch": 0.28803166581258505,
"grad_norm": 1.5367944240570068,
"learning_rate": 3.955842217902653e-05,
"loss": 0.3508,
"step": 16300
},
{
"epoch": 0.2889151985298016,
"grad_norm": 3.140960693359375,
"learning_rate": 3.9509336160687994e-05,
"loss": 0.3443,
"step": 16350
},
{
"epoch": 0.2897987312470181,
"grad_norm": 1.2341272830963135,
"learning_rate": 3.946025014234945e-05,
"loss": 0.4346,
"step": 16400
},
{
"epoch": 0.2906822639642346,
"grad_norm": 1.9500783681869507,
"learning_rate": 3.941116412401092e-05,
"loss": 0.4262,
"step": 16450
},
{
"epoch": 0.2915657966814511,
"grad_norm": 1.344519853591919,
"learning_rate": 3.936207810567238e-05,
"loss": 0.3065,
"step": 16500
},
{
"epoch": 0.29244932939866763,
"grad_norm": 1.4747456312179565,
"learning_rate": 3.9312992087333846e-05,
"loss": 0.4003,
"step": 16550
},
{
"epoch": 0.29333286211588416,
"grad_norm": 1.5639158487319946,
"learning_rate": 3.926390606899531e-05,
"loss": 0.5295,
"step": 16600
},
{
"epoch": 0.2942163948331007,
"grad_norm": 1.9425716400146484,
"learning_rate": 3.9214820050656776e-05,
"loss": 0.3582,
"step": 16650
},
{
"epoch": 0.29509992755031716,
"grad_norm": 3.003871440887451,
"learning_rate": 3.9165734032318234e-05,
"loss": 0.3299,
"step": 16700
},
{
"epoch": 0.2959834602675337,
"grad_norm": 3.689194679260254,
"learning_rate": 3.91166480139797e-05,
"loss": 0.3493,
"step": 16750
},
{
"epoch": 0.2968669929847502,
"grad_norm": 1.9439842700958252,
"learning_rate": 3.906756199564116e-05,
"loss": 0.2752,
"step": 16800
},
{
"epoch": 0.29775052570196675,
"grad_norm": 1.8846018314361572,
"learning_rate": 3.901847597730263e-05,
"loss": 0.3254,
"step": 16850
},
{
"epoch": 0.2986340584191833,
"grad_norm": 2.9167964458465576,
"learning_rate": 3.896938995896409e-05,
"loss": 0.3352,
"step": 16900
},
{
"epoch": 0.2995175911363998,
"grad_norm": 2.6470940113067627,
"learning_rate": 3.892128566099233e-05,
"loss": 0.3812,
"step": 16950
},
{
"epoch": 0.3004011238536163,
"grad_norm": 2.1021623611450195,
"learning_rate": 3.887219964265379e-05,
"loss": 0.3332,
"step": 17000
},
{
"epoch": 0.3012846565708328,
"grad_norm": 1.9923433065414429,
"learning_rate": 3.882311362431525e-05,
"loss": 0.3472,
"step": 17050
},
{
"epoch": 0.30216818928804934,
"grad_norm": 1.5736125707626343,
"learning_rate": 3.8774027605976714e-05,
"loss": 0.4207,
"step": 17100
},
{
"epoch": 0.30305172200526587,
"grad_norm": 2.2181496620178223,
"learning_rate": 3.872494158763818e-05,
"loss": 0.3849,
"step": 17150
},
{
"epoch": 0.3039352547224824,
"grad_norm": 1.5112169981002808,
"learning_rate": 3.867585556929964e-05,
"loss": 0.3272,
"step": 17200
},
{
"epoch": 0.30481878743969887,
"grad_norm": 1.5218919515609741,
"learning_rate": 3.862676955096111e-05,
"loss": 0.3037,
"step": 17250
},
{
"epoch": 0.3057023201569154,
"grad_norm": 1.5864076614379883,
"learning_rate": 3.857768353262257e-05,
"loss": 0.2924,
"step": 17300
},
{
"epoch": 0.3065858528741319,
"grad_norm": 1.8895894289016724,
"learning_rate": 3.852859751428403e-05,
"loss": 0.4029,
"step": 17350
},
{
"epoch": 0.30746938559134845,
"grad_norm": 1.4156498908996582,
"learning_rate": 3.8479511495945495e-05,
"loss": 0.5016,
"step": 17400
},
{
"epoch": 0.308352918308565,
"grad_norm": 1.4788236618041992,
"learning_rate": 3.843042547760696e-05,
"loss": 0.3648,
"step": 17450
},
{
"epoch": 0.3092364510257815,
"grad_norm": 1.7631937265396118,
"learning_rate": 3.8381339459268424e-05,
"loss": 0.3045,
"step": 17500
},
{
"epoch": 0.310119983742998,
"grad_norm": 1.9122941493988037,
"learning_rate": 3.833225344092989e-05,
"loss": 0.3271,
"step": 17550
},
{
"epoch": 0.3110035164602145,
"grad_norm": 1.6838266849517822,
"learning_rate": 3.828316742259135e-05,
"loss": 0.519,
"step": 17600
},
{
"epoch": 0.31188704917743104,
"grad_norm": 4.507582187652588,
"learning_rate": 3.823408140425281e-05,
"loss": 0.341,
"step": 17650
},
{
"epoch": 0.31277058189464757,
"grad_norm": 1.3272327184677124,
"learning_rate": 3.8184995385914276e-05,
"loss": 0.3352,
"step": 17700
},
{
"epoch": 0.3136541146118641,
"grad_norm": 2.516676664352417,
"learning_rate": 3.813590936757574e-05,
"loss": 0.4406,
"step": 17750
},
{
"epoch": 0.31453764732908057,
"grad_norm": 1.8230887651443481,
"learning_rate": 3.8086823349237206e-05,
"loss": 0.3822,
"step": 17800
},
{
"epoch": 0.3154211800462971,
"grad_norm": 1.5267698764801025,
"learning_rate": 3.803773733089867e-05,
"loss": 0.287,
"step": 17850
},
{
"epoch": 0.31630471276351363,
"grad_norm": 2.647895574569702,
"learning_rate": 3.798865131256013e-05,
"loss": 0.4349,
"step": 17900
},
{
"epoch": 0.31718824548073016,
"grad_norm": 1.5159648656845093,
"learning_rate": 3.793956529422159e-05,
"loss": 0.3633,
"step": 17950
},
{
"epoch": 0.3180717781979467,
"grad_norm": 1.9135470390319824,
"learning_rate": 3.7890479275883064e-05,
"loss": 0.3431,
"step": 18000
},
{
"epoch": 0.3189553109151632,
"grad_norm": 1.6438477039337158,
"learning_rate": 3.784139325754452e-05,
"loss": 0.3986,
"step": 18050
},
{
"epoch": 0.3198388436323797,
"grad_norm": 1.6794339418411255,
"learning_rate": 3.779230723920599e-05,
"loss": 0.3279,
"step": 18100
},
{
"epoch": 0.3207223763495962,
"grad_norm": 1.5067431926727295,
"learning_rate": 3.7743221220867445e-05,
"loss": 0.3062,
"step": 18150
},
{
"epoch": 0.32160590906681275,
"grad_norm": 1.6953719854354858,
"learning_rate": 3.7694135202528916e-05,
"loss": 0.2973,
"step": 18200
},
{
"epoch": 0.3224894417840293,
"grad_norm": 2.819748640060425,
"learning_rate": 3.764504918419038e-05,
"loss": 0.4078,
"step": 18250
},
{
"epoch": 0.3233729745012458,
"grad_norm": 1.5743447542190552,
"learning_rate": 3.759596316585184e-05,
"loss": 0.31,
"step": 18300
},
{
"epoch": 0.3242565072184623,
"grad_norm": 1.8966853618621826,
"learning_rate": 3.7546877147513303e-05,
"loss": 0.306,
"step": 18350
},
{
"epoch": 0.3251400399356788,
"grad_norm": 2.7652056217193604,
"learning_rate": 3.749779112917477e-05,
"loss": 0.3426,
"step": 18400
},
{
"epoch": 0.32602357265289533,
"grad_norm": 3.006504535675049,
"learning_rate": 3.744870511083623e-05,
"loss": 0.2807,
"step": 18450
},
{
"epoch": 0.32690710537011186,
"grad_norm": 1.5666753053665161,
"learning_rate": 3.73996190924977e-05,
"loss": 0.3856,
"step": 18500
},
{
"epoch": 0.3277906380873284,
"grad_norm": 1.9692752361297607,
"learning_rate": 3.735053307415916e-05,
"loss": 0.3575,
"step": 18550
},
{
"epoch": 0.3286741708045449,
"grad_norm": 3.517622232437134,
"learning_rate": 3.730144705582062e-05,
"loss": 0.347,
"step": 18600
},
{
"epoch": 0.3295577035217614,
"grad_norm": 1.8076531887054443,
"learning_rate": 3.7252361037482085e-05,
"loss": 0.3195,
"step": 18650
},
{
"epoch": 0.3304412362389779,
"grad_norm": 1.8082791566848755,
"learning_rate": 3.720327501914355e-05,
"loss": 0.3543,
"step": 18700
},
{
"epoch": 0.33132476895619445,
"grad_norm": 1.3712306022644043,
"learning_rate": 3.7154189000805014e-05,
"loss": 0.3642,
"step": 18750
},
{
"epoch": 0.332208301673411,
"grad_norm": 1.5654476881027222,
"learning_rate": 3.710510298246648e-05,
"loss": 0.3415,
"step": 18800
},
{
"epoch": 0.3330918343906275,
"grad_norm": 1.4388914108276367,
"learning_rate": 3.7056016964127937e-05,
"loss": 0.3069,
"step": 18850
},
{
"epoch": 0.333975367107844,
"grad_norm": 1.5527664422988892,
"learning_rate": 3.70069309457894e-05,
"loss": 0.2962,
"step": 18900
},
{
"epoch": 0.3348588998250605,
"grad_norm": 1.6680736541748047,
"learning_rate": 3.6957844927450866e-05,
"loss": 0.3156,
"step": 18950
},
{
"epoch": 0.33574243254227704,
"grad_norm": 2.266108274459839,
"learning_rate": 3.69097406294791e-05,
"loss": 0.3791,
"step": 19000
},
{
"epoch": 0.33662596525949356,
"grad_norm": 1.4146838188171387,
"learning_rate": 3.6860654611140565e-05,
"loss": 0.3287,
"step": 19050
},
{
"epoch": 0.3375094979767101,
"grad_norm": 1.640153169631958,
"learning_rate": 3.681156859280202e-05,
"loss": 0.4034,
"step": 19100
},
{
"epoch": 0.3383930306939266,
"grad_norm": 1.670589804649353,
"learning_rate": 3.6762482574463494e-05,
"loss": 0.3476,
"step": 19150
},
{
"epoch": 0.3392765634111431,
"grad_norm": 3.375941753387451,
"learning_rate": 3.671339655612496e-05,
"loss": 0.363,
"step": 19200
},
{
"epoch": 0.3401600961283596,
"grad_norm": 1.965834379196167,
"learning_rate": 3.666431053778642e-05,
"loss": 0.3182,
"step": 19250
},
{
"epoch": 0.34104362884557615,
"grad_norm": 1.607900857925415,
"learning_rate": 3.661522451944788e-05,
"loss": 0.3238,
"step": 19300
},
{
"epoch": 0.3419271615627927,
"grad_norm": 1.4051165580749512,
"learning_rate": 3.6566138501109346e-05,
"loss": 0.3043,
"step": 19350
},
{
"epoch": 0.3428106942800092,
"grad_norm": 1.4679523706436157,
"learning_rate": 3.651705248277081e-05,
"loss": 0.3902,
"step": 19400
},
{
"epoch": 0.3436942269972257,
"grad_norm": 1.5135536193847656,
"learning_rate": 3.6467966464432276e-05,
"loss": 0.3085,
"step": 19450
},
{
"epoch": 0.3445777597144422,
"grad_norm": 2.2533581256866455,
"learning_rate": 3.6418880446093734e-05,
"loss": 0.3162,
"step": 19500
},
{
"epoch": 0.34546129243165874,
"grad_norm": 1.625067949295044,
"learning_rate": 3.63697944277552e-05,
"loss": 0.345,
"step": 19550
},
{
"epoch": 0.34634482514887527,
"grad_norm": 1.1573612689971924,
"learning_rate": 3.632070840941666e-05,
"loss": 0.3017,
"step": 19600
},
{
"epoch": 0.3472283578660918,
"grad_norm": 3.46663498878479,
"learning_rate": 3.627162239107813e-05,
"loss": 0.4232,
"step": 19650
},
{
"epoch": 0.3481118905833083,
"grad_norm": 1.5614382028579712,
"learning_rate": 3.622253637273959e-05,
"loss": 0.3363,
"step": 19700
},
{
"epoch": 0.3489954233005248,
"grad_norm": 1.3841484785079956,
"learning_rate": 3.617345035440106e-05,
"loss": 0.3484,
"step": 19750
},
{
"epoch": 0.3498789560177413,
"grad_norm": 1.941517949104309,
"learning_rate": 3.6124364336062515e-05,
"loss": 0.3719,
"step": 19800
},
{
"epoch": 0.35076248873495786,
"grad_norm": 4.908963680267334,
"learning_rate": 3.607527831772398e-05,
"loss": 0.3226,
"step": 19850
},
{
"epoch": 0.3516460214521744,
"grad_norm": 1.5221627950668335,
"learning_rate": 3.6026192299385444e-05,
"loss": 0.3636,
"step": 19900
},
{
"epoch": 0.3525295541693909,
"grad_norm": 1.8089814186096191,
"learning_rate": 3.597710628104691e-05,
"loss": 0.3704,
"step": 19950
},
{
"epoch": 0.3534130868866074,
"grad_norm": 2.786560535430908,
"learning_rate": 3.5928020262708373e-05,
"loss": 0.3459,
"step": 20000
},
{
"epoch": 0.3542966196038239,
"grad_norm": 2.97851824760437,
"learning_rate": 3.587893424436983e-05,
"loss": 0.3226,
"step": 20050
},
{
"epoch": 0.35518015232104044,
"grad_norm": 2.1979775428771973,
"learning_rate": 3.5829848226031296e-05,
"loss": 0.3256,
"step": 20100
},
{
"epoch": 0.35606368503825697,
"grad_norm": 1.762453556060791,
"learning_rate": 3.578076220769276e-05,
"loss": 0.3179,
"step": 20150
},
{
"epoch": 0.3569472177554735,
"grad_norm": 1.4908533096313477,
"learning_rate": 3.5731676189354225e-05,
"loss": 0.4226,
"step": 20200
},
{
"epoch": 0.35783075047269003,
"grad_norm": 1.3192092180252075,
"learning_rate": 3.568259017101569e-05,
"loss": 0.4196,
"step": 20250
},
{
"epoch": 0.3587142831899065,
"grad_norm": 1.421736717224121,
"learning_rate": 3.5633504152677155e-05,
"loss": 0.3618,
"step": 20300
},
{
"epoch": 0.35959781590712303,
"grad_norm": 2.0631330013275146,
"learning_rate": 3.558441813433861e-05,
"loss": 0.4093,
"step": 20350
},
{
"epoch": 0.36048134862433956,
"grad_norm": 1.6250920295715332,
"learning_rate": 3.5535332116000084e-05,
"loss": 0.3051,
"step": 20400
},
{
"epoch": 0.3613648813415561,
"grad_norm": 1.4659417867660522,
"learning_rate": 3.548624609766155e-05,
"loss": 0.3379,
"step": 20450
},
{
"epoch": 0.3622484140587726,
"grad_norm": 1.520573616027832,
"learning_rate": 3.5437160079323007e-05,
"loss": 0.3582,
"step": 20500
},
{
"epoch": 0.3631319467759891,
"grad_norm": 2.158830165863037,
"learning_rate": 3.538807406098447e-05,
"loss": 0.4004,
"step": 20550
},
{
"epoch": 0.3640154794932056,
"grad_norm": 1.7503968477249146,
"learning_rate": 3.533898804264593e-05,
"loss": 0.33,
"step": 20600
},
{
"epoch": 0.36489901221042215,
"grad_norm": 1.5064153671264648,
"learning_rate": 3.52899020243074e-05,
"loss": 0.3072,
"step": 20650
},
{
"epoch": 0.3657825449276387,
"grad_norm": 3.5023598670959473,
"learning_rate": 3.5240816005968865e-05,
"loss": 0.35,
"step": 20700
},
{
"epoch": 0.3666660776448552,
"grad_norm": 1.7911083698272705,
"learning_rate": 3.519172998763032e-05,
"loss": 0.3241,
"step": 20750
},
{
"epoch": 0.36754961036207173,
"grad_norm": 1.50026273727417,
"learning_rate": 3.514264396929179e-05,
"loss": 0.37,
"step": 20800
},
{
"epoch": 0.3684331430792882,
"grad_norm": 1.5556259155273438,
"learning_rate": 3.509355795095325e-05,
"loss": 0.2689,
"step": 20850
},
{
"epoch": 0.36931667579650473,
"grad_norm": 1.6530933380126953,
"learning_rate": 3.504447193261472e-05,
"loss": 0.4061,
"step": 20900
},
{
"epoch": 0.37020020851372126,
"grad_norm": 1.250317931175232,
"learning_rate": 3.499538591427618e-05,
"loss": 0.3412,
"step": 20950
},
{
"epoch": 0.3710837412309378,
"grad_norm": 1.9599151611328125,
"learning_rate": 3.494728161630441e-05,
"loss": 0.3619,
"step": 21000
},
{
"epoch": 0.3719672739481543,
"grad_norm": 1.3728086948394775,
"learning_rate": 3.4898195597965874e-05,
"loss": 0.314,
"step": 21050
},
{
"epoch": 0.3728508066653708,
"grad_norm": 1.6389710903167725,
"learning_rate": 3.4849109579627346e-05,
"loss": 0.2912,
"step": 21100
},
{
"epoch": 0.3737343393825873,
"grad_norm": 3.552582025527954,
"learning_rate": 3.4800023561288803e-05,
"loss": 0.3402,
"step": 21150
},
{
"epoch": 0.37461787209980385,
"grad_norm": 1.6479156017303467,
"learning_rate": 3.475093754295027e-05,
"loss": 0.3462,
"step": 21200
},
{
"epoch": 0.3755014048170204,
"grad_norm": 1.593705415725708,
"learning_rate": 3.470185152461173e-05,
"loss": 0.2775,
"step": 21250
},
{
"epoch": 0.3763849375342369,
"grad_norm": 2.1807069778442383,
"learning_rate": 3.465276550627319e-05,
"loss": 0.3825,
"step": 21300
},
{
"epoch": 0.37726847025145344,
"grad_norm": 1.6359409093856812,
"learning_rate": 3.460367948793466e-05,
"loss": 0.3931,
"step": 21350
},
{
"epoch": 0.3781520029686699,
"grad_norm": 1.5960018634796143,
"learning_rate": 3.455459346959612e-05,
"loss": 0.4059,
"step": 21400
},
{
"epoch": 0.37903553568588644,
"grad_norm": 3.367835283279419,
"learning_rate": 3.4505507451257585e-05,
"loss": 0.3264,
"step": 21450
},
{
"epoch": 0.37991906840310297,
"grad_norm": 1.5965161323547363,
"learning_rate": 3.445642143291905e-05,
"loss": 0.2605,
"step": 21500
},
{
"epoch": 0.3808026011203195,
"grad_norm": 1.5011396408081055,
"learning_rate": 3.440733541458051e-05,
"loss": 0.3658,
"step": 21550
},
{
"epoch": 0.381686133837536,
"grad_norm": 1.5021259784698486,
"learning_rate": 3.435824939624198e-05,
"loss": 0.3274,
"step": 21600
},
{
"epoch": 0.3825696665547525,
"grad_norm": 1.5224860906600952,
"learning_rate": 3.430916337790344e-05,
"loss": 0.3094,
"step": 21650
},
{
"epoch": 0.383453199271969,
"grad_norm": 3.36433482170105,
"learning_rate": 3.42600773595649e-05,
"loss": 0.3556,
"step": 21700
},
{
"epoch": 0.38433673198918555,
"grad_norm": 1.9824773073196411,
"learning_rate": 3.4210991341226366e-05,
"loss": 0.2877,
"step": 21750
},
{
"epoch": 0.3852202647064021,
"grad_norm": 1.5103614330291748,
"learning_rate": 3.416190532288783e-05,
"loss": 0.3203,
"step": 21800
},
{
"epoch": 0.3861037974236186,
"grad_norm": 1.1625959873199463,
"learning_rate": 3.4112819304549295e-05,
"loss": 0.2553,
"step": 21850
},
{
"epoch": 0.38698733014083514,
"grad_norm": 1.5695985555648804,
"learning_rate": 3.406373328621076e-05,
"loss": 0.4425,
"step": 21900
},
{
"epoch": 0.3878708628580516,
"grad_norm": 1.6758594512939453,
"learning_rate": 3.401464726787222e-05,
"loss": 0.3249,
"step": 21950
},
{
"epoch": 0.38875439557526814,
"grad_norm": 3.6129748821258545,
"learning_rate": 3.396556124953368e-05,
"loss": 0.3649,
"step": 22000
},
{
"epoch": 0.38963792829248467,
"grad_norm": 1.6155461072921753,
"learning_rate": 3.391647523119515e-05,
"loss": 0.3621,
"step": 22050
},
{
"epoch": 0.3905214610097012,
"grad_norm": 1.7477047443389893,
"learning_rate": 3.386738921285661e-05,
"loss": 0.4232,
"step": 22100
},
{
"epoch": 0.3914049937269177,
"grad_norm": 3.0512797832489014,
"learning_rate": 3.3818303194518076e-05,
"loss": 0.266,
"step": 22150
},
{
"epoch": 0.3922885264441342,
"grad_norm": 1.4074236154556274,
"learning_rate": 3.376921717617954e-05,
"loss": 0.3767,
"step": 22200
},
{
"epoch": 0.39317205916135073,
"grad_norm": 1.7168455123901367,
"learning_rate": 3.3720131157841e-05,
"loss": 0.366,
"step": 22250
},
{
"epoch": 0.39405559187856726,
"grad_norm": 3.360104560852051,
"learning_rate": 3.3671045139502464e-05,
"loss": 0.3211,
"step": 22300
},
{
"epoch": 0.3949391245957838,
"grad_norm": 1.527031660079956,
"learning_rate": 3.3621959121163935e-05,
"loss": 0.2505,
"step": 22350
},
{
"epoch": 0.3958226573130003,
"grad_norm": 1.7586029767990112,
"learning_rate": 3.357287310282539e-05,
"loss": 0.3824,
"step": 22400
},
{
"epoch": 0.39670619003021684,
"grad_norm": 2.3490004539489746,
"learning_rate": 3.352378708448686e-05,
"loss": 0.331,
"step": 22450
},
{
"epoch": 0.3975897227474333,
"grad_norm": 1.5686146020889282,
"learning_rate": 3.3474701066148316e-05,
"loss": 0.3136,
"step": 22500
},
{
"epoch": 0.39847325546464984,
"grad_norm": 1.5068285465240479,
"learning_rate": 3.342561504780978e-05,
"loss": 0.297,
"step": 22550
},
{
"epoch": 0.3993567881818664,
"grad_norm": 1.81602942943573,
"learning_rate": 3.337652902947125e-05,
"loss": 0.2933,
"step": 22600
},
{
"epoch": 0.4002403208990829,
"grad_norm": 3.4516189098358154,
"learning_rate": 3.332744301113271e-05,
"loss": 0.4026,
"step": 22650
},
{
"epoch": 0.40112385361629943,
"grad_norm": 1.5759230852127075,
"learning_rate": 3.3278356992794174e-05,
"loss": 0.3567,
"step": 22700
},
{
"epoch": 0.4020073863335159,
"grad_norm": 1.9385254383087158,
"learning_rate": 3.322927097445564e-05,
"loss": 0.3711,
"step": 22750
},
{
"epoch": 0.40289091905073243,
"grad_norm": 1.6334116458892822,
"learning_rate": 3.31801849561171e-05,
"loss": 0.378,
"step": 22800
},
{
"epoch": 0.40377445176794896,
"grad_norm": 2.0981173515319824,
"learning_rate": 3.313109893777857e-05,
"loss": 0.355,
"step": 22850
},
{
"epoch": 0.4046579844851655,
"grad_norm": 1.6996448040008545,
"learning_rate": 3.308201291944003e-05,
"loss": 0.3044,
"step": 22900
},
{
"epoch": 0.405541517202382,
"grad_norm": 1.3511463403701782,
"learning_rate": 3.303292690110149e-05,
"loss": 0.357,
"step": 22950
},
{
"epoch": 0.40642504991959855,
"grad_norm": 1.7596737146377563,
"learning_rate": 3.2983840882762956e-05,
"loss": 0.3616,
"step": 23000
},
{
"epoch": 0.407308582636815,
"grad_norm": 2.8382747173309326,
"learning_rate": 3.2934754864424413e-05,
"loss": 0.3139,
"step": 23050
},
{
"epoch": 0.40819211535403155,
"grad_norm": 3.052281618118286,
"learning_rate": 3.2885668846085885e-05,
"loss": 0.3474,
"step": 23100
},
{
"epoch": 0.4090756480712481,
"grad_norm": 1.373552680015564,
"learning_rate": 3.283756454811412e-05,
"loss": 0.3208,
"step": 23150
},
{
"epoch": 0.4099591807884646,
"grad_norm": 1.6797386407852173,
"learning_rate": 3.278847852977558e-05,
"loss": 0.3798,
"step": 23200
},
{
"epoch": 0.41084271350568113,
"grad_norm": 1.8930203914642334,
"learning_rate": 3.273939251143704e-05,
"loss": 0.3282,
"step": 23250
},
{
"epoch": 0.4117262462228976,
"grad_norm": 1.256135106086731,
"learning_rate": 3.2690306493098507e-05,
"loss": 0.3302,
"step": 23300
},
{
"epoch": 0.41260977894011414,
"grad_norm": 1.952988862991333,
"learning_rate": 3.264122047475997e-05,
"loss": 0.3599,
"step": 23350
},
{
"epoch": 0.41349331165733066,
"grad_norm": 1.3686082363128662,
"learning_rate": 3.2592134456421436e-05,
"loss": 0.3608,
"step": 23400
},
{
"epoch": 0.4143768443745472,
"grad_norm": 1.56107759475708,
"learning_rate": 3.2543048438082894e-05,
"loss": 0.3387,
"step": 23450
},
{
"epoch": 0.4152603770917637,
"grad_norm": 1.823240876197815,
"learning_rate": 3.249396241974436e-05,
"loss": 0.3987,
"step": 23500
},
{
"epoch": 0.41614390980898025,
"grad_norm": 1.2912514209747314,
"learning_rate": 3.244487640140583e-05,
"loss": 0.3387,
"step": 23550
},
{
"epoch": 0.4170274425261967,
"grad_norm": 1.5520604848861694,
"learning_rate": 3.239579038306729e-05,
"loss": 0.2989,
"step": 23600
},
{
"epoch": 0.41791097524341325,
"grad_norm": 1.4236600399017334,
"learning_rate": 3.234670436472875e-05,
"loss": 0.2629,
"step": 23650
},
{
"epoch": 0.4187945079606298,
"grad_norm": 3.2101380825042725,
"learning_rate": 3.229761834639022e-05,
"loss": 0.2905,
"step": 23700
},
{
"epoch": 0.4196780406778463,
"grad_norm": 1.3380919694900513,
"learning_rate": 3.2248532328051675e-05,
"loss": 0.3234,
"step": 23750
},
{
"epoch": 0.42056157339506284,
"grad_norm": 1.5015414953231812,
"learning_rate": 3.2199446309713146e-05,
"loss": 0.3063,
"step": 23800
},
{
"epoch": 0.4214451061122793,
"grad_norm": 1.289444923400879,
"learning_rate": 3.2150360291374604e-05,
"loss": 0.3386,
"step": 23850
},
{
"epoch": 0.42232863882949584,
"grad_norm": 2.95922589302063,
"learning_rate": 3.210127427303607e-05,
"loss": 0.3431,
"step": 23900
},
{
"epoch": 0.42321217154671237,
"grad_norm": 1.6753530502319336,
"learning_rate": 3.2052188254697534e-05,
"loss": 0.2902,
"step": 23950
},
{
"epoch": 0.4240957042639289,
"grad_norm": 1.6901003122329712,
"learning_rate": 3.2003102236359e-05,
"loss": 0.3136,
"step": 24000
},
{
"epoch": 0.4249792369811454,
"grad_norm": 4.797271251678467,
"learning_rate": 3.195401621802046e-05,
"loss": 0.4001,
"step": 24050
},
{
"epoch": 0.42586276969836195,
"grad_norm": 1.4796360731124878,
"learning_rate": 3.190493019968193e-05,
"loss": 0.285,
"step": 24100
},
{
"epoch": 0.4267463024155784,
"grad_norm": 1.4410722255706787,
"learning_rate": 3.1855844181343386e-05,
"loss": 0.4717,
"step": 24150
},
{
"epoch": 0.42762983513279496,
"grad_norm": 1.398037075996399,
"learning_rate": 3.180675816300485e-05,
"loss": 0.3391,
"step": 24200
},
{
"epoch": 0.4285133678500115,
"grad_norm": 1.3054397106170654,
"learning_rate": 3.1757672144666315e-05,
"loss": 0.2913,
"step": 24250
},
{
"epoch": 0.429396900567228,
"grad_norm": 1.7768748998641968,
"learning_rate": 3.170858612632778e-05,
"loss": 0.3417,
"step": 24300
},
{
"epoch": 0.43028043328444454,
"grad_norm": 1.2682479619979858,
"learning_rate": 3.1659500107989244e-05,
"loss": 0.2909,
"step": 24350
},
{
"epoch": 0.431163966001661,
"grad_norm": 1.791175365447998,
"learning_rate": 3.16104140896507e-05,
"loss": 0.2871,
"step": 24400
},
{
"epoch": 0.43204749871887754,
"grad_norm": 1.5249110460281372,
"learning_rate": 3.156132807131217e-05,
"loss": 0.3929,
"step": 24450
},
{
"epoch": 0.43293103143609407,
"grad_norm": 1.2778598070144653,
"learning_rate": 3.151224205297363e-05,
"loss": 0.278,
"step": 24500
},
{
"epoch": 0.4338145641533106,
"grad_norm": 3.55033278465271,
"learning_rate": 3.1463156034635096e-05,
"loss": 0.4386,
"step": 24550
},
{
"epoch": 0.43469809687052713,
"grad_norm": 1.4700381755828857,
"learning_rate": 3.141407001629656e-05,
"loss": 0.4193,
"step": 24600
},
{
"epoch": 0.43558162958774366,
"grad_norm": 1.150854468345642,
"learning_rate": 3.1364983997958025e-05,
"loss": 0.367,
"step": 24650
},
{
"epoch": 0.43646516230496013,
"grad_norm": 1.6972355842590332,
"learning_rate": 3.131589797961948e-05,
"loss": 0.3474,
"step": 24700
},
{
"epoch": 0.43734869502217666,
"grad_norm": 1.355474829673767,
"learning_rate": 3.126681196128095e-05,
"loss": 0.3116,
"step": 24750
},
{
"epoch": 0.4382322277393932,
"grad_norm": 1.4246526956558228,
"learning_rate": 3.121772594294242e-05,
"loss": 0.2733,
"step": 24800
},
{
"epoch": 0.4391157604566097,
"grad_norm": 1.5642348527908325,
"learning_rate": 3.116863992460388e-05,
"loss": 0.3046,
"step": 24850
},
{
"epoch": 0.43999929317382624,
"grad_norm": 1.5843394994735718,
"learning_rate": 3.111955390626534e-05,
"loss": 0.3627,
"step": 24900
},
{
"epoch": 0.4408828258910427,
"grad_norm": 1.6260349750518799,
"learning_rate": 3.10704678879268e-05,
"loss": 0.3403,
"step": 24950
},
{
"epoch": 0.44176635860825925,
"grad_norm": 1.7742459774017334,
"learning_rate": 3.1021381869588265e-05,
"loss": 0.349,
"step": 25000
},
{
"epoch": 0.4426498913254758,
"grad_norm": 1.4080630540847778,
"learning_rate": 3.0972295851249736e-05,
"loss": 0.3527,
"step": 25050
},
{
"epoch": 0.4435334240426923,
"grad_norm": 1.7197438478469849,
"learning_rate": 3.0923209832911194e-05,
"loss": 0.3773,
"step": 25100
},
{
"epoch": 0.44441695675990883,
"grad_norm": 1.5831055641174316,
"learning_rate": 3.087510553493943e-05,
"loss": 0.3372,
"step": 25150
},
{
"epoch": 0.44530048947712536,
"grad_norm": 1.7535090446472168,
"learning_rate": 3.082601951660089e-05,
"loss": 0.3178,
"step": 25200
},
{
"epoch": 0.44618402219434183,
"grad_norm": 1.6131466627120972,
"learning_rate": 3.077693349826236e-05,
"loss": 0.2745,
"step": 25250
},
{
"epoch": 0.44706755491155836,
"grad_norm": 1.5419201850891113,
"learning_rate": 3.072784747992382e-05,
"loss": 0.2773,
"step": 25300
},
{
"epoch": 0.4479510876287749,
"grad_norm": 1.6418931484222412,
"learning_rate": 3.067876146158528e-05,
"loss": 0.3822,
"step": 25350
},
{
"epoch": 0.4488346203459914,
"grad_norm": 1.288121223449707,
"learning_rate": 3.0629675443246745e-05,
"loss": 0.3851,
"step": 25400
},
{
"epoch": 0.44971815306320795,
"grad_norm": 1.9523035287857056,
"learning_rate": 3.058058942490821e-05,
"loss": 0.3805,
"step": 25450
},
{
"epoch": 0.4506016857804245,
"grad_norm": 3.3735404014587402,
"learning_rate": 3.0531503406569674e-05,
"loss": 0.3245,
"step": 25500
},
{
"epoch": 0.45148521849764095,
"grad_norm": 1.4013001918792725,
"learning_rate": 3.048241738823114e-05,
"loss": 0.2978,
"step": 25550
},
{
"epoch": 0.4523687512148575,
"grad_norm": 1.9055225849151611,
"learning_rate": 3.0433331369892604e-05,
"loss": 0.3397,
"step": 25600
},
{
"epoch": 0.453252283932074,
"grad_norm": 3.319705009460449,
"learning_rate": 3.0384245351554065e-05,
"loss": 0.4655,
"step": 25650
},
{
"epoch": 0.45413581664929054,
"grad_norm": 1.3729950189590454,
"learning_rate": 3.033515933321553e-05,
"loss": 0.2669,
"step": 25700
},
{
"epoch": 0.45501934936650706,
"grad_norm": 1.3527820110321045,
"learning_rate": 3.028607331487699e-05,
"loss": 0.3316,
"step": 25750
},
{
"epoch": 0.45590288208372354,
"grad_norm": 1.4500503540039062,
"learning_rate": 3.0236987296538455e-05,
"loss": 0.3395,
"step": 25800
},
{
"epoch": 0.45678641480094007,
"grad_norm": 2.8250796794891357,
"learning_rate": 3.018790127819992e-05,
"loss": 0.3631,
"step": 25850
},
{
"epoch": 0.4576699475181566,
"grad_norm": 1.1532173156738281,
"learning_rate": 3.013881525986138e-05,
"loss": 0.3418,
"step": 25900
},
{
"epoch": 0.4585534802353731,
"grad_norm": 1.687465786933899,
"learning_rate": 3.0089729241522846e-05,
"loss": 0.3351,
"step": 25950
},
{
"epoch": 0.45943701295258965,
"grad_norm": 4.05789852142334,
"learning_rate": 3.004064322318431e-05,
"loss": 0.3117,
"step": 26000
},
{
"epoch": 0.4603205456698062,
"grad_norm": 1.4303230047225952,
"learning_rate": 2.9991557204845772e-05,
"loss": 0.3197,
"step": 26050
},
{
"epoch": 0.46120407838702265,
"grad_norm": 3.692739248275757,
"learning_rate": 2.9942471186507237e-05,
"loss": 0.2856,
"step": 26100
},
{
"epoch": 0.4620876111042392,
"grad_norm": 2.6494288444519043,
"learning_rate": 2.98933851681687e-05,
"loss": 0.3668,
"step": 26150
},
{
"epoch": 0.4629711438214557,
"grad_norm": 1.832560420036316,
"learning_rate": 2.9844299149830163e-05,
"loss": 0.4672,
"step": 26200
},
{
"epoch": 0.46385467653867224,
"grad_norm": 3.4169373512268066,
"learning_rate": 2.9795213131491627e-05,
"loss": 0.373,
"step": 26250
},
{
"epoch": 0.46473820925588877,
"grad_norm": 1.5430257320404053,
"learning_rate": 2.974612711315309e-05,
"loss": 0.3232,
"step": 26300
},
{
"epoch": 0.46562174197310524,
"grad_norm": 1.674177646636963,
"learning_rate": 2.9697041094814553e-05,
"loss": 0.3461,
"step": 26350
},
{
"epoch": 0.46650527469032177,
"grad_norm": 1.7116457223892212,
"learning_rate": 2.9647955076476018e-05,
"loss": 0.2937,
"step": 26400
},
{
"epoch": 0.4673888074075383,
"grad_norm": 1.3711694478988647,
"learning_rate": 2.9599850778504252e-05,
"loss": 0.3511,
"step": 26450
},
{
"epoch": 0.4682723401247548,
"grad_norm": 3.0807628631591797,
"learning_rate": 2.9550764760165717e-05,
"loss": 0.3204,
"step": 26500
},
{
"epoch": 0.46915587284197136,
"grad_norm": 1.5949090719223022,
"learning_rate": 2.950167874182718e-05,
"loss": 0.2698,
"step": 26550
},
{
"epoch": 0.4700394055591879,
"grad_norm": 1.6748404502868652,
"learning_rate": 2.9452592723488643e-05,
"loss": 0.3019,
"step": 26600
},
{
"epoch": 0.47092293827640436,
"grad_norm": 1.6362017393112183,
"learning_rate": 2.9403506705150108e-05,
"loss": 0.276,
"step": 26650
},
{
"epoch": 0.4718064709936209,
"grad_norm": 1.5143210887908936,
"learning_rate": 2.935442068681157e-05,
"loss": 0.2572,
"step": 26700
},
{
"epoch": 0.4726900037108374,
"grad_norm": 2.1000730991363525,
"learning_rate": 2.9305334668473034e-05,
"loss": 0.2821,
"step": 26750
},
{
"epoch": 0.47357353642805394,
"grad_norm": 1.9400396347045898,
"learning_rate": 2.9256248650134498e-05,
"loss": 0.2753,
"step": 26800
},
{
"epoch": 0.47445706914527047,
"grad_norm": 1.7398908138275146,
"learning_rate": 2.920716263179596e-05,
"loss": 0.2789,
"step": 26850
},
{
"epoch": 0.47534060186248694,
"grad_norm": 1.456929087638855,
"learning_rate": 2.9158076613457424e-05,
"loss": 0.5175,
"step": 26900
},
{
"epoch": 0.4762241345797035,
"grad_norm": 1.4763001203536987,
"learning_rate": 2.910899059511889e-05,
"loss": 0.3398,
"step": 26950
},
{
"epoch": 0.47710766729692,
"grad_norm": 1.3316082954406738,
"learning_rate": 2.905990457678035e-05,
"loss": 0.3683,
"step": 27000
},
{
"epoch": 0.47799120001413653,
"grad_norm": 1.1095103025436401,
"learning_rate": 2.9010818558441815e-05,
"loss": 0.341,
"step": 27050
},
{
"epoch": 0.47887473273135306,
"grad_norm": 1.5168321132659912,
"learning_rate": 2.8961732540103276e-05,
"loss": 0.2753,
"step": 27100
},
{
"epoch": 0.4797582654485696,
"grad_norm": 1.9980124235153198,
"learning_rate": 2.891264652176474e-05,
"loss": 0.35,
"step": 27150
},
{
"epoch": 0.48064179816578606,
"grad_norm": 1.6252918243408203,
"learning_rate": 2.8863560503426205e-05,
"loss": 0.3143,
"step": 27200
},
{
"epoch": 0.4815253308830026,
"grad_norm": 1.6409038305282593,
"learning_rate": 2.8814474485087667e-05,
"loss": 0.3968,
"step": 27250
},
{
"epoch": 0.4824088636002191,
"grad_norm": 1.4830607175827026,
"learning_rate": 2.876538846674913e-05,
"loss": 0.3246,
"step": 27300
},
{
"epoch": 0.48329239631743565,
"grad_norm": 1.6359367370605469,
"learning_rate": 2.87163024484106e-05,
"loss": 0.3131,
"step": 27350
},
{
"epoch": 0.4841759290346522,
"grad_norm": 1.1834681034088135,
"learning_rate": 2.8667216430072057e-05,
"loss": 0.3078,
"step": 27400
},
{
"epoch": 0.48505946175186865,
"grad_norm": 1.3667497634887695,
"learning_rate": 2.8618130411733522e-05,
"loss": 0.419,
"step": 27450
},
{
"epoch": 0.4859429944690852,
"grad_norm": 4.66032075881958,
"learning_rate": 2.856904439339499e-05,
"loss": 0.3959,
"step": 27500
},
{
"epoch": 0.4868265271863017,
"grad_norm": 1.530393362045288,
"learning_rate": 2.8519958375056448e-05,
"loss": 0.3754,
"step": 27550
},
{
"epoch": 0.48771005990351823,
"grad_norm": 0.9399372935295105,
"learning_rate": 2.8470872356717916e-05,
"loss": 0.3163,
"step": 27600
},
{
"epoch": 0.48859359262073476,
"grad_norm": 1.654520869255066,
"learning_rate": 2.8421786338379374e-05,
"loss": 0.35,
"step": 27650
},
{
"epoch": 0.4894771253379513,
"grad_norm": 1.5777958631515503,
"learning_rate": 2.8372700320040842e-05,
"loss": 0.3397,
"step": 27700
},
{
"epoch": 0.49036065805516776,
"grad_norm": 1.4474226236343384,
"learning_rate": 2.8323614301702307e-05,
"loss": 0.3853,
"step": 27750
},
{
"epoch": 0.4912441907723843,
"grad_norm": 1.603667140007019,
"learning_rate": 2.8274528283363765e-05,
"loss": 0.2568,
"step": 27800
},
{
"epoch": 0.4921277234896008,
"grad_norm": 1.727280855178833,
"learning_rate": 2.8225442265025233e-05,
"loss": 0.3108,
"step": 27850
},
{
"epoch": 0.49301125620681735,
"grad_norm": 1.4632737636566162,
"learning_rate": 2.8176356246686697e-05,
"loss": 0.4098,
"step": 27900
},
{
"epoch": 0.4938947889240339,
"grad_norm": 1.5443991422653198,
"learning_rate": 2.812727022834816e-05,
"loss": 0.3364,
"step": 27950
},
{
"epoch": 0.49477832164125035,
"grad_norm": 1.7304097414016724,
"learning_rate": 2.8078184210009623e-05,
"loss": 0.3354,
"step": 28000
},
{
"epoch": 0.4956618543584669,
"grad_norm": 1.141662359237671,
"learning_rate": 2.8029098191671088e-05,
"loss": 0.2879,
"step": 28050
},
{
"epoch": 0.4965453870756834,
"grad_norm": 1.5769354104995728,
"learning_rate": 2.798001217333255e-05,
"loss": 0.3604,
"step": 28100
},
{
"epoch": 0.49742891979289994,
"grad_norm": 2.3104453086853027,
"learning_rate": 2.7930926154994014e-05,
"loss": 0.2612,
"step": 28150
},
{
"epoch": 0.49831245251011647,
"grad_norm": 0.764305830001831,
"learning_rate": 2.7881840136655475e-05,
"loss": 0.3593,
"step": 28200
},
{
"epoch": 0.499195985227333,
"grad_norm": 1.1693766117095947,
"learning_rate": 2.783275411831694e-05,
"loss": 0.2961,
"step": 28250
},
{
"epoch": 0.5000795179445495,
"grad_norm": 1.65450918674469,
"learning_rate": 2.7783668099978404e-05,
"loss": 0.3338,
"step": 28300
},
{
"epoch": 0.500963050661766,
"grad_norm": 1.438693642616272,
"learning_rate": 2.7734582081639866e-05,
"loss": 0.3109,
"step": 28350
},
{
"epoch": 0.5018465833789825,
"grad_norm": 1.5170999765396118,
"learning_rate": 2.768549606330133e-05,
"loss": 0.3234,
"step": 28400
},
{
"epoch": 0.502730116096199,
"grad_norm": 1.497454285621643,
"learning_rate": 2.7636410044962795e-05,
"loss": 0.3257,
"step": 28450
},
{
"epoch": 0.5036136488134155,
"grad_norm": 3.3886194229125977,
"learning_rate": 2.7587324026624256e-05,
"loss": 0.4675,
"step": 28500
},
{
"epoch": 0.504497181530632,
"grad_norm": 1.6604270935058594,
"learning_rate": 2.753823800828572e-05,
"loss": 0.4318,
"step": 28550
},
{
"epoch": 0.5053807142478486,
"grad_norm": 1.7005223035812378,
"learning_rate": 2.7489151989947186e-05,
"loss": 0.3594,
"step": 28600
},
{
"epoch": 0.5062642469650651,
"grad_norm": 1.109703540802002,
"learning_rate": 2.7440065971608647e-05,
"loss": 0.3214,
"step": 28650
},
{
"epoch": 0.5071477796822816,
"grad_norm": 1.9164469242095947,
"learning_rate": 2.739097995327011e-05,
"loss": 0.2856,
"step": 28700
},
{
"epoch": 0.5080313123994982,
"grad_norm": 1.3944114446640015,
"learning_rate": 2.7341893934931573e-05,
"loss": 0.3094,
"step": 28750
},
{
"epoch": 0.5089148451167147,
"grad_norm": 1.3844256401062012,
"learning_rate": 2.7292807916593038e-05,
"loss": 0.3933,
"step": 28800
},
{
"epoch": 0.5097983778339312,
"grad_norm": 3.18278431892395,
"learning_rate": 2.7243721898254506e-05,
"loss": 0.3432,
"step": 28850
},
{
"epoch": 0.5106819105511478,
"grad_norm": 1.7024506330490112,
"learning_rate": 2.7194635879915964e-05,
"loss": 0.3766,
"step": 28900
},
{
"epoch": 0.5115654432683642,
"grad_norm": 1.4224214553833008,
"learning_rate": 2.7145549861577428e-05,
"loss": 0.3308,
"step": 28950
},
{
"epoch": 0.5124489759855807,
"grad_norm": 1.5428136587142944,
"learning_rate": 2.7096463843238896e-05,
"loss": 0.3453,
"step": 29000
},
{
"epoch": 0.5133325087027972,
"grad_norm": 1.4710556268692017,
"learning_rate": 2.7047377824900354e-05,
"loss": 0.2904,
"step": 29050
},
{
"epoch": 0.5142160414200138,
"grad_norm": 1.5080032348632812,
"learning_rate": 2.6998291806561822e-05,
"loss": 0.2647,
"step": 29100
},
{
"epoch": 0.5150995741372303,
"grad_norm": 1.7176605463027954,
"learning_rate": 2.6949205788223287e-05,
"loss": 0.4395,
"step": 29150
},
{
"epoch": 0.5159831068544468,
"grad_norm": 1.4339267015457153,
"learning_rate": 2.6900119769884745e-05,
"loss": 0.295,
"step": 29200
},
{
"epoch": 0.5168666395716633,
"grad_norm": 1.1258848905563354,
"learning_rate": 2.6851033751546213e-05,
"loss": 0.3927,
"step": 29250
},
{
"epoch": 0.5177501722888799,
"grad_norm": 2.5667836666107178,
"learning_rate": 2.680194773320767e-05,
"loss": 0.3492,
"step": 29300
},
{
"epoch": 0.5186337050060964,
"grad_norm": 1.7218468189239502,
"learning_rate": 2.675286171486914e-05,
"loss": 0.3304,
"step": 29350
},
{
"epoch": 0.5195172377233129,
"grad_norm": 2.4908971786499023,
"learning_rate": 2.6703775696530603e-05,
"loss": 0.3557,
"step": 29400
},
{
"epoch": 0.5204007704405295,
"grad_norm": 1.787463665008545,
"learning_rate": 2.665468967819206e-05,
"loss": 0.3389,
"step": 29450
},
{
"epoch": 0.5212843031577459,
"grad_norm": 3.174107789993286,
"learning_rate": 2.660560365985353e-05,
"loss": 0.3322,
"step": 29500
},
{
"epoch": 0.5221678358749624,
"grad_norm": 1.648913025856018,
"learning_rate": 2.6556517641514994e-05,
"loss": 0.3053,
"step": 29550
},
{
"epoch": 0.5230513685921789,
"grad_norm": 1.648561954498291,
"learning_rate": 2.6507431623176455e-05,
"loss": 0.2486,
"step": 29600
},
{
"epoch": 0.5239349013093955,
"grad_norm": 1.199449062347412,
"learning_rate": 2.645834560483792e-05,
"loss": 0.282,
"step": 29650
},
{
"epoch": 0.524818434026612,
"grad_norm": 0.9432544112205505,
"learning_rate": 2.6409259586499385e-05,
"loss": 0.3791,
"step": 29700
},
{
"epoch": 0.5257019667438285,
"grad_norm": 2.9582953453063965,
"learning_rate": 2.6360173568160846e-05,
"loss": 0.3346,
"step": 29750
},
{
"epoch": 0.526585499461045,
"grad_norm": 1.5263501405715942,
"learning_rate": 2.631108754982231e-05,
"loss": 0.2743,
"step": 29800
},
{
"epoch": 0.5274690321782616,
"grad_norm": 1.63582181930542,
"learning_rate": 2.6262001531483772e-05,
"loss": 0.2927,
"step": 29850
},
{
"epoch": 0.5283525648954781,
"grad_norm": 1.843386173248291,
"learning_rate": 2.6212915513145237e-05,
"loss": 0.3775,
"step": 29900
},
{
"epoch": 0.5292360976126946,
"grad_norm": 1.236327886581421,
"learning_rate": 2.61638294948067e-05,
"loss": 0.3114,
"step": 29950
},
{
"epoch": 0.5301196303299112,
"grad_norm": 1.5327879190444946,
"learning_rate": 2.6114743476468162e-05,
"loss": 0.2383,
"step": 30000
},
{
"epoch": 0.5310031630471276,
"grad_norm": 1.6281217336654663,
"learning_rate": 2.6065657458129627e-05,
"loss": 0.3798,
"step": 30050
},
{
"epoch": 0.5318866957643441,
"grad_norm": 1.1688692569732666,
"learning_rate": 2.6016571439791092e-05,
"loss": 0.3204,
"step": 30100
},
{
"epoch": 0.5327702284815606,
"grad_norm": 1.354048490524292,
"learning_rate": 2.5967485421452553e-05,
"loss": 0.2496,
"step": 30150
},
{
"epoch": 0.5336537611987772,
"grad_norm": 2.8124821186065674,
"learning_rate": 2.5918399403114018e-05,
"loss": 0.4147,
"step": 30200
},
{
"epoch": 0.5345372939159937,
"grad_norm": 1.886425495147705,
"learning_rate": 2.5869313384775486e-05,
"loss": 0.3021,
"step": 30250
},
{
"epoch": 0.5354208266332102,
"grad_norm": 1.6316314935684204,
"learning_rate": 2.5820227366436944e-05,
"loss": 0.2758,
"step": 30300
},
{
"epoch": 0.5363043593504268,
"grad_norm": 1.3990044593811035,
"learning_rate": 2.577114134809841e-05,
"loss": 0.3166,
"step": 30350
},
{
"epoch": 0.5371878920676433,
"grad_norm": 2.1562857627868652,
"learning_rate": 2.572205532975987e-05,
"loss": 0.35,
"step": 30400
},
{
"epoch": 0.5380714247848598,
"grad_norm": 1.1287676095962524,
"learning_rate": 2.5672969311421334e-05,
"loss": 0.3391,
"step": 30450
},
{
"epoch": 0.5389549575020763,
"grad_norm": 1.7524675130844116,
"learning_rate": 2.5623883293082802e-05,
"loss": 0.3576,
"step": 30500
},
{
"epoch": 0.5398384902192929,
"grad_norm": 1.1238594055175781,
"learning_rate": 2.5575778995111033e-05,
"loss": 0.295,
"step": 30550
},
{
"epoch": 0.5407220229365094,
"grad_norm": 0.9298042058944702,
"learning_rate": 2.5526692976772498e-05,
"loss": 0.3449,
"step": 30600
},
{
"epoch": 0.5416055556537258,
"grad_norm": 1.5093685388565063,
"learning_rate": 2.547760695843396e-05,
"loss": 0.3274,
"step": 30650
},
{
"epoch": 0.5424890883709423,
"grad_norm": 1.4606502056121826,
"learning_rate": 2.5428520940095424e-05,
"loss": 0.3094,
"step": 30700
},
{
"epoch": 0.5433726210881589,
"grad_norm": 1.7957881689071655,
"learning_rate": 2.537943492175689e-05,
"loss": 0.3077,
"step": 30750
},
{
"epoch": 0.5442561538053754,
"grad_norm": 1.4665497541427612,
"learning_rate": 2.533034890341835e-05,
"loss": 0.3505,
"step": 30800
},
{
"epoch": 0.5451396865225919,
"grad_norm": 1.785367488861084,
"learning_rate": 2.5281262885079815e-05,
"loss": 0.3485,
"step": 30850
},
{
"epoch": 0.5460232192398085,
"grad_norm": 4.639885425567627,
"learning_rate": 2.523217686674128e-05,
"loss": 0.331,
"step": 30900
},
{
"epoch": 0.546906751957025,
"grad_norm": 1.308772325515747,
"learning_rate": 2.518309084840274e-05,
"loss": 0.2846,
"step": 30950
},
{
"epoch": 0.5477902846742415,
"grad_norm": 1.3961265087127686,
"learning_rate": 2.5134004830064205e-05,
"loss": 0.3647,
"step": 31000
},
{
"epoch": 0.548673817391458,
"grad_norm": 1.0688265562057495,
"learning_rate": 2.5084918811725673e-05,
"loss": 0.3475,
"step": 31050
},
{
"epoch": 0.5495573501086746,
"grad_norm": 1.7052621841430664,
"learning_rate": 2.503583279338713e-05,
"loss": 0.2833,
"step": 31100
},
{
"epoch": 0.5504408828258911,
"grad_norm": 1.5378305912017822,
"learning_rate": 2.4986746775048596e-05,
"loss": 0.3,
"step": 31150
},
{
"epoch": 0.5513244155431075,
"grad_norm": 3.8670883178710938,
"learning_rate": 2.493766075671006e-05,
"loss": 0.3568,
"step": 31200
},
{
"epoch": 0.552207948260324,
"grad_norm": 1.8015788793563843,
"learning_rate": 2.4888574738371522e-05,
"loss": 0.3268,
"step": 31250
},
{
"epoch": 0.5530914809775406,
"grad_norm": 2.7606303691864014,
"learning_rate": 2.4839488720032987e-05,
"loss": 0.4005,
"step": 31300
},
{
"epoch": 0.5539750136947571,
"grad_norm": 1.3418834209442139,
"learning_rate": 2.479040270169445e-05,
"loss": 0.2993,
"step": 31350
},
{
"epoch": 0.5548585464119736,
"grad_norm": 1.3790879249572754,
"learning_rate": 2.4741316683355912e-05,
"loss": 0.3463,
"step": 31400
},
{
"epoch": 0.5557420791291902,
"grad_norm": 1.5994555950164795,
"learning_rate": 2.4692230665017377e-05,
"loss": 0.3654,
"step": 31450
},
{
"epoch": 0.5566256118464067,
"grad_norm": 1.528947114944458,
"learning_rate": 2.4643144646678842e-05,
"loss": 0.3329,
"step": 31500
},
{
"epoch": 0.5575091445636232,
"grad_norm": 1.4391777515411377,
"learning_rate": 2.4594058628340306e-05,
"loss": 0.2794,
"step": 31550
},
{
"epoch": 0.5583926772808397,
"grad_norm": 4.419312953948975,
"learning_rate": 2.4544972610001768e-05,
"loss": 0.4189,
"step": 31600
},
{
"epoch": 0.5592762099980563,
"grad_norm": 1.5030118227005005,
"learning_rate": 2.4495886591663232e-05,
"loss": 0.3643,
"step": 31650
},
{
"epoch": 0.5601597427152728,
"grad_norm": 1.3483951091766357,
"learning_rate": 2.4446800573324697e-05,
"loss": 0.3578,
"step": 31700
},
{
"epoch": 0.5610432754324892,
"grad_norm": 1.5314035415649414,
"learning_rate": 2.439771455498616e-05,
"loss": 0.3193,
"step": 31750
},
{
"epoch": 0.5619268081497057,
"grad_norm": 1.1020389795303345,
"learning_rate": 2.4348628536647623e-05,
"loss": 0.327,
"step": 31800
},
{
"epoch": 0.5628103408669223,
"grad_norm": 1.445654034614563,
"learning_rate": 2.4299542518309084e-05,
"loss": 0.3429,
"step": 31850
},
{
"epoch": 0.5636938735841388,
"grad_norm": 1.3795325756072998,
"learning_rate": 2.425045649997055e-05,
"loss": 0.2994,
"step": 31900
},
{
"epoch": 0.5645774063013553,
"grad_norm": 1.7217411994934082,
"learning_rate": 2.4201370481632014e-05,
"loss": 0.3219,
"step": 31950
},
{
"epoch": 0.5654609390185719,
"grad_norm": 1.3482351303100586,
"learning_rate": 2.4152284463293475e-05,
"loss": 0.2902,
"step": 32000
},
{
"epoch": 0.5663444717357884,
"grad_norm": 2.785452365875244,
"learning_rate": 2.4103198444954943e-05,
"loss": 0.3896,
"step": 32050
},
{
"epoch": 0.5672280044530049,
"grad_norm": 2.5383968353271484,
"learning_rate": 2.4054112426616404e-05,
"loss": 0.2491,
"step": 32100
},
{
"epoch": 0.5681115371702214,
"grad_norm": 1.584861397743225,
"learning_rate": 2.4005026408277866e-05,
"loss": 0.2663,
"step": 32150
},
{
"epoch": 0.568995069887438,
"grad_norm": 1.5586644411087036,
"learning_rate": 2.395594038993933e-05,
"loss": 0.3433,
"step": 32200
},
{
"epoch": 0.5698786026046545,
"grad_norm": 1.4697036743164062,
"learning_rate": 2.3906854371600795e-05,
"loss": 0.3375,
"step": 32250
},
{
"epoch": 0.5707621353218709,
"grad_norm": 2.39277720451355,
"learning_rate": 2.385776835326226e-05,
"loss": 0.2891,
"step": 32300
},
{
"epoch": 0.5716456680390875,
"grad_norm": 1.5755674839019775,
"learning_rate": 2.380868233492372e-05,
"loss": 0.296,
"step": 32350
},
{
"epoch": 0.572529200756304,
"grad_norm": 1.5802369117736816,
"learning_rate": 2.3759596316585182e-05,
"loss": 0.2478,
"step": 32400
},
{
"epoch": 0.5734127334735205,
"grad_norm": 2.731212615966797,
"learning_rate": 2.371051029824665e-05,
"loss": 0.3514,
"step": 32450
},
{
"epoch": 0.574296266190737,
"grad_norm": 1.70058274269104,
"learning_rate": 2.366142427990811e-05,
"loss": 0.2741,
"step": 32500
},
{
"epoch": 0.5751797989079536,
"grad_norm": 3.394753932952881,
"learning_rate": 2.3612338261569576e-05,
"loss": 0.3546,
"step": 32550
},
{
"epoch": 0.5760633316251701,
"grad_norm": 2.7270805835723877,
"learning_rate": 2.356423396359781e-05,
"loss": 0.3927,
"step": 32600
},
{
"epoch": 0.5769468643423866,
"grad_norm": 2.3731272220611572,
"learning_rate": 2.3515147945259272e-05,
"loss": 0.2725,
"step": 32650
},
{
"epoch": 0.5778303970596032,
"grad_norm": 1.4900075197219849,
"learning_rate": 2.3466061926920737e-05,
"loss": 0.3167,
"step": 32700
},
{
"epoch": 0.5787139297768197,
"grad_norm": 1.2145545482635498,
"learning_rate": 2.34169759085822e-05,
"loss": 0.3249,
"step": 32750
},
{
"epoch": 0.5795974624940362,
"grad_norm": 1.725298285484314,
"learning_rate": 2.3367889890243662e-05,
"loss": 0.2443,
"step": 32800
},
{
"epoch": 0.5804809952112526,
"grad_norm": 1.316084384918213,
"learning_rate": 2.331880387190513e-05,
"loss": 0.4113,
"step": 32850
},
{
"epoch": 0.5813645279284692,
"grad_norm": 1.8195414543151855,
"learning_rate": 2.3269717853566592e-05,
"loss": 0.3106,
"step": 32900
},
{
"epoch": 0.5822480606456857,
"grad_norm": 1.1715435981750488,
"learning_rate": 2.3220631835228053e-05,
"loss": 0.2841,
"step": 32950
},
{
"epoch": 0.5831315933629022,
"grad_norm": 1.3928303718566895,
"learning_rate": 2.3171545816889518e-05,
"loss": 0.2786,
"step": 33000
},
{
"epoch": 0.5840151260801187,
"grad_norm": 1.4881165027618408,
"learning_rate": 2.3122459798550982e-05,
"loss": 0.3576,
"step": 33050
},
{
"epoch": 0.5848986587973353,
"grad_norm": 2.8615384101867676,
"learning_rate": 2.3073373780212447e-05,
"loss": 0.2475,
"step": 33100
},
{
"epoch": 0.5857821915145518,
"grad_norm": 1.819924235343933,
"learning_rate": 2.302428776187391e-05,
"loss": 0.348,
"step": 33150
},
{
"epoch": 0.5866657242317683,
"grad_norm": 1.5402089357376099,
"learning_rate": 2.297520174353537e-05,
"loss": 0.2779,
"step": 33200
},
{
"epoch": 0.5875492569489849,
"grad_norm": 1.7234498262405396,
"learning_rate": 2.2926115725196838e-05,
"loss": 0.3166,
"step": 33250
},
{
"epoch": 0.5884327896662014,
"grad_norm": 1.4789388179779053,
"learning_rate": 2.28770297068583e-05,
"loss": 0.3448,
"step": 33300
},
{
"epoch": 0.5893163223834179,
"grad_norm": 0.8780321478843689,
"learning_rate": 2.2827943688519764e-05,
"loss": 0.2409,
"step": 33350
},
{
"epoch": 0.5901998551006343,
"grad_norm": 1.9462053775787354,
"learning_rate": 2.277885767018123e-05,
"loss": 0.3313,
"step": 33400
},
{
"epoch": 0.5910833878178509,
"grad_norm": 1.6026935577392578,
"learning_rate": 2.272977165184269e-05,
"loss": 0.2981,
"step": 33450
},
{
"epoch": 0.5919669205350674,
"grad_norm": 2.3030807971954346,
"learning_rate": 2.2680685633504154e-05,
"loss": 0.269,
"step": 33500
},
{
"epoch": 0.5928504532522839,
"grad_norm": 2.8911454677581787,
"learning_rate": 2.2631599615165616e-05,
"loss": 0.4098,
"step": 33550
},
{
"epoch": 0.5937339859695004,
"grad_norm": 1.4643045663833618,
"learning_rate": 2.258251359682708e-05,
"loss": 0.2924,
"step": 33600
},
{
"epoch": 0.594617518686717,
"grad_norm": 2.0076584815979004,
"learning_rate": 2.2533427578488545e-05,
"loss": 0.2952,
"step": 33650
},
{
"epoch": 0.5955010514039335,
"grad_norm": 1.203574299812317,
"learning_rate": 2.2484341560150006e-05,
"loss": 0.2754,
"step": 33700
},
{
"epoch": 0.59638458412115,
"grad_norm": 2.815420150756836,
"learning_rate": 2.243525554181147e-05,
"loss": 0.3434,
"step": 33750
},
{
"epoch": 0.5972681168383666,
"grad_norm": 1.487236499786377,
"learning_rate": 2.2386169523472935e-05,
"loss": 0.2541,
"step": 33800
},
{
"epoch": 0.5981516495555831,
"grad_norm": 1.532326102256775,
"learning_rate": 2.23370835051344e-05,
"loss": 0.2923,
"step": 33850
},
{
"epoch": 0.5990351822727996,
"grad_norm": 1.543256402015686,
"learning_rate": 2.228799748679586e-05,
"loss": 0.2361,
"step": 33900
},
{
"epoch": 0.599918714990016,
"grad_norm": 1.5733423233032227,
"learning_rate": 2.2238911468457326e-05,
"loss": 0.4459,
"step": 33950
},
{
"epoch": 0.6008022477072326,
"grad_norm": 1.2398439645767212,
"learning_rate": 2.218982545011879e-05,
"loss": 0.3169,
"step": 34000
},
{
"epoch": 0.6016857804244491,
"grad_norm": 1.1555734872817993,
"learning_rate": 2.2140739431780252e-05,
"loss": 0.3146,
"step": 34050
},
{
"epoch": 0.6025693131416656,
"grad_norm": 1.4827885627746582,
"learning_rate": 2.2091653413441717e-05,
"loss": 0.3125,
"step": 34100
},
{
"epoch": 0.6034528458588821,
"grad_norm": 1.5724104642868042,
"learning_rate": 2.204256739510318e-05,
"loss": 0.2738,
"step": 34150
},
{
"epoch": 0.6043363785760987,
"grad_norm": 1.5903054475784302,
"learning_rate": 2.1993481376764643e-05,
"loss": 0.3062,
"step": 34200
},
{
"epoch": 0.6052199112933152,
"grad_norm": 1.5402554273605347,
"learning_rate": 2.1944395358426107e-05,
"loss": 0.3566,
"step": 34250
},
{
"epoch": 0.6061034440105317,
"grad_norm": 1.7631182670593262,
"learning_rate": 2.189530934008757e-05,
"loss": 0.3466,
"step": 34300
},
{
"epoch": 0.6069869767277483,
"grad_norm": 1.2873070240020752,
"learning_rate": 2.1846223321749033e-05,
"loss": 0.2608,
"step": 34350
},
{
"epoch": 0.6078705094449648,
"grad_norm": 1.8117417097091675,
"learning_rate": 2.1797137303410498e-05,
"loss": 0.3239,
"step": 34400
},
{
"epoch": 0.6087540421621813,
"grad_norm": 1.5316294431686401,
"learning_rate": 2.174805128507196e-05,
"loss": 0.3557,
"step": 34450
},
{
"epoch": 0.6096375748793977,
"grad_norm": 1.539382815361023,
"learning_rate": 2.1698965266733427e-05,
"loss": 0.4175,
"step": 34500
},
{
"epoch": 0.6105211075966143,
"grad_norm": 1.6773380041122437,
"learning_rate": 2.164987924839489e-05,
"loss": 0.3104,
"step": 34550
},
{
"epoch": 0.6114046403138308,
"grad_norm": 1.3534982204437256,
"learning_rate": 2.1601774950423123e-05,
"loss": 0.3305,
"step": 34600
},
{
"epoch": 0.6122881730310473,
"grad_norm": 1.416923999786377,
"learning_rate": 2.1552688932084584e-05,
"loss": 0.2832,
"step": 34650
},
{
"epoch": 0.6131717057482639,
"grad_norm": 1.7992863655090332,
"learning_rate": 2.150360291374605e-05,
"loss": 0.3007,
"step": 34700
},
{
"epoch": 0.6140552384654804,
"grad_norm": 1.3988946676254272,
"learning_rate": 2.1454516895407514e-05,
"loss": 0.3932,
"step": 34750
},
{
"epoch": 0.6149387711826969,
"grad_norm": 1.7125048637390137,
"learning_rate": 2.1405430877068978e-05,
"loss": 0.3,
"step": 34800
},
{
"epoch": 0.6158223038999134,
"grad_norm": 1.4415560960769653,
"learning_rate": 2.135634485873044e-05,
"loss": 0.2785,
"step": 34850
},
{
"epoch": 0.61670583661713,
"grad_norm": 1.8688596487045288,
"learning_rate": 2.1307258840391904e-05,
"loss": 0.3015,
"step": 34900
},
{
"epoch": 0.6175893693343465,
"grad_norm": 3.085685968399048,
"learning_rate": 2.125817282205337e-05,
"loss": 0.3291,
"step": 34950
},
{
"epoch": 0.618472902051563,
"grad_norm": 1.3053193092346191,
"learning_rate": 2.120908680371483e-05,
"loss": 0.2634,
"step": 35000
},
{
"epoch": 0.6193564347687794,
"grad_norm": 1.4780889749526978,
"learning_rate": 2.1160000785376295e-05,
"loss": 0.3212,
"step": 35050
},
{
"epoch": 0.620239967485996,
"grad_norm": 1.699916124343872,
"learning_rate": 2.1110914767037756e-05,
"loss": 0.2965,
"step": 35100
},
{
"epoch": 0.6211235002032125,
"grad_norm": 1.6198956966400146,
"learning_rate": 2.106182874869922e-05,
"loss": 0.3557,
"step": 35150
},
{
"epoch": 0.622007032920429,
"grad_norm": 1.2697581052780151,
"learning_rate": 2.1012742730360685e-05,
"loss": 0.3535,
"step": 35200
},
{
"epoch": 0.6228905656376456,
"grad_norm": 1.9256399869918823,
"learning_rate": 2.0963656712022147e-05,
"loss": 0.4183,
"step": 35250
},
{
"epoch": 0.6237740983548621,
"grad_norm": 1.4346308708190918,
"learning_rate": 2.0914570693683615e-05,
"loss": 0.3355,
"step": 35300
},
{
"epoch": 0.6246576310720786,
"grad_norm": 1.3797852993011475,
"learning_rate": 2.0865484675345076e-05,
"loss": 0.3626,
"step": 35350
},
{
"epoch": 0.6255411637892951,
"grad_norm": 3.1976869106292725,
"learning_rate": 2.0816398657006537e-05,
"loss": 0.2542,
"step": 35400
},
{
"epoch": 0.6264246965065117,
"grad_norm": 1.4315252304077148,
"learning_rate": 2.0767312638668002e-05,
"loss": 0.2555,
"step": 35450
},
{
"epoch": 0.6273082292237282,
"grad_norm": 2.861154079437256,
"learning_rate": 2.0718226620329467e-05,
"loss": 0.3418,
"step": 35500
},
{
"epoch": 0.6281917619409447,
"grad_norm": 1.378416895866394,
"learning_rate": 2.066914060199093e-05,
"loss": 0.3118,
"step": 35550
},
{
"epoch": 0.6290752946581611,
"grad_norm": 4.129642486572266,
"learning_rate": 2.0620054583652393e-05,
"loss": 0.2889,
"step": 35600
},
{
"epoch": 0.6299588273753777,
"grad_norm": 1.478084683418274,
"learning_rate": 2.0570968565313857e-05,
"loss": 0.319,
"step": 35650
},
{
"epoch": 0.6308423600925942,
"grad_norm": 3.230463743209839,
"learning_rate": 2.0521882546975322e-05,
"loss": 0.312,
"step": 35700
},
{
"epoch": 0.6317258928098107,
"grad_norm": 1.2029914855957031,
"learning_rate": 2.0472796528636783e-05,
"loss": 0.2776,
"step": 35750
},
{
"epoch": 0.6326094255270273,
"grad_norm": 1.6909867525100708,
"learning_rate": 2.0423710510298248e-05,
"loss": 0.2574,
"step": 35800
},
{
"epoch": 0.6334929582442438,
"grad_norm": 1.6969387531280518,
"learning_rate": 2.0374624491959713e-05,
"loss": 0.2816,
"step": 35850
},
{
"epoch": 0.6343764909614603,
"grad_norm": 2.4319510459899902,
"learning_rate": 2.0325538473621174e-05,
"loss": 0.3392,
"step": 35900
},
{
"epoch": 0.6352600236786768,
"grad_norm": 1.4081567525863647,
"learning_rate": 2.027645245528264e-05,
"loss": 0.2856,
"step": 35950
},
{
"epoch": 0.6361435563958934,
"grad_norm": 1.2967078685760498,
"learning_rate": 2.02273664369441e-05,
"loss": 0.286,
"step": 36000
},
{
"epoch": 0.6370270891131099,
"grad_norm": 1.3550012111663818,
"learning_rate": 2.0178280418605568e-05,
"loss": 0.3555,
"step": 36050
},
{
"epoch": 0.6379106218303264,
"grad_norm": 1.9244177341461182,
"learning_rate": 2.012919440026703e-05,
"loss": 0.3065,
"step": 36100
},
{
"epoch": 0.6387941545475428,
"grad_norm": 2.1921980381011963,
"learning_rate": 2.008010838192849e-05,
"loss": 0.3696,
"step": 36150
},
{
"epoch": 0.6396776872647594,
"grad_norm": 1.6438093185424805,
"learning_rate": 2.0031022363589955e-05,
"loss": 0.2626,
"step": 36200
},
{
"epoch": 0.6405612199819759,
"grad_norm": 1.4499566555023193,
"learning_rate": 1.998193634525142e-05,
"loss": 0.2681,
"step": 36250
},
{
"epoch": 0.6414447526991924,
"grad_norm": 1.7677289247512817,
"learning_rate": 1.9932850326912884e-05,
"loss": 0.3015,
"step": 36300
},
{
"epoch": 0.642328285416409,
"grad_norm": 2.6856095790863037,
"learning_rate": 1.9883764308574346e-05,
"loss": 0.3587,
"step": 36350
},
{
"epoch": 0.6432118181336255,
"grad_norm": 1.9422292709350586,
"learning_rate": 1.983467829023581e-05,
"loss": 0.4138,
"step": 36400
},
{
"epoch": 0.644095350850842,
"grad_norm": 0.9389033317565918,
"learning_rate": 1.9785592271897275e-05,
"loss": 0.3785,
"step": 36450
},
{
"epoch": 0.6449788835680585,
"grad_norm": 1.297255516052246,
"learning_rate": 1.9736506253558736e-05,
"loss": 0.3385,
"step": 36500
},
{
"epoch": 0.6458624162852751,
"grad_norm": 1.0876415967941284,
"learning_rate": 1.96874202352202e-05,
"loss": 0.3423,
"step": 36550
},
{
"epoch": 0.6467459490024916,
"grad_norm": 1.2366421222686768,
"learning_rate": 1.9638334216881666e-05,
"loss": 0.3793,
"step": 36600
},
{
"epoch": 0.6476294817197081,
"grad_norm": 4.349328517913818,
"learning_rate": 1.95902299189099e-05,
"loss": 0.3819,
"step": 36650
},
{
"epoch": 0.6485130144369246,
"grad_norm": 1.804661750793457,
"learning_rate": 1.954114390057136e-05,
"loss": 0.3111,
"step": 36700
},
{
"epoch": 0.6493965471541411,
"grad_norm": 2.6138484477996826,
"learning_rate": 1.9492057882232826e-05,
"loss": 0.3299,
"step": 36750
},
{
"epoch": 0.6502800798713576,
"grad_norm": 1.8608500957489014,
"learning_rate": 1.9442971863894287e-05,
"loss": 0.3994,
"step": 36800
},
{
"epoch": 0.6511636125885741,
"grad_norm": 2.2977466583251953,
"learning_rate": 1.9393885845555755e-05,
"loss": 0.3595,
"step": 36850
},
{
"epoch": 0.6520471453057907,
"grad_norm": 1.6370161771774292,
"learning_rate": 1.9344799827217217e-05,
"loss": 0.3572,
"step": 36900
},
{
"epoch": 0.6529306780230072,
"grad_norm": 1.4357324838638306,
"learning_rate": 1.9295713808878678e-05,
"loss": 0.3085,
"step": 36950
},
{
"epoch": 0.6538142107402237,
"grad_norm": 1.8057055473327637,
"learning_rate": 1.9246627790540143e-05,
"loss": 0.2647,
"step": 37000
},
{
"epoch": 0.6546977434574403,
"grad_norm": 1.230721354484558,
"learning_rate": 1.9197541772201607e-05,
"loss": 0.2622,
"step": 37050
},
{
"epoch": 0.6555812761746568,
"grad_norm": 1.6303822994232178,
"learning_rate": 1.9148455753863072e-05,
"loss": 0.3814,
"step": 37100
},
{
"epoch": 0.6564648088918733,
"grad_norm": 1.2327115535736084,
"learning_rate": 1.9099369735524533e-05,
"loss": 0.4174,
"step": 37150
},
{
"epoch": 0.6573483416090898,
"grad_norm": 1.4918360710144043,
"learning_rate": 1.9050283717185998e-05,
"loss": 0.3473,
"step": 37200
},
{
"epoch": 0.6582318743263063,
"grad_norm": 1.6960564851760864,
"learning_rate": 1.9001197698847463e-05,
"loss": 0.3598,
"step": 37250
},
{
"epoch": 0.6591154070435228,
"grad_norm": 1.8127328157424927,
"learning_rate": 1.8952111680508924e-05,
"loss": 0.2822,
"step": 37300
},
{
"epoch": 0.6599989397607393,
"grad_norm": 1.7553006410598755,
"learning_rate": 1.890302566217039e-05,
"loss": 0.2538,
"step": 37350
},
{
"epoch": 0.6608824724779558,
"grad_norm": 1.111005187034607,
"learning_rate": 1.8853939643831853e-05,
"loss": 0.2212,
"step": 37400
},
{
"epoch": 0.6617660051951724,
"grad_norm": 1.2916769981384277,
"learning_rate": 1.8804853625493314e-05,
"loss": 0.2687,
"step": 37450
},
{
"epoch": 0.6626495379123889,
"grad_norm": 1.5212571620941162,
"learning_rate": 1.875576760715478e-05,
"loss": 0.3288,
"step": 37500
},
{
"epoch": 0.6635330706296054,
"grad_norm": 1.5829190015792847,
"learning_rate": 1.870668158881624e-05,
"loss": 0.4221,
"step": 37550
},
{
"epoch": 0.664416603346822,
"grad_norm": 1.4784077405929565,
"learning_rate": 1.8657595570477705e-05,
"loss": 0.3711,
"step": 37600
},
{
"epoch": 0.6653001360640385,
"grad_norm": 1.907202959060669,
"learning_rate": 1.860850955213917e-05,
"loss": 0.2418,
"step": 37650
},
{
"epoch": 0.666183668781255,
"grad_norm": 1.4358186721801758,
"learning_rate": 1.855942353380063e-05,
"loss": 0.3,
"step": 37700
},
{
"epoch": 0.6670672014984715,
"grad_norm": 1.4791388511657715,
"learning_rate": 1.85103375154621e-05,
"loss": 0.3147,
"step": 37750
},
{
"epoch": 0.667950734215688,
"grad_norm": 1.383799433708191,
"learning_rate": 1.846125149712356e-05,
"loss": 0.2895,
"step": 37800
},
{
"epoch": 0.6688342669329045,
"grad_norm": 1.7297286987304688,
"learning_rate": 1.8412165478785025e-05,
"loss": 0.2649,
"step": 37850
},
{
"epoch": 0.669717799650121,
"grad_norm": 1.2361524105072021,
"learning_rate": 1.8363079460446486e-05,
"loss": 0.287,
"step": 37900
},
{
"epoch": 0.6706013323673375,
"grad_norm": 1.472721815109253,
"learning_rate": 1.831399344210795e-05,
"loss": 0.267,
"step": 37950
},
{
"epoch": 0.6714848650845541,
"grad_norm": 1.7498071193695068,
"learning_rate": 1.8264907423769416e-05,
"loss": 0.2862,
"step": 38000
},
{
"epoch": 0.6723683978017706,
"grad_norm": 1.389864444732666,
"learning_rate": 1.8215821405430877e-05,
"loss": 0.2931,
"step": 38050
},
{
"epoch": 0.6732519305189871,
"grad_norm": 1.2709695100784302,
"learning_rate": 1.816673538709234e-05,
"loss": 0.3039,
"step": 38100
},
{
"epoch": 0.6741354632362037,
"grad_norm": 1.2036606073379517,
"learning_rate": 1.8117649368753806e-05,
"loss": 0.3067,
"step": 38150
},
{
"epoch": 0.6750189959534202,
"grad_norm": 1.3336296081542969,
"learning_rate": 1.8068563350415268e-05,
"loss": 0.3072,
"step": 38200
},
{
"epoch": 0.6759025286706367,
"grad_norm": 1.8485578298568726,
"learning_rate": 1.8019477332076732e-05,
"loss": 0.2744,
"step": 38250
},
{
"epoch": 0.6767860613878532,
"grad_norm": 1.350595235824585,
"learning_rate": 1.7970391313738197e-05,
"loss": 0.3098,
"step": 38300
},
{
"epoch": 0.6776695941050697,
"grad_norm": 1.8860970735549927,
"learning_rate": 1.7921305295399658e-05,
"loss": 0.2837,
"step": 38350
},
{
"epoch": 0.6785531268222862,
"grad_norm": 1.3870184421539307,
"learning_rate": 1.7872219277061123e-05,
"loss": 0.2998,
"step": 38400
},
{
"epoch": 0.6794366595395027,
"grad_norm": 1.5092830657958984,
"learning_rate": 1.7823133258722584e-05,
"loss": 0.2477,
"step": 38450
},
{
"epoch": 0.6803201922567192,
"grad_norm": 1.4017945528030396,
"learning_rate": 1.7774047240384052e-05,
"loss": 0.3343,
"step": 38500
},
{
"epoch": 0.6812037249739358,
"grad_norm": 1.5817060470581055,
"learning_rate": 1.7724961222045513e-05,
"loss": 0.2411,
"step": 38550
},
{
"epoch": 0.6820872576911523,
"grad_norm": 1.4471608400344849,
"learning_rate": 1.7675875203706975e-05,
"loss": 0.2641,
"step": 38600
},
{
"epoch": 0.6829707904083688,
"grad_norm": 1.6398324966430664,
"learning_rate": 1.762777090573521e-05,
"loss": 0.3673,
"step": 38650
},
{
"epoch": 0.6838543231255854,
"grad_norm": 1.5645078420639038,
"learning_rate": 1.7578684887396674e-05,
"loss": 0.3456,
"step": 38700
},
{
"epoch": 0.6847378558428019,
"grad_norm": 1.4957185983657837,
"learning_rate": 1.752959886905814e-05,
"loss": 0.2554,
"step": 38750
},
{
"epoch": 0.6856213885600184,
"grad_norm": 1.5689042806625366,
"learning_rate": 1.7480512850719603e-05,
"loss": 0.2289,
"step": 38800
},
{
"epoch": 0.686504921277235,
"grad_norm": 1.4531927108764648,
"learning_rate": 1.7431426832381064e-05,
"loss": 0.2409,
"step": 38850
},
{
"epoch": 0.6873884539944514,
"grad_norm": 3.168332576751709,
"learning_rate": 1.738234081404253e-05,
"loss": 0.3395,
"step": 38900
},
{
"epoch": 0.6882719867116679,
"grad_norm": 1.5215202569961548,
"learning_rate": 1.7333254795703994e-05,
"loss": 0.3296,
"step": 38950
},
{
"epoch": 0.6891555194288844,
"grad_norm": 1.9889358282089233,
"learning_rate": 1.7284168777365455e-05,
"loss": 0.349,
"step": 39000
},
{
"epoch": 0.690039052146101,
"grad_norm": 1.6141583919525146,
"learning_rate": 1.723508275902692e-05,
"loss": 0.2841,
"step": 39050
},
{
"epoch": 0.6909225848633175,
"grad_norm": 1.3648995161056519,
"learning_rate": 1.7185996740688384e-05,
"loss": 0.3101,
"step": 39100
},
{
"epoch": 0.691806117580534,
"grad_norm": 1.897626280784607,
"learning_rate": 1.7136910722349846e-05,
"loss": 0.3094,
"step": 39150
},
{
"epoch": 0.6926896502977505,
"grad_norm": 1.4272273778915405,
"learning_rate": 1.708782470401131e-05,
"loss": 0.2485,
"step": 39200
},
{
"epoch": 0.6935731830149671,
"grad_norm": 1.1664527654647827,
"learning_rate": 1.703873868567277e-05,
"loss": 0.2985,
"step": 39250
},
{
"epoch": 0.6944567157321836,
"grad_norm": 1.5030759572982788,
"learning_rate": 1.698965266733424e-05,
"loss": 0.3039,
"step": 39300
},
{
"epoch": 0.6953402484494001,
"grad_norm": 1.2608274221420288,
"learning_rate": 1.69405666489957e-05,
"loss": 0.2616,
"step": 39350
},
{
"epoch": 0.6962237811666167,
"grad_norm": 1.173496961593628,
"learning_rate": 1.6891480630657162e-05,
"loss": 0.2932,
"step": 39400
},
{
"epoch": 0.6971073138838331,
"grad_norm": 1.3213509321212769,
"learning_rate": 1.6842394612318627e-05,
"loss": 0.3707,
"step": 39450
},
{
"epoch": 0.6979908466010496,
"grad_norm": 1.9010616540908813,
"learning_rate": 1.679330859398009e-05,
"loss": 0.2728,
"step": 39500
},
{
"epoch": 0.6988743793182661,
"grad_norm": 1.143967866897583,
"learning_rate": 1.6744222575641556e-05,
"loss": 0.2482,
"step": 39550
},
{
"epoch": 0.6997579120354827,
"grad_norm": 1.108268141746521,
"learning_rate": 1.6695136557303018e-05,
"loss": 0.3475,
"step": 39600
},
{
"epoch": 0.7006414447526992,
"grad_norm": 0.6908143758773804,
"learning_rate": 1.6646050538964482e-05,
"loss": 0.3447,
"step": 39650
},
{
"epoch": 0.7015249774699157,
"grad_norm": 1.5447782278060913,
"learning_rate": 1.6596964520625947e-05,
"loss": 0.2503,
"step": 39700
},
{
"epoch": 0.7024085101871322,
"grad_norm": 1.515202283859253,
"learning_rate": 1.6547878502287408e-05,
"loss": 0.3593,
"step": 39750
},
{
"epoch": 0.7032920429043488,
"grad_norm": 4.640558242797852,
"learning_rate": 1.6498792483948873e-05,
"loss": 0.2933,
"step": 39800
},
{
"epoch": 0.7041755756215653,
"grad_norm": 1.1238136291503906,
"learning_rate": 1.6449706465610338e-05,
"loss": 0.3565,
"step": 39850
},
{
"epoch": 0.7050591083387818,
"grad_norm": 1.5694066286087036,
"learning_rate": 1.64006204472718e-05,
"loss": 0.2838,
"step": 39900
},
{
"epoch": 0.7059426410559984,
"grad_norm": 1.808310866355896,
"learning_rate": 1.6351534428933263e-05,
"loss": 0.2604,
"step": 39950
},
{
"epoch": 0.7068261737732148,
"grad_norm": 1.6668068170547485,
"learning_rate": 1.6302448410594725e-05,
"loss": 0.2286,
"step": 40000
},
{
"epoch": 0.7077097064904313,
"grad_norm": 1.541528344154358,
"learning_rate": 1.6253362392256193e-05,
"loss": 0.2766,
"step": 40050
},
{
"epoch": 0.7085932392076478,
"grad_norm": 1.4408469200134277,
"learning_rate": 1.6204276373917654e-05,
"loss": 0.2649,
"step": 40100
},
{
"epoch": 0.7094767719248644,
"grad_norm": 1.57314932346344,
"learning_rate": 1.6155190355579115e-05,
"loss": 0.3184,
"step": 40150
},
{
"epoch": 0.7103603046420809,
"grad_norm": 2.2145802974700928,
"learning_rate": 1.6106104337240583e-05,
"loss": 0.334,
"step": 40200
},
{
"epoch": 0.7112438373592974,
"grad_norm": 1.348560094833374,
"learning_rate": 1.6057018318902045e-05,
"loss": 0.3609,
"step": 40250
},
{
"epoch": 0.7121273700765139,
"grad_norm": 0.9920164942741394,
"learning_rate": 1.600793230056351e-05,
"loss": 0.306,
"step": 40300
},
{
"epoch": 0.7130109027937305,
"grad_norm": 1.3321669101715088,
"learning_rate": 1.595884628222497e-05,
"loss": 0.2629,
"step": 40350
},
{
"epoch": 0.713894435510947,
"grad_norm": 1.257283329963684,
"learning_rate": 1.5909760263886435e-05,
"loss": 0.2311,
"step": 40400
},
{
"epoch": 0.7147779682281635,
"grad_norm": 1.3199965953826904,
"learning_rate": 1.58606742455479e-05,
"loss": 0.2909,
"step": 40450
},
{
"epoch": 0.7156615009453801,
"grad_norm": 1.4839483499526978,
"learning_rate": 1.581158822720936e-05,
"loss": 0.343,
"step": 40500
},
{
"epoch": 0.7165450336625965,
"grad_norm": 1.0020859241485596,
"learning_rate": 1.5762502208870826e-05,
"loss": 0.3013,
"step": 40550
},
{
"epoch": 0.717428566379813,
"grad_norm": 1.3758106231689453,
"learning_rate": 1.571341619053229e-05,
"loss": 0.3128,
"step": 40600
},
{
"epoch": 0.7183120990970295,
"grad_norm": 1.5661957263946533,
"learning_rate": 1.5664330172193752e-05,
"loss": 0.2073,
"step": 40650
},
{
"epoch": 0.7191956318142461,
"grad_norm": 1.4217487573623657,
"learning_rate": 1.5615244153855217e-05,
"loss": 0.2873,
"step": 40700
},
{
"epoch": 0.7200791645314626,
"grad_norm": 1.6037381887435913,
"learning_rate": 1.556713985588345e-05,
"loss": 0.3705,
"step": 40750
},
{
"epoch": 0.7209626972486791,
"grad_norm": 1.7782158851623535,
"learning_rate": 1.5518053837544916e-05,
"loss": 0.2985,
"step": 40800
},
{
"epoch": 0.7218462299658956,
"grad_norm": 1.8306645154953003,
"learning_rate": 1.546896781920638e-05,
"loss": 0.3355,
"step": 40850
},
{
"epoch": 0.7227297626831122,
"grad_norm": 1.4840078353881836,
"learning_rate": 1.541988180086784e-05,
"loss": 0.3322,
"step": 40900
},
{
"epoch": 0.7236132954003287,
"grad_norm": 2.7773265838623047,
"learning_rate": 1.5370795782529303e-05,
"loss": 0.3424,
"step": 40950
},
{
"epoch": 0.7244968281175452,
"grad_norm": 1.3592840433120728,
"learning_rate": 1.532170976419077e-05,
"loss": 0.2834,
"step": 41000
},
{
"epoch": 0.7253803608347618,
"grad_norm": 2.0050973892211914,
"learning_rate": 1.5272623745852232e-05,
"loss": 0.2776,
"step": 41050
},
{
"epoch": 0.7262638935519782,
"grad_norm": 1.4293886423110962,
"learning_rate": 1.5223537727513695e-05,
"loss": 0.2924,
"step": 41100
},
{
"epoch": 0.7271474262691947,
"grad_norm": 1.5391188859939575,
"learning_rate": 1.5174451709175158e-05,
"loss": 0.3982,
"step": 41150
},
{
"epoch": 0.7280309589864112,
"grad_norm": 1.4493207931518555,
"learning_rate": 1.5125365690836625e-05,
"loss": 0.3379,
"step": 41200
},
{
"epoch": 0.7289144917036278,
"grad_norm": 2.0417702198028564,
"learning_rate": 1.5076279672498086e-05,
"loss": 0.2876,
"step": 41250
},
{
"epoch": 0.7297980244208443,
"grad_norm": 1.7424287796020508,
"learning_rate": 1.5027193654159549e-05,
"loss": 0.3289,
"step": 41300
},
{
"epoch": 0.7306815571380608,
"grad_norm": 1.1353446245193481,
"learning_rate": 1.4978107635821015e-05,
"loss": 0.3119,
"step": 41350
},
{
"epoch": 0.7315650898552774,
"grad_norm": 1.4479618072509766,
"learning_rate": 1.4929021617482478e-05,
"loss": 0.3007,
"step": 41400
},
{
"epoch": 0.7324486225724939,
"grad_norm": 2.910383939743042,
"learning_rate": 1.4879935599143941e-05,
"loss": 0.3499,
"step": 41450
},
{
"epoch": 0.7333321552897104,
"grad_norm": 1.0191997289657593,
"learning_rate": 1.4830849580805404e-05,
"loss": 0.3016,
"step": 41500
},
{
"epoch": 0.7342156880069269,
"grad_norm": 0.9859305620193481,
"learning_rate": 1.4781763562466869e-05,
"loss": 0.3232,
"step": 41550
},
{
"epoch": 0.7350992207241435,
"grad_norm": 1.4275975227355957,
"learning_rate": 1.4732677544128332e-05,
"loss": 0.2577,
"step": 41600
},
{
"epoch": 0.7359827534413599,
"grad_norm": 1.0362133979797363,
"learning_rate": 1.4683591525789795e-05,
"loss": 0.2928,
"step": 41650
},
{
"epoch": 0.7368662861585764,
"grad_norm": 2.07706618309021,
"learning_rate": 1.4634505507451258e-05,
"loss": 0.2814,
"step": 41700
},
{
"epoch": 0.7377498188757929,
"grad_norm": 1.6817320585250854,
"learning_rate": 1.4585419489112722e-05,
"loss": 0.2962,
"step": 41750
},
{
"epoch": 0.7386333515930095,
"grad_norm": 2.021404981613159,
"learning_rate": 1.4536333470774185e-05,
"loss": 0.2756,
"step": 41800
},
{
"epoch": 0.739516884310226,
"grad_norm": 1.3860830068588257,
"learning_rate": 1.4487247452435648e-05,
"loss": 0.3287,
"step": 41850
},
{
"epoch": 0.7404004170274425,
"grad_norm": 1.266453742980957,
"learning_rate": 1.4438161434097111e-05,
"loss": 0.34,
"step": 41900
},
{
"epoch": 0.741283949744659,
"grad_norm": 1.5601640939712524,
"learning_rate": 1.4389075415758576e-05,
"loss": 0.3402,
"step": 41950
},
{
"epoch": 0.7421674824618756,
"grad_norm": 1.6014955043792725,
"learning_rate": 1.4339989397420039e-05,
"loss": 0.2404,
"step": 42000
},
{
"epoch": 0.7430510151790921,
"grad_norm": 1.4757792949676514,
"learning_rate": 1.4290903379081502e-05,
"loss": 0.2655,
"step": 42050
},
{
"epoch": 0.7439345478963086,
"grad_norm": 1.6618765592575073,
"learning_rate": 1.4241817360742968e-05,
"loss": 0.3288,
"step": 42100
},
{
"epoch": 0.7448180806135252,
"grad_norm": 1.3700001239776611,
"learning_rate": 1.4192731342404431e-05,
"loss": 0.3333,
"step": 42150
},
{
"epoch": 0.7457016133307416,
"grad_norm": 1.4557344913482666,
"learning_rate": 1.4143645324065894e-05,
"loss": 0.3602,
"step": 42200
},
{
"epoch": 0.7465851460479581,
"grad_norm": 1.546533226966858,
"learning_rate": 1.4094559305727355e-05,
"loss": 0.3631,
"step": 42250
},
{
"epoch": 0.7474686787651746,
"grad_norm": 1.3664302825927734,
"learning_rate": 1.4045473287388822e-05,
"loss": 0.2374,
"step": 42300
},
{
"epoch": 0.7483522114823912,
"grad_norm": 4.161416053771973,
"learning_rate": 1.3996387269050285e-05,
"loss": 0.3347,
"step": 42350
},
{
"epoch": 0.7492357441996077,
"grad_norm": 1.3883721828460693,
"learning_rate": 1.3947301250711748e-05,
"loss": 0.3031,
"step": 42400
},
{
"epoch": 0.7501192769168242,
"grad_norm": 2.6039016246795654,
"learning_rate": 1.389821523237321e-05,
"loss": 0.2817,
"step": 42450
},
{
"epoch": 0.7510028096340408,
"grad_norm": 4.557380676269531,
"learning_rate": 1.3849129214034675e-05,
"loss": 0.3433,
"step": 42500
},
{
"epoch": 0.7518863423512573,
"grad_norm": 2.938749074935913,
"learning_rate": 1.3800043195696138e-05,
"loss": 0.3273,
"step": 42550
},
{
"epoch": 0.7527698750684738,
"grad_norm": 1.3836658000946045,
"learning_rate": 1.3750957177357601e-05,
"loss": 0.3123,
"step": 42600
},
{
"epoch": 0.7536534077856903,
"grad_norm": 4.600383758544922,
"learning_rate": 1.3701871159019066e-05,
"loss": 0.29,
"step": 42650
},
{
"epoch": 0.7545369405029069,
"grad_norm": 4.080932140350342,
"learning_rate": 1.3652785140680529e-05,
"loss": 0.3352,
"step": 42700
},
{
"epoch": 0.7554204732201233,
"grad_norm": 1.4026703834533691,
"learning_rate": 1.3603699122341992e-05,
"loss": 0.3143,
"step": 42750
},
{
"epoch": 0.7563040059373398,
"grad_norm": 1.4037362337112427,
"learning_rate": 1.3555594824370226e-05,
"loss": 0.2833,
"step": 42800
},
{
"epoch": 0.7571875386545563,
"grad_norm": 1.5535756349563599,
"learning_rate": 1.350650880603169e-05,
"loss": 0.2851,
"step": 42850
},
{
"epoch": 0.7580710713717729,
"grad_norm": 1.3919951915740967,
"learning_rate": 1.3457422787693156e-05,
"loss": 0.2956,
"step": 42900
},
{
"epoch": 0.7589546040889894,
"grad_norm": 1.531242847442627,
"learning_rate": 1.3408336769354619e-05,
"loss": 0.3038,
"step": 42950
},
{
"epoch": 0.7598381368062059,
"grad_norm": 1.4396170377731323,
"learning_rate": 1.335925075101608e-05,
"loss": 0.2195,
"step": 43000
},
{
"epoch": 0.7607216695234225,
"grad_norm": 1.4077396392822266,
"learning_rate": 1.3310164732677543e-05,
"loss": 0.2588,
"step": 43050
},
{
"epoch": 0.761605202240639,
"grad_norm": 2.881322145462036,
"learning_rate": 1.326107871433901e-05,
"loss": 0.3065,
"step": 43100
},
{
"epoch": 0.7624887349578555,
"grad_norm": 1.5936981439590454,
"learning_rate": 1.3211992696000472e-05,
"loss": 0.3252,
"step": 43150
},
{
"epoch": 0.763372267675072,
"grad_norm": 1.4670791625976562,
"learning_rate": 1.3162906677661935e-05,
"loss": 0.2418,
"step": 43200
},
{
"epoch": 0.7642558003922886,
"grad_norm": 1.6417291164398193,
"learning_rate": 1.31138206593234e-05,
"loss": 0.4002,
"step": 43250
},
{
"epoch": 0.765139333109505,
"grad_norm": 1.5653693675994873,
"learning_rate": 1.3064734640984863e-05,
"loss": 0.3379,
"step": 43300
},
{
"epoch": 0.7660228658267215,
"grad_norm": 1.279615879058838,
"learning_rate": 1.3015648622646326e-05,
"loss": 0.3574,
"step": 43350
},
{
"epoch": 0.766906398543938,
"grad_norm": 1.693057894706726,
"learning_rate": 1.2966562604307789e-05,
"loss": 0.3664,
"step": 43400
},
{
"epoch": 0.7677899312611546,
"grad_norm": 2.931711196899414,
"learning_rate": 1.2917476585969254e-05,
"loss": 0.3274,
"step": 43450
},
{
"epoch": 0.7686734639783711,
"grad_norm": 1.3495726585388184,
"learning_rate": 1.2868390567630716e-05,
"loss": 0.2443,
"step": 43500
},
{
"epoch": 0.7695569966955876,
"grad_norm": 1.4437354803085327,
"learning_rate": 1.281930454929218e-05,
"loss": 0.2998,
"step": 43550
},
{
"epoch": 0.7704405294128042,
"grad_norm": 4.394979000091553,
"learning_rate": 1.2770218530953642e-05,
"loss": 0.3619,
"step": 43600
},
{
"epoch": 0.7713240621300207,
"grad_norm": 2.726393461227417,
"learning_rate": 1.2721132512615109e-05,
"loss": 0.2586,
"step": 43650
},
{
"epoch": 0.7722075948472372,
"grad_norm": 1.146583080291748,
"learning_rate": 1.2672046494276572e-05,
"loss": 0.251,
"step": 43700
},
{
"epoch": 0.7730911275644538,
"grad_norm": 1.2839117050170898,
"learning_rate": 1.2622960475938033e-05,
"loss": 0.3154,
"step": 43750
},
{
"epoch": 0.7739746602816703,
"grad_norm": 1.3681036233901978,
"learning_rate": 1.25738744575995e-05,
"loss": 0.3309,
"step": 43800
},
{
"epoch": 0.7748581929988867,
"grad_norm": 1.3661130666732788,
"learning_rate": 1.2524788439260962e-05,
"loss": 0.2768,
"step": 43850
},
{
"epoch": 0.7757417257161032,
"grad_norm": 1.3001888990402222,
"learning_rate": 1.2475702420922425e-05,
"loss": 0.3049,
"step": 43900
},
{
"epoch": 0.7766252584333198,
"grad_norm": 1.4377065896987915,
"learning_rate": 1.2426616402583888e-05,
"loss": 0.2939,
"step": 43950
},
{
"epoch": 0.7775087911505363,
"grad_norm": 2.084547519683838,
"learning_rate": 1.2377530384245351e-05,
"loss": 0.2909,
"step": 44000
},
{
"epoch": 0.7783923238677528,
"grad_norm": 1.421271562576294,
"learning_rate": 1.2328444365906816e-05,
"loss": 0.2157,
"step": 44050
},
{
"epoch": 0.7792758565849693,
"grad_norm": 1.2172672748565674,
"learning_rate": 1.2279358347568279e-05,
"loss": 0.2785,
"step": 44100
},
{
"epoch": 0.7801593893021859,
"grad_norm": 1.5359545946121216,
"learning_rate": 1.2230272329229744e-05,
"loss": 0.2386,
"step": 44150
},
{
"epoch": 0.7810429220194024,
"grad_norm": 1.3057314157485962,
"learning_rate": 1.2181186310891207e-05,
"loss": 0.3297,
"step": 44200
},
{
"epoch": 0.7819264547366189,
"grad_norm": 1.0489422082901,
"learning_rate": 1.213210029255267e-05,
"loss": 0.2576,
"step": 44250
},
{
"epoch": 0.7828099874538355,
"grad_norm": 2.2906908988952637,
"learning_rate": 1.2083014274214133e-05,
"loss": 0.2678,
"step": 44300
},
{
"epoch": 0.783693520171052,
"grad_norm": 1.5887507200241089,
"learning_rate": 1.2033928255875597e-05,
"loss": 0.3625,
"step": 44350
},
{
"epoch": 0.7845770528882684,
"grad_norm": 1.592004418373108,
"learning_rate": 1.1984842237537062e-05,
"loss": 0.311,
"step": 44400
},
{
"epoch": 0.7854605856054849,
"grad_norm": 1.2201918363571167,
"learning_rate": 1.1935756219198523e-05,
"loss": 0.2924,
"step": 44450
},
{
"epoch": 0.7863441183227015,
"grad_norm": 1.6248202323913574,
"learning_rate": 1.1886670200859988e-05,
"loss": 0.2602,
"step": 44500
},
{
"epoch": 0.787227651039918,
"grad_norm": 2.032122850418091,
"learning_rate": 1.183758418252145e-05,
"loss": 0.3936,
"step": 44550
},
{
"epoch": 0.7881111837571345,
"grad_norm": 1.149383783340454,
"learning_rate": 1.1788498164182915e-05,
"loss": 0.229,
"step": 44600
},
{
"epoch": 0.788994716474351,
"grad_norm": 4.3157148361206055,
"learning_rate": 1.1739412145844378e-05,
"loss": 0.3017,
"step": 44650
},
{
"epoch": 0.7898782491915676,
"grad_norm": 1.8925341367721558,
"learning_rate": 1.1690326127505841e-05,
"loss": 0.2795,
"step": 44700
},
{
"epoch": 0.7907617819087841,
"grad_norm": 1.678846001625061,
"learning_rate": 1.1641240109167304e-05,
"loss": 0.279,
"step": 44750
},
{
"epoch": 0.7916453146260006,
"grad_norm": 1.3051953315734863,
"learning_rate": 1.1592154090828769e-05,
"loss": 0.227,
"step": 44800
},
{
"epoch": 0.7925288473432172,
"grad_norm": 1.5428054332733154,
"learning_rate": 1.1543068072490232e-05,
"loss": 0.25,
"step": 44850
},
{
"epoch": 0.7934123800604337,
"grad_norm": 4.3441057205200195,
"learning_rate": 1.1494963774518466e-05,
"loss": 0.3381,
"step": 44900
},
{
"epoch": 0.7942959127776501,
"grad_norm": 1.589739203453064,
"learning_rate": 1.1445877756179931e-05,
"loss": 0.2914,
"step": 44950
},
{
"epoch": 0.7951794454948666,
"grad_norm": 1.8874093294143677,
"learning_rate": 1.1396791737841392e-05,
"loss": 0.282,
"step": 45000
},
{
"epoch": 0.7960629782120832,
"grad_norm": 1.1699323654174805,
"learning_rate": 1.1347705719502857e-05,
"loss": 0.2596,
"step": 45050
},
{
"epoch": 0.7969465109292997,
"grad_norm": 1.4282245635986328,
"learning_rate": 1.129861970116432e-05,
"loss": 0.311,
"step": 45100
},
{
"epoch": 0.7978300436465162,
"grad_norm": 1.4583613872528076,
"learning_rate": 1.1249533682825785e-05,
"loss": 0.2368,
"step": 45150
},
{
"epoch": 0.7987135763637327,
"grad_norm": 1.3440780639648438,
"learning_rate": 1.120044766448725e-05,
"loss": 0.3147,
"step": 45200
},
{
"epoch": 0.7995971090809493,
"grad_norm": 1.4544724225997925,
"learning_rate": 1.115136164614871e-05,
"loss": 0.2964,
"step": 45250
},
{
"epoch": 0.8004806417981658,
"grad_norm": 1.5149188041687012,
"learning_rate": 1.1102275627810175e-05,
"loss": 0.34,
"step": 45300
},
{
"epoch": 0.8013641745153823,
"grad_norm": 1.4456932544708252,
"learning_rate": 1.1053189609471638e-05,
"loss": 0.3249,
"step": 45350
},
{
"epoch": 0.8022477072325989,
"grad_norm": 4.4482574462890625,
"learning_rate": 1.1004103591133103e-05,
"loss": 0.4092,
"step": 45400
},
{
"epoch": 0.8031312399498154,
"grad_norm": 1.7785700559616089,
"learning_rate": 1.0955017572794566e-05,
"loss": 0.2601,
"step": 45450
},
{
"epoch": 0.8040147726670318,
"grad_norm": 1.560614824295044,
"learning_rate": 1.0905931554456029e-05,
"loss": 0.306,
"step": 45500
},
{
"epoch": 0.8048983053842483,
"grad_norm": 1.2510974407196045,
"learning_rate": 1.0856845536117492e-05,
"loss": 0.2888,
"step": 45550
},
{
"epoch": 0.8057818381014649,
"grad_norm": 2.532653331756592,
"learning_rate": 1.0807759517778957e-05,
"loss": 0.3101,
"step": 45600
},
{
"epoch": 0.8066653708186814,
"grad_norm": 1.2750502824783325,
"learning_rate": 1.075867349944042e-05,
"loss": 0.3148,
"step": 45650
},
{
"epoch": 0.8075489035358979,
"grad_norm": 2.6815085411071777,
"learning_rate": 1.0709587481101884e-05,
"loss": 0.3975,
"step": 45700
},
{
"epoch": 0.8084324362531145,
"grad_norm": 1.3361300230026245,
"learning_rate": 1.0660501462763347e-05,
"loss": 0.391,
"step": 45750
},
{
"epoch": 0.809315968970331,
"grad_norm": 1.438496470451355,
"learning_rate": 1.061141544442481e-05,
"loss": 0.3055,
"step": 45800
},
{
"epoch": 0.8101995016875475,
"grad_norm": 1.3673596382141113,
"learning_rate": 1.0562329426086275e-05,
"loss": 0.2434,
"step": 45850
},
{
"epoch": 0.811083034404764,
"grad_norm": 2.5049281120300293,
"learning_rate": 1.0513243407747738e-05,
"loss": 0.285,
"step": 45900
},
{
"epoch": 0.8119665671219806,
"grad_norm": 4.577225208282471,
"learning_rate": 1.04641573894092e-05,
"loss": 0.3952,
"step": 45950
},
{
"epoch": 0.8128500998391971,
"grad_norm": 1.4778873920440674,
"learning_rate": 1.0415071371070664e-05,
"loss": 0.2802,
"step": 46000
},
{
"epoch": 0.8137336325564135,
"grad_norm": 0.8188498020172119,
"learning_rate": 1.0365985352732128e-05,
"loss": 0.2687,
"step": 46050
},
{
"epoch": 0.81461716527363,
"grad_norm": 1.3656600713729858,
"learning_rate": 1.0316899334393591e-05,
"loss": 0.2787,
"step": 46100
},
{
"epoch": 0.8155006979908466,
"grad_norm": 1.3830708265304565,
"learning_rate": 1.0267813316055056e-05,
"loss": 0.2943,
"step": 46150
},
{
"epoch": 0.8163842307080631,
"grad_norm": 1.866623878479004,
"learning_rate": 1.0218727297716519e-05,
"loss": 0.3386,
"step": 46200
},
{
"epoch": 0.8172677634252796,
"grad_norm": 1.5368878841400146,
"learning_rate": 1.0169641279377982e-05,
"loss": 0.3227,
"step": 46250
},
{
"epoch": 0.8181512961424962,
"grad_norm": 3.281324625015259,
"learning_rate": 1.0120555261039447e-05,
"loss": 0.2582,
"step": 46300
},
{
"epoch": 0.8190348288597127,
"grad_norm": 1.4477012157440186,
"learning_rate": 1.007146924270091e-05,
"loss": 0.285,
"step": 46350
},
{
"epoch": 0.8199183615769292,
"grad_norm": 2.9229135513305664,
"learning_rate": 1.0022383224362374e-05,
"loss": 0.2989,
"step": 46400
},
{
"epoch": 0.8208018942941457,
"grad_norm": 1.5713211297988892,
"learning_rate": 9.973297206023836e-06,
"loss": 0.2761,
"step": 46450
},
{
"epoch": 0.8216854270113623,
"grad_norm": 1.5968650579452515,
"learning_rate": 9.9242111876853e-06,
"loss": 0.299,
"step": 46500
},
{
"epoch": 0.8225689597285788,
"grad_norm": 1.5933504104614258,
"learning_rate": 9.875125169346763e-06,
"loss": 0.2908,
"step": 46550
},
{
"epoch": 0.8234524924457952,
"grad_norm": 0.9819146990776062,
"learning_rate": 9.826039151008228e-06,
"loss": 0.3134,
"step": 46600
},
{
"epoch": 0.8243360251630117,
"grad_norm": 1.4541929960250854,
"learning_rate": 9.776953132669691e-06,
"loss": 0.2603,
"step": 46650
},
{
"epoch": 0.8252195578802283,
"grad_norm": 1.499306559562683,
"learning_rate": 9.727867114331154e-06,
"loss": 0.2965,
"step": 46700
},
{
"epoch": 0.8261030905974448,
"grad_norm": 2.7763867378234863,
"learning_rate": 9.678781095992617e-06,
"loss": 0.2506,
"step": 46750
},
{
"epoch": 0.8269866233146613,
"grad_norm": 1.4240554571151733,
"learning_rate": 9.629695077654082e-06,
"loss": 0.2258,
"step": 46800
},
{
"epoch": 0.8278701560318779,
"grad_norm": 1.71811842918396,
"learning_rate": 9.580609059315546e-06,
"loss": 0.2209,
"step": 46850
},
{
"epoch": 0.8287536887490944,
"grad_norm": 3.2347002029418945,
"learning_rate": 9.53152304097701e-06,
"loss": 0.2756,
"step": 46900
},
{
"epoch": 0.8296372214663109,
"grad_norm": 2.865858554840088,
"learning_rate": 9.483418743005244e-06,
"loss": 0.3345,
"step": 46950
},
{
"epoch": 0.8305207541835274,
"grad_norm": 1.4720476865768433,
"learning_rate": 9.434332724666705e-06,
"loss": 0.3016,
"step": 47000
},
{
"epoch": 0.831404286900744,
"grad_norm": 2.7783117294311523,
"learning_rate": 9.38524670632817e-06,
"loss": 0.2712,
"step": 47050
},
{
"epoch": 0.8322878196179605,
"grad_norm": 4.626585483551025,
"learning_rate": 9.336160687989634e-06,
"loss": 0.3721,
"step": 47100
},
{
"epoch": 0.8331713523351769,
"grad_norm": 1.097589135169983,
"learning_rate": 9.287074669651097e-06,
"loss": 0.3483,
"step": 47150
},
{
"epoch": 0.8340548850523934,
"grad_norm": 1.6428859233856201,
"learning_rate": 9.237988651312562e-06,
"loss": 0.3002,
"step": 47200
},
{
"epoch": 0.83493841776961,
"grad_norm": 1.0670841932296753,
"learning_rate": 9.188902632974023e-06,
"loss": 0.2333,
"step": 47250
},
{
"epoch": 0.8358219504868265,
"grad_norm": 1.9052667617797852,
"learning_rate": 9.139816614635488e-06,
"loss": 0.3043,
"step": 47300
},
{
"epoch": 0.836705483204043,
"grad_norm": 2.9311811923980713,
"learning_rate": 9.09073059629695e-06,
"loss": 0.2992,
"step": 47350
},
{
"epoch": 0.8375890159212596,
"grad_norm": 1.2520331144332886,
"learning_rate": 9.041644577958415e-06,
"loss": 0.3367,
"step": 47400
},
{
"epoch": 0.8384725486384761,
"grad_norm": 1.483476996421814,
"learning_rate": 8.992558559619878e-06,
"loss": 0.3135,
"step": 47450
},
{
"epoch": 0.8393560813556926,
"grad_norm": 1.5691540241241455,
"learning_rate": 8.943472541281341e-06,
"loss": 0.2681,
"step": 47500
},
{
"epoch": 0.8402396140729091,
"grad_norm": 1.2460750341415405,
"learning_rate": 8.894386522942804e-06,
"loss": 0.2764,
"step": 47550
},
{
"epoch": 0.8411231467901257,
"grad_norm": 1.3095312118530273,
"learning_rate": 8.845300504604269e-06,
"loss": 0.3211,
"step": 47600
},
{
"epoch": 0.8420066795073422,
"grad_norm": 1.5162594318389893,
"learning_rate": 8.796214486265734e-06,
"loss": 0.3081,
"step": 47650
},
{
"epoch": 0.8428902122245586,
"grad_norm": 1.3636444807052612,
"learning_rate": 8.747128467927197e-06,
"loss": 0.2929,
"step": 47700
},
{
"epoch": 0.8437737449417752,
"grad_norm": 1.2207202911376953,
"learning_rate": 8.69804244958866e-06,
"loss": 0.3466,
"step": 47750
},
{
"epoch": 0.8446572776589917,
"grad_norm": 1.294301152229309,
"learning_rate": 8.648956431250123e-06,
"loss": 0.3012,
"step": 47800
},
{
"epoch": 0.8455408103762082,
"grad_norm": 1.188514232635498,
"learning_rate": 8.599870412911587e-06,
"loss": 0.2953,
"step": 47850
},
{
"epoch": 0.8464243430934247,
"grad_norm": 1.5537595748901367,
"learning_rate": 8.55078439457305e-06,
"loss": 0.2914,
"step": 47900
},
{
"epoch": 0.8473078758106413,
"grad_norm": 1.640060544013977,
"learning_rate": 8.501698376234513e-06,
"loss": 0.3219,
"step": 47950
},
{
"epoch": 0.8481914085278578,
"grad_norm": 1.896763801574707,
"learning_rate": 8.452612357895976e-06,
"loss": 0.3104,
"step": 48000
},
{
"epoch": 0.8490749412450743,
"grad_norm": 1.4819157123565674,
"learning_rate": 8.403526339557441e-06,
"loss": 0.3676,
"step": 48050
},
{
"epoch": 0.8499584739622909,
"grad_norm": 1.882551670074463,
"learning_rate": 8.354440321218904e-06,
"loss": 0.2989,
"step": 48100
},
{
"epoch": 0.8508420066795074,
"grad_norm": 1.3162806034088135,
"learning_rate": 8.305354302880369e-06,
"loss": 0.3259,
"step": 48150
},
{
"epoch": 0.8517255393967239,
"grad_norm": 1.8228886127471924,
"learning_rate": 8.256268284541832e-06,
"loss": 0.2949,
"step": 48200
},
{
"epoch": 0.8526090721139403,
"grad_norm": 1.490918517112732,
"learning_rate": 8.207182266203294e-06,
"loss": 0.3914,
"step": 48250
},
{
"epoch": 0.8534926048311569,
"grad_norm": 1.5268231630325317,
"learning_rate": 8.158096247864759e-06,
"loss": 0.2645,
"step": 48300
},
{
"epoch": 0.8543761375483734,
"grad_norm": 1.9607213735580444,
"learning_rate": 8.109010229526222e-06,
"loss": 0.3358,
"step": 48350
},
{
"epoch": 0.8552596702655899,
"grad_norm": 1.4697561264038086,
"learning_rate": 8.059924211187687e-06,
"loss": 0.2939,
"step": 48400
},
{
"epoch": 0.8561432029828064,
"grad_norm": 4.159787654876709,
"learning_rate": 8.010838192849148e-06,
"loss": 0.4063,
"step": 48450
},
{
"epoch": 0.857026735700023,
"grad_norm": 2.129241466522217,
"learning_rate": 7.961752174510613e-06,
"loss": 0.3492,
"step": 48500
},
{
"epoch": 0.8579102684172395,
"grad_norm": 1.48981511592865,
"learning_rate": 7.912666156172076e-06,
"loss": 0.2794,
"step": 48550
},
{
"epoch": 0.858793801134456,
"grad_norm": 2.017918348312378,
"learning_rate": 7.86358013783354e-06,
"loss": 0.2854,
"step": 48600
},
{
"epoch": 0.8596773338516726,
"grad_norm": 1.4077606201171875,
"learning_rate": 7.814494119495003e-06,
"loss": 0.2664,
"step": 48650
},
{
"epoch": 0.8605608665688891,
"grad_norm": 1.417729139328003,
"learning_rate": 7.765408101156466e-06,
"loss": 0.3028,
"step": 48700
},
{
"epoch": 0.8614443992861056,
"grad_norm": 1.0813167095184326,
"learning_rate": 7.716322082817931e-06,
"loss": 0.2579,
"step": 48750
},
{
"epoch": 0.862327932003322,
"grad_norm": 2.008650064468384,
"learning_rate": 7.667236064479394e-06,
"loss": 0.3404,
"step": 48800
},
{
"epoch": 0.8632114647205386,
"grad_norm": 1.3516128063201904,
"learning_rate": 7.618150046140858e-06,
"loss": 0.3535,
"step": 48850
},
{
"epoch": 0.8640949974377551,
"grad_norm": 1.4083527326583862,
"learning_rate": 7.569064027802321e-06,
"loss": 0.2224,
"step": 48900
},
{
"epoch": 0.8649785301549716,
"grad_norm": 1.421423077583313,
"learning_rate": 7.5199780094637854e-06,
"loss": 0.2701,
"step": 48950
},
{
"epoch": 0.8658620628721881,
"grad_norm": 2.421118974685669,
"learning_rate": 7.4708919911252476e-06,
"loss": 0.2506,
"step": 49000
},
{
"epoch": 0.8667455955894047,
"grad_norm": 1.3858048915863037,
"learning_rate": 7.421805972786712e-06,
"loss": 0.3032,
"step": 49050
},
{
"epoch": 0.8676291283066212,
"grad_norm": 1.2791121006011963,
"learning_rate": 7.373701674814947e-06,
"loss": 0.3881,
"step": 49100
},
{
"epoch": 0.8685126610238377,
"grad_norm": 1.6947706937789917,
"learning_rate": 7.32461565647641e-06,
"loss": 0.253,
"step": 49150
},
{
"epoch": 0.8693961937410543,
"grad_norm": 1.4587703943252563,
"learning_rate": 7.2755296381378734e-06,
"loss": 0.2827,
"step": 49200
},
{
"epoch": 0.8702797264582708,
"grad_norm": 1.333967924118042,
"learning_rate": 7.2264436197993364e-06,
"loss": 0.266,
"step": 49250
},
{
"epoch": 0.8711632591754873,
"grad_norm": 1.3817411661148071,
"learning_rate": 7.1773576014608e-06,
"loss": 0.3008,
"step": 49300
},
{
"epoch": 0.8720467918927037,
"grad_norm": 1.269362211227417,
"learning_rate": 7.128271583122263e-06,
"loss": 0.3406,
"step": 49350
},
{
"epoch": 0.8729303246099203,
"grad_norm": 1.5153824090957642,
"learning_rate": 7.079185564783727e-06,
"loss": 0.3348,
"step": 49400
},
{
"epoch": 0.8738138573271368,
"grad_norm": 1.6337603330612183,
"learning_rate": 7.03009954644519e-06,
"loss": 0.3021,
"step": 49450
},
{
"epoch": 0.8746973900443533,
"grad_norm": 1.4994523525238037,
"learning_rate": 6.981013528106655e-06,
"loss": 0.3182,
"step": 49500
},
{
"epoch": 0.8755809227615698,
"grad_norm": 1.2485002279281616,
"learning_rate": 6.9319275097681185e-06,
"loss": 0.3063,
"step": 49550
},
{
"epoch": 0.8764644554787864,
"grad_norm": 1.538524866104126,
"learning_rate": 6.8828414914295815e-06,
"loss": 0.2385,
"step": 49600
},
{
"epoch": 0.8773479881960029,
"grad_norm": 1.3927173614501953,
"learning_rate": 6.833755473091045e-06,
"loss": 0.3152,
"step": 49650
},
{
"epoch": 0.8782315209132194,
"grad_norm": 1.4090054035186768,
"learning_rate": 6.784669454752508e-06,
"loss": 0.267,
"step": 49700
},
{
"epoch": 0.879115053630436,
"grad_norm": 1.5765697956085205,
"learning_rate": 6.735583436413972e-06,
"loss": 0.2599,
"step": 49750
},
{
"epoch": 0.8799985863476525,
"grad_norm": 1.617443323135376,
"learning_rate": 6.686497418075435e-06,
"loss": 0.3226,
"step": 49800
},
{
"epoch": 0.880882119064869,
"grad_norm": 1.385986089706421,
"learning_rate": 6.6374113997369e-06,
"loss": 0.3516,
"step": 49850
},
{
"epoch": 0.8817656517820854,
"grad_norm": 1.4890649318695068,
"learning_rate": 6.588325381398362e-06,
"loss": 0.2912,
"step": 49900
},
{
"epoch": 0.882649184499302,
"grad_norm": 2.459829807281494,
"learning_rate": 6.5392393630598265e-06,
"loss": 0.2853,
"step": 49950
},
{
"epoch": 0.8835327172165185,
"grad_norm": 1.6274219751358032,
"learning_rate": 6.4901533447212895e-06,
"loss": 0.3212,
"step": 50000
},
{
"epoch": 0.884416249933735,
"grad_norm": 2.2164740562438965,
"learning_rate": 6.441067326382753e-06,
"loss": 0.3399,
"step": 50050
},
{
"epoch": 0.8852997826509515,
"grad_norm": 3.567988157272339,
"learning_rate": 6.391981308044218e-06,
"loss": 0.3104,
"step": 50100
},
{
"epoch": 0.8861833153681681,
"grad_norm": 1.5539664030075073,
"learning_rate": 6.34289528970568e-06,
"loss": 0.368,
"step": 50150
},
{
"epoch": 0.8870668480853846,
"grad_norm": 1.6674470901489258,
"learning_rate": 6.293809271367145e-06,
"loss": 0.2848,
"step": 50200
},
{
"epoch": 0.8879503808026011,
"grad_norm": 1.1558799743652344,
"learning_rate": 6.244723253028607e-06,
"loss": 0.4137,
"step": 50250
},
{
"epoch": 0.8888339135198177,
"grad_norm": 1.2852174043655396,
"learning_rate": 6.195637234690072e-06,
"loss": 0.3597,
"step": 50300
},
{
"epoch": 0.8897174462370342,
"grad_norm": 2.747140407562256,
"learning_rate": 6.146551216351535e-06,
"loss": 0.3246,
"step": 50350
},
{
"epoch": 0.8906009789542507,
"grad_norm": 1.5731008052825928,
"learning_rate": 6.097465198012998e-06,
"loss": 0.2658,
"step": 50400
},
{
"epoch": 0.8914845116714671,
"grad_norm": 1.7012232542037964,
"learning_rate": 6.048379179674462e-06,
"loss": 0.2954,
"step": 50450
},
{
"epoch": 0.8923680443886837,
"grad_norm": 1.2959450483322144,
"learning_rate": 5.999293161335925e-06,
"loss": 0.3035,
"step": 50500
},
{
"epoch": 0.8932515771059002,
"grad_norm": 1.6592167615890503,
"learning_rate": 5.950207142997389e-06,
"loss": 0.2654,
"step": 50550
},
{
"epoch": 0.8941351098231167,
"grad_norm": 1.2229481935501099,
"learning_rate": 5.901121124658853e-06,
"loss": 0.2742,
"step": 50600
},
{
"epoch": 0.8950186425403333,
"grad_norm": 1.4973150491714478,
"learning_rate": 5.852035106320316e-06,
"loss": 0.3435,
"step": 50650
},
{
"epoch": 0.8959021752575498,
"grad_norm": 1.2695672512054443,
"learning_rate": 5.80294908798178e-06,
"loss": 0.2947,
"step": 50700
},
{
"epoch": 0.8967857079747663,
"grad_norm": 0.9303974509239197,
"learning_rate": 5.753863069643243e-06,
"loss": 0.3013,
"step": 50750
},
{
"epoch": 0.8976692406919828,
"grad_norm": 1.5696642398834229,
"learning_rate": 5.704777051304706e-06,
"loss": 0.3845,
"step": 50800
},
{
"epoch": 0.8985527734091994,
"grad_norm": 1.9302955865859985,
"learning_rate": 5.65569103296617e-06,
"loss": 0.2882,
"step": 50850
},
{
"epoch": 0.8994363061264159,
"grad_norm": 2.9837305545806885,
"learning_rate": 5.606605014627634e-06,
"loss": 0.3639,
"step": 50900
},
{
"epoch": 0.9003198388436324,
"grad_norm": 1.3305821418762207,
"learning_rate": 5.557518996289098e-06,
"loss": 0.2332,
"step": 50950
},
{
"epoch": 0.901203371560849,
"grad_norm": 1.2136187553405762,
"learning_rate": 5.508432977950561e-06,
"loss": 0.3363,
"step": 51000
},
{
"epoch": 0.9020869042780654,
"grad_norm": 1.782301664352417,
"learning_rate": 5.459346959612025e-06,
"loss": 0.2674,
"step": 51050
},
{
"epoch": 0.9029704369952819,
"grad_norm": 1.5983684062957764,
"learning_rate": 5.411242661640259e-06,
"loss": 0.331,
"step": 51100
},
{
"epoch": 0.9038539697124984,
"grad_norm": 1.749089002609253,
"learning_rate": 5.362156643301722e-06,
"loss": 0.2881,
"step": 51150
},
{
"epoch": 0.904737502429715,
"grad_norm": 1.2461782693862915,
"learning_rate": 5.313070624963186e-06,
"loss": 0.3553,
"step": 51200
},
{
"epoch": 0.9056210351469315,
"grad_norm": 2.317101001739502,
"learning_rate": 5.26398460662465e-06,
"loss": 0.2936,
"step": 51250
},
{
"epoch": 0.906504567864148,
"grad_norm": 1.3416547775268555,
"learning_rate": 5.214898588286113e-06,
"loss": 0.2987,
"step": 51300
},
{
"epoch": 0.9073881005813645,
"grad_norm": 3.7747082710266113,
"learning_rate": 5.1658125699475765e-06,
"loss": 0.3343,
"step": 51350
},
{
"epoch": 0.9082716332985811,
"grad_norm": 1.4777984619140625,
"learning_rate": 5.1167265516090395e-06,
"loss": 0.2919,
"step": 51400
},
{
"epoch": 0.9091551660157976,
"grad_norm": 1.3142715692520142,
"learning_rate": 5.067640533270503e-06,
"loss": 0.2997,
"step": 51450
},
{
"epoch": 0.9100386987330141,
"grad_norm": 1.3387079238891602,
"learning_rate": 5.018554514931967e-06,
"loss": 0.2247,
"step": 51500
},
{
"epoch": 0.9109222314502307,
"grad_norm": 1.9581636190414429,
"learning_rate": 4.96946849659343e-06,
"loss": 0.2918,
"step": 51550
},
{
"epoch": 0.9118057641674471,
"grad_norm": 1.3822007179260254,
"learning_rate": 4.920382478254894e-06,
"loss": 0.3295,
"step": 51600
},
{
"epoch": 0.9126892968846636,
"grad_norm": 1.4896866083145142,
"learning_rate": 4.871296459916358e-06,
"loss": 0.2493,
"step": 51650
},
{
"epoch": 0.9135728296018801,
"grad_norm": 4.590723037719727,
"learning_rate": 4.8222104415778216e-06,
"loss": 0.3088,
"step": 51700
},
{
"epoch": 0.9144563623190967,
"grad_norm": 1.653506875038147,
"learning_rate": 4.7731244232392845e-06,
"loss": 0.2287,
"step": 51750
},
{
"epoch": 0.9153398950363132,
"grad_norm": 1.7086869478225708,
"learning_rate": 4.724038404900748e-06,
"loss": 0.2067,
"step": 51800
},
{
"epoch": 0.9162234277535297,
"grad_norm": 1.1146478652954102,
"learning_rate": 4.674952386562212e-06,
"loss": 0.2735,
"step": 51850
},
{
"epoch": 0.9171069604707462,
"grad_norm": 2.2454397678375244,
"learning_rate": 4.625866368223675e-06,
"loss": 0.3976,
"step": 51900
},
{
"epoch": 0.9179904931879628,
"grad_norm": 1.902377724647522,
"learning_rate": 4.576780349885139e-06,
"loss": 0.2939,
"step": 51950
},
{
"epoch": 0.9188740259051793,
"grad_norm": 4.320808410644531,
"learning_rate": 4.527694331546602e-06,
"loss": 0.3193,
"step": 52000
},
{
"epoch": 0.9197575586223958,
"grad_norm": 1.4950217008590698,
"learning_rate": 4.478608313208066e-06,
"loss": 0.2988,
"step": 52050
},
{
"epoch": 0.9206410913396124,
"grad_norm": 1.5405720472335815,
"learning_rate": 4.42952229486953e-06,
"loss": 0.2286,
"step": 52100
},
{
"epoch": 0.9215246240568288,
"grad_norm": 1.5918203592300415,
"learning_rate": 4.3804362765309926e-06,
"loss": 0.371,
"step": 52150
},
{
"epoch": 0.9224081567740453,
"grad_norm": 1.2329323291778564,
"learning_rate": 4.331350258192457e-06,
"loss": 0.2825,
"step": 52200
},
{
"epoch": 0.9232916894912618,
"grad_norm": 1.2270597219467163,
"learning_rate": 4.28226423985392e-06,
"loss": 0.3728,
"step": 52250
},
{
"epoch": 0.9241752222084784,
"grad_norm": 1.8672150373458862,
"learning_rate": 4.233178221515384e-06,
"loss": 0.3196,
"step": 52300
},
{
"epoch": 0.9250587549256949,
"grad_norm": 1.6005786657333374,
"learning_rate": 4.184092203176848e-06,
"loss": 0.3322,
"step": 52350
},
{
"epoch": 0.9259422876429114,
"grad_norm": 1.4158750772476196,
"learning_rate": 4.135006184838311e-06,
"loss": 0.3258,
"step": 52400
},
{
"epoch": 0.926825820360128,
"grad_norm": 4.394749164581299,
"learning_rate": 4.085920166499775e-06,
"loss": 0.3419,
"step": 52450
},
{
"epoch": 0.9277093530773445,
"grad_norm": 0.856221616268158,
"learning_rate": 4.036834148161238e-06,
"loss": 0.2997,
"step": 52500
},
{
"epoch": 0.928592885794561,
"grad_norm": 1.1520658731460571,
"learning_rate": 3.9877481298227014e-06,
"loss": 0.2425,
"step": 52550
},
{
"epoch": 0.9294764185117775,
"grad_norm": 1.2415558099746704,
"learning_rate": 3.938662111484165e-06,
"loss": 0.2708,
"step": 52600
},
{
"epoch": 0.9303599512289941,
"grad_norm": 2.747580051422119,
"learning_rate": 3.889576093145628e-06,
"loss": 0.2201,
"step": 52650
},
{
"epoch": 0.9312434839462105,
"grad_norm": 2.005228281021118,
"learning_rate": 3.840490074807092e-06,
"loss": 0.2958,
"step": 52700
},
{
"epoch": 0.932127016663427,
"grad_norm": 1.6022164821624756,
"learning_rate": 3.791404056468556e-06,
"loss": 0.2647,
"step": 52750
},
{
"epoch": 0.9330105493806435,
"grad_norm": 1.7913720607757568,
"learning_rate": 3.7423180381300193e-06,
"loss": 0.2329,
"step": 52800
},
{
"epoch": 0.9338940820978601,
"grad_norm": 2.5619053840637207,
"learning_rate": 3.693232019791483e-06,
"loss": 0.2984,
"step": 52850
},
{
"epoch": 0.9347776148150766,
"grad_norm": 1.545856237411499,
"learning_rate": 3.6441460014529465e-06,
"loss": 0.2546,
"step": 52900
},
{
"epoch": 0.9356611475322931,
"grad_norm": 2.4920833110809326,
"learning_rate": 3.59505998311441e-06,
"loss": 0.2433,
"step": 52950
},
{
"epoch": 0.9365446802495097,
"grad_norm": 1.65108323097229,
"learning_rate": 3.5459739647758733e-06,
"loss": 0.3632,
"step": 53000
},
{
"epoch": 0.9374282129667262,
"grad_norm": 2.5942931175231934,
"learning_rate": 3.496887946437337e-06,
"loss": 0.2602,
"step": 53050
},
{
"epoch": 0.9383117456839427,
"grad_norm": 1.256638526916504,
"learning_rate": 3.4478019280988005e-06,
"loss": 0.2066,
"step": 53100
},
{
"epoch": 0.9391952784011592,
"grad_norm": 3.677544593811035,
"learning_rate": 3.399697630127035e-06,
"loss": 0.2957,
"step": 53150
},
{
"epoch": 0.9400788111183758,
"grad_norm": 1.3518919944763184,
"learning_rate": 3.3506116117884983e-06,
"loss": 0.3931,
"step": 53200
},
{
"epoch": 0.9409623438355922,
"grad_norm": 1.065996766090393,
"learning_rate": 3.3015255934499617e-06,
"loss": 0.3384,
"step": 53250
},
{
"epoch": 0.9418458765528087,
"grad_norm": 1.57516610622406,
"learning_rate": 3.252439575111425e-06,
"loss": 0.234,
"step": 53300
},
{
"epoch": 0.9427294092700252,
"grad_norm": 1.2013062238693237,
"learning_rate": 3.203353556772889e-06,
"loss": 0.2244,
"step": 53350
},
{
"epoch": 0.9436129419872418,
"grad_norm": 1.448370099067688,
"learning_rate": 3.1542675384343524e-06,
"loss": 0.2736,
"step": 53400
},
{
"epoch": 0.9444964747044583,
"grad_norm": 1.7333183288574219,
"learning_rate": 3.105181520095816e-06,
"loss": 0.3088,
"step": 53450
},
{
"epoch": 0.9453800074216748,
"grad_norm": 1.5718059539794922,
"learning_rate": 3.0560955017572796e-06,
"loss": 0.3135,
"step": 53500
},
{
"epoch": 0.9462635401388914,
"grad_norm": 1.3086848258972168,
"learning_rate": 3.007009483418743e-06,
"loss": 0.2813,
"step": 53550
},
{
"epoch": 0.9471470728561079,
"grad_norm": 1.3118650913238525,
"learning_rate": 2.9579234650802064e-06,
"loss": 0.2333,
"step": 53600
},
{
"epoch": 0.9480306055733244,
"grad_norm": 3.0708839893341064,
"learning_rate": 2.9088374467416706e-06,
"loss": 0.3022,
"step": 53650
},
{
"epoch": 0.9489141382905409,
"grad_norm": 1.637635588645935,
"learning_rate": 2.859751428403134e-06,
"loss": 0.303,
"step": 53700
},
{
"epoch": 0.9497976710077575,
"grad_norm": 2.5479607582092285,
"learning_rate": 2.8106654100645974e-06,
"loss": 0.2651,
"step": 53750
},
{
"epoch": 0.9506812037249739,
"grad_norm": 4.394486427307129,
"learning_rate": 2.761579391726061e-06,
"loss": 0.3384,
"step": 53800
},
{
"epoch": 0.9515647364421904,
"grad_norm": 3.1194252967834473,
"learning_rate": 2.712493373387524e-06,
"loss": 0.3324,
"step": 53850
},
{
"epoch": 0.952448269159407,
"grad_norm": 1.082737684249878,
"learning_rate": 2.663407355048988e-06,
"loss": 0.2253,
"step": 53900
},
{
"epoch": 0.9533318018766235,
"grad_norm": 1.0127415657043457,
"learning_rate": 2.614321336710452e-06,
"loss": 0.2942,
"step": 53950
},
{
"epoch": 0.95421533459384,
"grad_norm": 4.512701988220215,
"learning_rate": 2.5652353183719152e-06,
"loss": 0.2997,
"step": 54000
},
{
"epoch": 0.9550988673110565,
"grad_norm": 1.0720359086990356,
"learning_rate": 2.5161493000333786e-06,
"loss": 0.3954,
"step": 54050
},
{
"epoch": 0.9559824000282731,
"grad_norm": 1.608279824256897,
"learning_rate": 2.467063281694842e-06,
"loss": 0.3496,
"step": 54100
},
{
"epoch": 0.9568659327454896,
"grad_norm": 1.2330106496810913,
"learning_rate": 2.4179772633563054e-06,
"loss": 0.2609,
"step": 54150
},
{
"epoch": 0.9577494654627061,
"grad_norm": 1.4279929399490356,
"learning_rate": 2.3688912450177693e-06,
"loss": 0.3942,
"step": 54200
},
{
"epoch": 0.9586329981799226,
"grad_norm": 1.4870383739471436,
"learning_rate": 2.319805226679233e-06,
"loss": 0.3794,
"step": 54250
},
{
"epoch": 0.9595165308971392,
"grad_norm": 3.1990461349487305,
"learning_rate": 2.2707192083406965e-06,
"loss": 0.2834,
"step": 54300
},
{
"epoch": 0.9604000636143556,
"grad_norm": 1.178895115852356,
"learning_rate": 2.22163319000216e-06,
"loss": 0.3271,
"step": 54350
},
{
"epoch": 0.9612835963315721,
"grad_norm": 1.724674105644226,
"learning_rate": 2.1725471716636233e-06,
"loss": 0.3048,
"step": 54400
},
{
"epoch": 0.9621671290487886,
"grad_norm": 1.5154780149459839,
"learning_rate": 2.1234611533250867e-06,
"loss": 0.2813,
"step": 54450
},
{
"epoch": 0.9630506617660052,
"grad_norm": 1.3216954469680786,
"learning_rate": 2.0743751349865505e-06,
"loss": 0.3229,
"step": 54500
},
{
"epoch": 0.9639341944832217,
"grad_norm": 1.5333393812179565,
"learning_rate": 2.0252891166480143e-06,
"loss": 0.3064,
"step": 54550
},
{
"epoch": 0.9648177272004382,
"grad_norm": 1.3715639114379883,
"learning_rate": 1.9762030983094777e-06,
"loss": 0.2925,
"step": 54600
},
{
"epoch": 0.9657012599176548,
"grad_norm": 3.4723856449127197,
"learning_rate": 1.927117079970941e-06,
"loss": 0.301,
"step": 54650
},
{
"epoch": 0.9665847926348713,
"grad_norm": 3.3657915592193604,
"learning_rate": 1.8780310616324047e-06,
"loss": 0.2484,
"step": 54700
},
{
"epoch": 0.9674683253520878,
"grad_norm": 3.2125537395477295,
"learning_rate": 1.8289450432938681e-06,
"loss": 0.3228,
"step": 54750
},
{
"epoch": 0.9683518580693043,
"grad_norm": 3.5145859718322754,
"learning_rate": 1.779859024955332e-06,
"loss": 0.2935,
"step": 54800
},
{
"epoch": 0.9692353907865209,
"grad_norm": 1.5993742942810059,
"learning_rate": 1.7307730066167953e-06,
"loss": 0.3085,
"step": 54850
},
{
"epoch": 0.9701189235037373,
"grad_norm": 4.223308086395264,
"learning_rate": 1.681686988278259e-06,
"loss": 0.3112,
"step": 54900
},
{
"epoch": 0.9710024562209538,
"grad_norm": 1.7939913272857666,
"learning_rate": 1.6326009699397223e-06,
"loss": 0.2889,
"step": 54950
},
{
"epoch": 0.9718859889381704,
"grad_norm": 1.1405465602874756,
"learning_rate": 1.583514951601186e-06,
"loss": 0.2746,
"step": 55000
},
{
"epoch": 0.9727695216553869,
"grad_norm": 1.8150931596755981,
"learning_rate": 1.5344289332626496e-06,
"loss": 0.2772,
"step": 55050
},
{
"epoch": 0.9736530543726034,
"grad_norm": 1.4807177782058716,
"learning_rate": 1.485342914924113e-06,
"loss": 0.2965,
"step": 55100
},
{
"epoch": 0.9745365870898199,
"grad_norm": 1.4012283086776733,
"learning_rate": 1.4362568965855766e-06,
"loss": 0.2382,
"step": 55150
},
{
"epoch": 0.9754201198070365,
"grad_norm": 1.435829520225525,
"learning_rate": 1.388152598613811e-06,
"loss": 0.3863,
"step": 55200
},
{
"epoch": 0.976303652524253,
"grad_norm": 1.0731230974197388,
"learning_rate": 1.3390665802752744e-06,
"loss": 0.2909,
"step": 55250
},
{
"epoch": 0.9771871852414695,
"grad_norm": 1.6253186464309692,
"learning_rate": 1.289980561936738e-06,
"loss": 0.3787,
"step": 55300
},
{
"epoch": 0.978070717958686,
"grad_norm": 1.9667285680770874,
"learning_rate": 1.2408945435982016e-06,
"loss": 0.3196,
"step": 55350
},
{
"epoch": 0.9789542506759026,
"grad_norm": 1.1798194646835327,
"learning_rate": 1.191808525259665e-06,
"loss": 0.3738,
"step": 55400
},
{
"epoch": 0.979837783393119,
"grad_norm": 1.5018582344055176,
"learning_rate": 1.1427225069211286e-06,
"loss": 0.3588,
"step": 55450
},
{
"epoch": 0.9807213161103355,
"grad_norm": 1.1979721784591675,
"learning_rate": 1.0936364885825922e-06,
"loss": 0.3559,
"step": 55500
},
{
"epoch": 0.9816048488275521,
"grad_norm": 3.014507532119751,
"learning_rate": 1.0445504702440556e-06,
"loss": 0.4382,
"step": 55550
},
{
"epoch": 0.9824883815447686,
"grad_norm": 1.5364562273025513,
"learning_rate": 9.954644519055192e-07,
"loss": 0.2588,
"step": 55600
},
{
"epoch": 0.9833719142619851,
"grad_norm": 1.011873483657837,
"learning_rate": 9.463784335669829e-07,
"loss": 0.3132,
"step": 55650
},
{
"epoch": 0.9842554469792016,
"grad_norm": 2.5110092163085938,
"learning_rate": 8.972924152284464e-07,
"loss": 0.2922,
"step": 55700
},
{
"epoch": 0.9851389796964182,
"grad_norm": 1.2086411714553833,
"learning_rate": 8.4820639688991e-07,
"loss": 0.2409,
"step": 55750
},
{
"epoch": 0.9860225124136347,
"grad_norm": 1.5035746097564697,
"learning_rate": 7.991203785513735e-07,
"loss": 0.2704,
"step": 55800
},
{
"epoch": 0.9869060451308512,
"grad_norm": 1.3643758296966553,
"learning_rate": 7.50034360212837e-07,
"loss": 0.2855,
"step": 55850
},
{
"epoch": 0.9877895778480678,
"grad_norm": 1.2211904525756836,
"learning_rate": 7.009483418743006e-07,
"loss": 0.241,
"step": 55900
},
{
"epoch": 0.9886731105652843,
"grad_norm": 3.049858570098877,
"learning_rate": 6.518623235357641e-07,
"loss": 0.2616,
"step": 55950
},
{
"epoch": 0.9895566432825007,
"grad_norm": 1.8196197748184204,
"learning_rate": 6.027763051972277e-07,
"loss": 0.2572,
"step": 56000
},
{
"epoch": 0.9904401759997172,
"grad_norm": 1.814112663269043,
"learning_rate": 5.536902868586912e-07,
"loss": 0.3437,
"step": 56050
},
{
"epoch": 0.9913237087169338,
"grad_norm": 2.0368192195892334,
"learning_rate": 5.046042685201547e-07,
"loss": 0.2681,
"step": 56100
},
{
"epoch": 0.9922072414341503,
"grad_norm": 1.4389891624450684,
"learning_rate": 4.555182501816183e-07,
"loss": 0.2366,
"step": 56150
},
{
"epoch": 0.9930907741513668,
"grad_norm": 2.772890567779541,
"learning_rate": 4.0643223184308187e-07,
"loss": 0.2505,
"step": 56200
},
{
"epoch": 0.9939743068685833,
"grad_norm": 1.548779010772705,
"learning_rate": 3.5734621350454537e-07,
"loss": 0.3341,
"step": 56250
},
{
"epoch": 0.9948578395857999,
"grad_norm": 1.6362569332122803,
"learning_rate": 3.0826019516600893e-07,
"loss": 0.3292,
"step": 56300
},
{
"epoch": 0.9957413723030164,
"grad_norm": 1.707270622253418,
"learning_rate": 2.591741768274725e-07,
"loss": 0.3199,
"step": 56350
},
{
"epoch": 0.9966249050202329,
"grad_norm": 2.1296205520629883,
"learning_rate": 2.1008815848893604e-07,
"loss": 0.284,
"step": 56400
},
{
"epoch": 0.9975084377374495,
"grad_norm": 1.6319339275360107,
"learning_rate": 1.6100214015039955e-07,
"loss": 0.3286,
"step": 56450
},
{
"epoch": 0.998391970454666,
"grad_norm": 1.348299503326416,
"learning_rate": 1.119161218118631e-07,
"loss": 0.3321,
"step": 56500
},
{
"epoch": 0.9992755031718824,
"grad_norm": 1.5483179092407227,
"learning_rate": 6.283010347332666e-08,
"loss": 0.3073,
"step": 56550
}
],
"logging_steps": 50,
"max_steps": 56591,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 256,
"trial_name": null,
"trial_params": null
}