thenlpresearcher's picture
Upload fine-tuned IndicTrans2 (en→mar without punctuation)
6145139 verified
{
"best_global_step": 4000,
"best_metric": 31.3442,
"best_model_checkpoint": "indictrans2-en-indic-dist-200M-en-indic-iitb-finetuned-eng_Latn-to-mar_Deva/checkpoint-4000",
"epoch": 3.878910532085336,
"eval_steps": 4000,
"global_step": 92000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.004216207100092757,
"grad_norm": 10.629733085632324,
"learning_rate": 1.999165190994182e-05,
"loss": 6.2178,
"step": 100
},
{
"epoch": 0.008432414200185513,
"grad_norm": 10.794089317321777,
"learning_rate": 1.9983219495741632e-05,
"loss": 4.5911,
"step": 200
},
{
"epoch": 0.01264862130027827,
"grad_norm": 11.978194236755371,
"learning_rate": 1.9974787081541448e-05,
"loss": 3.3837,
"step": 300
},
{
"epoch": 0.016864828400371026,
"grad_norm": 10.057585716247559,
"learning_rate": 1.996635466734126e-05,
"loss": 2.3077,
"step": 400
},
{
"epoch": 0.021081035500463783,
"grad_norm": 6.098118782043457,
"learning_rate": 1.9957922253141076e-05,
"loss": 1.4151,
"step": 500
},
{
"epoch": 0.02529724260055654,
"grad_norm": 2.8122458457946777,
"learning_rate": 1.994948983894089e-05,
"loss": 0.8033,
"step": 600
},
{
"epoch": 0.029513449700649296,
"grad_norm": 1.5206619501113892,
"learning_rate": 1.9941057424740704e-05,
"loss": 0.5806,
"step": 700
},
{
"epoch": 0.03372965680074205,
"grad_norm": 2.194688558578491,
"learning_rate": 1.993262501054052e-05,
"loss": 0.5271,
"step": 800
},
{
"epoch": 0.037945863900834806,
"grad_norm": 1.3595080375671387,
"learning_rate": 1.9924192596340336e-05,
"loss": 0.5115,
"step": 900
},
{
"epoch": 0.042162071000927566,
"grad_norm": 1.391483187675476,
"learning_rate": 1.9915760182140148e-05,
"loss": 0.529,
"step": 1000
},
{
"epoch": 0.04637827810102032,
"grad_norm": 1.4901494979858398,
"learning_rate": 1.9907327767939964e-05,
"loss": 0.5027,
"step": 1100
},
{
"epoch": 0.05059448520111308,
"grad_norm": 1.3334758281707764,
"learning_rate": 1.9898895353739776e-05,
"loss": 0.4803,
"step": 1200
},
{
"epoch": 0.05481069230120583,
"grad_norm": 1.407790184020996,
"learning_rate": 1.9890462939539592e-05,
"loss": 0.4897,
"step": 1300
},
{
"epoch": 0.05902689940129859,
"grad_norm": 0.8968947529792786,
"learning_rate": 1.9882030525339408e-05,
"loss": 0.4822,
"step": 1400
},
{
"epoch": 0.06324310650139135,
"grad_norm": 1.3116236925125122,
"learning_rate": 1.987359811113922e-05,
"loss": 0.4815,
"step": 1500
},
{
"epoch": 0.0674593136014841,
"grad_norm": 1.0337741374969482,
"learning_rate": 1.9865165696939036e-05,
"loss": 0.4704,
"step": 1600
},
{
"epoch": 0.07167552070157686,
"grad_norm": 1.1927516460418701,
"learning_rate": 1.985673328273885e-05,
"loss": 0.4721,
"step": 1700
},
{
"epoch": 0.07589172780166961,
"grad_norm": 0.9303850531578064,
"learning_rate": 1.9848300868538664e-05,
"loss": 0.4703,
"step": 1800
},
{
"epoch": 0.08010793490176238,
"grad_norm": 1.325838565826416,
"learning_rate": 1.9839868454338477e-05,
"loss": 0.4648,
"step": 1900
},
{
"epoch": 0.08432414200185513,
"grad_norm": 0.8775719404220581,
"learning_rate": 1.9831436040138292e-05,
"loss": 0.4916,
"step": 2000
},
{
"epoch": 0.08854034910194788,
"grad_norm": 1.0944805145263672,
"learning_rate": 1.9823003625938108e-05,
"loss": 0.4771,
"step": 2100
},
{
"epoch": 0.09275655620204064,
"grad_norm": 1.6143155097961426,
"learning_rate": 1.9814571211737924e-05,
"loss": 0.466,
"step": 2200
},
{
"epoch": 0.0969727633021334,
"grad_norm": 1.1526069641113281,
"learning_rate": 1.9806138797537736e-05,
"loss": 0.4543,
"step": 2300
},
{
"epoch": 0.10118897040222616,
"grad_norm": 1.3782904148101807,
"learning_rate": 1.9797706383337552e-05,
"loss": 0.4671,
"step": 2400
},
{
"epoch": 0.10540517750231891,
"grad_norm": 1.2834385633468628,
"learning_rate": 1.9789273969137365e-05,
"loss": 0.4682,
"step": 2500
},
{
"epoch": 0.10962138460241166,
"grad_norm": 1.5170279741287231,
"learning_rate": 1.978084155493718e-05,
"loss": 0.4645,
"step": 2600
},
{
"epoch": 0.11383759170250443,
"grad_norm": 1.4424182176589966,
"learning_rate": 1.9772409140736996e-05,
"loss": 0.4746,
"step": 2700
},
{
"epoch": 0.11805379880259718,
"grad_norm": 1.9791672229766846,
"learning_rate": 1.976397672653681e-05,
"loss": 0.4495,
"step": 2800
},
{
"epoch": 0.12227000590268994,
"grad_norm": 1.112899899482727,
"learning_rate": 1.9755544312336624e-05,
"loss": 0.452,
"step": 2900
},
{
"epoch": 0.1264862130027827,
"grad_norm": 1.2428970336914062,
"learning_rate": 1.9747111898136437e-05,
"loss": 0.4559,
"step": 3000
},
{
"epoch": 0.13070242010287544,
"grad_norm": 0.8663131594657898,
"learning_rate": 1.9738679483936252e-05,
"loss": 0.4377,
"step": 3100
},
{
"epoch": 0.1349186272029682,
"grad_norm": 1.1876728534698486,
"learning_rate": 1.9730247069736065e-05,
"loss": 0.4525,
"step": 3200
},
{
"epoch": 0.13913483430306098,
"grad_norm": 0.9078199863433838,
"learning_rate": 1.9721814655535884e-05,
"loss": 0.4382,
"step": 3300
},
{
"epoch": 0.14335104140315372,
"grad_norm": 0.9842768907546997,
"learning_rate": 1.9713382241335696e-05,
"loss": 0.4292,
"step": 3400
},
{
"epoch": 0.14756724850324648,
"grad_norm": 1.2574409246444702,
"learning_rate": 1.9704949827135512e-05,
"loss": 0.4372,
"step": 3500
},
{
"epoch": 0.15178345560333922,
"grad_norm": 1.0580419301986694,
"learning_rate": 1.9696517412935325e-05,
"loss": 0.4531,
"step": 3600
},
{
"epoch": 0.155999662703432,
"grad_norm": 1.4315508604049683,
"learning_rate": 1.968808499873514e-05,
"loss": 0.4503,
"step": 3700
},
{
"epoch": 0.16021586980352476,
"grad_norm": 1.4435293674468994,
"learning_rate": 1.9679652584534953e-05,
"loss": 0.4472,
"step": 3800
},
{
"epoch": 0.1644320769036175,
"grad_norm": 1.1712230443954468,
"learning_rate": 1.967122017033477e-05,
"loss": 0.4495,
"step": 3900
},
{
"epoch": 0.16864828400371026,
"grad_norm": 0.952250599861145,
"learning_rate": 1.966278775613458e-05,
"loss": 0.4274,
"step": 4000
},
{
"epoch": 0.16864828400371026,
"eval_bleu": 9.1559,
"eval_bleurt": null,
"eval_chrfpp": 31.3442,
"eval_comet": 0.5302,
"eval_gen_len": 20.8691,
"eval_loss": 0.4290911853313446,
"eval_runtime": 1006.2399,
"eval_samples_per_second": 47.141,
"eval_steps_per_second": 2.947,
"step": 4000
},
{
"epoch": 0.17286449110380303,
"grad_norm": 0.8649118542671204,
"learning_rate": 1.9654355341934397e-05,
"loss": 0.4538,
"step": 4100
},
{
"epoch": 0.17708069820389577,
"grad_norm": 1.0827510356903076,
"learning_rate": 1.9645922927734212e-05,
"loss": 0.4334,
"step": 4200
},
{
"epoch": 0.18129690530398854,
"grad_norm": 1.2431646585464478,
"learning_rate": 1.9637490513534025e-05,
"loss": 0.4319,
"step": 4300
},
{
"epoch": 0.18551311240408128,
"grad_norm": 1.222221851348877,
"learning_rate": 1.962905809933384e-05,
"loss": 0.4401,
"step": 4400
},
{
"epoch": 0.18972931950417404,
"grad_norm": 0.8401798605918884,
"learning_rate": 1.9620625685133656e-05,
"loss": 0.4312,
"step": 4500
},
{
"epoch": 0.1939455266042668,
"grad_norm": 1.088202714920044,
"learning_rate": 1.9612193270933472e-05,
"loss": 0.4476,
"step": 4600
},
{
"epoch": 0.19816173370435955,
"grad_norm": 1.2317404747009277,
"learning_rate": 1.9603760856733285e-05,
"loss": 0.4456,
"step": 4700
},
{
"epoch": 0.20237794080445232,
"grad_norm": 2.003664255142212,
"learning_rate": 1.95953284425331e-05,
"loss": 0.4337,
"step": 4800
},
{
"epoch": 0.20659414790454508,
"grad_norm": 0.987022340297699,
"learning_rate": 1.9586896028332913e-05,
"loss": 0.4368,
"step": 4900
},
{
"epoch": 0.21081035500463782,
"grad_norm": 1.0784544944763184,
"learning_rate": 1.957846361413273e-05,
"loss": 0.4447,
"step": 5000
},
{
"epoch": 0.2150265621047306,
"grad_norm": 0.9378799796104431,
"learning_rate": 1.957003119993254e-05,
"loss": 0.4217,
"step": 5100
},
{
"epoch": 0.21924276920482333,
"grad_norm": 1.2435382604599,
"learning_rate": 1.9561598785732357e-05,
"loss": 0.4357,
"step": 5200
},
{
"epoch": 0.2234589763049161,
"grad_norm": 1.362450361251831,
"learning_rate": 1.955316637153217e-05,
"loss": 0.4507,
"step": 5300
},
{
"epoch": 0.22767518340500886,
"grad_norm": 1.155408263206482,
"learning_rate": 1.9544733957331985e-05,
"loss": 0.4283,
"step": 5400
},
{
"epoch": 0.2318913905051016,
"grad_norm": 1.3443132638931274,
"learning_rate": 1.95363015431318e-05,
"loss": 0.4333,
"step": 5500
},
{
"epoch": 0.23610759760519437,
"grad_norm": 1.2103915214538574,
"learning_rate": 1.9527869128931616e-05,
"loss": 0.4337,
"step": 5600
},
{
"epoch": 0.24032380470528714,
"grad_norm": 1.0087685585021973,
"learning_rate": 1.951943671473143e-05,
"loss": 0.416,
"step": 5700
},
{
"epoch": 0.24454001180537988,
"grad_norm": 1.2199865579605103,
"learning_rate": 1.9511004300531245e-05,
"loss": 0.4331,
"step": 5800
},
{
"epoch": 0.24875621890547264,
"grad_norm": 0.9081279039382935,
"learning_rate": 1.9502571886331057e-05,
"loss": 0.4227,
"step": 5900
},
{
"epoch": 0.2529724260055654,
"grad_norm": 1.2454545497894287,
"learning_rate": 1.9494139472130873e-05,
"loss": 0.4295,
"step": 6000
},
{
"epoch": 0.25718863310565815,
"grad_norm": 1.2220704555511475,
"learning_rate": 1.948570705793069e-05,
"loss": 0.4344,
"step": 6100
},
{
"epoch": 0.2614048402057509,
"grad_norm": 0.9363239407539368,
"learning_rate": 1.94772746437305e-05,
"loss": 0.4311,
"step": 6200
},
{
"epoch": 0.2656210473058437,
"grad_norm": 1.3526592254638672,
"learning_rate": 1.9468842229530317e-05,
"loss": 0.4196,
"step": 6300
},
{
"epoch": 0.2698372544059364,
"grad_norm": 1.1111302375793457,
"learning_rate": 1.946040981533013e-05,
"loss": 0.4196,
"step": 6400
},
{
"epoch": 0.27405346150602916,
"grad_norm": 1.1077983379364014,
"learning_rate": 1.9451977401129945e-05,
"loss": 0.43,
"step": 6500
},
{
"epoch": 0.27826966860612196,
"grad_norm": 1.3143802881240845,
"learning_rate": 1.9443544986929757e-05,
"loss": 0.4085,
"step": 6600
},
{
"epoch": 0.2824858757062147,
"grad_norm": 0.971645176410675,
"learning_rate": 1.9435112572729573e-05,
"loss": 0.431,
"step": 6700
},
{
"epoch": 0.28670208280630743,
"grad_norm": 1.212292194366455,
"learning_rate": 1.942668015852939e-05,
"loss": 0.4243,
"step": 6800
},
{
"epoch": 0.29091828990640023,
"grad_norm": 1.331641435623169,
"learning_rate": 1.9418247744329205e-05,
"loss": 0.4142,
"step": 6900
},
{
"epoch": 0.29513449700649297,
"grad_norm": 1.3871821165084839,
"learning_rate": 1.9409815330129017e-05,
"loss": 0.416,
"step": 7000
},
{
"epoch": 0.2993507041065857,
"grad_norm": 1.1271859407424927,
"learning_rate": 1.9401382915928833e-05,
"loss": 0.4228,
"step": 7100
},
{
"epoch": 0.30356691120667845,
"grad_norm": 0.9559811353683472,
"learning_rate": 1.9392950501728645e-05,
"loss": 0.4135,
"step": 7200
},
{
"epoch": 0.30778311830677124,
"grad_norm": 0.8394259810447693,
"learning_rate": 1.938451808752846e-05,
"loss": 0.4333,
"step": 7300
},
{
"epoch": 0.311999325406864,
"grad_norm": 0.82978755235672,
"learning_rate": 1.9376085673328277e-05,
"loss": 0.4198,
"step": 7400
},
{
"epoch": 0.3162155325069567,
"grad_norm": 1.0237234830856323,
"learning_rate": 1.936765325912809e-05,
"loss": 0.4333,
"step": 7500
},
{
"epoch": 0.3204317396070495,
"grad_norm": 1.2100563049316406,
"learning_rate": 1.9359220844927905e-05,
"loss": 0.4286,
"step": 7600
},
{
"epoch": 0.32464794670714225,
"grad_norm": 1.3460373878479004,
"learning_rate": 1.9350788430727717e-05,
"loss": 0.4275,
"step": 7700
},
{
"epoch": 0.328864153807235,
"grad_norm": 1.0469090938568115,
"learning_rate": 1.9342356016527533e-05,
"loss": 0.4285,
"step": 7800
},
{
"epoch": 0.3330803609073278,
"grad_norm": 1.6244388818740845,
"learning_rate": 1.9333923602327346e-05,
"loss": 0.4311,
"step": 7900
},
{
"epoch": 0.3372965680074205,
"grad_norm": 1.1776193380355835,
"learning_rate": 1.9325491188127165e-05,
"loss": 0.414,
"step": 8000
},
{
"epoch": 0.3372965680074205,
"eval_bleu": 9.4762,
"eval_bleurt": null,
"eval_chrfpp": 31.7112,
"eval_comet": 0.5327,
"eval_gen_len": 20.8762,
"eval_loss": 0.40556150674819946,
"eval_runtime": 1022.9164,
"eval_samples_per_second": 46.372,
"eval_steps_per_second": 2.899,
"step": 8000
},
{
"epoch": 0.34151277510751327,
"grad_norm": 1.427694320678711,
"learning_rate": 1.9317058773926977e-05,
"loss": 0.4135,
"step": 8100
},
{
"epoch": 0.34572898220760606,
"grad_norm": 1.3969519138336182,
"learning_rate": 1.9308626359726793e-05,
"loss": 0.424,
"step": 8200
},
{
"epoch": 0.3499451893076988,
"grad_norm": 1.012691617012024,
"learning_rate": 1.9300193945526605e-05,
"loss": 0.4239,
"step": 8300
},
{
"epoch": 0.35416139640779154,
"grad_norm": 1.0593016147613525,
"learning_rate": 1.929176153132642e-05,
"loss": 0.4066,
"step": 8400
},
{
"epoch": 0.35837760350788433,
"grad_norm": 0.7688089609146118,
"learning_rate": 1.9283329117126233e-05,
"loss": 0.4097,
"step": 8500
},
{
"epoch": 0.3625938106079771,
"grad_norm": 1.1880069971084595,
"learning_rate": 1.927489670292605e-05,
"loss": 0.4231,
"step": 8600
},
{
"epoch": 0.3668100177080698,
"grad_norm": 1.010106086730957,
"learning_rate": 1.9266464288725865e-05,
"loss": 0.4211,
"step": 8700
},
{
"epoch": 0.37102622480816255,
"grad_norm": 1.1799863576889038,
"learning_rate": 1.9258031874525677e-05,
"loss": 0.414,
"step": 8800
},
{
"epoch": 0.37524243190825535,
"grad_norm": 1.1016535758972168,
"learning_rate": 1.9249599460325493e-05,
"loss": 0.4167,
"step": 8900
},
{
"epoch": 0.3794586390083481,
"grad_norm": 0.9111543297767639,
"learning_rate": 1.9241167046125306e-05,
"loss": 0.4116,
"step": 9000
},
{
"epoch": 0.3836748461084408,
"grad_norm": 1.0465009212493896,
"learning_rate": 1.923273463192512e-05,
"loss": 0.4029,
"step": 9100
},
{
"epoch": 0.3878910532085336,
"grad_norm": 1.1918885707855225,
"learning_rate": 1.9224302217724937e-05,
"loss": 0.4129,
"step": 9200
},
{
"epoch": 0.39210726030862636,
"grad_norm": 0.9597665667533875,
"learning_rate": 1.9215869803524753e-05,
"loss": 0.4158,
"step": 9300
},
{
"epoch": 0.3963234674087191,
"grad_norm": 1.3496443033218384,
"learning_rate": 1.9207437389324565e-05,
"loss": 0.4103,
"step": 9400
},
{
"epoch": 0.4005396745088119,
"grad_norm": 0.9334352016448975,
"learning_rate": 1.919900497512438e-05,
"loss": 0.4165,
"step": 9500
},
{
"epoch": 0.40475588160890463,
"grad_norm": 0.7650086879730225,
"learning_rate": 1.9190572560924194e-05,
"loss": 0.4126,
"step": 9600
},
{
"epoch": 0.40897208870899737,
"grad_norm": 0.8084037899971008,
"learning_rate": 1.918214014672401e-05,
"loss": 0.409,
"step": 9700
},
{
"epoch": 0.41318829580909017,
"grad_norm": 2.67522931098938,
"learning_rate": 1.9173707732523822e-05,
"loss": 0.4211,
"step": 9800
},
{
"epoch": 0.4174045029091829,
"grad_norm": 1.3551160097122192,
"learning_rate": 1.9165275318323637e-05,
"loss": 0.4273,
"step": 9900
},
{
"epoch": 0.42162071000927565,
"grad_norm": 1.408456802368164,
"learning_rate": 1.9156842904123453e-05,
"loss": 0.4069,
"step": 10000
},
{
"epoch": 0.4258369171093684,
"grad_norm": 0.8736149668693542,
"learning_rate": 1.9148410489923266e-05,
"loss": 0.4086,
"step": 10100
},
{
"epoch": 0.4300531242094612,
"grad_norm": 0.9605196118354797,
"learning_rate": 1.913997807572308e-05,
"loss": 0.4032,
"step": 10200
},
{
"epoch": 0.4342693313095539,
"grad_norm": 0.9522096514701843,
"learning_rate": 1.9131545661522894e-05,
"loss": 0.4225,
"step": 10300
},
{
"epoch": 0.43848553840964666,
"grad_norm": 1.1162022352218628,
"learning_rate": 1.912311324732271e-05,
"loss": 0.4092,
"step": 10400
},
{
"epoch": 0.44270174550973945,
"grad_norm": 1.0420705080032349,
"learning_rate": 1.9114680833122525e-05,
"loss": 0.4015,
"step": 10500
},
{
"epoch": 0.4469179526098322,
"grad_norm": 1.5382574796676636,
"learning_rate": 1.910624841892234e-05,
"loss": 0.4084,
"step": 10600
},
{
"epoch": 0.45113415970992493,
"grad_norm": 1.0530604124069214,
"learning_rate": 1.9097816004722154e-05,
"loss": 0.4182,
"step": 10700
},
{
"epoch": 0.4553503668100177,
"grad_norm": 1.0158157348632812,
"learning_rate": 1.908938359052197e-05,
"loss": 0.3976,
"step": 10800
},
{
"epoch": 0.45956657391011047,
"grad_norm": 1.01460862159729,
"learning_rate": 1.9080951176321782e-05,
"loss": 0.4104,
"step": 10900
},
{
"epoch": 0.4637827810102032,
"grad_norm": 0.9880945682525635,
"learning_rate": 1.9072518762121598e-05,
"loss": 0.4077,
"step": 11000
},
{
"epoch": 0.467998988110296,
"grad_norm": 0.9603067636489868,
"learning_rate": 1.906408634792141e-05,
"loss": 0.4121,
"step": 11100
},
{
"epoch": 0.47221519521038874,
"grad_norm": 1.2611957788467407,
"learning_rate": 1.9055653933721226e-05,
"loss": 0.4074,
"step": 11200
},
{
"epoch": 0.4764314023104815,
"grad_norm": 1.094708800315857,
"learning_rate": 1.9047221519521038e-05,
"loss": 0.3905,
"step": 11300
},
{
"epoch": 0.4806476094105743,
"grad_norm": 0.9691109657287598,
"learning_rate": 1.9038789105320854e-05,
"loss": 0.4066,
"step": 11400
},
{
"epoch": 0.484863816510667,
"grad_norm": 1.3236321210861206,
"learning_rate": 1.903035669112067e-05,
"loss": 0.4216,
"step": 11500
},
{
"epoch": 0.48908002361075975,
"grad_norm": 0.8272280693054199,
"learning_rate": 1.9021924276920485e-05,
"loss": 0.4048,
"step": 11600
},
{
"epoch": 0.4932962307108525,
"grad_norm": 1.0969959497451782,
"learning_rate": 1.9013491862720298e-05,
"loss": 0.409,
"step": 11700
},
{
"epoch": 0.4975124378109453,
"grad_norm": 1.1777257919311523,
"learning_rate": 1.9005059448520114e-05,
"loss": 0.3956,
"step": 11800
},
{
"epoch": 0.5017286449110381,
"grad_norm": 1.3242672681808472,
"learning_rate": 1.899662703431993e-05,
"loss": 0.4,
"step": 11900
},
{
"epoch": 0.5059448520111308,
"grad_norm": 0.9798252582550049,
"learning_rate": 1.8988194620119742e-05,
"loss": 0.4048,
"step": 12000
},
{
"epoch": 0.5059448520111308,
"eval_bleu": 9.7085,
"eval_bleurt": null,
"eval_chrfpp": 32.0437,
"eval_comet": 0.534,
"eval_gen_len": 20.8716,
"eval_loss": 0.3886358141899109,
"eval_runtime": 1131.9138,
"eval_samples_per_second": 41.907,
"eval_steps_per_second": 2.619,
"step": 12000
},
{
"epoch": 0.5101610591112236,
"grad_norm": 1.1658809185028076,
"learning_rate": 1.8979762205919558e-05,
"loss": 0.4064,
"step": 12100
},
{
"epoch": 0.5143772662113163,
"grad_norm": 1.077453374862671,
"learning_rate": 1.897132979171937e-05,
"loss": 0.3909,
"step": 12200
},
{
"epoch": 0.518593473311409,
"grad_norm": 0.7280858159065247,
"learning_rate": 1.8962897377519186e-05,
"loss": 0.3804,
"step": 12300
},
{
"epoch": 0.5228096804115018,
"grad_norm": 0.9924391508102417,
"learning_rate": 1.8954464963318998e-05,
"loss": 0.3938,
"step": 12400
},
{
"epoch": 0.5270258875115945,
"grad_norm": 1.1247611045837402,
"learning_rate": 1.8946032549118814e-05,
"loss": 0.4073,
"step": 12500
},
{
"epoch": 0.5312420946116874,
"grad_norm": 1.0452404022216797,
"learning_rate": 1.8937600134918626e-05,
"loss": 0.4064,
"step": 12600
},
{
"epoch": 0.5354583017117801,
"grad_norm": 1.024165153503418,
"learning_rate": 1.8929167720718445e-05,
"loss": 0.4067,
"step": 12700
},
{
"epoch": 0.5396745088118728,
"grad_norm": 1.2145025730133057,
"learning_rate": 1.8920735306518258e-05,
"loss": 0.4021,
"step": 12800
},
{
"epoch": 0.5438907159119656,
"grad_norm": 1.086727499961853,
"learning_rate": 1.8912302892318074e-05,
"loss": 0.3882,
"step": 12900
},
{
"epoch": 0.5481069230120583,
"grad_norm": 0.8512001633644104,
"learning_rate": 1.8903870478117886e-05,
"loss": 0.4171,
"step": 13000
},
{
"epoch": 0.5523231301121511,
"grad_norm": 1.3099777698516846,
"learning_rate": 1.8895438063917702e-05,
"loss": 0.4128,
"step": 13100
},
{
"epoch": 0.5565393372122439,
"grad_norm": 1.1675434112548828,
"learning_rate": 1.8887005649717514e-05,
"loss": 0.4096,
"step": 13200
},
{
"epoch": 0.5607555443123367,
"grad_norm": 0.8974719047546387,
"learning_rate": 1.887857323551733e-05,
"loss": 0.4226,
"step": 13300
},
{
"epoch": 0.5649717514124294,
"grad_norm": 0.967807948589325,
"learning_rate": 1.8870140821317146e-05,
"loss": 0.3983,
"step": 13400
},
{
"epoch": 0.5691879585125221,
"grad_norm": 1.1763675212860107,
"learning_rate": 1.8861708407116958e-05,
"loss": 0.4071,
"step": 13500
},
{
"epoch": 0.5734041656126149,
"grad_norm": 1.1269315481185913,
"learning_rate": 1.8853275992916774e-05,
"loss": 0.395,
"step": 13600
},
{
"epoch": 0.5776203727127076,
"grad_norm": 1.2609223127365112,
"learning_rate": 1.8844843578716586e-05,
"loss": 0.387,
"step": 13700
},
{
"epoch": 0.5818365798128005,
"grad_norm": 1.0252714157104492,
"learning_rate": 1.8836411164516402e-05,
"loss": 0.4011,
"step": 13800
},
{
"epoch": 0.5860527869128932,
"grad_norm": 0.8436282277107239,
"learning_rate": 1.8827978750316218e-05,
"loss": 0.3984,
"step": 13900
},
{
"epoch": 0.5902689940129859,
"grad_norm": 0.9598125219345093,
"learning_rate": 1.8819546336116034e-05,
"loss": 0.3923,
"step": 14000
},
{
"epoch": 0.5944852011130787,
"grad_norm": 0.8917134404182434,
"learning_rate": 1.8811113921915846e-05,
"loss": 0.4149,
"step": 14100
},
{
"epoch": 0.5987014082131714,
"grad_norm": 0.9456690549850464,
"learning_rate": 1.8802681507715662e-05,
"loss": 0.3969,
"step": 14200
},
{
"epoch": 0.6029176153132642,
"grad_norm": 1.3810299634933472,
"learning_rate": 1.8794249093515474e-05,
"loss": 0.4047,
"step": 14300
},
{
"epoch": 0.6071338224133569,
"grad_norm": 0.9798800945281982,
"learning_rate": 1.878581667931529e-05,
"loss": 0.3943,
"step": 14400
},
{
"epoch": 0.6113500295134497,
"grad_norm": 1.0832455158233643,
"learning_rate": 1.8777384265115102e-05,
"loss": 0.4021,
"step": 14500
},
{
"epoch": 0.6155662366135425,
"grad_norm": 1.0834710597991943,
"learning_rate": 1.8768951850914918e-05,
"loss": 0.4116,
"step": 14600
},
{
"epoch": 0.6197824437136352,
"grad_norm": 1.3946242332458496,
"learning_rate": 1.8760519436714734e-05,
"loss": 0.386,
"step": 14700
},
{
"epoch": 0.623998650813728,
"grad_norm": 1.0280137062072754,
"learning_rate": 1.8752087022514546e-05,
"loss": 0.3944,
"step": 14800
},
{
"epoch": 0.6282148579138207,
"grad_norm": 0.9602075815200806,
"learning_rate": 1.8743654608314362e-05,
"loss": 0.3982,
"step": 14900
},
{
"epoch": 0.6324310650139134,
"grad_norm": 0.9291537404060364,
"learning_rate": 1.8735222194114175e-05,
"loss": 0.3926,
"step": 15000
},
{
"epoch": 0.6366472721140063,
"grad_norm": 0.811850905418396,
"learning_rate": 1.872678977991399e-05,
"loss": 0.4024,
"step": 15100
},
{
"epoch": 0.640863479214099,
"grad_norm": 0.8130801916122437,
"learning_rate": 1.8718357365713806e-05,
"loss": 0.3984,
"step": 15200
},
{
"epoch": 0.6450796863141918,
"grad_norm": 0.8128789067268372,
"learning_rate": 1.8709924951513622e-05,
"loss": 0.3832,
"step": 15300
},
{
"epoch": 0.6492958934142845,
"grad_norm": 1.1947672367095947,
"learning_rate": 1.8701492537313434e-05,
"loss": 0.3912,
"step": 15400
},
{
"epoch": 0.6535121005143772,
"grad_norm": 1.2158654928207397,
"learning_rate": 1.869306012311325e-05,
"loss": 0.3876,
"step": 15500
},
{
"epoch": 0.65772830761447,
"grad_norm": 0.8703183531761169,
"learning_rate": 1.8684627708913062e-05,
"loss": 0.3862,
"step": 15600
},
{
"epoch": 0.6619445147145627,
"grad_norm": 0.9874376058578491,
"learning_rate": 1.8676195294712878e-05,
"loss": 0.3886,
"step": 15700
},
{
"epoch": 0.6661607218146556,
"grad_norm": 1.0630080699920654,
"learning_rate": 1.866776288051269e-05,
"loss": 0.3941,
"step": 15800
},
{
"epoch": 0.6703769289147483,
"grad_norm": 1.0598597526550293,
"learning_rate": 1.8659330466312506e-05,
"loss": 0.3898,
"step": 15900
},
{
"epoch": 0.674593136014841,
"grad_norm": 0.6748641729354858,
"learning_rate": 1.8650898052112322e-05,
"loss": 0.3972,
"step": 16000
},
{
"epoch": 0.674593136014841,
"eval_bleu": 10.0416,
"eval_bleurt": null,
"eval_chrfpp": 32.3205,
"eval_comet": 0.5353,
"eval_gen_len": 20.8769,
"eval_loss": 0.3783491253852844,
"eval_runtime": 1363.5698,
"eval_samples_per_second": 34.787,
"eval_steps_per_second": 2.174,
"step": 16000
},
{
"epoch": 0.6788093431149338,
"grad_norm": 1.0870927572250366,
"learning_rate": 1.8642465637912135e-05,
"loss": 0.4016,
"step": 16100
},
{
"epoch": 0.6830255502150265,
"grad_norm": 1.2668064832687378,
"learning_rate": 1.863403322371195e-05,
"loss": 0.3733,
"step": 16200
},
{
"epoch": 0.6872417573151193,
"grad_norm": 1.145337700843811,
"learning_rate": 1.8625600809511766e-05,
"loss": 0.3888,
"step": 16300
},
{
"epoch": 0.6914579644152121,
"grad_norm": 1.0644266605377197,
"learning_rate": 1.861716839531158e-05,
"loss": 0.4011,
"step": 16400
},
{
"epoch": 0.6956741715153049,
"grad_norm": 1.0268157720565796,
"learning_rate": 1.8608735981111394e-05,
"loss": 0.3988,
"step": 16500
},
{
"epoch": 0.6998903786153976,
"grad_norm": 1.1735461950302124,
"learning_rate": 1.860030356691121e-05,
"loss": 0.3953,
"step": 16600
},
{
"epoch": 0.7041065857154903,
"grad_norm": 1.0253026485443115,
"learning_rate": 1.8591871152711023e-05,
"loss": 0.392,
"step": 16700
},
{
"epoch": 0.7083227928155831,
"grad_norm": 1.061868667602539,
"learning_rate": 1.858343873851084e-05,
"loss": 0.3851,
"step": 16800
},
{
"epoch": 0.7125389999156758,
"grad_norm": 1.8942055702209473,
"learning_rate": 1.857500632431065e-05,
"loss": 0.4071,
"step": 16900
},
{
"epoch": 0.7167552070157687,
"grad_norm": 1.0703763961791992,
"learning_rate": 1.8566573910110466e-05,
"loss": 0.3964,
"step": 17000
},
{
"epoch": 0.7209714141158614,
"grad_norm": 1.1151158809661865,
"learning_rate": 1.855814149591028e-05,
"loss": 0.3837,
"step": 17100
},
{
"epoch": 0.7251876212159541,
"grad_norm": 0.9483737349510193,
"learning_rate": 1.8549709081710095e-05,
"loss": 0.3994,
"step": 17200
},
{
"epoch": 0.7294038283160469,
"grad_norm": 1.2941887378692627,
"learning_rate": 1.8541276667509907e-05,
"loss": 0.3924,
"step": 17300
},
{
"epoch": 0.7336200354161396,
"grad_norm": 0.8903588652610779,
"learning_rate": 1.8532844253309723e-05,
"loss": 0.3915,
"step": 17400
},
{
"epoch": 0.7378362425162324,
"grad_norm": 1.2245477437973022,
"learning_rate": 1.852441183910954e-05,
"loss": 0.38,
"step": 17500
},
{
"epoch": 0.7420524496163251,
"grad_norm": 1.2684203386306763,
"learning_rate": 1.8515979424909354e-05,
"loss": 0.4012,
"step": 17600
},
{
"epoch": 0.746268656716418,
"grad_norm": 0.9190846681594849,
"learning_rate": 1.8507547010709167e-05,
"loss": 0.3874,
"step": 17700
},
{
"epoch": 0.7504848638165107,
"grad_norm": 1.3668287992477417,
"learning_rate": 1.8499114596508983e-05,
"loss": 0.3803,
"step": 17800
},
{
"epoch": 0.7547010709166034,
"grad_norm": 1.227296233177185,
"learning_rate": 1.84906821823088e-05,
"loss": 0.3736,
"step": 17900
},
{
"epoch": 0.7589172780166962,
"grad_norm": 0.9625111222267151,
"learning_rate": 1.848224976810861e-05,
"loss": 0.4079,
"step": 18000
},
{
"epoch": 0.7631334851167889,
"grad_norm": 1.1573898792266846,
"learning_rate": 1.8473817353908427e-05,
"loss": 0.3766,
"step": 18100
},
{
"epoch": 0.7673496922168817,
"grad_norm": 1.0590816736221313,
"learning_rate": 1.846538493970824e-05,
"loss": 0.3768,
"step": 18200
},
{
"epoch": 0.7715658993169745,
"grad_norm": 1.1647247076034546,
"learning_rate": 1.8456952525508055e-05,
"loss": 0.4032,
"step": 18300
},
{
"epoch": 0.7757821064170672,
"grad_norm": 1.1958105564117432,
"learning_rate": 1.8448520111307867e-05,
"loss": 0.3776,
"step": 18400
},
{
"epoch": 0.77999831351716,
"grad_norm": 0.9784579277038574,
"learning_rate": 1.8440087697107683e-05,
"loss": 0.3828,
"step": 18500
},
{
"epoch": 0.7842145206172527,
"grad_norm": 0.7342677712440491,
"learning_rate": 1.8431655282907495e-05,
"loss": 0.3854,
"step": 18600
},
{
"epoch": 0.7884307277173455,
"grad_norm": 1.1513690948486328,
"learning_rate": 1.8423222868707314e-05,
"loss": 0.379,
"step": 18700
},
{
"epoch": 0.7926469348174382,
"grad_norm": 1.2128503322601318,
"learning_rate": 1.8414790454507127e-05,
"loss": 0.3873,
"step": 18800
},
{
"epoch": 0.7968631419175309,
"grad_norm": 0.9662160277366638,
"learning_rate": 1.8406358040306943e-05,
"loss": 0.3875,
"step": 18900
},
{
"epoch": 0.8010793490176238,
"grad_norm": 1.006768822669983,
"learning_rate": 1.8397925626106755e-05,
"loss": 0.3791,
"step": 19000
},
{
"epoch": 0.8052955561177165,
"grad_norm": 1.0220060348510742,
"learning_rate": 1.838949321190657e-05,
"loss": 0.3772,
"step": 19100
},
{
"epoch": 0.8095117632178093,
"grad_norm": 1.3875762224197388,
"learning_rate": 1.8381060797706383e-05,
"loss": 0.3827,
"step": 19200
},
{
"epoch": 0.813727970317902,
"grad_norm": 1.1512092351913452,
"learning_rate": 1.83726283835062e-05,
"loss": 0.3765,
"step": 19300
},
{
"epoch": 0.8179441774179947,
"grad_norm": 1.107202172279358,
"learning_rate": 1.8364195969306015e-05,
"loss": 0.3951,
"step": 19400
},
{
"epoch": 0.8221603845180875,
"grad_norm": 1.2689570188522339,
"learning_rate": 1.8355763555105827e-05,
"loss": 0.396,
"step": 19500
},
{
"epoch": 0.8263765916181803,
"grad_norm": 0.8329848051071167,
"learning_rate": 1.8347331140905643e-05,
"loss": 0.3845,
"step": 19600
},
{
"epoch": 0.8305927987182731,
"grad_norm": 1.0717341899871826,
"learning_rate": 1.8338898726705455e-05,
"loss": 0.3804,
"step": 19700
},
{
"epoch": 0.8348090058183658,
"grad_norm": 0.8131351470947266,
"learning_rate": 1.8330466312505274e-05,
"loss": 0.3799,
"step": 19800
},
{
"epoch": 0.8390252129184586,
"grad_norm": 0.992375373840332,
"learning_rate": 1.8322033898305087e-05,
"loss": 0.3841,
"step": 19900
},
{
"epoch": 0.8432414200185513,
"grad_norm": 1.202879548072815,
"learning_rate": 1.8313601484104903e-05,
"loss": 0.3791,
"step": 20000
},
{
"epoch": 0.8432414200185513,
"eval_bleu": 10.1124,
"eval_bleurt": null,
"eval_chrfpp": 32.5597,
"eval_comet": 0.5374,
"eval_gen_len": 20.8772,
"eval_loss": 0.36685481667518616,
"eval_runtime": 1369.9335,
"eval_samples_per_second": 34.626,
"eval_steps_per_second": 2.164,
"step": 20000
},
{
"epoch": 0.847457627118644,
"grad_norm": 0.9284172058105469,
"learning_rate": 1.8305169069904715e-05,
"loss": 0.3832,
"step": 20100
},
{
"epoch": 0.8516738342187368,
"grad_norm": 1.2193272113800049,
"learning_rate": 1.829673665570453e-05,
"loss": 0.3777,
"step": 20200
},
{
"epoch": 0.8558900413188296,
"grad_norm": 0.7489703297615051,
"learning_rate": 1.8288304241504343e-05,
"loss": 0.383,
"step": 20300
},
{
"epoch": 0.8601062484189224,
"grad_norm": 0.9435054063796997,
"learning_rate": 1.827987182730416e-05,
"loss": 0.3691,
"step": 20400
},
{
"epoch": 0.8643224555190151,
"grad_norm": 1.2876486778259277,
"learning_rate": 1.827143941310397e-05,
"loss": 0.3679,
"step": 20500
},
{
"epoch": 0.8685386626191078,
"grad_norm": 1.0788872241973877,
"learning_rate": 1.8263006998903787e-05,
"loss": 0.388,
"step": 20600
},
{
"epoch": 0.8727548697192006,
"grad_norm": 1.425753116607666,
"learning_rate": 1.8254574584703603e-05,
"loss": 0.3845,
"step": 20700
},
{
"epoch": 0.8769710768192933,
"grad_norm": 1.7826398611068726,
"learning_rate": 1.8246142170503415e-05,
"loss": 0.3874,
"step": 20800
},
{
"epoch": 0.8811872839193862,
"grad_norm": 0.7879995107650757,
"learning_rate": 1.823770975630323e-05,
"loss": 0.3894,
"step": 20900
},
{
"epoch": 0.8854034910194789,
"grad_norm": 1.137299656867981,
"learning_rate": 1.8229277342103047e-05,
"loss": 0.3782,
"step": 21000
},
{
"epoch": 0.8896196981195716,
"grad_norm": 1.3012561798095703,
"learning_rate": 1.822084492790286e-05,
"loss": 0.3841,
"step": 21100
},
{
"epoch": 0.8938359052196644,
"grad_norm": 1.351110577583313,
"learning_rate": 1.8212412513702675e-05,
"loss": 0.3868,
"step": 21200
},
{
"epoch": 0.8980521123197571,
"grad_norm": 1.3879759311676025,
"learning_rate": 1.820398009950249e-05,
"loss": 0.3907,
"step": 21300
},
{
"epoch": 0.9022683194198499,
"grad_norm": 1.047498345375061,
"learning_rate": 1.8195547685302303e-05,
"loss": 0.3961,
"step": 21400
},
{
"epoch": 0.9064845265199427,
"grad_norm": 0.7418652176856995,
"learning_rate": 1.818711527110212e-05,
"loss": 0.3814,
"step": 21500
},
{
"epoch": 0.9107007336200355,
"grad_norm": 0.9947733879089355,
"learning_rate": 1.817868285690193e-05,
"loss": 0.3724,
"step": 21600
},
{
"epoch": 0.9149169407201282,
"grad_norm": 1.223792314529419,
"learning_rate": 1.8170250442701747e-05,
"loss": 0.377,
"step": 21700
},
{
"epoch": 0.9191331478202209,
"grad_norm": 0.9043552875518799,
"learning_rate": 1.816181802850156e-05,
"loss": 0.3775,
"step": 21800
},
{
"epoch": 0.9233493549203137,
"grad_norm": 0.8440760374069214,
"learning_rate": 1.8153385614301375e-05,
"loss": 0.3818,
"step": 21900
},
{
"epoch": 0.9275655620204064,
"grad_norm": 3.621572732925415,
"learning_rate": 1.814495320010119e-05,
"loss": 0.3895,
"step": 22000
},
{
"epoch": 0.9317817691204991,
"grad_norm": 1.0998283624649048,
"learning_rate": 1.8136520785901004e-05,
"loss": 0.3612,
"step": 22100
},
{
"epoch": 0.935997976220592,
"grad_norm": 1.0755438804626465,
"learning_rate": 1.812808837170082e-05,
"loss": 0.3841,
"step": 22200
},
{
"epoch": 0.9402141833206847,
"grad_norm": 1.0700905323028564,
"learning_rate": 1.8119655957500635e-05,
"loss": 0.3858,
"step": 22300
},
{
"epoch": 0.9444303904207775,
"grad_norm": 0.925391674041748,
"learning_rate": 1.8111223543300448e-05,
"loss": 0.3716,
"step": 22400
},
{
"epoch": 0.9486465975208702,
"grad_norm": 1.4689534902572632,
"learning_rate": 1.8102791129100263e-05,
"loss": 0.3806,
"step": 22500
},
{
"epoch": 0.952862804620963,
"grad_norm": 1.0605189800262451,
"learning_rate": 1.809435871490008e-05,
"loss": 0.3706,
"step": 22600
},
{
"epoch": 0.9570790117210557,
"grad_norm": 1.186305046081543,
"learning_rate": 1.808592630069989e-05,
"loss": 0.3748,
"step": 22700
},
{
"epoch": 0.9612952188211485,
"grad_norm": 1.3206912279129028,
"learning_rate": 1.8077493886499707e-05,
"loss": 0.3814,
"step": 22800
},
{
"epoch": 0.9655114259212413,
"grad_norm": 1.2885257005691528,
"learning_rate": 1.806906147229952e-05,
"loss": 0.3823,
"step": 22900
},
{
"epoch": 0.969727633021334,
"grad_norm": 1.059088945388794,
"learning_rate": 1.8060629058099335e-05,
"loss": 0.3886,
"step": 23000
},
{
"epoch": 0.9739438401214268,
"grad_norm": 1.139894962310791,
"learning_rate": 1.8052196643899148e-05,
"loss": 0.3679,
"step": 23100
},
{
"epoch": 0.9781600472215195,
"grad_norm": 1.169776201248169,
"learning_rate": 1.8043764229698964e-05,
"loss": 0.3844,
"step": 23200
},
{
"epoch": 0.9823762543216122,
"grad_norm": 1.1594703197479248,
"learning_rate": 1.803533181549878e-05,
"loss": 0.3791,
"step": 23300
},
{
"epoch": 0.986592461421705,
"grad_norm": 1.0355985164642334,
"learning_rate": 1.8026899401298595e-05,
"loss": 0.378,
"step": 23400
},
{
"epoch": 0.9908086685217978,
"grad_norm": 1.0133675336837769,
"learning_rate": 1.8018466987098408e-05,
"loss": 0.3666,
"step": 23500
},
{
"epoch": 0.9950248756218906,
"grad_norm": 0.8024215698242188,
"learning_rate": 1.8010034572898223e-05,
"loss": 0.3523,
"step": 23600
},
{
"epoch": 0.9992410827219833,
"grad_norm": 0.9694296717643738,
"learning_rate": 1.8001602158698036e-05,
"loss": 0.3694,
"step": 23700
},
{
"epoch": 1.0034572898220762,
"grad_norm": 0.8517723083496094,
"learning_rate": 1.799316974449785e-05,
"loss": 0.3637,
"step": 23800
},
{
"epoch": 1.0076734969221688,
"grad_norm": 0.951740562915802,
"learning_rate": 1.7984737330297667e-05,
"loss": 0.3541,
"step": 23900
},
{
"epoch": 1.0118897040222616,
"grad_norm": 1.074562907218933,
"learning_rate": 1.797630491609748e-05,
"loss": 0.3388,
"step": 24000
},
{
"epoch": 1.0118897040222616,
"eval_bleu": 10.3014,
"eval_bleurt": null,
"eval_chrfpp": 32.7152,
"eval_comet": 0.5376,
"eval_gen_len": 20.8763,
"eval_loss": 0.3606056571006775,
"eval_runtime": 1366.2367,
"eval_samples_per_second": 34.719,
"eval_steps_per_second": 2.17,
"step": 24000
},
{
"epoch": 1.0161059111223543,
"grad_norm": 0.845503568649292,
"learning_rate": 1.7967872501897296e-05,
"loss": 0.337,
"step": 24100
},
{
"epoch": 1.0203221182224471,
"grad_norm": 0.886055588722229,
"learning_rate": 1.7959440087697108e-05,
"loss": 0.3557,
"step": 24200
},
{
"epoch": 1.0245383253225397,
"grad_norm": 0.8072330355644226,
"learning_rate": 1.7951007673496924e-05,
"loss": 0.3492,
"step": 24300
},
{
"epoch": 1.0287545324226326,
"grad_norm": 1.0184261798858643,
"learning_rate": 1.7942575259296736e-05,
"loss": 0.3394,
"step": 24400
},
{
"epoch": 1.0329707395227254,
"grad_norm": 0.9989670515060425,
"learning_rate": 1.7934142845096552e-05,
"loss": 0.3368,
"step": 24500
},
{
"epoch": 1.037186946622818,
"grad_norm": 1.0039615631103516,
"learning_rate": 1.7925710430896368e-05,
"loss": 0.3566,
"step": 24600
},
{
"epoch": 1.041403153722911,
"grad_norm": 0.9721380472183228,
"learning_rate": 1.7917278016696183e-05,
"loss": 0.3468,
"step": 24700
},
{
"epoch": 1.0456193608230036,
"grad_norm": 1.2254228591918945,
"learning_rate": 1.7908845602495996e-05,
"loss": 0.3574,
"step": 24800
},
{
"epoch": 1.0498355679230964,
"grad_norm": 0.8590681552886963,
"learning_rate": 1.790041318829581e-05,
"loss": 0.3629,
"step": 24900
},
{
"epoch": 1.054051775023189,
"grad_norm": 0.9355018734931946,
"learning_rate": 1.7891980774095624e-05,
"loss": 0.3321,
"step": 25000
},
{
"epoch": 1.0582679821232819,
"grad_norm": 0.9381804466247559,
"learning_rate": 1.788354835989544e-05,
"loss": 0.3349,
"step": 25100
},
{
"epoch": 1.0624841892233747,
"grad_norm": 1.0869252681732178,
"learning_rate": 1.7875115945695256e-05,
"loss": 0.3464,
"step": 25200
},
{
"epoch": 1.0667003963234674,
"grad_norm": 1.0041707754135132,
"learning_rate": 1.7866683531495068e-05,
"loss": 0.3503,
"step": 25300
},
{
"epoch": 1.0709166034235602,
"grad_norm": 0.8508927226066589,
"learning_rate": 1.7858251117294884e-05,
"loss": 0.3552,
"step": 25400
},
{
"epoch": 1.0751328105236528,
"grad_norm": 1.2893351316452026,
"learning_rate": 1.7849818703094696e-05,
"loss": 0.3491,
"step": 25500
},
{
"epoch": 1.0793490176237457,
"grad_norm": 0.8087054491043091,
"learning_rate": 1.7841386288894512e-05,
"loss": 0.3256,
"step": 25600
},
{
"epoch": 1.0835652247238385,
"grad_norm": 1.098964810371399,
"learning_rate": 1.7832953874694324e-05,
"loss": 0.3378,
"step": 25700
},
{
"epoch": 1.0877814318239312,
"grad_norm": 0.9316391944885254,
"learning_rate": 1.7824521460494143e-05,
"loss": 0.341,
"step": 25800
},
{
"epoch": 1.091997638924024,
"grad_norm": 1.127323865890503,
"learning_rate": 1.7816089046293956e-05,
"loss": 0.3574,
"step": 25900
},
{
"epoch": 1.0962138460241166,
"grad_norm": 1.1783215999603271,
"learning_rate": 1.780765663209377e-05,
"loss": 0.3501,
"step": 26000
},
{
"epoch": 1.1004300531242095,
"grad_norm": 0.8408191800117493,
"learning_rate": 1.7799224217893584e-05,
"loss": 0.3501,
"step": 26100
},
{
"epoch": 1.1046462602243021,
"grad_norm": 1.200312614440918,
"learning_rate": 1.77907918036934e-05,
"loss": 0.3377,
"step": 26200
},
{
"epoch": 1.108862467324395,
"grad_norm": 1.207794189453125,
"learning_rate": 1.7782359389493212e-05,
"loss": 0.3426,
"step": 26300
},
{
"epoch": 1.1130786744244878,
"grad_norm": 1.1393693685531616,
"learning_rate": 1.7773926975293028e-05,
"loss": 0.3393,
"step": 26400
},
{
"epoch": 1.1172948815245805,
"grad_norm": 1.1110385656356812,
"learning_rate": 1.776549456109284e-05,
"loss": 0.3428,
"step": 26500
},
{
"epoch": 1.1215110886246733,
"grad_norm": 0.830635130405426,
"learning_rate": 1.7757062146892656e-05,
"loss": 0.3451,
"step": 26600
},
{
"epoch": 1.125727295724766,
"grad_norm": 0.9378274083137512,
"learning_rate": 1.7748629732692472e-05,
"loss": 0.3408,
"step": 26700
},
{
"epoch": 1.1299435028248588,
"grad_norm": 1.1354353427886963,
"learning_rate": 1.7740197318492284e-05,
"loss": 0.3526,
"step": 26800
},
{
"epoch": 1.1341597099249516,
"grad_norm": 1.0900987386703491,
"learning_rate": 1.77317649042921e-05,
"loss": 0.3471,
"step": 26900
},
{
"epoch": 1.1383759170250443,
"grad_norm": 0.799541175365448,
"learning_rate": 1.7723332490091916e-05,
"loss": 0.3434,
"step": 27000
},
{
"epoch": 1.142592124125137,
"grad_norm": 0.8897498250007629,
"learning_rate": 1.771490007589173e-05,
"loss": 0.3309,
"step": 27100
},
{
"epoch": 1.1468083312252297,
"grad_norm": 1.2157037258148193,
"learning_rate": 1.7706467661691544e-05,
"loss": 0.3478,
"step": 27200
},
{
"epoch": 1.1510245383253226,
"grad_norm": 1.3008877038955688,
"learning_rate": 1.769803524749136e-05,
"loss": 0.3476,
"step": 27300
},
{
"epoch": 1.1552407454254152,
"grad_norm": 0.851649820804596,
"learning_rate": 1.7689602833291172e-05,
"loss": 0.3475,
"step": 27400
},
{
"epoch": 1.159456952525508,
"grad_norm": 1.1734685897827148,
"learning_rate": 1.7681170419090988e-05,
"loss": 0.3388,
"step": 27500
},
{
"epoch": 1.163673159625601,
"grad_norm": 0.779128909111023,
"learning_rate": 1.76727380048908e-05,
"loss": 0.3432,
"step": 27600
},
{
"epoch": 1.1678893667256935,
"grad_norm": 0.845840334892273,
"learning_rate": 1.7664305590690616e-05,
"loss": 0.3395,
"step": 27700
},
{
"epoch": 1.1721055738257864,
"grad_norm": 0.9524010419845581,
"learning_rate": 1.765587317649043e-05,
"loss": 0.3224,
"step": 27800
},
{
"epoch": 1.176321780925879,
"grad_norm": 0.8855528235435486,
"learning_rate": 1.7647440762290244e-05,
"loss": 0.3383,
"step": 27900
},
{
"epoch": 1.1805379880259719,
"grad_norm": 1.00590980052948,
"learning_rate": 1.763900834809006e-05,
"loss": 0.3404,
"step": 28000
},
{
"epoch": 1.1805379880259719,
"eval_bleu": 10.3175,
"eval_bleurt": null,
"eval_chrfpp": 32.8418,
"eval_comet": 0.5385,
"eval_gen_len": 20.8769,
"eval_loss": 0.35359427332878113,
"eval_runtime": 1378.0895,
"eval_samples_per_second": 34.421,
"eval_steps_per_second": 2.152,
"step": 28000
},
{
"epoch": 1.1847541951260645,
"grad_norm": 0.936487078666687,
"learning_rate": 1.7630575933889876e-05,
"loss": 0.3491,
"step": 28100
},
{
"epoch": 1.1889704022261574,
"grad_norm": 1.366697907447815,
"learning_rate": 1.762214351968969e-05,
"loss": 0.3393,
"step": 28200
},
{
"epoch": 1.1931866093262502,
"grad_norm": 1.3890807628631592,
"learning_rate": 1.7613711105489504e-05,
"loss": 0.3445,
"step": 28300
},
{
"epoch": 1.1974028164263428,
"grad_norm": 1.003692388534546,
"learning_rate": 1.7605278691289317e-05,
"loss": 0.3438,
"step": 28400
},
{
"epoch": 1.2016190235264357,
"grad_norm": 1.1394814252853394,
"learning_rate": 1.7596846277089132e-05,
"loss": 0.353,
"step": 28500
},
{
"epoch": 1.2058352306265283,
"grad_norm": 1.1011195182800293,
"learning_rate": 1.7588413862888948e-05,
"loss": 0.33,
"step": 28600
},
{
"epoch": 1.2100514377266212,
"grad_norm": 1.3941056728363037,
"learning_rate": 1.757998144868876e-05,
"loss": 0.3491,
"step": 28700
},
{
"epoch": 1.2142676448267138,
"grad_norm": 1.0769147872924805,
"learning_rate": 1.7571549034488576e-05,
"loss": 0.3349,
"step": 28800
},
{
"epoch": 1.2184838519268066,
"grad_norm": 1.0283029079437256,
"learning_rate": 1.756311662028839e-05,
"loss": 0.3331,
"step": 28900
},
{
"epoch": 1.2227000590268995,
"grad_norm": 1.1583529710769653,
"learning_rate": 1.7554684206088204e-05,
"loss": 0.3415,
"step": 29000
},
{
"epoch": 1.2269162661269921,
"grad_norm": 0.9916228652000427,
"learning_rate": 1.7546251791888017e-05,
"loss": 0.3423,
"step": 29100
},
{
"epoch": 1.231132473227085,
"grad_norm": 1.2261525392532349,
"learning_rate": 1.7537819377687833e-05,
"loss": 0.3474,
"step": 29200
},
{
"epoch": 1.2353486803271776,
"grad_norm": 1.1459957361221313,
"learning_rate": 1.752938696348765e-05,
"loss": 0.3494,
"step": 29300
},
{
"epoch": 1.2395648874272704,
"grad_norm": 1.4597856998443604,
"learning_rate": 1.7520954549287464e-05,
"loss": 0.3506,
"step": 29400
},
{
"epoch": 1.243781094527363,
"grad_norm": 1.1697700023651123,
"learning_rate": 1.7512522135087277e-05,
"loss": 0.3234,
"step": 29500
},
{
"epoch": 1.247997301627456,
"grad_norm": 1.378232717514038,
"learning_rate": 1.7504089720887092e-05,
"loss": 0.3384,
"step": 29600
},
{
"epoch": 1.2522135087275488,
"grad_norm": 1.1748912334442139,
"learning_rate": 1.7495657306686905e-05,
"loss": 0.3359,
"step": 29700
},
{
"epoch": 1.2564297158276414,
"grad_norm": 0.7615249752998352,
"learning_rate": 1.748722489248672e-05,
"loss": 0.3419,
"step": 29800
},
{
"epoch": 1.2606459229277343,
"grad_norm": 0.9890500903129578,
"learning_rate": 1.7478792478286536e-05,
"loss": 0.3424,
"step": 29900
},
{
"epoch": 1.264862130027827,
"grad_norm": 1.0541248321533203,
"learning_rate": 1.747036006408635e-05,
"loss": 0.3325,
"step": 30000
},
{
"epoch": 1.2690783371279197,
"grad_norm": 1.171378493309021,
"learning_rate": 1.7461927649886164e-05,
"loss": 0.3435,
"step": 30100
},
{
"epoch": 1.2732945442280124,
"grad_norm": 1.087592363357544,
"learning_rate": 1.7453495235685977e-05,
"loss": 0.3263,
"step": 30200
},
{
"epoch": 1.2775107513281052,
"grad_norm": 1.0550174713134766,
"learning_rate": 1.7445062821485793e-05,
"loss": 0.3434,
"step": 30300
},
{
"epoch": 1.281726958428198,
"grad_norm": 0.9705281257629395,
"learning_rate": 1.7436630407285605e-05,
"loss": 0.345,
"step": 30400
},
{
"epoch": 1.2859431655282907,
"grad_norm": 1.1588115692138672,
"learning_rate": 1.7428197993085424e-05,
"loss": 0.334,
"step": 30500
},
{
"epoch": 1.2901593726283835,
"grad_norm": 0.9370762705802917,
"learning_rate": 1.7419765578885237e-05,
"loss": 0.333,
"step": 30600
},
{
"epoch": 1.2943755797284764,
"grad_norm": 1.0201505422592163,
"learning_rate": 1.7411333164685052e-05,
"loss": 0.339,
"step": 30700
},
{
"epoch": 1.298591786828569,
"grad_norm": 0.8545118570327759,
"learning_rate": 1.7402900750484865e-05,
"loss": 0.337,
"step": 30800
},
{
"epoch": 1.3028079939286616,
"grad_norm": 1.1026926040649414,
"learning_rate": 1.739446833628468e-05,
"loss": 0.3363,
"step": 30900
},
{
"epoch": 1.3070242010287545,
"grad_norm": 0.9570561051368713,
"learning_rate": 1.7386035922084493e-05,
"loss": 0.3424,
"step": 31000
},
{
"epoch": 1.3112404081288473,
"grad_norm": 1.092779278755188,
"learning_rate": 1.737760350788431e-05,
"loss": 0.3331,
"step": 31100
},
{
"epoch": 1.31545661522894,
"grad_norm": 0.7841922044754028,
"learning_rate": 1.7369171093684125e-05,
"loss": 0.3357,
"step": 31200
},
{
"epoch": 1.3196728223290328,
"grad_norm": 0.9633954167366028,
"learning_rate": 1.7360738679483937e-05,
"loss": 0.3341,
"step": 31300
},
{
"epoch": 1.3238890294291257,
"grad_norm": 1.0769535303115845,
"learning_rate": 1.7352306265283753e-05,
"loss": 0.3403,
"step": 31400
},
{
"epoch": 1.3281052365292183,
"grad_norm": 0.9702937602996826,
"learning_rate": 1.7343873851083565e-05,
"loss": 0.3379,
"step": 31500
},
{
"epoch": 1.3323214436293112,
"grad_norm": 0.8990470767021179,
"learning_rate": 1.733544143688338e-05,
"loss": 0.3299,
"step": 31600
},
{
"epoch": 1.3365376507294038,
"grad_norm": 1.2237523794174194,
"learning_rate": 1.7327009022683197e-05,
"loss": 0.3422,
"step": 31700
},
{
"epoch": 1.3407538578294966,
"grad_norm": 1.086236596107483,
"learning_rate": 1.7318576608483012e-05,
"loss": 0.3395,
"step": 31800
},
{
"epoch": 1.3449700649295893,
"grad_norm": 1.2538822889328003,
"learning_rate": 1.7310144194282825e-05,
"loss": 0.3279,
"step": 31900
},
{
"epoch": 1.349186272029682,
"grad_norm": 1.317533016204834,
"learning_rate": 1.730171178008264e-05,
"loss": 0.3322,
"step": 32000
},
{
"epoch": 1.349186272029682,
"eval_bleu": 10.4799,
"eval_bleurt": null,
"eval_chrfpp": 33.0168,
"eval_comet": 0.5397,
"eval_gen_len": 20.874,
"eval_loss": 0.346453994512558,
"eval_runtime": 1388.5027,
"eval_samples_per_second": 34.163,
"eval_steps_per_second": 2.135,
"step": 32000
},
{
"epoch": 1.353402479129775,
"grad_norm": 0.9494001865386963,
"learning_rate": 1.7293279365882453e-05,
"loss": 0.3409,
"step": 32100
},
{
"epoch": 1.3576186862298676,
"grad_norm": 0.9309782981872559,
"learning_rate": 1.728484695168227e-05,
"loss": 0.3305,
"step": 32200
},
{
"epoch": 1.3618348933299604,
"grad_norm": 1.0548337697982788,
"learning_rate": 1.727641453748208e-05,
"loss": 0.3471,
"step": 32300
},
{
"epoch": 1.366051100430053,
"grad_norm": 0.9669992923736572,
"learning_rate": 1.7267982123281897e-05,
"loss": 0.3416,
"step": 32400
},
{
"epoch": 1.370267307530146,
"grad_norm": 1.1200644969940186,
"learning_rate": 1.725954970908171e-05,
"loss": 0.337,
"step": 32500
},
{
"epoch": 1.3744835146302385,
"grad_norm": 1.1911766529083252,
"learning_rate": 1.7251117294881525e-05,
"loss": 0.3256,
"step": 32600
},
{
"epoch": 1.3786997217303314,
"grad_norm": 0.8328487873077393,
"learning_rate": 1.724268488068134e-05,
"loss": 0.3432,
"step": 32700
},
{
"epoch": 1.3829159288304242,
"grad_norm": 1.1673336029052734,
"learning_rate": 1.7234252466481153e-05,
"loss": 0.3405,
"step": 32800
},
{
"epoch": 1.3871321359305169,
"grad_norm": 0.9292609691619873,
"learning_rate": 1.722582005228097e-05,
"loss": 0.339,
"step": 32900
},
{
"epoch": 1.3913483430306097,
"grad_norm": 0.9854961037635803,
"learning_rate": 1.7217387638080785e-05,
"loss": 0.3358,
"step": 33000
},
{
"epoch": 1.3955645501307024,
"grad_norm": 0.8137360215187073,
"learning_rate": 1.72089552238806e-05,
"loss": 0.3453,
"step": 33100
},
{
"epoch": 1.3997807572307952,
"grad_norm": 1.2690805196762085,
"learning_rate": 1.7200522809680413e-05,
"loss": 0.3361,
"step": 33200
},
{
"epoch": 1.4039969643308878,
"grad_norm": 1.0749177932739258,
"learning_rate": 1.719209039548023e-05,
"loss": 0.3495,
"step": 33300
},
{
"epoch": 1.4082131714309807,
"grad_norm": 1.118641972541809,
"learning_rate": 1.718365798128004e-05,
"loss": 0.3387,
"step": 33400
},
{
"epoch": 1.4124293785310735,
"grad_norm": 1.184773564338684,
"learning_rate": 1.7175225567079857e-05,
"loss": 0.3272,
"step": 33500
},
{
"epoch": 1.4166455856311662,
"grad_norm": 0.9476341009140015,
"learning_rate": 1.716679315287967e-05,
"loss": 0.3544,
"step": 33600
},
{
"epoch": 1.420861792731259,
"grad_norm": 0.9392078518867493,
"learning_rate": 1.7158360738679485e-05,
"loss": 0.3405,
"step": 33700
},
{
"epoch": 1.4250779998313516,
"grad_norm": 1.2548301219940186,
"learning_rate": 1.7149928324479298e-05,
"loss": 0.3167,
"step": 33800
},
{
"epoch": 1.4292942069314445,
"grad_norm": 1.0808441638946533,
"learning_rate": 1.7141495910279113e-05,
"loss": 0.3395,
"step": 33900
},
{
"epoch": 1.4335104140315371,
"grad_norm": 0.8488920331001282,
"learning_rate": 1.713306349607893e-05,
"loss": 0.3252,
"step": 34000
},
{
"epoch": 1.43772662113163,
"grad_norm": 1.0139002799987793,
"learning_rate": 1.7124631081878745e-05,
"loss": 0.3359,
"step": 34100
},
{
"epoch": 1.4419428282317228,
"grad_norm": 1.288271427154541,
"learning_rate": 1.7116198667678557e-05,
"loss": 0.3312,
"step": 34200
},
{
"epoch": 1.4461590353318154,
"grad_norm": 0.9306642413139343,
"learning_rate": 1.7107766253478373e-05,
"loss": 0.336,
"step": 34300
},
{
"epoch": 1.4503752424319083,
"grad_norm": 1.1435469388961792,
"learning_rate": 1.7099333839278185e-05,
"loss": 0.3313,
"step": 34400
},
{
"epoch": 1.4545914495320011,
"grad_norm": 0.9129034280776978,
"learning_rate": 1.7090901425078e-05,
"loss": 0.3351,
"step": 34500
},
{
"epoch": 1.4588076566320938,
"grad_norm": 1.0107824802398682,
"learning_rate": 1.7082469010877817e-05,
"loss": 0.3357,
"step": 34600
},
{
"epoch": 1.4630238637321864,
"grad_norm": 1.1336674690246582,
"learning_rate": 1.707403659667763e-05,
"loss": 0.3274,
"step": 34700
},
{
"epoch": 1.4672400708322793,
"grad_norm": 1.1610007286071777,
"learning_rate": 1.7065604182477445e-05,
"loss": 0.3438,
"step": 34800
},
{
"epoch": 1.471456277932372,
"grad_norm": 1.0345039367675781,
"learning_rate": 1.7057171768277258e-05,
"loss": 0.3356,
"step": 34900
},
{
"epoch": 1.4756724850324647,
"grad_norm": 0.9744789600372314,
"learning_rate": 1.7048739354077073e-05,
"loss": 0.3346,
"step": 35000
},
{
"epoch": 1.4798886921325576,
"grad_norm": 1.4097639322280884,
"learning_rate": 1.7040306939876886e-05,
"loss": 0.3279,
"step": 35100
},
{
"epoch": 1.4841048992326504,
"grad_norm": 1.1328394412994385,
"learning_rate": 1.7031874525676705e-05,
"loss": 0.3351,
"step": 35200
},
{
"epoch": 1.488321106332743,
"grad_norm": 1.0193605422973633,
"learning_rate": 1.7023442111476517e-05,
"loss": 0.3286,
"step": 35300
},
{
"epoch": 1.4925373134328357,
"grad_norm": 0.8527234792709351,
"learning_rate": 1.7015009697276333e-05,
"loss": 0.326,
"step": 35400
},
{
"epoch": 1.4967535205329285,
"grad_norm": 0.8829551339149475,
"learning_rate": 1.7006577283076146e-05,
"loss": 0.3322,
"step": 35500
},
{
"epoch": 1.5009697276330214,
"grad_norm": 1.0889208316802979,
"learning_rate": 1.699814486887596e-05,
"loss": 0.3413,
"step": 35600
},
{
"epoch": 1.505185934733114,
"grad_norm": 1.0842567682266235,
"learning_rate": 1.6989712454675774e-05,
"loss": 0.337,
"step": 35700
},
{
"epoch": 1.5094021418332069,
"grad_norm": 1.0290625095367432,
"learning_rate": 1.698128004047559e-05,
"loss": 0.3292,
"step": 35800
},
{
"epoch": 1.5136183489332997,
"grad_norm": 0.9727330803871155,
"learning_rate": 1.6972847626275405e-05,
"loss": 0.3388,
"step": 35900
},
{
"epoch": 1.5178345560333923,
"grad_norm": 0.9701403975486755,
"learning_rate": 1.6964415212075218e-05,
"loss": 0.3211,
"step": 36000
},
{
"epoch": 1.5178345560333923,
"eval_bleu": 10.7275,
"eval_bleurt": null,
"eval_chrfpp": 33.2537,
"eval_comet": 0.5414,
"eval_gen_len": 20.8726,
"eval_loss": 0.34036117792129517,
"eval_runtime": 1386.4377,
"eval_samples_per_second": 34.214,
"eval_steps_per_second": 2.139,
"step": 36000
},
{
"epoch": 1.522050763133485,
"grad_norm": 1.1016792058944702,
"learning_rate": 1.6955982797875033e-05,
"loss": 0.3267,
"step": 36100
},
{
"epoch": 1.5262669702335778,
"grad_norm": 0.9762911796569824,
"learning_rate": 1.6947550383674846e-05,
"loss": 0.3191,
"step": 36200
},
{
"epoch": 1.5304831773336707,
"grad_norm": 0.9911622405052185,
"learning_rate": 1.693911796947466e-05,
"loss": 0.3428,
"step": 36300
},
{
"epoch": 1.5346993844337633,
"grad_norm": 1.3638495206832886,
"learning_rate": 1.6930685555274477e-05,
"loss": 0.3353,
"step": 36400
},
{
"epoch": 1.5389155915338562,
"grad_norm": 0.9522203803062439,
"learning_rate": 1.6922253141074293e-05,
"loss": 0.3368,
"step": 36500
},
{
"epoch": 1.543131798633949,
"grad_norm": 0.9717823266983032,
"learning_rate": 1.6913820726874106e-05,
"loss": 0.3382,
"step": 36600
},
{
"epoch": 1.5473480057340416,
"grad_norm": 1.055655837059021,
"learning_rate": 1.690538831267392e-05,
"loss": 0.3312,
"step": 36700
},
{
"epoch": 1.5515642128341343,
"grad_norm": 1.0646960735321045,
"learning_rate": 1.6896955898473734e-05,
"loss": 0.3249,
"step": 36800
},
{
"epoch": 1.5557804199342273,
"grad_norm": 1.2053914070129395,
"learning_rate": 1.688852348427355e-05,
"loss": 0.3305,
"step": 36900
},
{
"epoch": 1.55999662703432,
"grad_norm": 1.0148818492889404,
"learning_rate": 1.6880091070073362e-05,
"loss": 0.3328,
"step": 37000
},
{
"epoch": 1.5642128341344126,
"grad_norm": 1.2227891683578491,
"learning_rate": 1.6871658655873178e-05,
"loss": 0.3327,
"step": 37100
},
{
"epoch": 1.5684290412345054,
"grad_norm": 1.0082377195358276,
"learning_rate": 1.6863226241672993e-05,
"loss": 0.328,
"step": 37200
},
{
"epoch": 1.5726452483345983,
"grad_norm": 0.9407429695129395,
"learning_rate": 1.6854793827472806e-05,
"loss": 0.3401,
"step": 37300
},
{
"epoch": 1.576861455434691,
"grad_norm": 1.115344762802124,
"learning_rate": 1.684636141327262e-05,
"loss": 0.3401,
"step": 37400
},
{
"epoch": 1.5810776625347835,
"grad_norm": 1.064095377922058,
"learning_rate": 1.6837928999072434e-05,
"loss": 0.3166,
"step": 37500
},
{
"epoch": 1.5852938696348766,
"grad_norm": 1.0824617147445679,
"learning_rate": 1.682949658487225e-05,
"loss": 0.312,
"step": 37600
},
{
"epoch": 1.5895100767349692,
"grad_norm": 1.1635481119155884,
"learning_rate": 1.6821064170672066e-05,
"loss": 0.3342,
"step": 37700
},
{
"epoch": 1.5937262838350619,
"grad_norm": 1.2488656044006348,
"learning_rate": 1.681263175647188e-05,
"loss": 0.3358,
"step": 37800
},
{
"epoch": 1.5979424909351547,
"grad_norm": 1.297699213027954,
"learning_rate": 1.6804199342271694e-05,
"loss": 0.3383,
"step": 37900
},
{
"epoch": 1.6021586980352476,
"grad_norm": 1.4961750507354736,
"learning_rate": 1.679576692807151e-05,
"loss": 0.3302,
"step": 38000
},
{
"epoch": 1.6063749051353402,
"grad_norm": 1.1194815635681152,
"learning_rate": 1.6787334513871322e-05,
"loss": 0.3234,
"step": 38100
},
{
"epoch": 1.610591112235433,
"grad_norm": 0.9607496857643127,
"learning_rate": 1.6778902099671138e-05,
"loss": 0.3364,
"step": 38200
},
{
"epoch": 1.614807319335526,
"grad_norm": 1.6336873769760132,
"learning_rate": 1.677046968547095e-05,
"loss": 0.3416,
"step": 38300
},
{
"epoch": 1.6190235264356185,
"grad_norm": 1.0101227760314941,
"learning_rate": 1.6762037271270766e-05,
"loss": 0.3269,
"step": 38400
},
{
"epoch": 1.6232397335357112,
"grad_norm": 0.7373623251914978,
"learning_rate": 1.6753604857070582e-05,
"loss": 0.3485,
"step": 38500
},
{
"epoch": 1.627455940635804,
"grad_norm": 0.9564256072044373,
"learning_rate": 1.6745172442870394e-05,
"loss": 0.3302,
"step": 38600
},
{
"epoch": 1.6316721477358969,
"grad_norm": 1.0523947477340698,
"learning_rate": 1.673674002867021e-05,
"loss": 0.3379,
"step": 38700
},
{
"epoch": 1.6358883548359895,
"grad_norm": 1.6321437358856201,
"learning_rate": 1.6728307614470026e-05,
"loss": 0.3287,
"step": 38800
},
{
"epoch": 1.6401045619360823,
"grad_norm": 0.7536235451698303,
"learning_rate": 1.6719875200269838e-05,
"loss": 0.3215,
"step": 38900
},
{
"epoch": 1.6443207690361752,
"grad_norm": 1.126569390296936,
"learning_rate": 1.6711442786069654e-05,
"loss": 0.3361,
"step": 39000
},
{
"epoch": 1.6485369761362678,
"grad_norm": 1.2291463613510132,
"learning_rate": 1.670301037186947e-05,
"loss": 0.338,
"step": 39100
},
{
"epoch": 1.6527531832363604,
"grad_norm": 1.1694891452789307,
"learning_rate": 1.6694577957669282e-05,
"loss": 0.3318,
"step": 39200
},
{
"epoch": 1.6569693903364533,
"grad_norm": 1.023356318473816,
"learning_rate": 1.6686145543469098e-05,
"loss": 0.3269,
"step": 39300
},
{
"epoch": 1.6611855974365461,
"grad_norm": 1.048325777053833,
"learning_rate": 1.667771312926891e-05,
"loss": 0.3208,
"step": 39400
},
{
"epoch": 1.6654018045366388,
"grad_norm": 0.9685364961624146,
"learning_rate": 1.6669280715068726e-05,
"loss": 0.3311,
"step": 39500
},
{
"epoch": 1.6696180116367316,
"grad_norm": 1.1764518022537231,
"learning_rate": 1.666084830086854e-05,
"loss": 0.3387,
"step": 39600
},
{
"epoch": 1.6738342187368245,
"grad_norm": 0.9446860551834106,
"learning_rate": 1.6652415886668354e-05,
"loss": 0.3387,
"step": 39700
},
{
"epoch": 1.678050425836917,
"grad_norm": 0.9704703092575073,
"learning_rate": 1.6643983472468167e-05,
"loss": 0.333,
"step": 39800
},
{
"epoch": 1.6822666329370097,
"grad_norm": 1.2208021879196167,
"learning_rate": 1.6635551058267982e-05,
"loss": 0.3335,
"step": 39900
},
{
"epoch": 1.6864828400371026,
"grad_norm": 0.8652202486991882,
"learning_rate": 1.6627118644067798e-05,
"loss": 0.3161,
"step": 40000
},
{
"epoch": 1.6864828400371026,
"eval_bleu": 10.8027,
"eval_bleurt": null,
"eval_chrfpp": 33.334,
"eval_comet": 0.5405,
"eval_gen_len": 20.8782,
"eval_loss": 0.3363133370876312,
"eval_runtime": 1388.0432,
"eval_samples_per_second": 34.174,
"eval_steps_per_second": 2.136,
"step": 40000
},
{
"epoch": 1.6906990471371954,
"grad_norm": 1.0811352729797363,
"learning_rate": 1.6618686229867614e-05,
"loss": 0.3193,
"step": 40100
},
{
"epoch": 1.694915254237288,
"grad_norm": 1.07560396194458,
"learning_rate": 1.6610253815667426e-05,
"loss": 0.3316,
"step": 40200
},
{
"epoch": 1.699131461337381,
"grad_norm": 1.0102812051773071,
"learning_rate": 1.6601821401467242e-05,
"loss": 0.3391,
"step": 40300
},
{
"epoch": 1.7033476684374738,
"grad_norm": 1.0425939559936523,
"learning_rate": 1.6593388987267058e-05,
"loss": 0.3322,
"step": 40400
},
{
"epoch": 1.7075638755375664,
"grad_norm": 1.0230315923690796,
"learning_rate": 1.658495657306687e-05,
"loss": 0.3282,
"step": 40500
},
{
"epoch": 1.711780082637659,
"grad_norm": 1.18186616897583,
"learning_rate": 1.6576524158866686e-05,
"loss": 0.3468,
"step": 40600
},
{
"epoch": 1.7159962897377519,
"grad_norm": 0.9138390421867371,
"learning_rate": 1.65680917446665e-05,
"loss": 0.3321,
"step": 40700
},
{
"epoch": 1.7202124968378447,
"grad_norm": 0.9740304350852966,
"learning_rate": 1.6559659330466314e-05,
"loss": 0.3218,
"step": 40800
},
{
"epoch": 1.7244287039379373,
"grad_norm": 1.2450716495513916,
"learning_rate": 1.6551226916266127e-05,
"loss": 0.3257,
"step": 40900
},
{
"epoch": 1.7286449110380302,
"grad_norm": 1.093040943145752,
"learning_rate": 1.6542794502065942e-05,
"loss": 0.3245,
"step": 41000
},
{
"epoch": 1.732861118138123,
"grad_norm": 0.9208073616027832,
"learning_rate": 1.6534362087865755e-05,
"loss": 0.3252,
"step": 41100
},
{
"epoch": 1.7370773252382157,
"grad_norm": 0.7854002714157104,
"learning_rate": 1.6525929673665574e-05,
"loss": 0.3304,
"step": 41200
},
{
"epoch": 1.7412935323383083,
"grad_norm": 0.9421478509902954,
"learning_rate": 1.6517497259465386e-05,
"loss": 0.3218,
"step": 41300
},
{
"epoch": 1.7455097394384014,
"grad_norm": 1.282019853591919,
"learning_rate": 1.6509064845265202e-05,
"loss": 0.3224,
"step": 41400
},
{
"epoch": 1.749725946538494,
"grad_norm": 1.2438828945159912,
"learning_rate": 1.6500632431065014e-05,
"loss": 0.3343,
"step": 41500
},
{
"epoch": 1.7539421536385866,
"grad_norm": 1.0174343585968018,
"learning_rate": 1.649220001686483e-05,
"loss": 0.3212,
"step": 41600
},
{
"epoch": 1.7581583607386795,
"grad_norm": 1.1391730308532715,
"learning_rate": 1.6483767602664643e-05,
"loss": 0.3145,
"step": 41700
},
{
"epoch": 1.7623745678387723,
"grad_norm": 1.1963273286819458,
"learning_rate": 1.647533518846446e-05,
"loss": 0.3153,
"step": 41800
},
{
"epoch": 1.766590774938865,
"grad_norm": 1.0125856399536133,
"learning_rate": 1.6466902774264274e-05,
"loss": 0.3251,
"step": 41900
},
{
"epoch": 1.7708069820389576,
"grad_norm": 1.2673448324203491,
"learning_rate": 1.6458470360064087e-05,
"loss": 0.3231,
"step": 42000
},
{
"epoch": 1.7750231891390507,
"grad_norm": 0.9198032021522522,
"learning_rate": 1.6450037945863902e-05,
"loss": 0.3332,
"step": 42100
},
{
"epoch": 1.7792393962391433,
"grad_norm": 1.0789839029312134,
"learning_rate": 1.6441605531663715e-05,
"loss": 0.3289,
"step": 42200
},
{
"epoch": 1.783455603339236,
"grad_norm": 1.2579984664916992,
"learning_rate": 1.643317311746353e-05,
"loss": 0.3166,
"step": 42300
},
{
"epoch": 1.7876718104393288,
"grad_norm": 1.1121423244476318,
"learning_rate": 1.6424740703263346e-05,
"loss": 0.3417,
"step": 42400
},
{
"epoch": 1.7918880175394216,
"grad_norm": 0.8245619535446167,
"learning_rate": 1.6416308289063162e-05,
"loss": 0.3274,
"step": 42500
},
{
"epoch": 1.7961042246395142,
"grad_norm": 1.3833199739456177,
"learning_rate": 1.6407875874862975e-05,
"loss": 0.3228,
"step": 42600
},
{
"epoch": 1.800320431739607,
"grad_norm": 1.387864112854004,
"learning_rate": 1.639944346066279e-05,
"loss": 0.3373,
"step": 42700
},
{
"epoch": 1.8045366388397,
"grad_norm": 1.053475260734558,
"learning_rate": 1.6391011046462603e-05,
"loss": 0.3292,
"step": 42800
},
{
"epoch": 1.8087528459397926,
"grad_norm": 1.1653027534484863,
"learning_rate": 1.638257863226242e-05,
"loss": 0.3243,
"step": 42900
},
{
"epoch": 1.8129690530398852,
"grad_norm": 0.7447928786277771,
"learning_rate": 1.637414621806223e-05,
"loss": 0.3211,
"step": 43000
},
{
"epoch": 1.817185260139978,
"grad_norm": 1.282525897026062,
"learning_rate": 1.6365713803862047e-05,
"loss": 0.3356,
"step": 43100
},
{
"epoch": 1.821401467240071,
"grad_norm": 0.9279470443725586,
"learning_rate": 1.6357281389661862e-05,
"loss": 0.3316,
"step": 43200
},
{
"epoch": 1.8256176743401635,
"grad_norm": 1.2694237232208252,
"learning_rate": 1.6348848975461675e-05,
"loss": 0.3219,
"step": 43300
},
{
"epoch": 1.8298338814402564,
"grad_norm": 0.9450383186340332,
"learning_rate": 1.634041656126149e-05,
"loss": 0.3309,
"step": 43400
},
{
"epoch": 1.8340500885403492,
"grad_norm": 0.7561673521995544,
"learning_rate": 1.6331984147061306e-05,
"loss": 0.3241,
"step": 43500
},
{
"epoch": 1.8382662956404419,
"grad_norm": 1.1571000814437866,
"learning_rate": 1.632355173286112e-05,
"loss": 0.3125,
"step": 43600
},
{
"epoch": 1.8424825027405345,
"grad_norm": 0.931656002998352,
"learning_rate": 1.6315119318660935e-05,
"loss": 0.3318,
"step": 43700
},
{
"epoch": 1.8466987098406273,
"grad_norm": 0.8409183025360107,
"learning_rate": 1.630668690446075e-05,
"loss": 0.3288,
"step": 43800
},
{
"epoch": 1.8509149169407202,
"grad_norm": 0.9442520141601562,
"learning_rate": 1.6298254490260563e-05,
"loss": 0.3215,
"step": 43900
},
{
"epoch": 1.8551311240408128,
"grad_norm": 0.9926084876060486,
"learning_rate": 1.628982207606038e-05,
"loss": 0.3248,
"step": 44000
},
{
"epoch": 1.8551311240408128,
"eval_bleu": 10.8661,
"eval_bleurt": null,
"eval_chrfpp": 33.5077,
"eval_comet": 0.5417,
"eval_gen_len": 20.8761,
"eval_loss": 0.33092769980430603,
"eval_runtime": 1383.7975,
"eval_samples_per_second": 34.279,
"eval_steps_per_second": 2.143,
"step": 44000
},
{
"epoch": 1.8593473311409057,
"grad_norm": 1.0377424955368042,
"learning_rate": 1.628138966186019e-05,
"loss": 0.345,
"step": 44100
},
{
"epoch": 1.8635635382409985,
"grad_norm": 0.848667562007904,
"learning_rate": 1.6272957247660007e-05,
"loss": 0.3422,
"step": 44200
},
{
"epoch": 1.8677797453410911,
"grad_norm": 0.9515209197998047,
"learning_rate": 1.626452483345982e-05,
"loss": 0.3259,
"step": 44300
},
{
"epoch": 1.8719959524411838,
"grad_norm": 1.2481857538223267,
"learning_rate": 1.6256092419259635e-05,
"loss": 0.3302,
"step": 44400
},
{
"epoch": 1.8762121595412766,
"grad_norm": 0.7382723093032837,
"learning_rate": 1.624766000505945e-05,
"loss": 0.3171,
"step": 44500
},
{
"epoch": 1.8804283666413695,
"grad_norm": 1.148926854133606,
"learning_rate": 1.6239227590859263e-05,
"loss": 0.3227,
"step": 44600
},
{
"epoch": 1.884644573741462,
"grad_norm": 0.9098696112632751,
"learning_rate": 1.623079517665908e-05,
"loss": 0.3117,
"step": 44700
},
{
"epoch": 1.888860780841555,
"grad_norm": 1.4447691440582275,
"learning_rate": 1.6222362762458895e-05,
"loss": 0.3316,
"step": 44800
},
{
"epoch": 1.8930769879416478,
"grad_norm": 0.9378564953804016,
"learning_rate": 1.6213930348258707e-05,
"loss": 0.3345,
"step": 44900
},
{
"epoch": 1.8972931950417404,
"grad_norm": 1.1684097051620483,
"learning_rate": 1.6205497934058523e-05,
"loss": 0.3177,
"step": 45000
},
{
"epoch": 1.901509402141833,
"grad_norm": 1.1021177768707275,
"learning_rate": 1.619706551985834e-05,
"loss": 0.3355,
"step": 45100
},
{
"epoch": 1.905725609241926,
"grad_norm": 1.5381673574447632,
"learning_rate": 1.618863310565815e-05,
"loss": 0.3279,
"step": 45200
},
{
"epoch": 1.9099418163420188,
"grad_norm": 1.1123120784759521,
"learning_rate": 1.6180200691457967e-05,
"loss": 0.3231,
"step": 45300
},
{
"epoch": 1.9141580234421114,
"grad_norm": 0.9993001818656921,
"learning_rate": 1.617176827725778e-05,
"loss": 0.3179,
"step": 45400
},
{
"epoch": 1.9183742305422042,
"grad_norm": 1.2264147996902466,
"learning_rate": 1.6163335863057595e-05,
"loss": 0.3189,
"step": 45500
},
{
"epoch": 1.922590437642297,
"grad_norm": 0.9310553669929504,
"learning_rate": 1.6154903448857407e-05,
"loss": 0.3325,
"step": 45600
},
{
"epoch": 1.9268066447423897,
"grad_norm": 0.8175243139266968,
"learning_rate": 1.6146471034657223e-05,
"loss": 0.3304,
"step": 45700
},
{
"epoch": 1.9310228518424823,
"grad_norm": 1.0943084955215454,
"learning_rate": 1.6138038620457035e-05,
"loss": 0.3283,
"step": 45800
},
{
"epoch": 1.9352390589425754,
"grad_norm": 1.1721843481063843,
"learning_rate": 1.6129606206256855e-05,
"loss": 0.3369,
"step": 45900
},
{
"epoch": 1.939455266042668,
"grad_norm": 1.3300516605377197,
"learning_rate": 1.6121173792056667e-05,
"loss": 0.325,
"step": 46000
},
{
"epoch": 1.9436714731427607,
"grad_norm": 0.9788861274719238,
"learning_rate": 1.6112741377856483e-05,
"loss": 0.3213,
"step": 46100
},
{
"epoch": 1.9478876802428535,
"grad_norm": 0.9827736616134644,
"learning_rate": 1.6104308963656295e-05,
"loss": 0.3232,
"step": 46200
},
{
"epoch": 1.9521038873429464,
"grad_norm": 1.8439685106277466,
"learning_rate": 1.609587654945611e-05,
"loss": 0.3343,
"step": 46300
},
{
"epoch": 1.956320094443039,
"grad_norm": 1.3670806884765625,
"learning_rate": 1.6087444135255927e-05,
"loss": 0.3207,
"step": 46400
},
{
"epoch": 1.9605363015431319,
"grad_norm": 1.2202095985412598,
"learning_rate": 1.607901172105574e-05,
"loss": 0.3179,
"step": 46500
},
{
"epoch": 1.9647525086432247,
"grad_norm": 0.9660270810127258,
"learning_rate": 1.6070579306855555e-05,
"loss": 0.3305,
"step": 46600
},
{
"epoch": 1.9689687157433173,
"grad_norm": 0.8134570121765137,
"learning_rate": 1.6062146892655367e-05,
"loss": 0.3276,
"step": 46700
},
{
"epoch": 1.97318492284341,
"grad_norm": 0.957103431224823,
"learning_rate": 1.6053714478455183e-05,
"loss": 0.3262,
"step": 46800
},
{
"epoch": 1.9774011299435028,
"grad_norm": 0.9770568609237671,
"learning_rate": 1.6045282064254996e-05,
"loss": 0.334,
"step": 46900
},
{
"epoch": 1.9816173370435957,
"grad_norm": 0.9361381530761719,
"learning_rate": 1.603684965005481e-05,
"loss": 0.3256,
"step": 47000
},
{
"epoch": 1.9858335441436883,
"grad_norm": 0.9023398756980896,
"learning_rate": 1.6028417235854627e-05,
"loss": 0.3176,
"step": 47100
},
{
"epoch": 1.9900497512437811,
"grad_norm": 1.149931788444519,
"learning_rate": 1.6019984821654443e-05,
"loss": 0.3158,
"step": 47200
},
{
"epoch": 1.994265958343874,
"grad_norm": 1.1268340349197388,
"learning_rate": 1.6011552407454255e-05,
"loss": 0.3266,
"step": 47300
},
{
"epoch": 1.9984821654439666,
"grad_norm": 1.1518200635910034,
"learning_rate": 1.600311999325407e-05,
"loss": 0.3279,
"step": 47400
},
{
"epoch": 2.0026983725440592,
"grad_norm": 0.8826459646224976,
"learning_rate": 1.5994687579053883e-05,
"loss": 0.3068,
"step": 47500
},
{
"epoch": 2.0069145796441523,
"grad_norm": 3.155869722366333,
"learning_rate": 1.59862551648537e-05,
"loss": 0.2818,
"step": 47600
},
{
"epoch": 2.011130786744245,
"grad_norm": 1.0178985595703125,
"learning_rate": 1.597782275065351e-05,
"loss": 0.3089,
"step": 47700
},
{
"epoch": 2.0153469938443376,
"grad_norm": 0.9122896790504456,
"learning_rate": 1.5969390336453327e-05,
"loss": 0.3009,
"step": 47800
},
{
"epoch": 2.01956320094443,
"grad_norm": 0.8784326910972595,
"learning_rate": 1.5960957922253143e-05,
"loss": 0.2948,
"step": 47900
},
{
"epoch": 2.0237794080445233,
"grad_norm": 1.5919113159179688,
"learning_rate": 1.5952525508052956e-05,
"loss": 0.2849,
"step": 48000
},
{
"epoch": 2.0237794080445233,
"eval_bleu": 10.9104,
"eval_bleurt": null,
"eval_chrfpp": 33.5186,
"eval_comet": 0.5415,
"eval_gen_len": 20.8749,
"eval_loss": 0.3277857005596161,
"eval_runtime": 1380.1121,
"eval_samples_per_second": 34.37,
"eval_steps_per_second": 2.148,
"step": 48000
},
{
"epoch": 2.027995615144616,
"grad_norm": 1.1097805500030518,
"learning_rate": 1.594409309385277e-05,
"loss": 0.2929,
"step": 48100
},
{
"epoch": 2.0322118222447085,
"grad_norm": 1.0113049745559692,
"learning_rate": 1.5935660679652584e-05,
"loss": 0.2906,
"step": 48200
},
{
"epoch": 2.0364280293448016,
"grad_norm": 0.9952514171600342,
"learning_rate": 1.5927228265452403e-05,
"loss": 0.2948,
"step": 48300
},
{
"epoch": 2.0406442364448942,
"grad_norm": 0.9028350114822388,
"learning_rate": 1.5918795851252215e-05,
"loss": 0.3016,
"step": 48400
},
{
"epoch": 2.044860443544987,
"grad_norm": 1.228007197380066,
"learning_rate": 1.591036343705203e-05,
"loss": 0.2829,
"step": 48500
},
{
"epoch": 2.0490766506450795,
"grad_norm": 0.9562857747077942,
"learning_rate": 1.5901931022851844e-05,
"loss": 0.2995,
"step": 48600
},
{
"epoch": 2.0532928577451726,
"grad_norm": 0.7891142964363098,
"learning_rate": 1.589349860865166e-05,
"loss": 0.3013,
"step": 48700
},
{
"epoch": 2.057509064845265,
"grad_norm": 0.8914188742637634,
"learning_rate": 1.588506619445147e-05,
"loss": 0.3034,
"step": 48800
},
{
"epoch": 2.061725271945358,
"grad_norm": 1.180168867111206,
"learning_rate": 1.5876633780251287e-05,
"loss": 0.3043,
"step": 48900
},
{
"epoch": 2.065941479045451,
"grad_norm": 1.1622966527938843,
"learning_rate": 1.58682013660511e-05,
"loss": 0.2952,
"step": 49000
},
{
"epoch": 2.0701576861455435,
"grad_norm": 1.1493375301361084,
"learning_rate": 1.5859768951850916e-05,
"loss": 0.3084,
"step": 49100
},
{
"epoch": 2.074373893245636,
"grad_norm": 1.2714732885360718,
"learning_rate": 1.585133653765073e-05,
"loss": 0.294,
"step": 49200
},
{
"epoch": 2.0785901003457288,
"grad_norm": 0.6899680495262146,
"learning_rate": 1.5842904123450544e-05,
"loss": 0.2936,
"step": 49300
},
{
"epoch": 2.082806307445822,
"grad_norm": 0.9679650068283081,
"learning_rate": 1.583447170925036e-05,
"loss": 0.296,
"step": 49400
},
{
"epoch": 2.0870225145459145,
"grad_norm": 0.8275384902954102,
"learning_rate": 1.5826039295050175e-05,
"loss": 0.3055,
"step": 49500
},
{
"epoch": 2.091238721646007,
"grad_norm": 1.0451972484588623,
"learning_rate": 1.5817606880849988e-05,
"loss": 0.2983,
"step": 49600
},
{
"epoch": 2.0954549287461,
"grad_norm": 0.8411651253700256,
"learning_rate": 1.5809174466649804e-05,
"loss": 0.3067,
"step": 49700
},
{
"epoch": 2.099671135846193,
"grad_norm": 1.1985809803009033,
"learning_rate": 1.580074205244962e-05,
"loss": 0.2959,
"step": 49800
},
{
"epoch": 2.1038873429462854,
"grad_norm": 1.143189787864685,
"learning_rate": 1.5792309638249432e-05,
"loss": 0.3003,
"step": 49900
},
{
"epoch": 2.108103550046378,
"grad_norm": 1.1173498630523682,
"learning_rate": 1.5783877224049248e-05,
"loss": 0.2941,
"step": 50000
},
{
"epoch": 2.112319757146471,
"grad_norm": 1.442973017692566,
"learning_rate": 1.577544480984906e-05,
"loss": 0.2909,
"step": 50100
},
{
"epoch": 2.1165359642465638,
"grad_norm": 2.1009395122528076,
"learning_rate": 1.5767012395648876e-05,
"loss": 0.2989,
"step": 50200
},
{
"epoch": 2.1207521713466564,
"grad_norm": 0.8531181216239929,
"learning_rate": 1.5758579981448688e-05,
"loss": 0.2897,
"step": 50300
},
{
"epoch": 2.1249683784467495,
"grad_norm": 0.9189686179161072,
"learning_rate": 1.5750147567248504e-05,
"loss": 0.3031,
"step": 50400
},
{
"epoch": 2.129184585546842,
"grad_norm": 1.1998023986816406,
"learning_rate": 1.574171515304832e-05,
"loss": 0.3012,
"step": 50500
},
{
"epoch": 2.1334007926469347,
"grad_norm": 1.015147089958191,
"learning_rate": 1.5733282738848132e-05,
"loss": 0.3005,
"step": 50600
},
{
"epoch": 2.137616999747028,
"grad_norm": 0.907986581325531,
"learning_rate": 1.5724850324647948e-05,
"loss": 0.293,
"step": 50700
},
{
"epoch": 2.1418332068471204,
"grad_norm": 0.7289599776268005,
"learning_rate": 1.5716417910447764e-05,
"loss": 0.2911,
"step": 50800
},
{
"epoch": 2.146049413947213,
"grad_norm": 0.9426230788230896,
"learning_rate": 1.5707985496247576e-05,
"loss": 0.2916,
"step": 50900
},
{
"epoch": 2.1502656210473057,
"grad_norm": 0.7667168378829956,
"learning_rate": 1.5699553082047392e-05,
"loss": 0.2903,
"step": 51000
},
{
"epoch": 2.1544818281473987,
"grad_norm": 1.0113255977630615,
"learning_rate": 1.5691120667847208e-05,
"loss": 0.2982,
"step": 51100
},
{
"epoch": 2.1586980352474914,
"grad_norm": 1.3701939582824707,
"learning_rate": 1.568268825364702e-05,
"loss": 0.3025,
"step": 51200
},
{
"epoch": 2.162914242347584,
"grad_norm": 0.9606438875198364,
"learning_rate": 1.5674255839446836e-05,
"loss": 0.2999,
"step": 51300
},
{
"epoch": 2.167130449447677,
"grad_norm": 1.252716064453125,
"learning_rate": 1.5665823425246648e-05,
"loss": 0.2951,
"step": 51400
},
{
"epoch": 2.1713466565477697,
"grad_norm": 1.0813300609588623,
"learning_rate": 1.5657391011046464e-05,
"loss": 0.3044,
"step": 51500
},
{
"epoch": 2.1755628636478623,
"grad_norm": 1.3432576656341553,
"learning_rate": 1.5648958596846276e-05,
"loss": 0.2975,
"step": 51600
},
{
"epoch": 2.179779070747955,
"grad_norm": 1.1878820657730103,
"learning_rate": 1.5640526182646092e-05,
"loss": 0.2983,
"step": 51700
},
{
"epoch": 2.183995277848048,
"grad_norm": 0.9856054186820984,
"learning_rate": 1.5632093768445908e-05,
"loss": 0.2989,
"step": 51800
},
{
"epoch": 2.1882114849481407,
"grad_norm": 1.0320172309875488,
"learning_rate": 1.5623661354245724e-05,
"loss": 0.3075,
"step": 51900
},
{
"epoch": 2.1924276920482333,
"grad_norm": 1.0002996921539307,
"learning_rate": 1.5615228940045536e-05,
"loss": 0.2842,
"step": 52000
},
{
"epoch": 2.1924276920482333,
"eval_bleu": 11.0893,
"eval_bleurt": null,
"eval_chrfpp": 33.7186,
"eval_comet": 0.5425,
"eval_gen_len": 20.875,
"eval_loss": 0.3242399990558624,
"eval_runtime": 1379.6714,
"eval_samples_per_second": 34.381,
"eval_steps_per_second": 2.149,
"step": 52000
},
{
"epoch": 2.1966438991483264,
"grad_norm": 1.0548425912857056,
"learning_rate": 1.5606796525845352e-05,
"loss": 0.2999,
"step": 52100
},
{
"epoch": 2.200860106248419,
"grad_norm": 0.9152701497077942,
"learning_rate": 1.5598364111645164e-05,
"loss": 0.295,
"step": 52200
},
{
"epoch": 2.2050763133485116,
"grad_norm": 0.6967754364013672,
"learning_rate": 1.558993169744498e-05,
"loss": 0.3083,
"step": 52300
},
{
"epoch": 2.2092925204486042,
"grad_norm": 1.1519029140472412,
"learning_rate": 1.5581499283244796e-05,
"loss": 0.2902,
"step": 52400
},
{
"epoch": 2.2135087275486973,
"grad_norm": 1.1648145914077759,
"learning_rate": 1.5573066869044608e-05,
"loss": 0.3046,
"step": 52500
},
{
"epoch": 2.21772493464879,
"grad_norm": 1.3708479404449463,
"learning_rate": 1.5564634454844424e-05,
"loss": 0.2916,
"step": 52600
},
{
"epoch": 2.2219411417488826,
"grad_norm": 1.298677682876587,
"learning_rate": 1.5556202040644236e-05,
"loss": 0.2973,
"step": 52700
},
{
"epoch": 2.2261573488489756,
"grad_norm": 0.917349100112915,
"learning_rate": 1.5547769626444052e-05,
"loss": 0.296,
"step": 52800
},
{
"epoch": 2.2303735559490683,
"grad_norm": 0.9578775763511658,
"learning_rate": 1.5539337212243865e-05,
"loss": 0.2904,
"step": 52900
},
{
"epoch": 2.234589763049161,
"grad_norm": 1.1845663785934448,
"learning_rate": 1.5530904798043684e-05,
"loss": 0.2993,
"step": 53000
},
{
"epoch": 2.2388059701492535,
"grad_norm": 0.9572575688362122,
"learning_rate": 1.5522472383843496e-05,
"loss": 0.3096,
"step": 53100
},
{
"epoch": 2.2430221772493466,
"grad_norm": 1.1121599674224854,
"learning_rate": 1.5514039969643312e-05,
"loss": 0.2969,
"step": 53200
},
{
"epoch": 2.2472383843494392,
"grad_norm": 1.1499770879745483,
"learning_rate": 1.5505607555443124e-05,
"loss": 0.3047,
"step": 53300
},
{
"epoch": 2.251454591449532,
"grad_norm": 0.9810878038406372,
"learning_rate": 1.549717514124294e-05,
"loss": 0.3008,
"step": 53400
},
{
"epoch": 2.255670798549625,
"grad_norm": 0.9504501819610596,
"learning_rate": 1.5488742727042752e-05,
"loss": 0.303,
"step": 53500
},
{
"epoch": 2.2598870056497176,
"grad_norm": 0.8722612857818604,
"learning_rate": 1.5480310312842568e-05,
"loss": 0.3017,
"step": 53600
},
{
"epoch": 2.26410321274981,
"grad_norm": 1.1492185592651367,
"learning_rate": 1.5471877898642384e-05,
"loss": 0.2932,
"step": 53700
},
{
"epoch": 2.2683194198499033,
"grad_norm": 1.248023509979248,
"learning_rate": 1.5463445484442196e-05,
"loss": 0.3085,
"step": 53800
},
{
"epoch": 2.272535626949996,
"grad_norm": 0.9625715017318726,
"learning_rate": 1.5455013070242012e-05,
"loss": 0.2991,
"step": 53900
},
{
"epoch": 2.2767518340500885,
"grad_norm": 1.1480776071548462,
"learning_rate": 1.5446580656041825e-05,
"loss": 0.2919,
"step": 54000
},
{
"epoch": 2.280968041150181,
"grad_norm": 1.2776678800582886,
"learning_rate": 1.543814824184164e-05,
"loss": 0.2985,
"step": 54100
},
{
"epoch": 2.285184248250274,
"grad_norm": 1.0501973628997803,
"learning_rate": 1.5429715827641456e-05,
"loss": 0.3052,
"step": 54200
},
{
"epoch": 2.289400455350367,
"grad_norm": 1.2569295167922974,
"learning_rate": 1.5421283413441272e-05,
"loss": 0.2963,
"step": 54300
},
{
"epoch": 2.2936166624504595,
"grad_norm": 1.1297552585601807,
"learning_rate": 1.5412850999241084e-05,
"loss": 0.2987,
"step": 54400
},
{
"epoch": 2.297832869550552,
"grad_norm": 1.2715651988983154,
"learning_rate": 1.54044185850409e-05,
"loss": 0.2974,
"step": 54500
},
{
"epoch": 2.302049076650645,
"grad_norm": 1.2650036811828613,
"learning_rate": 1.5395986170840712e-05,
"loss": 0.3003,
"step": 54600
},
{
"epoch": 2.306265283750738,
"grad_norm": 1.1509555578231812,
"learning_rate": 1.5387553756640528e-05,
"loss": 0.3007,
"step": 54700
},
{
"epoch": 2.3104814908508304,
"grad_norm": 0.7887945771217346,
"learning_rate": 1.537912134244034e-05,
"loss": 0.2914,
"step": 54800
},
{
"epoch": 2.3146976979509235,
"grad_norm": 1.1291335821151733,
"learning_rate": 1.5370688928240156e-05,
"loss": 0.3031,
"step": 54900
},
{
"epoch": 2.318913905051016,
"grad_norm": 1.0857185125350952,
"learning_rate": 1.536225651403997e-05,
"loss": 0.2953,
"step": 55000
},
{
"epoch": 2.3231301121511088,
"grad_norm": 0.8543188571929932,
"learning_rate": 1.5353824099839785e-05,
"loss": 0.2902,
"step": 55100
},
{
"epoch": 2.327346319251202,
"grad_norm": 0.8242142796516418,
"learning_rate": 1.53453916856396e-05,
"loss": 0.2837,
"step": 55200
},
{
"epoch": 2.3315625263512945,
"grad_norm": 0.9572939872741699,
"learning_rate": 1.5336959271439413e-05,
"loss": 0.3028,
"step": 55300
},
{
"epoch": 2.335778733451387,
"grad_norm": 0.9514021277427673,
"learning_rate": 1.532852685723923e-05,
"loss": 0.2904,
"step": 55400
},
{
"epoch": 2.3399949405514797,
"grad_norm": 0.9875904321670532,
"learning_rate": 1.5320094443039044e-05,
"loss": 0.3006,
"step": 55500
},
{
"epoch": 2.344211147651573,
"grad_norm": 1.1220569610595703,
"learning_rate": 1.531166202883886e-05,
"loss": 0.2915,
"step": 55600
},
{
"epoch": 2.3484273547516654,
"grad_norm": 1.2072755098342896,
"learning_rate": 1.5303229614638673e-05,
"loss": 0.2922,
"step": 55700
},
{
"epoch": 2.352643561851758,
"grad_norm": 0.9680849313735962,
"learning_rate": 1.5294797200438488e-05,
"loss": 0.2919,
"step": 55800
},
{
"epoch": 2.3568597689518507,
"grad_norm": 1.0141241550445557,
"learning_rate": 1.52863647862383e-05,
"loss": 0.2953,
"step": 55900
},
{
"epoch": 2.3610759760519437,
"grad_norm": 1.1104940176010132,
"learning_rate": 1.5277932372038116e-05,
"loss": 0.3066,
"step": 56000
},
{
"epoch": 2.3610759760519437,
"eval_bleu": 11.0717,
"eval_bleurt": null,
"eval_chrfpp": 33.766,
"eval_comet": 0.5431,
"eval_gen_len": 20.8744,
"eval_loss": 0.31978920102119446,
"eval_runtime": 1367.3903,
"eval_samples_per_second": 34.69,
"eval_steps_per_second": 2.168,
"step": 56000
},
{
"epoch": 2.3652921831520364,
"grad_norm": 0.8834062218666077,
"learning_rate": 1.526949995783793e-05,
"loss": 0.2964,
"step": 56100
},
{
"epoch": 2.369508390252129,
"grad_norm": 1.1473215818405151,
"learning_rate": 1.5261067543637745e-05,
"loss": 0.3116,
"step": 56200
},
{
"epoch": 2.373724597352222,
"grad_norm": 1.0120465755462646,
"learning_rate": 1.5252635129437559e-05,
"loss": 0.29,
"step": 56300
},
{
"epoch": 2.3779408044523147,
"grad_norm": 1.1583025455474854,
"learning_rate": 1.5244202715237373e-05,
"loss": 0.2926,
"step": 56400
},
{
"epoch": 2.3821570115524073,
"grad_norm": 1.3375540971755981,
"learning_rate": 1.5235770301037187e-05,
"loss": 0.2999,
"step": 56500
},
{
"epoch": 2.3863732186525004,
"grad_norm": 0.9002558588981628,
"learning_rate": 1.5227337886837004e-05,
"loss": 0.297,
"step": 56600
},
{
"epoch": 2.390589425752593,
"grad_norm": 0.9687677025794983,
"learning_rate": 1.5218905472636818e-05,
"loss": 0.2872,
"step": 56700
},
{
"epoch": 2.3948056328526857,
"grad_norm": 0.8272023797035217,
"learning_rate": 1.5210473058436633e-05,
"loss": 0.2959,
"step": 56800
},
{
"epoch": 2.3990218399527783,
"grad_norm": 0.8559250235557556,
"learning_rate": 1.5202040644236447e-05,
"loss": 0.2851,
"step": 56900
},
{
"epoch": 2.4032380470528714,
"grad_norm": 0.9264187812805176,
"learning_rate": 1.519360823003626e-05,
"loss": 0.3003,
"step": 57000
},
{
"epoch": 2.407454254152964,
"grad_norm": 0.9628717303276062,
"learning_rate": 1.5185175815836075e-05,
"loss": 0.2871,
"step": 57100
},
{
"epoch": 2.4116704612530566,
"grad_norm": 1.0523897409439087,
"learning_rate": 1.5176743401635889e-05,
"loss": 0.3034,
"step": 57200
},
{
"epoch": 2.4158866683531497,
"grad_norm": 1.1983698606491089,
"learning_rate": 1.5168310987435703e-05,
"loss": 0.2959,
"step": 57300
},
{
"epoch": 2.4201028754532423,
"grad_norm": 0.9500690698623657,
"learning_rate": 1.5159878573235519e-05,
"loss": 0.2952,
"step": 57400
},
{
"epoch": 2.424319082553335,
"grad_norm": 1.007147192955017,
"learning_rate": 1.5151446159035333e-05,
"loss": 0.2875,
"step": 57500
},
{
"epoch": 2.4285352896534276,
"grad_norm": 1.1029393672943115,
"learning_rate": 1.5143013744835147e-05,
"loss": 0.2912,
"step": 57600
},
{
"epoch": 2.4327514967535206,
"grad_norm": 0.9687654376029968,
"learning_rate": 1.5134581330634961e-05,
"loss": 0.2944,
"step": 57700
},
{
"epoch": 2.4369677038536133,
"grad_norm": 1.0068873167037964,
"learning_rate": 1.5126148916434777e-05,
"loss": 0.2941,
"step": 57800
},
{
"epoch": 2.441183910953706,
"grad_norm": 1.0818562507629395,
"learning_rate": 1.5117716502234591e-05,
"loss": 0.2997,
"step": 57900
},
{
"epoch": 2.445400118053799,
"grad_norm": 0.8829095363616943,
"learning_rate": 1.5109284088034407e-05,
"loss": 0.2951,
"step": 58000
},
{
"epoch": 2.4496163251538916,
"grad_norm": 0.9035953283309937,
"learning_rate": 1.510085167383422e-05,
"loss": 0.3027,
"step": 58100
},
{
"epoch": 2.4538325322539842,
"grad_norm": 0.9314539432525635,
"learning_rate": 1.5092419259634035e-05,
"loss": 0.287,
"step": 58200
},
{
"epoch": 2.4580487393540773,
"grad_norm": 1.2419513463974,
"learning_rate": 1.5083986845433849e-05,
"loss": 0.2901,
"step": 58300
},
{
"epoch": 2.46226494645417,
"grad_norm": 1.06702721118927,
"learning_rate": 1.5075554431233663e-05,
"loss": 0.2941,
"step": 58400
},
{
"epoch": 2.4664811535542626,
"grad_norm": 1.1835907697677612,
"learning_rate": 1.5067122017033477e-05,
"loss": 0.296,
"step": 58500
},
{
"epoch": 2.470697360654355,
"grad_norm": 0.8179803490638733,
"learning_rate": 1.5058689602833291e-05,
"loss": 0.2888,
"step": 58600
},
{
"epoch": 2.4749135677544483,
"grad_norm": 0.777482807636261,
"learning_rate": 1.5050257188633105e-05,
"loss": 0.2923,
"step": 58700
},
{
"epoch": 2.479129774854541,
"grad_norm": 1.0873775482177734,
"learning_rate": 1.5041824774432921e-05,
"loss": 0.2942,
"step": 58800
},
{
"epoch": 2.4833459819546335,
"grad_norm": 0.9708207249641418,
"learning_rate": 1.5033392360232737e-05,
"loss": 0.2917,
"step": 58900
},
{
"epoch": 2.487562189054726,
"grad_norm": 1.1274676322937012,
"learning_rate": 1.5024959946032551e-05,
"loss": 0.2895,
"step": 59000
},
{
"epoch": 2.491778396154819,
"grad_norm": 1.5154836177825928,
"learning_rate": 1.5016527531832365e-05,
"loss": 0.2952,
"step": 59100
},
{
"epoch": 2.495994603254912,
"grad_norm": 1.0997233390808105,
"learning_rate": 1.5008095117632179e-05,
"loss": 0.3037,
"step": 59200
},
{
"epoch": 2.5002108103550045,
"grad_norm": 0.8567424416542053,
"learning_rate": 1.4999662703431995e-05,
"loss": 0.2986,
"step": 59300
},
{
"epoch": 2.5044270174550975,
"grad_norm": 1.1187572479248047,
"learning_rate": 1.4991230289231809e-05,
"loss": 0.2912,
"step": 59400
},
{
"epoch": 2.50864322455519,
"grad_norm": 1.0140553712844849,
"learning_rate": 1.4982797875031623e-05,
"loss": 0.2853,
"step": 59500
},
{
"epoch": 2.512859431655283,
"grad_norm": 1.2574357986450195,
"learning_rate": 1.4974365460831437e-05,
"loss": 0.2785,
"step": 59600
},
{
"epoch": 2.517075638755376,
"grad_norm": 1.3628848791122437,
"learning_rate": 1.4965933046631251e-05,
"loss": 0.295,
"step": 59700
},
{
"epoch": 2.5212918458554685,
"grad_norm": 1.3058345317840576,
"learning_rate": 1.4957500632431065e-05,
"loss": 0.2837,
"step": 59800
},
{
"epoch": 2.525508052955561,
"grad_norm": 1.1940069198608398,
"learning_rate": 1.494906821823088e-05,
"loss": 0.2934,
"step": 59900
},
{
"epoch": 2.529724260055654,
"grad_norm": 0.9008951187133789,
"learning_rate": 1.4940635804030694e-05,
"loss": 0.2827,
"step": 60000
},
{
"epoch": 2.529724260055654,
"eval_bleu": 11.1804,
"eval_bleurt": null,
"eval_chrfpp": 33.8664,
"eval_comet": 0.5434,
"eval_gen_len": 20.874,
"eval_loss": 0.3158092796802521,
"eval_runtime": 1392.2104,
"eval_samples_per_second": 34.072,
"eval_steps_per_second": 2.13,
"step": 60000
},
{
"epoch": 2.533940467155747,
"grad_norm": 1.0412942171096802,
"learning_rate": 1.4932203389830511e-05,
"loss": 0.3027,
"step": 60100
},
{
"epoch": 2.5381566742558395,
"grad_norm": 1.2109469175338745,
"learning_rate": 1.4923770975630325e-05,
"loss": 0.2911,
"step": 60200
},
{
"epoch": 2.542372881355932,
"grad_norm": 1.0820285081863403,
"learning_rate": 1.491533856143014e-05,
"loss": 0.2946,
"step": 60300
},
{
"epoch": 2.5465890884560247,
"grad_norm": 1.6041064262390137,
"learning_rate": 1.4906906147229953e-05,
"loss": 0.2969,
"step": 60400
},
{
"epoch": 2.550805295556118,
"grad_norm": 1.1145155429840088,
"learning_rate": 1.4898473733029767e-05,
"loss": 0.285,
"step": 60500
},
{
"epoch": 2.5550215026562104,
"grad_norm": 1.105671763420105,
"learning_rate": 1.4890041318829581e-05,
"loss": 0.2979,
"step": 60600
},
{
"epoch": 2.559237709756303,
"grad_norm": 1.5386360883712769,
"learning_rate": 1.4881608904629397e-05,
"loss": 0.296,
"step": 60700
},
{
"epoch": 2.563453916856396,
"grad_norm": 0.9032478928565979,
"learning_rate": 1.4873176490429211e-05,
"loss": 0.3074,
"step": 60800
},
{
"epoch": 2.5676701239564887,
"grad_norm": 0.9565399885177612,
"learning_rate": 1.4864744076229025e-05,
"loss": 0.2831,
"step": 60900
},
{
"epoch": 2.5718863310565814,
"grad_norm": 0.8517098426818848,
"learning_rate": 1.485631166202884e-05,
"loss": 0.2882,
"step": 61000
},
{
"epoch": 2.5761025381566744,
"grad_norm": 0.9721641540527344,
"learning_rate": 1.4847879247828654e-05,
"loss": 0.29,
"step": 61100
},
{
"epoch": 2.580318745256767,
"grad_norm": 1.0403635501861572,
"learning_rate": 1.4839446833628468e-05,
"loss": 0.291,
"step": 61200
},
{
"epoch": 2.5845349523568597,
"grad_norm": 0.9895289540290833,
"learning_rate": 1.4831014419428285e-05,
"loss": 0.2879,
"step": 61300
},
{
"epoch": 2.5887511594569528,
"grad_norm": 0.7700739502906799,
"learning_rate": 1.48225820052281e-05,
"loss": 0.2902,
"step": 61400
},
{
"epoch": 2.5929673665570454,
"grad_norm": 1.0654332637786865,
"learning_rate": 1.4814149591027913e-05,
"loss": 0.2909,
"step": 61500
},
{
"epoch": 2.597183573657138,
"grad_norm": 1.0778576135635376,
"learning_rate": 1.4805717176827727e-05,
"loss": 0.2935,
"step": 61600
},
{
"epoch": 2.6013997807572307,
"grad_norm": 0.9637481570243835,
"learning_rate": 1.4797284762627541e-05,
"loss": 0.2909,
"step": 61700
},
{
"epoch": 2.6056159878573233,
"grad_norm": 0.9111833572387695,
"learning_rate": 1.4788852348427356e-05,
"loss": 0.2945,
"step": 61800
},
{
"epoch": 2.6098321949574164,
"grad_norm": 1.1596795320510864,
"learning_rate": 1.478041993422717e-05,
"loss": 0.2805,
"step": 61900
},
{
"epoch": 2.614048402057509,
"grad_norm": 1.007734775543213,
"learning_rate": 1.4771987520026985e-05,
"loss": 0.2975,
"step": 62000
},
{
"epoch": 2.6182646091576016,
"grad_norm": 1.0803258419036865,
"learning_rate": 1.47635551058268e-05,
"loss": 0.285,
"step": 62100
},
{
"epoch": 2.6224808162576947,
"grad_norm": 1.2612115144729614,
"learning_rate": 1.4755122691626614e-05,
"loss": 0.2805,
"step": 62200
},
{
"epoch": 2.6266970233577873,
"grad_norm": 1.2345725297927856,
"learning_rate": 1.4746690277426428e-05,
"loss": 0.3011,
"step": 62300
},
{
"epoch": 2.63091323045788,
"grad_norm": 0.9898720383644104,
"learning_rate": 1.4738257863226242e-05,
"loss": 0.2929,
"step": 62400
},
{
"epoch": 2.635129437557973,
"grad_norm": 1.1107537746429443,
"learning_rate": 1.4729825449026058e-05,
"loss": 0.2869,
"step": 62500
},
{
"epoch": 2.6393456446580656,
"grad_norm": 1.1848292350769043,
"learning_rate": 1.4721393034825873e-05,
"loss": 0.2916,
"step": 62600
},
{
"epoch": 2.6435618517581583,
"grad_norm": 1.2696866989135742,
"learning_rate": 1.4712960620625687e-05,
"loss": 0.2927,
"step": 62700
},
{
"epoch": 2.6477780588582513,
"grad_norm": 0.9077075123786926,
"learning_rate": 1.4704528206425502e-05,
"loss": 0.3002,
"step": 62800
},
{
"epoch": 2.651994265958344,
"grad_norm": 1.1843475103378296,
"learning_rate": 1.4696095792225316e-05,
"loss": 0.2846,
"step": 62900
},
{
"epoch": 2.6562104730584366,
"grad_norm": 0.8227006196975708,
"learning_rate": 1.468766337802513e-05,
"loss": 0.2951,
"step": 63000
},
{
"epoch": 2.6604266801585292,
"grad_norm": 1.1044954061508179,
"learning_rate": 1.4679230963824944e-05,
"loss": 0.2914,
"step": 63100
},
{
"epoch": 2.6646428872586223,
"grad_norm": 0.7992355227470398,
"learning_rate": 1.4670798549624758e-05,
"loss": 0.291,
"step": 63200
},
{
"epoch": 2.668859094358715,
"grad_norm": 1.0405184030532837,
"learning_rate": 1.4662366135424572e-05,
"loss": 0.3053,
"step": 63300
},
{
"epoch": 2.6730753014588076,
"grad_norm": 0.9635149240493774,
"learning_rate": 1.4653933721224388e-05,
"loss": 0.2981,
"step": 63400
},
{
"epoch": 2.6772915085589,
"grad_norm": 0.9436842799186707,
"learning_rate": 1.4645501307024202e-05,
"loss": 0.2995,
"step": 63500
},
{
"epoch": 2.6815077156589933,
"grad_norm": 1.4944032430648804,
"learning_rate": 1.4637068892824016e-05,
"loss": 0.2766,
"step": 63600
},
{
"epoch": 2.685723922759086,
"grad_norm": 1.2989920377731323,
"learning_rate": 1.4628636478623832e-05,
"loss": 0.284,
"step": 63700
},
{
"epoch": 2.6899401298591785,
"grad_norm": 1.1201857328414917,
"learning_rate": 1.4620204064423646e-05,
"loss": 0.2878,
"step": 63800
},
{
"epoch": 2.6941563369592716,
"grad_norm": 0.7587829232215881,
"learning_rate": 1.4611771650223462e-05,
"loss": 0.2815,
"step": 63900
},
{
"epoch": 2.698372544059364,
"grad_norm": 1.278507113456726,
"learning_rate": 1.4603339236023276e-05,
"loss": 0.2879,
"step": 64000
},
{
"epoch": 2.698372544059364,
"eval_bleu": 11.3398,
"eval_bleurt": null,
"eval_chrfpp": 33.9415,
"eval_comet": 0.5433,
"eval_gen_len": 20.8771,
"eval_loss": 0.3120929002761841,
"eval_runtime": 1359.9271,
"eval_samples_per_second": 34.881,
"eval_steps_per_second": 2.18,
"step": 64000
},
{
"epoch": 2.702588751159457,
"grad_norm": 1.3177260160446167,
"learning_rate": 1.459490682182309e-05,
"loss": 0.2893,
"step": 64100
},
{
"epoch": 2.70680495825955,
"grad_norm": 1.136610507965088,
"learning_rate": 1.4586474407622904e-05,
"loss": 0.3055,
"step": 64200
},
{
"epoch": 2.7110211653596425,
"grad_norm": 1.152239203453064,
"learning_rate": 1.4578041993422718e-05,
"loss": 0.2845,
"step": 64300
},
{
"epoch": 2.715237372459735,
"grad_norm": 1.1815099716186523,
"learning_rate": 1.4569609579222532e-05,
"loss": 0.2855,
"step": 64400
},
{
"epoch": 2.7194535795598282,
"grad_norm": 0.8875076174736023,
"learning_rate": 1.4561177165022346e-05,
"loss": 0.2902,
"step": 64500
},
{
"epoch": 2.723669786659921,
"grad_norm": 0.9175387024879456,
"learning_rate": 1.455274475082216e-05,
"loss": 0.2843,
"step": 64600
},
{
"epoch": 2.7278859937600135,
"grad_norm": 0.9587578773498535,
"learning_rate": 1.4544312336621976e-05,
"loss": 0.2877,
"step": 64700
},
{
"epoch": 2.732102200860106,
"grad_norm": 0.8387385606765747,
"learning_rate": 1.453587992242179e-05,
"loss": 0.2882,
"step": 64800
},
{
"epoch": 2.7363184079601988,
"grad_norm": 1.0625752210617065,
"learning_rate": 1.4527447508221606e-05,
"loss": 0.3063,
"step": 64900
},
{
"epoch": 2.740534615060292,
"grad_norm": 1.0522043704986572,
"learning_rate": 1.451901509402142e-05,
"loss": 0.2854,
"step": 65000
},
{
"epoch": 2.7447508221603845,
"grad_norm": 1.0325335264205933,
"learning_rate": 1.4510582679821234e-05,
"loss": 0.287,
"step": 65100
},
{
"epoch": 2.748967029260477,
"grad_norm": 0.7415598630905151,
"learning_rate": 1.4502150265621048e-05,
"loss": 0.2783,
"step": 65200
},
{
"epoch": 2.75318323636057,
"grad_norm": 1.2780919075012207,
"learning_rate": 1.4493717851420864e-05,
"loss": 0.2854,
"step": 65300
},
{
"epoch": 2.757399443460663,
"grad_norm": 1.0465195178985596,
"learning_rate": 1.4485285437220678e-05,
"loss": 0.2942,
"step": 65400
},
{
"epoch": 2.7616156505607554,
"grad_norm": 0.8310420513153076,
"learning_rate": 1.4476853023020492e-05,
"loss": 0.2896,
"step": 65500
},
{
"epoch": 2.7658318576608485,
"grad_norm": 0.9426363706588745,
"learning_rate": 1.4468420608820306e-05,
"loss": 0.2839,
"step": 65600
},
{
"epoch": 2.770048064760941,
"grad_norm": 1.195570945739746,
"learning_rate": 1.445998819462012e-05,
"loss": 0.2894,
"step": 65700
},
{
"epoch": 2.7742642718610337,
"grad_norm": 1.1623390913009644,
"learning_rate": 1.4451555780419934e-05,
"loss": 0.2897,
"step": 65800
},
{
"epoch": 2.778480478961127,
"grad_norm": 0.8429798483848572,
"learning_rate": 1.4443123366219748e-05,
"loss": 0.2877,
"step": 65900
},
{
"epoch": 2.7826966860612194,
"grad_norm": 1.0380526781082153,
"learning_rate": 1.4434690952019562e-05,
"loss": 0.2745,
"step": 66000
},
{
"epoch": 2.786912893161312,
"grad_norm": 1.0655642747879028,
"learning_rate": 1.442625853781938e-05,
"loss": 0.2906,
"step": 66100
},
{
"epoch": 2.7911291002614047,
"grad_norm": 0.9089457392692566,
"learning_rate": 1.4417826123619194e-05,
"loss": 0.2918,
"step": 66200
},
{
"epoch": 2.7953453073614973,
"grad_norm": 0.9997662305831909,
"learning_rate": 1.4409393709419008e-05,
"loss": 0.2835,
"step": 66300
},
{
"epoch": 2.7995615144615904,
"grad_norm": 1.1101837158203125,
"learning_rate": 1.4400961295218822e-05,
"loss": 0.2774,
"step": 66400
},
{
"epoch": 2.803777721561683,
"grad_norm": 0.9055351614952087,
"learning_rate": 1.4392528881018636e-05,
"loss": 0.2976,
"step": 66500
},
{
"epoch": 2.8079939286617757,
"grad_norm": 0.9779496192932129,
"learning_rate": 1.4384096466818452e-05,
"loss": 0.3006,
"step": 66600
},
{
"epoch": 2.8122101357618687,
"grad_norm": 1.180202603340149,
"learning_rate": 1.4375664052618266e-05,
"loss": 0.2886,
"step": 66700
},
{
"epoch": 2.8164263428619614,
"grad_norm": 1.1998876333236694,
"learning_rate": 1.436723163841808e-05,
"loss": 0.2888,
"step": 66800
},
{
"epoch": 2.820642549962054,
"grad_norm": 1.1736562252044678,
"learning_rate": 1.4358799224217894e-05,
"loss": 0.2894,
"step": 66900
},
{
"epoch": 2.824858757062147,
"grad_norm": 1.5462160110473633,
"learning_rate": 1.4350366810017708e-05,
"loss": 0.2958,
"step": 67000
},
{
"epoch": 2.8290749641622397,
"grad_norm": 1.0704152584075928,
"learning_rate": 1.4341934395817523e-05,
"loss": 0.2809,
"step": 67100
},
{
"epoch": 2.8332911712623323,
"grad_norm": 1.1228965520858765,
"learning_rate": 1.433350198161734e-05,
"loss": 0.2859,
"step": 67200
},
{
"epoch": 2.8375073783624254,
"grad_norm": 0.8643785715103149,
"learning_rate": 1.4325069567417154e-05,
"loss": 0.2812,
"step": 67300
},
{
"epoch": 2.841723585462518,
"grad_norm": 1.112558364868164,
"learning_rate": 1.4316637153216968e-05,
"loss": 0.2825,
"step": 67400
},
{
"epoch": 2.8459397925626106,
"grad_norm": 0.8024058938026428,
"learning_rate": 1.4308204739016782e-05,
"loss": 0.2807,
"step": 67500
},
{
"epoch": 2.8501559996627033,
"grad_norm": 1.2020084857940674,
"learning_rate": 1.4299772324816596e-05,
"loss": 0.2945,
"step": 67600
},
{
"epoch": 2.8543722067627963,
"grad_norm": 0.9170039296150208,
"learning_rate": 1.429133991061641e-05,
"loss": 0.2821,
"step": 67700
},
{
"epoch": 2.858588413862889,
"grad_norm": 1.3312523365020752,
"learning_rate": 1.4282907496416225e-05,
"loss": 0.2861,
"step": 67800
},
{
"epoch": 2.8628046209629816,
"grad_norm": 1.2471208572387695,
"learning_rate": 1.4274475082216039e-05,
"loss": 0.2832,
"step": 67900
},
{
"epoch": 2.8670208280630742,
"grad_norm": 1.0585116147994995,
"learning_rate": 1.4266042668015854e-05,
"loss": 0.2884,
"step": 68000
},
{
"epoch": 2.8670208280630742,
"eval_bleu": 11.4168,
"eval_bleurt": null,
"eval_chrfpp": 34.1465,
"eval_comet": 0.5446,
"eval_gen_len": 20.8719,
"eval_loss": 0.3076566457748413,
"eval_runtime": 1396.301,
"eval_samples_per_second": 33.972,
"eval_steps_per_second": 2.123,
"step": 68000
},
{
"epoch": 2.8712370351631673,
"grad_norm": 0.7719435095787048,
"learning_rate": 1.4257610253815668e-05,
"loss": 0.2931,
"step": 68100
},
{
"epoch": 2.87545324226326,
"grad_norm": 0.9555220007896423,
"learning_rate": 1.4249177839615483e-05,
"loss": 0.3001,
"step": 68200
},
{
"epoch": 2.8796694493633526,
"grad_norm": 1.2510745525360107,
"learning_rate": 1.4240745425415297e-05,
"loss": 0.2911,
"step": 68300
},
{
"epoch": 2.8838856564634456,
"grad_norm": 1.013611078262329,
"learning_rate": 1.4232313011215112e-05,
"loss": 0.2784,
"step": 68400
},
{
"epoch": 2.8881018635635383,
"grad_norm": 1.0333224534988403,
"learning_rate": 1.4223880597014928e-05,
"loss": 0.2862,
"step": 68500
},
{
"epoch": 2.892318070663631,
"grad_norm": 0.9320237040519714,
"learning_rate": 1.4215448182814742e-05,
"loss": 0.2913,
"step": 68600
},
{
"epoch": 2.896534277763724,
"grad_norm": 0.8303669691085815,
"learning_rate": 1.4207015768614556e-05,
"loss": 0.2892,
"step": 68700
},
{
"epoch": 2.9007504848638166,
"grad_norm": 0.9247381091117859,
"learning_rate": 1.419858335441437e-05,
"loss": 0.2853,
"step": 68800
},
{
"epoch": 2.904966691963909,
"grad_norm": 0.8872043490409851,
"learning_rate": 1.4190150940214185e-05,
"loss": 0.2772,
"step": 68900
},
{
"epoch": 2.9091828990640023,
"grad_norm": 1.039004921913147,
"learning_rate": 1.4181718526013999e-05,
"loss": 0.2749,
"step": 69000
},
{
"epoch": 2.913399106164095,
"grad_norm": 0.9193853139877319,
"learning_rate": 1.4173286111813813e-05,
"loss": 0.2809,
"step": 69100
},
{
"epoch": 2.9176153132641875,
"grad_norm": 1.53365957736969,
"learning_rate": 1.4164853697613627e-05,
"loss": 0.2907,
"step": 69200
},
{
"epoch": 2.92183152036428,
"grad_norm": 0.719420850276947,
"learning_rate": 1.4156421283413443e-05,
"loss": 0.2783,
"step": 69300
},
{
"epoch": 2.926047727464373,
"grad_norm": 0.9032190442085266,
"learning_rate": 1.4147988869213257e-05,
"loss": 0.2757,
"step": 69400
},
{
"epoch": 2.930263934564466,
"grad_norm": 0.9444390535354614,
"learning_rate": 1.413955645501307e-05,
"loss": 0.2884,
"step": 69500
},
{
"epoch": 2.9344801416645585,
"grad_norm": 1.3838810920715332,
"learning_rate": 1.4131124040812887e-05,
"loss": 0.2893,
"step": 69600
},
{
"epoch": 2.938696348764651,
"grad_norm": 1.2459306716918945,
"learning_rate": 1.41226916266127e-05,
"loss": 0.2893,
"step": 69700
},
{
"epoch": 2.942912555864744,
"grad_norm": 1.2073965072631836,
"learning_rate": 1.4114259212412515e-05,
"loss": 0.2844,
"step": 69800
},
{
"epoch": 2.947128762964837,
"grad_norm": 0.9537835717201233,
"learning_rate": 1.410582679821233e-05,
"loss": 0.2851,
"step": 69900
},
{
"epoch": 2.9513449700649295,
"grad_norm": 1.0783562660217285,
"learning_rate": 1.4097394384012145e-05,
"loss": 0.2769,
"step": 70000
},
{
"epoch": 2.9555611771650225,
"grad_norm": 1.0770165920257568,
"learning_rate": 1.4088961969811959e-05,
"loss": 0.2914,
"step": 70100
},
{
"epoch": 2.959777384265115,
"grad_norm": 1.1360630989074707,
"learning_rate": 1.4080529555611773e-05,
"loss": 0.2985,
"step": 70200
},
{
"epoch": 2.963993591365208,
"grad_norm": 0.7796394228935242,
"learning_rate": 1.4072097141411587e-05,
"loss": 0.2934,
"step": 70300
},
{
"epoch": 2.968209798465301,
"grad_norm": 1.3134346008300781,
"learning_rate": 1.4063664727211401e-05,
"loss": 0.2926,
"step": 70400
},
{
"epoch": 2.9724260055653935,
"grad_norm": 1.082274317741394,
"learning_rate": 1.4055232313011215e-05,
"loss": 0.2934,
"step": 70500
},
{
"epoch": 2.976642212665486,
"grad_norm": 1.1806004047393799,
"learning_rate": 1.4046799898811029e-05,
"loss": 0.2824,
"step": 70600
},
{
"epoch": 2.9808584197655787,
"grad_norm": 1.0039913654327393,
"learning_rate": 1.4038367484610845e-05,
"loss": 0.2887,
"step": 70700
},
{
"epoch": 2.9850746268656714,
"grad_norm": 0.9361388087272644,
"learning_rate": 1.402993507041066e-05,
"loss": 0.3021,
"step": 70800
},
{
"epoch": 2.9892908339657644,
"grad_norm": 0.9558672904968262,
"learning_rate": 1.4021502656210475e-05,
"loss": 0.2891,
"step": 70900
},
{
"epoch": 2.993507041065857,
"grad_norm": 1.2940090894699097,
"learning_rate": 1.4013070242010289e-05,
"loss": 0.2901,
"step": 71000
},
{
"epoch": 2.9977232481659497,
"grad_norm": 0.9156692028045654,
"learning_rate": 1.4004637827810103e-05,
"loss": 0.2858,
"step": 71100
},
{
"epoch": 3.0019394552660428,
"grad_norm": 1.4320809841156006,
"learning_rate": 1.3996205413609919e-05,
"loss": 0.2682,
"step": 71200
},
{
"epoch": 3.0061556623661354,
"grad_norm": 1.0174983739852905,
"learning_rate": 1.3987772999409733e-05,
"loss": 0.2601,
"step": 71300
},
{
"epoch": 3.010371869466228,
"grad_norm": 1.1944080591201782,
"learning_rate": 1.3979340585209547e-05,
"loss": 0.2587,
"step": 71400
},
{
"epoch": 3.014588076566321,
"grad_norm": 1.1349588632583618,
"learning_rate": 1.3970908171009361e-05,
"loss": 0.275,
"step": 71500
},
{
"epoch": 3.0188042836664137,
"grad_norm": 1.0986281633377075,
"learning_rate": 1.3962475756809175e-05,
"loss": 0.2624,
"step": 71600
},
{
"epoch": 3.0230204907665064,
"grad_norm": 1.050392746925354,
"learning_rate": 1.395404334260899e-05,
"loss": 0.2676,
"step": 71700
},
{
"epoch": 3.027236697866599,
"grad_norm": 2.1180052757263184,
"learning_rate": 1.3945610928408803e-05,
"loss": 0.2541,
"step": 71800
},
{
"epoch": 3.031452904966692,
"grad_norm": 1.1823991537094116,
"learning_rate": 1.3937178514208617e-05,
"loss": 0.2618,
"step": 71900
},
{
"epoch": 3.0356691120667847,
"grad_norm": 0.8114222288131714,
"learning_rate": 1.3928746100008435e-05,
"loss": 0.264,
"step": 72000
},
{
"epoch": 3.0356691120667847,
"eval_bleu": 11.3651,
"eval_bleurt": null,
"eval_chrfpp": 34.13,
"eval_comet": 0.5443,
"eval_gen_len": 20.8717,
"eval_loss": 0.3058512210845947,
"eval_runtime": 1368.271,
"eval_samples_per_second": 34.668,
"eval_steps_per_second": 2.167,
"step": 72000
},
{
"epoch": 3.0398853191668773,
"grad_norm": 0.9190216064453125,
"learning_rate": 1.3920313685808249e-05,
"loss": 0.256,
"step": 72100
},
{
"epoch": 3.0441015262669704,
"grad_norm": 1.0350401401519775,
"learning_rate": 1.3911881271608063e-05,
"loss": 0.2637,
"step": 72200
},
{
"epoch": 3.048317733367063,
"grad_norm": 1.3594918251037598,
"learning_rate": 1.3903448857407877e-05,
"loss": 0.258,
"step": 72300
},
{
"epoch": 3.0525339404671556,
"grad_norm": 0.9170486927032471,
"learning_rate": 1.3895016443207691e-05,
"loss": 0.2646,
"step": 72400
},
{
"epoch": 3.0567501475672487,
"grad_norm": 1.1841360330581665,
"learning_rate": 1.3886584029007505e-05,
"loss": 0.2685,
"step": 72500
},
{
"epoch": 3.0609663546673413,
"grad_norm": 0.9434228539466858,
"learning_rate": 1.3878151614807321e-05,
"loss": 0.2689,
"step": 72600
},
{
"epoch": 3.065182561767434,
"grad_norm": 0.9371470212936401,
"learning_rate": 1.3869719200607135e-05,
"loss": 0.2657,
"step": 72700
},
{
"epoch": 3.0693987688675266,
"grad_norm": 0.862743616104126,
"learning_rate": 1.386128678640695e-05,
"loss": 0.2554,
"step": 72800
},
{
"epoch": 3.0736149759676197,
"grad_norm": 1.0926709175109863,
"learning_rate": 1.3852854372206763e-05,
"loss": 0.2596,
"step": 72900
},
{
"epoch": 3.0778311830677123,
"grad_norm": 1.4358494281768799,
"learning_rate": 1.3844421958006577e-05,
"loss": 0.269,
"step": 73000
},
{
"epoch": 3.082047390167805,
"grad_norm": 1.0654805898666382,
"learning_rate": 1.3835989543806391e-05,
"loss": 0.2584,
"step": 73100
},
{
"epoch": 3.086263597267898,
"grad_norm": 1.1404222249984741,
"learning_rate": 1.3827557129606209e-05,
"loss": 0.2629,
"step": 73200
},
{
"epoch": 3.0904798043679906,
"grad_norm": 1.7569445371627808,
"learning_rate": 1.3819124715406023e-05,
"loss": 0.2762,
"step": 73300
},
{
"epoch": 3.0946960114680833,
"grad_norm": 1.0361456871032715,
"learning_rate": 1.3810692301205837e-05,
"loss": 0.2652,
"step": 73400
},
{
"epoch": 3.098912218568176,
"grad_norm": 1.2701947689056396,
"learning_rate": 1.3802259887005651e-05,
"loss": 0.272,
"step": 73500
},
{
"epoch": 3.103128425668269,
"grad_norm": 1.0151307582855225,
"learning_rate": 1.3793827472805465e-05,
"loss": 0.2643,
"step": 73600
},
{
"epoch": 3.1073446327683616,
"grad_norm": 1.0204205513000488,
"learning_rate": 1.378539505860528e-05,
"loss": 0.2533,
"step": 73700
},
{
"epoch": 3.111560839868454,
"grad_norm": 0.9302712678909302,
"learning_rate": 1.3776962644405094e-05,
"loss": 0.2656,
"step": 73800
},
{
"epoch": 3.1157770469685473,
"grad_norm": 0.9464835524559021,
"learning_rate": 1.3768530230204908e-05,
"loss": 0.2571,
"step": 73900
},
{
"epoch": 3.11999325406864,
"grad_norm": 0.83738774061203,
"learning_rate": 1.3760097816004723e-05,
"loss": 0.2612,
"step": 74000
},
{
"epoch": 3.1242094611687325,
"grad_norm": 1.519640326499939,
"learning_rate": 1.3751665401804537e-05,
"loss": 0.2708,
"step": 74100
},
{
"epoch": 3.128425668268825,
"grad_norm": 0.8798648715019226,
"learning_rate": 1.3743232987604352e-05,
"loss": 0.2619,
"step": 74200
},
{
"epoch": 3.1326418753689182,
"grad_norm": 1.1586161851882935,
"learning_rate": 1.3734800573404167e-05,
"loss": 0.2617,
"step": 74300
},
{
"epoch": 3.136858082469011,
"grad_norm": 1.134314775466919,
"learning_rate": 1.3726368159203981e-05,
"loss": 0.2664,
"step": 74400
},
{
"epoch": 3.1410742895691035,
"grad_norm": 0.8345361351966858,
"learning_rate": 1.3717935745003797e-05,
"loss": 0.2614,
"step": 74500
},
{
"epoch": 3.1452904966691966,
"grad_norm": 1.4608139991760254,
"learning_rate": 1.3709503330803611e-05,
"loss": 0.275,
"step": 74600
},
{
"epoch": 3.149506703769289,
"grad_norm": 1.0728867053985596,
"learning_rate": 1.3701070916603425e-05,
"loss": 0.2715,
"step": 74700
},
{
"epoch": 3.153722910869382,
"grad_norm": 0.96731036901474,
"learning_rate": 1.369263850240324e-05,
"loss": 0.2708,
"step": 74800
},
{
"epoch": 3.1579391179694745,
"grad_norm": 1.0788986682891846,
"learning_rate": 1.3684206088203054e-05,
"loss": 0.2666,
"step": 74900
},
{
"epoch": 3.1621553250695675,
"grad_norm": 1.2752233743667603,
"learning_rate": 1.3675773674002868e-05,
"loss": 0.2622,
"step": 75000
},
{
"epoch": 3.16637153216966,
"grad_norm": 1.037100076675415,
"learning_rate": 1.3667341259802682e-05,
"loss": 0.2663,
"step": 75100
},
{
"epoch": 3.170587739269753,
"grad_norm": 1.1296089887619019,
"learning_rate": 1.3658908845602496e-05,
"loss": 0.2693,
"step": 75200
},
{
"epoch": 3.174803946369846,
"grad_norm": 1.0998258590698242,
"learning_rate": 1.3650476431402312e-05,
"loss": 0.2659,
"step": 75300
},
{
"epoch": 3.1790201534699385,
"grad_norm": 0.9814489483833313,
"learning_rate": 1.3642044017202126e-05,
"loss": 0.2599,
"step": 75400
},
{
"epoch": 3.183236360570031,
"grad_norm": 1.1682645082473755,
"learning_rate": 1.3633611603001941e-05,
"loss": 0.2558,
"step": 75500
},
{
"epoch": 3.1874525676701237,
"grad_norm": 0.9016832113265991,
"learning_rate": 1.3625179188801756e-05,
"loss": 0.256,
"step": 75600
},
{
"epoch": 3.191668774770217,
"grad_norm": 1.146004319190979,
"learning_rate": 1.361674677460157e-05,
"loss": 0.261,
"step": 75700
},
{
"epoch": 3.1958849818703094,
"grad_norm": 0.9824726581573486,
"learning_rate": 1.3608314360401384e-05,
"loss": 0.2599,
"step": 75800
},
{
"epoch": 3.200101188970402,
"grad_norm": 0.9161651730537415,
"learning_rate": 1.35998819462012e-05,
"loss": 0.2707,
"step": 75900
},
{
"epoch": 3.204317396070495,
"grad_norm": 0.7958844900131226,
"learning_rate": 1.3591449532001014e-05,
"loss": 0.2653,
"step": 76000
},
{
"epoch": 3.204317396070495,
"eval_bleu": 11.5178,
"eval_bleurt": null,
"eval_chrfpp": 34.2395,
"eval_comet": 0.5445,
"eval_gen_len": 20.8749,
"eval_loss": 0.30260327458381653,
"eval_runtime": 1385.9778,
"eval_samples_per_second": 34.225,
"eval_steps_per_second": 2.139,
"step": 76000
},
{
"epoch": 3.2085336031705878,
"grad_norm": 1.1375194787979126,
"learning_rate": 1.3583017117800828e-05,
"loss": 0.28,
"step": 76100
},
{
"epoch": 3.2127498102706804,
"grad_norm": 1.3891476392745972,
"learning_rate": 1.3574584703600642e-05,
"loss": 0.2673,
"step": 76200
},
{
"epoch": 3.2169660173707735,
"grad_norm": 0.8804958462715149,
"learning_rate": 1.3566152289400456e-05,
"loss": 0.2609,
"step": 76300
},
{
"epoch": 3.221182224470866,
"grad_norm": 1.148685097694397,
"learning_rate": 1.355771987520027e-05,
"loss": 0.2584,
"step": 76400
},
{
"epoch": 3.2253984315709587,
"grad_norm": 1.2225135564804077,
"learning_rate": 1.3549287461000084e-05,
"loss": 0.2693,
"step": 76500
},
{
"epoch": 3.2296146386710514,
"grad_norm": 1.3453797101974487,
"learning_rate": 1.3540855046799898e-05,
"loss": 0.2582,
"step": 76600
},
{
"epoch": 3.2338308457711444,
"grad_norm": 1.1462249755859375,
"learning_rate": 1.3532422632599716e-05,
"loss": 0.261,
"step": 76700
},
{
"epoch": 3.238047052871237,
"grad_norm": 1.3225855827331543,
"learning_rate": 1.352399021839953e-05,
"loss": 0.2593,
"step": 76800
},
{
"epoch": 3.2422632599713297,
"grad_norm": 1.0181751251220703,
"learning_rate": 1.3515557804199344e-05,
"loss": 0.2735,
"step": 76900
},
{
"epoch": 3.2464794670714223,
"grad_norm": 1.4639792442321777,
"learning_rate": 1.3507125389999158e-05,
"loss": 0.2684,
"step": 77000
},
{
"epoch": 3.2506956741715154,
"grad_norm": 0.9273201823234558,
"learning_rate": 1.3498692975798972e-05,
"loss": 0.2691,
"step": 77100
},
{
"epoch": 3.254911881271608,
"grad_norm": 1.0048563480377197,
"learning_rate": 1.3490260561598788e-05,
"loss": 0.2694,
"step": 77200
},
{
"epoch": 3.2591280883717006,
"grad_norm": 0.7959649562835693,
"learning_rate": 1.3481828147398602e-05,
"loss": 0.2644,
"step": 77300
},
{
"epoch": 3.2633442954717937,
"grad_norm": 0.9960177540779114,
"learning_rate": 1.3473395733198416e-05,
"loss": 0.2634,
"step": 77400
},
{
"epoch": 3.2675605025718863,
"grad_norm": 0.8392547965049744,
"learning_rate": 1.346496331899823e-05,
"loss": 0.2641,
"step": 77500
},
{
"epoch": 3.271776709671979,
"grad_norm": 0.8676571846008301,
"learning_rate": 1.3456530904798044e-05,
"loss": 0.2651,
"step": 77600
},
{
"epoch": 3.275992916772072,
"grad_norm": 0.8622851371765137,
"learning_rate": 1.3448098490597858e-05,
"loss": 0.2579,
"step": 77700
},
{
"epoch": 3.2802091238721647,
"grad_norm": 1.0479894876480103,
"learning_rate": 1.3439666076397672e-05,
"loss": 0.2743,
"step": 77800
},
{
"epoch": 3.2844253309722573,
"grad_norm": 0.8865060806274414,
"learning_rate": 1.343123366219749e-05,
"loss": 0.2675,
"step": 77900
},
{
"epoch": 3.28864153807235,
"grad_norm": 0.9038619995117188,
"learning_rate": 1.3422801247997304e-05,
"loss": 0.2595,
"step": 78000
},
{
"epoch": 3.292857745172443,
"grad_norm": 1.3131364583969116,
"learning_rate": 1.3414368833797118e-05,
"loss": 0.2638,
"step": 78100
},
{
"epoch": 3.2970739522725356,
"grad_norm": 0.7389425039291382,
"learning_rate": 1.3405936419596932e-05,
"loss": 0.261,
"step": 78200
},
{
"epoch": 3.3012901593726283,
"grad_norm": 1.0259007215499878,
"learning_rate": 1.3397504005396746e-05,
"loss": 0.2699,
"step": 78300
},
{
"epoch": 3.305506366472721,
"grad_norm": 1.1851677894592285,
"learning_rate": 1.338907159119656e-05,
"loss": 0.2675,
"step": 78400
},
{
"epoch": 3.309722573572814,
"grad_norm": 1.12969172000885,
"learning_rate": 1.3380639176996374e-05,
"loss": 0.2663,
"step": 78500
},
{
"epoch": 3.3139387806729066,
"grad_norm": 0.6820844411849976,
"learning_rate": 1.337220676279619e-05,
"loss": 0.2741,
"step": 78600
},
{
"epoch": 3.318154987772999,
"grad_norm": 0.805769681930542,
"learning_rate": 1.3363774348596004e-05,
"loss": 0.2604,
"step": 78700
},
{
"epoch": 3.3223711948730923,
"grad_norm": 1.0743379592895508,
"learning_rate": 1.3355341934395818e-05,
"loss": 0.2718,
"step": 78800
},
{
"epoch": 3.326587401973185,
"grad_norm": 0.9101067185401917,
"learning_rate": 1.3346909520195632e-05,
"loss": 0.2663,
"step": 78900
},
{
"epoch": 3.3308036090732775,
"grad_norm": 1.1092708110809326,
"learning_rate": 1.3338477105995446e-05,
"loss": 0.2643,
"step": 79000
},
{
"epoch": 3.3350198161733706,
"grad_norm": 0.9594613313674927,
"learning_rate": 1.3330044691795264e-05,
"loss": 0.2634,
"step": 79100
},
{
"epoch": 3.3392360232734632,
"grad_norm": 0.7595967650413513,
"learning_rate": 1.3321612277595078e-05,
"loss": 0.2634,
"step": 79200
},
{
"epoch": 3.343452230373556,
"grad_norm": 1.062562346458435,
"learning_rate": 1.3313179863394892e-05,
"loss": 0.2641,
"step": 79300
},
{
"epoch": 3.347668437473649,
"grad_norm": 1.0648949146270752,
"learning_rate": 1.3304747449194706e-05,
"loss": 0.2768,
"step": 79400
},
{
"epoch": 3.3518846445737416,
"grad_norm": 1.2904027700424194,
"learning_rate": 1.329631503499452e-05,
"loss": 0.2666,
"step": 79500
},
{
"epoch": 3.356100851673834,
"grad_norm": 1.052890658378601,
"learning_rate": 1.3287882620794334e-05,
"loss": 0.2621,
"step": 79600
},
{
"epoch": 3.360317058773927,
"grad_norm": 0.9428207874298096,
"learning_rate": 1.3279450206594148e-05,
"loss": 0.2743,
"step": 79700
},
{
"epoch": 3.36453326587402,
"grad_norm": 1.0574067831039429,
"learning_rate": 1.3271017792393962e-05,
"loss": 0.2701,
"step": 79800
},
{
"epoch": 3.3687494729741125,
"grad_norm": 1.0170483589172363,
"learning_rate": 1.3262585378193778e-05,
"loss": 0.2719,
"step": 79900
},
{
"epoch": 3.372965680074205,
"grad_norm": 0.9573329091072083,
"learning_rate": 1.3254152963993592e-05,
"loss": 0.259,
"step": 80000
},
{
"epoch": 3.372965680074205,
"eval_bleu": 11.5551,
"eval_bleurt": null,
"eval_chrfpp": 34.3387,
"eval_comet": 0.5455,
"eval_gen_len": 20.8748,
"eval_loss": 0.29944270849227905,
"eval_runtime": 1379.6059,
"eval_samples_per_second": 34.383,
"eval_steps_per_second": 2.149,
"step": 80000
},
{
"epoch": 3.377181887174298,
"grad_norm": 1.0012997388839722,
"learning_rate": 1.3245720549793406e-05,
"loss": 0.2702,
"step": 80100
},
{
"epoch": 3.381398094274391,
"grad_norm": 1.2515530586242676,
"learning_rate": 1.323728813559322e-05,
"loss": 0.2662,
"step": 80200
},
{
"epoch": 3.3856143013744835,
"grad_norm": 1.2128268480300903,
"learning_rate": 1.3228855721393036e-05,
"loss": 0.2622,
"step": 80300
},
{
"epoch": 3.389830508474576,
"grad_norm": 0.9349983930587769,
"learning_rate": 1.322042330719285e-05,
"loss": 0.2524,
"step": 80400
},
{
"epoch": 3.394046715574669,
"grad_norm": 0.8696044087409973,
"learning_rate": 1.3211990892992666e-05,
"loss": 0.2622,
"step": 80500
},
{
"epoch": 3.398262922674762,
"grad_norm": 1.0299559831619263,
"learning_rate": 1.320355847879248e-05,
"loss": 0.2612,
"step": 80600
},
{
"epoch": 3.4024791297748544,
"grad_norm": 1.1014291048049927,
"learning_rate": 1.3195126064592294e-05,
"loss": 0.269,
"step": 80700
},
{
"epoch": 3.4066953368749475,
"grad_norm": 1.00558340549469,
"learning_rate": 1.3186693650392108e-05,
"loss": 0.2634,
"step": 80800
},
{
"epoch": 3.41091154397504,
"grad_norm": 0.9738940596580505,
"learning_rate": 1.3178261236191923e-05,
"loss": 0.2572,
"step": 80900
},
{
"epoch": 3.415127751075133,
"grad_norm": 0.9514461755752563,
"learning_rate": 1.3169828821991737e-05,
"loss": 0.2692,
"step": 81000
},
{
"epoch": 3.4193439581752254,
"grad_norm": 1.0373955965042114,
"learning_rate": 1.316139640779155e-05,
"loss": 0.252,
"step": 81100
},
{
"epoch": 3.4235601652753185,
"grad_norm": 1.02577805519104,
"learning_rate": 1.3152963993591365e-05,
"loss": 0.264,
"step": 81200
},
{
"epoch": 3.427776372375411,
"grad_norm": 1.1359772682189941,
"learning_rate": 1.314453157939118e-05,
"loss": 0.2577,
"step": 81300
},
{
"epoch": 3.4319925794755037,
"grad_norm": 1.2096680402755737,
"learning_rate": 1.3136099165190995e-05,
"loss": 0.2599,
"step": 81400
},
{
"epoch": 3.4362087865755964,
"grad_norm": 0.9178110957145691,
"learning_rate": 1.312766675099081e-05,
"loss": 0.2806,
"step": 81500
},
{
"epoch": 3.4404249936756894,
"grad_norm": 1.1373695135116577,
"learning_rate": 1.3119234336790625e-05,
"loss": 0.2569,
"step": 81600
},
{
"epoch": 3.444641200775782,
"grad_norm": 1.1340562105178833,
"learning_rate": 1.3110801922590439e-05,
"loss": 0.2647,
"step": 81700
},
{
"epoch": 3.4488574078758747,
"grad_norm": 0.8706813454627991,
"learning_rate": 1.3102369508390254e-05,
"loss": 0.2629,
"step": 81800
},
{
"epoch": 3.4530736149759678,
"grad_norm": 1.1597013473510742,
"learning_rate": 1.3093937094190068e-05,
"loss": 0.2644,
"step": 81900
},
{
"epoch": 3.4572898220760604,
"grad_norm": 0.9667551517486572,
"learning_rate": 1.3085504679989883e-05,
"loss": 0.2599,
"step": 82000
},
{
"epoch": 3.461506029176153,
"grad_norm": 1.319297194480896,
"learning_rate": 1.3077072265789697e-05,
"loss": 0.2536,
"step": 82100
},
{
"epoch": 3.465722236276246,
"grad_norm": 1.0382535457611084,
"learning_rate": 1.306863985158951e-05,
"loss": 0.2639,
"step": 82200
},
{
"epoch": 3.4699384433763387,
"grad_norm": 1.1606062650680542,
"learning_rate": 1.3060207437389325e-05,
"loss": 0.2602,
"step": 82300
},
{
"epoch": 3.4741546504764313,
"grad_norm": 0.8028622269630432,
"learning_rate": 1.3051775023189139e-05,
"loss": 0.2652,
"step": 82400
},
{
"epoch": 3.478370857576524,
"grad_norm": 1.2183704376220703,
"learning_rate": 1.3043342608988953e-05,
"loss": 0.2639,
"step": 82500
},
{
"epoch": 3.482587064676617,
"grad_norm": 1.0649685859680176,
"learning_rate": 1.303491019478877e-05,
"loss": 0.2601,
"step": 82600
},
{
"epoch": 3.4868032717767097,
"grad_norm": 0.9648706316947937,
"learning_rate": 1.3026477780588585e-05,
"loss": 0.2594,
"step": 82700
},
{
"epoch": 3.4910194788768023,
"grad_norm": 1.0118120908737183,
"learning_rate": 1.3018045366388399e-05,
"loss": 0.2598,
"step": 82800
},
{
"epoch": 3.4952356859768954,
"grad_norm": 0.8838013410568237,
"learning_rate": 1.3009612952188213e-05,
"loss": 0.2677,
"step": 82900
},
{
"epoch": 3.499451893076988,
"grad_norm": 1.0722988843917847,
"learning_rate": 1.3001180537988027e-05,
"loss": 0.2644,
"step": 83000
},
{
"epoch": 3.5036681001770806,
"grad_norm": 1.016909122467041,
"learning_rate": 1.2992748123787841e-05,
"loss": 0.2568,
"step": 83100
},
{
"epoch": 3.5078843072771733,
"grad_norm": 0.9746114611625671,
"learning_rate": 1.2984315709587657e-05,
"loss": 0.2703,
"step": 83200
},
{
"epoch": 3.5121005143772663,
"grad_norm": 1.0796164274215698,
"learning_rate": 1.297588329538747e-05,
"loss": 0.2494,
"step": 83300
},
{
"epoch": 3.516316721477359,
"grad_norm": 0.9735890030860901,
"learning_rate": 1.2967450881187285e-05,
"loss": 0.2587,
"step": 83400
},
{
"epoch": 3.5205329285774516,
"grad_norm": 0.9859138131141663,
"learning_rate": 1.2959018466987099e-05,
"loss": 0.2733,
"step": 83500
},
{
"epoch": 3.5247491356775447,
"grad_norm": 1.0381730794906616,
"learning_rate": 1.2950586052786913e-05,
"loss": 0.2641,
"step": 83600
},
{
"epoch": 3.5289653427776373,
"grad_norm": 0.8919042348861694,
"learning_rate": 1.2942153638586727e-05,
"loss": 0.2571,
"step": 83700
},
{
"epoch": 3.53318154987773,
"grad_norm": 0.9157905578613281,
"learning_rate": 1.2933721224386545e-05,
"loss": 0.2703,
"step": 83800
},
{
"epoch": 3.537397756977823,
"grad_norm": 1.2394428253173828,
"learning_rate": 1.2925288810186359e-05,
"loss": 0.2614,
"step": 83900
},
{
"epoch": 3.5416139640779156,
"grad_norm": 0.7191671133041382,
"learning_rate": 1.2916856395986173e-05,
"loss": 0.2635,
"step": 84000
},
{
"epoch": 3.5416139640779156,
"eval_bleu": 11.6956,
"eval_bleurt": null,
"eval_chrfpp": 34.4214,
"eval_comet": 0.5453,
"eval_gen_len": 20.8742,
"eval_loss": 0.2975287437438965,
"eval_runtime": 1344.7261,
"eval_samples_per_second": 35.275,
"eval_steps_per_second": 2.205,
"step": 84000
},
{
"epoch": 3.5458301711780082,
"grad_norm": 0.8943409323692322,
"learning_rate": 1.2908423981785987e-05,
"loss": 0.2593,
"step": 84100
},
{
"epoch": 3.550046378278101,
"grad_norm": 0.978228747844696,
"learning_rate": 1.2899991567585801e-05,
"loss": 0.272,
"step": 84200
},
{
"epoch": 3.5542625853781935,
"grad_norm": 0.7906679511070251,
"learning_rate": 1.2891559153385615e-05,
"loss": 0.2636,
"step": 84300
},
{
"epoch": 3.5584787924782866,
"grad_norm": 1.0755661725997925,
"learning_rate": 1.2883126739185429e-05,
"loss": 0.2673,
"step": 84400
},
{
"epoch": 3.562694999578379,
"grad_norm": 1.0431406497955322,
"learning_rate": 1.2874694324985245e-05,
"loss": 0.2591,
"step": 84500
},
{
"epoch": 3.566911206678472,
"grad_norm": 1.1435611248016357,
"learning_rate": 1.2866261910785059e-05,
"loss": 0.2745,
"step": 84600
},
{
"epoch": 3.571127413778565,
"grad_norm": 1.0531567335128784,
"learning_rate": 1.2857829496584873e-05,
"loss": 0.2641,
"step": 84700
},
{
"epoch": 3.5753436208786575,
"grad_norm": 1.1592568159103394,
"learning_rate": 1.2849397082384687e-05,
"loss": 0.2602,
"step": 84800
},
{
"epoch": 3.57955982797875,
"grad_norm": 0.975192666053772,
"learning_rate": 1.2840964668184501e-05,
"loss": 0.2643,
"step": 84900
},
{
"epoch": 3.5837760350788432,
"grad_norm": 1.0815240144729614,
"learning_rate": 1.2832532253984317e-05,
"loss": 0.2566,
"step": 85000
},
{
"epoch": 3.587992242178936,
"grad_norm": 0.9638839960098267,
"learning_rate": 1.2824099839784133e-05,
"loss": 0.256,
"step": 85100
},
{
"epoch": 3.5922084492790285,
"grad_norm": 0.9916542768478394,
"learning_rate": 1.2815667425583947e-05,
"loss": 0.266,
"step": 85200
},
{
"epoch": 3.5964246563791216,
"grad_norm": 0.7632570266723633,
"learning_rate": 1.2807235011383761e-05,
"loss": 0.255,
"step": 85300
},
{
"epoch": 3.600640863479214,
"grad_norm": 1.1871920824050903,
"learning_rate": 1.2798802597183575e-05,
"loss": 0.2631,
"step": 85400
},
{
"epoch": 3.604857070579307,
"grad_norm": 1.2854896783828735,
"learning_rate": 1.279037018298339e-05,
"loss": 0.2681,
"step": 85500
},
{
"epoch": 3.6090732776793994,
"grad_norm": 1.0291328430175781,
"learning_rate": 1.2781937768783203e-05,
"loss": 0.2668,
"step": 85600
},
{
"epoch": 3.6132894847794925,
"grad_norm": 1.1781797409057617,
"learning_rate": 1.2773505354583017e-05,
"loss": 0.2684,
"step": 85700
},
{
"epoch": 3.617505691879585,
"grad_norm": 1.0925040245056152,
"learning_rate": 1.2765072940382831e-05,
"loss": 0.2631,
"step": 85800
},
{
"epoch": 3.621721898979678,
"grad_norm": 1.1996210813522339,
"learning_rate": 1.2756640526182647e-05,
"loss": 0.2543,
"step": 85900
},
{
"epoch": 3.6259381060797704,
"grad_norm": 1.081076979637146,
"learning_rate": 1.2748208111982461e-05,
"loss": 0.2647,
"step": 86000
},
{
"epoch": 3.6301543131798635,
"grad_norm": 1.1334370374679565,
"learning_rate": 1.2739775697782275e-05,
"loss": 0.2657,
"step": 86100
},
{
"epoch": 3.634370520279956,
"grad_norm": 1.1829060316085815,
"learning_rate": 1.2731343283582091e-05,
"loss": 0.2628,
"step": 86200
},
{
"epoch": 3.6385867273800487,
"grad_norm": 0.9868782758712769,
"learning_rate": 1.2722910869381905e-05,
"loss": 0.2728,
"step": 86300
},
{
"epoch": 3.642802934480142,
"grad_norm": 1.0498135089874268,
"learning_rate": 1.271447845518172e-05,
"loss": 0.2621,
"step": 86400
},
{
"epoch": 3.6470191415802344,
"grad_norm": 1.3446903228759766,
"learning_rate": 1.2706046040981535e-05,
"loss": 0.2667,
"step": 86500
},
{
"epoch": 3.651235348680327,
"grad_norm": 1.084639072418213,
"learning_rate": 1.269761362678135e-05,
"loss": 0.27,
"step": 86600
},
{
"epoch": 3.65545155578042,
"grad_norm": 1.2545477151870728,
"learning_rate": 1.2689181212581163e-05,
"loss": 0.259,
"step": 86700
},
{
"epoch": 3.6596677628805128,
"grad_norm": 1.0665180683135986,
"learning_rate": 1.2680748798380977e-05,
"loss": 0.2604,
"step": 86800
},
{
"epoch": 3.6638839699806054,
"grad_norm": 1.826547384262085,
"learning_rate": 1.2672316384180791e-05,
"loss": 0.2746,
"step": 86900
},
{
"epoch": 3.6681001770806985,
"grad_norm": 1.2480037212371826,
"learning_rate": 1.2663883969980606e-05,
"loss": 0.2674,
"step": 87000
},
{
"epoch": 3.672316384180791,
"grad_norm": 1.1907069683074951,
"learning_rate": 1.265545155578042e-05,
"loss": 0.2825,
"step": 87100
},
{
"epoch": 3.6765325912808837,
"grad_norm": 0.9594092965126038,
"learning_rate": 1.2647019141580234e-05,
"loss": 0.2723,
"step": 87200
},
{
"epoch": 3.6807487983809763,
"grad_norm": 1.1402744054794312,
"learning_rate": 1.263858672738005e-05,
"loss": 0.2647,
"step": 87300
},
{
"epoch": 3.684965005481069,
"grad_norm": 1.1719417572021484,
"learning_rate": 1.2630154313179865e-05,
"loss": 0.2661,
"step": 87400
},
{
"epoch": 3.689181212581162,
"grad_norm": 1.0842151641845703,
"learning_rate": 1.262172189897968e-05,
"loss": 0.2643,
"step": 87500
},
{
"epoch": 3.6933974196812547,
"grad_norm": 0.8954026103019714,
"learning_rate": 1.2613289484779493e-05,
"loss": 0.264,
"step": 87600
},
{
"epoch": 3.6976136267813473,
"grad_norm": 0.8815357089042664,
"learning_rate": 1.2604857070579308e-05,
"loss": 0.2618,
"step": 87700
},
{
"epoch": 3.7018298338814404,
"grad_norm": 0.9481285214424133,
"learning_rate": 1.2596424656379123e-05,
"loss": 0.2525,
"step": 87800
},
{
"epoch": 3.706046040981533,
"grad_norm": 1.143824577331543,
"learning_rate": 1.2587992242178937e-05,
"loss": 0.2661,
"step": 87900
},
{
"epoch": 3.7102622480816256,
"grad_norm": 1.0877296924591064,
"learning_rate": 1.2579559827978752e-05,
"loss": 0.2528,
"step": 88000
},
{
"epoch": 3.7102622480816256,
"eval_bleu": 11.7437,
"eval_bleurt": null,
"eval_chrfpp": 34.4824,
"eval_comet": 0.5455,
"eval_gen_len": 20.8768,
"eval_loss": 0.2948751747608185,
"eval_runtime": 1328.1507,
"eval_samples_per_second": 35.715,
"eval_steps_per_second": 2.232,
"step": 88000
},
{
"epoch": 3.7144784551817187,
"grad_norm": 0.8538644313812256,
"learning_rate": 1.2571127413778566e-05,
"loss": 0.2588,
"step": 88100
},
{
"epoch": 3.7186946622818113,
"grad_norm": 0.7428072094917297,
"learning_rate": 1.256269499957838e-05,
"loss": 0.2604,
"step": 88200
},
{
"epoch": 3.722910869381904,
"grad_norm": 0.9546633362770081,
"learning_rate": 1.2554262585378194e-05,
"loss": 0.2636,
"step": 88300
},
{
"epoch": 3.727127076481997,
"grad_norm": 1.0499143600463867,
"learning_rate": 1.2545830171178008e-05,
"loss": 0.2483,
"step": 88400
},
{
"epoch": 3.7313432835820897,
"grad_norm": 0.892047643661499,
"learning_rate": 1.2537397756977822e-05,
"loss": 0.2567,
"step": 88500
},
{
"epoch": 3.7355594906821823,
"grad_norm": 0.9587951302528381,
"learning_rate": 1.252896534277764e-05,
"loss": 0.2694,
"step": 88600
},
{
"epoch": 3.739775697782275,
"grad_norm": 1.1876782178878784,
"learning_rate": 1.2520532928577454e-05,
"loss": 0.2626,
"step": 88700
},
{
"epoch": 3.7439919048823675,
"grad_norm": 0.9685199856758118,
"learning_rate": 1.2512100514377268e-05,
"loss": 0.2623,
"step": 88800
},
{
"epoch": 3.7482081119824606,
"grad_norm": 1.2437102794647217,
"learning_rate": 1.2503668100177082e-05,
"loss": 0.2553,
"step": 88900
},
{
"epoch": 3.7524243190825533,
"grad_norm": 0.9781164526939392,
"learning_rate": 1.2495235685976896e-05,
"loss": 0.2614,
"step": 89000
},
{
"epoch": 3.756640526182646,
"grad_norm": 1.0566153526306152,
"learning_rate": 1.248680327177671e-05,
"loss": 0.2673,
"step": 89100
},
{
"epoch": 3.760856733282739,
"grad_norm": 0.990650475025177,
"learning_rate": 1.2478370857576526e-05,
"loss": 0.2666,
"step": 89200
},
{
"epoch": 3.7650729403828316,
"grad_norm": 1.4134660959243774,
"learning_rate": 1.246993844337634e-05,
"loss": 0.2627,
"step": 89300
},
{
"epoch": 3.769289147482924,
"grad_norm": 0.844741940498352,
"learning_rate": 1.2461506029176154e-05,
"loss": 0.255,
"step": 89400
},
{
"epoch": 3.7735053545830173,
"grad_norm": 1.163191795349121,
"learning_rate": 1.2453073614975968e-05,
"loss": 0.2581,
"step": 89500
},
{
"epoch": 3.77772156168311,
"grad_norm": 1.472217082977295,
"learning_rate": 1.2444641200775782e-05,
"loss": 0.2619,
"step": 89600
},
{
"epoch": 3.7819377687832025,
"grad_norm": 0.9781330823898315,
"learning_rate": 1.24362087865756e-05,
"loss": 0.2537,
"step": 89700
},
{
"epoch": 3.7861539758832956,
"grad_norm": 1.3486849069595337,
"learning_rate": 1.2427776372375414e-05,
"loss": 0.2613,
"step": 89800
},
{
"epoch": 3.7903701829833882,
"grad_norm": 0.9725906848907471,
"learning_rate": 1.2419343958175228e-05,
"loss": 0.2559,
"step": 89900
},
{
"epoch": 3.794586390083481,
"grad_norm": 1.3243602514266968,
"learning_rate": 1.2410911543975042e-05,
"loss": 0.26,
"step": 90000
},
{
"epoch": 3.7988025971835735,
"grad_norm": 1.0398342609405518,
"learning_rate": 1.2402479129774856e-05,
"loss": 0.2559,
"step": 90100
},
{
"epoch": 3.8030188042836666,
"grad_norm": 1.1146111488342285,
"learning_rate": 1.239404671557467e-05,
"loss": 0.2634,
"step": 90200
},
{
"epoch": 3.807235011383759,
"grad_norm": 1.127182960510254,
"learning_rate": 1.2385614301374484e-05,
"loss": 0.2606,
"step": 90300
},
{
"epoch": 3.811451218483852,
"grad_norm": 1.015506386756897,
"learning_rate": 1.2377181887174298e-05,
"loss": 0.2678,
"step": 90400
},
{
"epoch": 3.8156674255839444,
"grad_norm": 1.116790533065796,
"learning_rate": 1.2368749472974114e-05,
"loss": 0.2509,
"step": 90500
},
{
"epoch": 3.8198836326840375,
"grad_norm": 0.8489646911621094,
"learning_rate": 1.2360317058773928e-05,
"loss": 0.2583,
"step": 90600
},
{
"epoch": 3.82409983978413,
"grad_norm": 1.4936515092849731,
"learning_rate": 1.2351884644573742e-05,
"loss": 0.2573,
"step": 90700
},
{
"epoch": 3.828316046884223,
"grad_norm": 1.065934419631958,
"learning_rate": 1.2343452230373556e-05,
"loss": 0.2634,
"step": 90800
},
{
"epoch": 3.832532253984316,
"grad_norm": 1.003338098526001,
"learning_rate": 1.2335019816173372e-05,
"loss": 0.268,
"step": 90900
},
{
"epoch": 3.8367484610844085,
"grad_norm": 1.0324583053588867,
"learning_rate": 1.2326587401973186e-05,
"loss": 0.2549,
"step": 91000
},
{
"epoch": 3.840964668184501,
"grad_norm": 1.2623025178909302,
"learning_rate": 1.2318154987773002e-05,
"loss": 0.2692,
"step": 91100
},
{
"epoch": 3.845180875284594,
"grad_norm": 0.9342853426933289,
"learning_rate": 1.2309722573572816e-05,
"loss": 0.254,
"step": 91200
},
{
"epoch": 3.849397082384687,
"grad_norm": 1.087953805923462,
"learning_rate": 1.230129015937263e-05,
"loss": 0.2584,
"step": 91300
},
{
"epoch": 3.8536132894847794,
"grad_norm": 0.8858796954154968,
"learning_rate": 1.2292857745172444e-05,
"loss": 0.249,
"step": 91400
},
{
"epoch": 3.8578294965848725,
"grad_norm": 0.9204941987991333,
"learning_rate": 1.2284425330972258e-05,
"loss": 0.2557,
"step": 91500
},
{
"epoch": 3.862045703684965,
"grad_norm": 1.2459721565246582,
"learning_rate": 1.2275992916772072e-05,
"loss": 0.2699,
"step": 91600
},
{
"epoch": 3.8662619107850578,
"grad_norm": 0.9872584342956543,
"learning_rate": 1.2267560502571886e-05,
"loss": 0.2583,
"step": 91700
},
{
"epoch": 3.8704781178851504,
"grad_norm": 1.076714277267456,
"learning_rate": 1.22591280883717e-05,
"loss": 0.264,
"step": 91800
},
{
"epoch": 3.874694324985243,
"grad_norm": 1.9756258726119995,
"learning_rate": 1.2250695674171516e-05,
"loss": 0.2596,
"step": 91900
},
{
"epoch": 3.878910532085336,
"grad_norm": 0.9622089266777039,
"learning_rate": 1.224226325997133e-05,
"loss": 0.2579,
"step": 92000
},
{
"epoch": 3.878910532085336,
"eval_bleu": 11.8761,
"eval_bleurt": null,
"eval_chrfpp": 34.5875,
"eval_comet": 0.5465,
"eval_gen_len": 20.8682,
"eval_loss": 0.29051172733306885,
"eval_runtime": 1094.2642,
"eval_samples_per_second": 43.349,
"eval_steps_per_second": 2.71,
"step": 92000
}
],
"logging_steps": 100,
"max_steps": 237180,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 4000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 9.060418285102694e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}