tencent-kazakh-7b-adapter / trainer_state.json
Defetya's picture
Add kazakh 23k checkpoint
b744739 verified
{
"best_global_step": 18000,
"best_metric": 87.79281231152616,
"best_model_checkpoint": "checkpoints_7B_lora_translated/ru-kz-final/checkpoint-18000",
"epoch": 1.041572321347704,
"eval_steps": 1000,
"global_step": 23000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00045285753102074087,
"grad_norm": 3.4883720874786377,
"learning_rate": 8.148483476686284e-07,
"loss": 5.0788,
"step": 10
},
{
"epoch": 0.0009057150620414817,
"grad_norm": 4.04372501373291,
"learning_rate": 1.720235400633771e-06,
"loss": 5.133,
"step": 20
},
{
"epoch": 0.0013585725930622225,
"grad_norm": 4.182616233825684,
"learning_rate": 2.6256224535989136e-06,
"loss": 5.1652,
"step": 30
},
{
"epoch": 0.0018114301240829635,
"grad_norm": 4.259949684143066,
"learning_rate": 3.5310095065640563e-06,
"loss": 4.9479,
"step": 40
},
{
"epoch": 0.002264287655103704,
"grad_norm": 4.507060527801514,
"learning_rate": 4.4363965595291986e-06,
"loss": 4.9178,
"step": 50
},
{
"epoch": 0.002717145186124445,
"grad_norm": 7.0745086669921875,
"learning_rate": 5.341783612494342e-06,
"loss": 4.558,
"step": 60
},
{
"epoch": 0.003170002717145186,
"grad_norm": 9.023025512695312,
"learning_rate": 6.247170665459484e-06,
"loss": 3.7948,
"step": 70
},
{
"epoch": 0.003622860248165927,
"grad_norm": 7.761899948120117,
"learning_rate": 7.152557718424627e-06,
"loss": 3.2346,
"step": 80
},
{
"epoch": 0.004075717779186668,
"grad_norm": 4.877920150756836,
"learning_rate": 8.05794477138977e-06,
"loss": 2.0696,
"step": 90
},
{
"epoch": 0.004528575310207408,
"grad_norm": 1.9245119094848633,
"learning_rate": 8.963331824354912e-06,
"loss": 1.4604,
"step": 100
},
{
"epoch": 0.00498143284122815,
"grad_norm": 2.389509439468384,
"learning_rate": 9.868718877320054e-06,
"loss": 1.2735,
"step": 110
},
{
"epoch": 0.00543429037224889,
"grad_norm": 1.2380106449127197,
"learning_rate": 1.0774105930285198e-05,
"loss": 1.1165,
"step": 120
},
{
"epoch": 0.0058871479032696315,
"grad_norm": 0.7591245770454407,
"learning_rate": 1.167949298325034e-05,
"loss": 0.9562,
"step": 130
},
{
"epoch": 0.006340005434290372,
"grad_norm": 0.6966114044189453,
"learning_rate": 1.2584880036215482e-05,
"loss": 0.8385,
"step": 140
},
{
"epoch": 0.006792862965311113,
"grad_norm": 0.6013082265853882,
"learning_rate": 1.3490267089180624e-05,
"loss": 0.7487,
"step": 150
},
{
"epoch": 0.007245720496331854,
"grad_norm": 0.5370333790779114,
"learning_rate": 1.4395654142145767e-05,
"loss": 0.7016,
"step": 160
},
{
"epoch": 0.007698578027352595,
"grad_norm": 0.4565121531486511,
"learning_rate": 1.530104119511091e-05,
"loss": 0.6324,
"step": 170
},
{
"epoch": 0.008151435558373336,
"grad_norm": 0.4546894431114197,
"learning_rate": 1.6206428248076053e-05,
"loss": 0.6874,
"step": 180
},
{
"epoch": 0.008604293089394076,
"grad_norm": 0.38576531410217285,
"learning_rate": 1.7111815301041197e-05,
"loss": 0.6267,
"step": 190
},
{
"epoch": 0.009057150620414817,
"grad_norm": 0.4952828884124756,
"learning_rate": 1.8017202354006337e-05,
"loss": 0.5919,
"step": 200
},
{
"epoch": 0.009510008151435559,
"grad_norm": 0.3792784512042999,
"learning_rate": 1.892258940697148e-05,
"loss": 0.6007,
"step": 210
},
{
"epoch": 0.0099628656824563,
"grad_norm": 0.46156740188598633,
"learning_rate": 1.9827976459936622e-05,
"loss": 0.5762,
"step": 220
},
{
"epoch": 0.01041572321347704,
"grad_norm": 0.4529961347579956,
"learning_rate": 2.0733363512901766e-05,
"loss": 0.55,
"step": 230
},
{
"epoch": 0.01086858074449778,
"grad_norm": 0.47309136390686035,
"learning_rate": 2.163875056586691e-05,
"loss": 0.5293,
"step": 240
},
{
"epoch": 0.011321438275518523,
"grad_norm": 0.4536254405975342,
"learning_rate": 2.254413761883205e-05,
"loss": 0.4997,
"step": 250
},
{
"epoch": 0.011774295806539263,
"grad_norm": 0.6276798248291016,
"learning_rate": 2.3449524671797194e-05,
"loss": 0.541,
"step": 260
},
{
"epoch": 0.012227153337560004,
"grad_norm": 0.5525098443031311,
"learning_rate": 2.4354911724762335e-05,
"loss": 0.538,
"step": 270
},
{
"epoch": 0.012680010868580744,
"grad_norm": 0.40901291370391846,
"learning_rate": 2.5260298777727482e-05,
"loss": 0.5161,
"step": 280
},
{
"epoch": 0.013132868399601485,
"grad_norm": 0.46389591693878174,
"learning_rate": 2.6165685830692623e-05,
"loss": 0.4911,
"step": 290
},
{
"epoch": 0.013585725930622227,
"grad_norm": 0.5157365798950195,
"learning_rate": 2.7071072883657767e-05,
"loss": 0.5064,
"step": 300
},
{
"epoch": 0.014038583461642967,
"grad_norm": 0.5174415707588196,
"learning_rate": 2.7976459936622907e-05,
"loss": 0.4649,
"step": 310
},
{
"epoch": 0.014491440992663708,
"grad_norm": 0.5348315834999084,
"learning_rate": 2.888184698958805e-05,
"loss": 0.4376,
"step": 320
},
{
"epoch": 0.014944298523684448,
"grad_norm": 0.592741072177887,
"learning_rate": 2.9787234042553192e-05,
"loss": 0.4873,
"step": 330
},
{
"epoch": 0.01539715605470519,
"grad_norm": 0.6220499277114868,
"learning_rate": 3.0692621095518336e-05,
"loss": 0.4712,
"step": 340
},
{
"epoch": 0.01585001358572593,
"grad_norm": 0.6278738975524902,
"learning_rate": 3.159800814848348e-05,
"loss": 0.4564,
"step": 350
},
{
"epoch": 0.01630287111674667,
"grad_norm": 0.7244141697883606,
"learning_rate": 3.2503395201448624e-05,
"loss": 0.4701,
"step": 360
},
{
"epoch": 0.016755728647767414,
"grad_norm": 0.6748137474060059,
"learning_rate": 3.340878225441376e-05,
"loss": 0.4641,
"step": 370
},
{
"epoch": 0.017208586178788152,
"grad_norm": 0.5243514180183411,
"learning_rate": 3.4314169307378905e-05,
"loss": 0.445,
"step": 380
},
{
"epoch": 0.017661443709808895,
"grad_norm": 0.7680507302284241,
"learning_rate": 3.521955636034405e-05,
"loss": 0.4426,
"step": 390
},
{
"epoch": 0.018114301240829633,
"grad_norm": 0.5714876651763916,
"learning_rate": 3.612494341330919e-05,
"loss": 0.4399,
"step": 400
},
{
"epoch": 0.018567158771850376,
"grad_norm": 0.8051562905311584,
"learning_rate": 3.703033046627433e-05,
"loss": 0.4358,
"step": 410
},
{
"epoch": 0.019020016302871118,
"grad_norm": 0.6920434236526489,
"learning_rate": 3.793571751923948e-05,
"loss": 0.4504,
"step": 420
},
{
"epoch": 0.019472873833891857,
"grad_norm": 0.548530101776123,
"learning_rate": 3.884110457220462e-05,
"loss": 0.4391,
"step": 430
},
{
"epoch": 0.0199257313649126,
"grad_norm": 0.8991818428039551,
"learning_rate": 3.974649162516976e-05,
"loss": 0.4269,
"step": 440
},
{
"epoch": 0.02037858889593334,
"grad_norm": 0.7665138244628906,
"learning_rate": 4.0651878678134906e-05,
"loss": 0.4311,
"step": 450
},
{
"epoch": 0.02083144642695408,
"grad_norm": 0.7382726669311523,
"learning_rate": 4.155726573110005e-05,
"loss": 0.3808,
"step": 460
},
{
"epoch": 0.021284303957974822,
"grad_norm": 0.7119817733764648,
"learning_rate": 4.246265278406519e-05,
"loss": 0.4154,
"step": 470
},
{
"epoch": 0.02173716148899556,
"grad_norm": 0.6397897005081177,
"learning_rate": 4.336803983703033e-05,
"loss": 0.4125,
"step": 480
},
{
"epoch": 0.022190019020016303,
"grad_norm": 0.6061927080154419,
"learning_rate": 4.4273426889995475e-05,
"loss": 0.4004,
"step": 490
},
{
"epoch": 0.022642876551037045,
"grad_norm": 0.6459689140319824,
"learning_rate": 4.517881394296062e-05,
"loss": 0.3976,
"step": 500
},
{
"epoch": 0.023095734082057784,
"grad_norm": 0.8386490345001221,
"learning_rate": 4.6084200995925756e-05,
"loss": 0.4249,
"step": 510
},
{
"epoch": 0.023548591613078526,
"grad_norm": 0.6931592226028442,
"learning_rate": 4.6989588048890906e-05,
"loss": 0.4099,
"step": 520
},
{
"epoch": 0.024001449144099265,
"grad_norm": 0.6584775447845459,
"learning_rate": 4.7894975101856044e-05,
"loss": 0.405,
"step": 530
},
{
"epoch": 0.024454306675120007,
"grad_norm": 0.728175938129425,
"learning_rate": 4.880036215482119e-05,
"loss": 0.4054,
"step": 540
},
{
"epoch": 0.02490716420614075,
"grad_norm": 0.8367078304290771,
"learning_rate": 4.970574920778633e-05,
"loss": 0.3941,
"step": 550
},
{
"epoch": 0.025360021737161488,
"grad_norm": 0.8447272777557373,
"learning_rate": 5.061113626075147e-05,
"loss": 0.3893,
"step": 560
},
{
"epoch": 0.02581287926818223,
"grad_norm": 0.8425686359405518,
"learning_rate": 5.151652331371661e-05,
"loss": 0.4156,
"step": 570
},
{
"epoch": 0.02626573679920297,
"grad_norm": 0.8669265508651733,
"learning_rate": 5.2421910366681757e-05,
"loss": 0.4148,
"step": 580
},
{
"epoch": 0.02671859433022371,
"grad_norm": 0.7871622443199158,
"learning_rate": 5.332729741964691e-05,
"loss": 0.3944,
"step": 590
},
{
"epoch": 0.027171451861244453,
"grad_norm": 0.7147888541221619,
"learning_rate": 5.423268447261205e-05,
"loss": 0.4041,
"step": 600
},
{
"epoch": 0.027624309392265192,
"grad_norm": 0.8785873651504517,
"learning_rate": 5.513807152557718e-05,
"loss": 0.3851,
"step": 610
},
{
"epoch": 0.028077166923285934,
"grad_norm": 0.7281315326690674,
"learning_rate": 5.604345857854233e-05,
"loss": 0.4027,
"step": 620
},
{
"epoch": 0.028530024454306677,
"grad_norm": 0.9609182476997375,
"learning_rate": 5.6948845631507476e-05,
"loss": 0.3872,
"step": 630
},
{
"epoch": 0.028982881985327415,
"grad_norm": 0.8392449021339417,
"learning_rate": 5.785423268447262e-05,
"loss": 0.4003,
"step": 640
},
{
"epoch": 0.029435739516348158,
"grad_norm": 0.7625656723976135,
"learning_rate": 5.875961973743776e-05,
"loss": 0.3884,
"step": 650
},
{
"epoch": 0.029888597047368896,
"grad_norm": 0.8860791921615601,
"learning_rate": 5.96650067904029e-05,
"loss": 0.4055,
"step": 660
},
{
"epoch": 0.03034145457838964,
"grad_norm": 0.7659401893615723,
"learning_rate": 6.0570393843368045e-05,
"loss": 0.3866,
"step": 670
},
{
"epoch": 0.03079431210941038,
"grad_norm": 0.8208166360855103,
"learning_rate": 6.14757808963332e-05,
"loss": 0.3746,
"step": 680
},
{
"epoch": 0.03124716964043112,
"grad_norm": 0.9270297288894653,
"learning_rate": 6.238116794929832e-05,
"loss": 0.3617,
"step": 690
},
{
"epoch": 0.03170002717145186,
"grad_norm": 0.8403503894805908,
"learning_rate": 6.328655500226347e-05,
"loss": 0.4067,
"step": 700
},
{
"epoch": 0.0321528847024726,
"grad_norm": 0.8086050152778625,
"learning_rate": 6.419194205522862e-05,
"loss": 0.3664,
"step": 710
},
{
"epoch": 0.03260574223349334,
"grad_norm": 0.9025173187255859,
"learning_rate": 6.509732910819376e-05,
"loss": 0.3812,
"step": 720
},
{
"epoch": 0.033058599764514085,
"grad_norm": 0.8973671197891235,
"learning_rate": 6.60027161611589e-05,
"loss": 0.3865,
"step": 730
},
{
"epoch": 0.03351145729553483,
"grad_norm": 0.7736189365386963,
"learning_rate": 6.690810321412405e-05,
"loss": 0.3643,
"step": 740
},
{
"epoch": 0.03396431482655556,
"grad_norm": 0.7647067308425903,
"learning_rate": 6.781349026708918e-05,
"loss": 0.394,
"step": 750
},
{
"epoch": 0.034417172357576305,
"grad_norm": 0.8811724185943604,
"learning_rate": 6.871887732005433e-05,
"loss": 0.3877,
"step": 760
},
{
"epoch": 0.03487002988859705,
"grad_norm": 0.7932597398757935,
"learning_rate": 6.962426437301947e-05,
"loss": 0.3703,
"step": 770
},
{
"epoch": 0.03532288741961779,
"grad_norm": 0.7502638697624207,
"learning_rate": 7.052965142598461e-05,
"loss": 0.3616,
"step": 780
},
{
"epoch": 0.03577574495063853,
"grad_norm": 0.9235597252845764,
"learning_rate": 7.143503847894976e-05,
"loss": 0.3856,
"step": 790
},
{
"epoch": 0.03622860248165927,
"grad_norm": 0.839526355266571,
"learning_rate": 7.23404255319149e-05,
"loss": 0.366,
"step": 800
},
{
"epoch": 0.03668146001268001,
"grad_norm": 0.9115842580795288,
"learning_rate": 7.324581258488003e-05,
"loss": 0.3608,
"step": 810
},
{
"epoch": 0.03713431754370075,
"grad_norm": 0.8510302901268005,
"learning_rate": 7.415119963784518e-05,
"loss": 0.3471,
"step": 820
},
{
"epoch": 0.03758717507472149,
"grad_norm": 0.8537122011184692,
"learning_rate": 7.505658669081032e-05,
"loss": 0.3643,
"step": 830
},
{
"epoch": 0.038040032605742236,
"grad_norm": 0.9007611870765686,
"learning_rate": 7.596197374377547e-05,
"loss": 0.3695,
"step": 840
},
{
"epoch": 0.03849289013676298,
"grad_norm": 0.8921451568603516,
"learning_rate": 7.686736079674061e-05,
"loss": 0.3751,
"step": 850
},
{
"epoch": 0.03894574766778371,
"grad_norm": 0.7536324858665466,
"learning_rate": 7.777274784970575e-05,
"loss": 0.36,
"step": 860
},
{
"epoch": 0.039398605198804455,
"grad_norm": 0.6539735198020935,
"learning_rate": 7.86781349026709e-05,
"loss": 0.3678,
"step": 870
},
{
"epoch": 0.0398514627298252,
"grad_norm": 0.7920020222663879,
"learning_rate": 7.958352195563605e-05,
"loss": 0.3668,
"step": 880
},
{
"epoch": 0.04030432026084594,
"grad_norm": 1.0227323770523071,
"learning_rate": 8.048890900860117e-05,
"loss": 0.36,
"step": 890
},
{
"epoch": 0.04075717779186668,
"grad_norm": 0.7640708684921265,
"learning_rate": 8.139429606156632e-05,
"loss": 0.3253,
"step": 900
},
{
"epoch": 0.04121003532288742,
"grad_norm": 0.7461748123168945,
"learning_rate": 8.229968311453147e-05,
"loss": 0.3713,
"step": 910
},
{
"epoch": 0.04166289285390816,
"grad_norm": 0.7150042653083801,
"learning_rate": 8.320507016749661e-05,
"loss": 0.349,
"step": 920
},
{
"epoch": 0.0421157503849289,
"grad_norm": 0.7429611682891846,
"learning_rate": 8.411045722046175e-05,
"loss": 0.3226,
"step": 930
},
{
"epoch": 0.042568607915949644,
"grad_norm": 0.7522969841957092,
"learning_rate": 8.50158442734269e-05,
"loss": 0.3427,
"step": 940
},
{
"epoch": 0.043021465446970386,
"grad_norm": 0.8997257947921753,
"learning_rate": 8.592123132639204e-05,
"loss": 0.3471,
"step": 950
},
{
"epoch": 0.04347432297799112,
"grad_norm": 0.8524265885353088,
"learning_rate": 8.682661837935719e-05,
"loss": 0.3817,
"step": 960
},
{
"epoch": 0.043927180509011864,
"grad_norm": 0.6731559634208679,
"learning_rate": 8.773200543232232e-05,
"loss": 0.3432,
"step": 970
},
{
"epoch": 0.044380038040032606,
"grad_norm": 0.8779836893081665,
"learning_rate": 8.863739248528746e-05,
"loss": 0.3283,
"step": 980
},
{
"epoch": 0.04483289557105335,
"grad_norm": 0.6812531352043152,
"learning_rate": 8.954277953825261e-05,
"loss": 0.3326,
"step": 990
},
{
"epoch": 0.04528575310207409,
"grad_norm": 0.7251217365264893,
"learning_rate": 9.044816659121775e-05,
"loss": 0.3444,
"step": 1000
},
{
"epoch": 0.04528575310207409,
"eval_chrf": 66.89902036813824,
"eval_loss": 0.4501848816871643,
"eval_runtime": 26.7238,
"eval_samples_per_second": 0.374,
"eval_steps_per_second": 0.037,
"step": 1000
},
{
"epoch": 0.045738610633094826,
"grad_norm": 0.9436652660369873,
"learning_rate": 9.135355364418289e-05,
"loss": 0.3641,
"step": 1010
},
{
"epoch": 0.04619146816411557,
"grad_norm": 0.711843729019165,
"learning_rate": 9.225894069714804e-05,
"loss": 0.3503,
"step": 1020
},
{
"epoch": 0.04664432569513631,
"grad_norm": 0.7915553450584412,
"learning_rate": 9.316432775011317e-05,
"loss": 0.3693,
"step": 1030
},
{
"epoch": 0.04709718322615705,
"grad_norm": 0.6526038646697998,
"learning_rate": 9.406971480307832e-05,
"loss": 0.3255,
"step": 1040
},
{
"epoch": 0.047550040757177794,
"grad_norm": 0.6790297031402588,
"learning_rate": 9.497510185604346e-05,
"loss": 0.3277,
"step": 1050
},
{
"epoch": 0.04800289828819853,
"grad_norm": 0.8189061284065247,
"learning_rate": 9.58804889090086e-05,
"loss": 0.3457,
"step": 1060
},
{
"epoch": 0.04845575581921927,
"grad_norm": 0.7368476986885071,
"learning_rate": 9.678587596197375e-05,
"loss": 0.3014,
"step": 1070
},
{
"epoch": 0.048908613350240014,
"grad_norm": 0.998906135559082,
"learning_rate": 9.76912630149389e-05,
"loss": 0.3247,
"step": 1080
},
{
"epoch": 0.049361470881260756,
"grad_norm": 0.7826228737831116,
"learning_rate": 9.859665006790402e-05,
"loss": 0.3101,
"step": 1090
},
{
"epoch": 0.0498143284122815,
"grad_norm": 0.7113918662071228,
"learning_rate": 9.950203712086917e-05,
"loss": 0.3164,
"step": 1100
},
{
"epoch": 0.050267185943302234,
"grad_norm": 0.8249057531356812,
"learning_rate": 0.00010040742417383431,
"loss": 0.3567,
"step": 1110
},
{
"epoch": 0.050720043474322976,
"grad_norm": 0.636049211025238,
"learning_rate": 0.00010131281122679945,
"loss": 0.3037,
"step": 1120
},
{
"epoch": 0.05117290100534372,
"grad_norm": 0.7211443185806274,
"learning_rate": 0.0001022181982797646,
"loss": 0.3284,
"step": 1130
},
{
"epoch": 0.05162575853636446,
"grad_norm": 0.7342743277549744,
"learning_rate": 0.00010312358533272975,
"loss": 0.3229,
"step": 1140
},
{
"epoch": 0.0520786160673852,
"grad_norm": 0.7419087886810303,
"learning_rate": 0.00010402897238569489,
"loss": 0.3253,
"step": 1150
},
{
"epoch": 0.05253147359840594,
"grad_norm": 0.8502224087715149,
"learning_rate": 0.00010493435943866004,
"loss": 0.3233,
"step": 1160
},
{
"epoch": 0.05298433112942668,
"grad_norm": 0.5899739861488342,
"learning_rate": 0.00010583974649162517,
"loss": 0.3287,
"step": 1170
},
{
"epoch": 0.05343718866044742,
"grad_norm": 0.7512941360473633,
"learning_rate": 0.00010674513354459033,
"loss": 0.3311,
"step": 1180
},
{
"epoch": 0.053890046191468165,
"grad_norm": 1.5729906558990479,
"learning_rate": 0.00010765052059755548,
"loss": 0.3218,
"step": 1190
},
{
"epoch": 0.05434290372248891,
"grad_norm": 0.8467279076576233,
"learning_rate": 0.0001085559076505206,
"loss": 0.3308,
"step": 1200
},
{
"epoch": 0.05479576125350965,
"grad_norm": 0.7849757671356201,
"learning_rate": 0.00010946129470348574,
"loss": 0.3203,
"step": 1210
},
{
"epoch": 0.055248618784530384,
"grad_norm": 0.8003832697868347,
"learning_rate": 0.00011036668175645089,
"loss": 0.3489,
"step": 1220
},
{
"epoch": 0.05570147631555113,
"grad_norm": 0.8363836407661438,
"learning_rate": 0.00011127206880941602,
"loss": 0.3116,
"step": 1230
},
{
"epoch": 0.05615433384657187,
"grad_norm": 0.6694333553314209,
"learning_rate": 0.00011217745586238118,
"loss": 0.3133,
"step": 1240
},
{
"epoch": 0.05660719137759261,
"grad_norm": 0.6896982789039612,
"learning_rate": 0.00011308284291534633,
"loss": 0.3219,
"step": 1250
},
{
"epoch": 0.05706004890861335,
"grad_norm": 0.631035566329956,
"learning_rate": 0.00011398822996831146,
"loss": 0.3516,
"step": 1260
},
{
"epoch": 0.05751290643963409,
"grad_norm": 0.7124539017677307,
"learning_rate": 0.00011489361702127661,
"loss": 0.3234,
"step": 1270
},
{
"epoch": 0.05796576397065483,
"grad_norm": 0.6392337083816528,
"learning_rate": 0.00011579900407424174,
"loss": 0.3552,
"step": 1280
},
{
"epoch": 0.05841862150167557,
"grad_norm": 0.5689034461975098,
"learning_rate": 0.00011670439112720688,
"loss": 0.3237,
"step": 1290
},
{
"epoch": 0.058871479032696315,
"grad_norm": 0.7601428031921387,
"learning_rate": 0.00011760977818017203,
"loss": 0.3261,
"step": 1300
},
{
"epoch": 0.05932433656371706,
"grad_norm": 0.8485261797904968,
"learning_rate": 0.00011851516523313718,
"loss": 0.3211,
"step": 1310
},
{
"epoch": 0.05977719409473779,
"grad_norm": 0.616400957107544,
"learning_rate": 0.00011942055228610231,
"loss": 0.3439,
"step": 1320
},
{
"epoch": 0.060230051625758535,
"grad_norm": 0.8092880845069885,
"learning_rate": 0.00012032593933906746,
"loss": 0.3118,
"step": 1330
},
{
"epoch": 0.06068290915677928,
"grad_norm": 0.7503390908241272,
"learning_rate": 0.0001212313263920326,
"loss": 0.3077,
"step": 1340
},
{
"epoch": 0.06113576668780002,
"grad_norm": 0.7002999186515808,
"learning_rate": 0.00012213671344499775,
"loss": 0.2747,
"step": 1350
},
{
"epoch": 0.06158862421882076,
"grad_norm": 0.7599196434020996,
"learning_rate": 0.00012304210049796286,
"loss": 0.3072,
"step": 1360
},
{
"epoch": 0.0620414817498415,
"grad_norm": 0.7310253381729126,
"learning_rate": 0.00012394748755092803,
"loss": 0.3286,
"step": 1370
},
{
"epoch": 0.06249433928086224,
"grad_norm": 0.8364066481590271,
"learning_rate": 0.00012485287460389316,
"loss": 0.3164,
"step": 1380
},
{
"epoch": 0.06294719681188299,
"grad_norm": 0.6382613182067871,
"learning_rate": 0.0001257582616568583,
"loss": 0.2996,
"step": 1390
},
{
"epoch": 0.06340005434290372,
"grad_norm": 0.8997701406478882,
"learning_rate": 0.00012666364870982347,
"loss": 0.3191,
"step": 1400
},
{
"epoch": 0.06385291187392446,
"grad_norm": 0.8146569728851318,
"learning_rate": 0.0001275690357627886,
"loss": 0.3146,
"step": 1410
},
{
"epoch": 0.0643057694049452,
"grad_norm": 0.6676255464553833,
"learning_rate": 0.00012847442281575374,
"loss": 0.3181,
"step": 1420
},
{
"epoch": 0.06475862693596594,
"grad_norm": 0.5385376811027527,
"learning_rate": 0.0001293798098687189,
"loss": 0.2957,
"step": 1430
},
{
"epoch": 0.06521148446698669,
"grad_norm": 0.8492608070373535,
"learning_rate": 0.00013028519692168401,
"loss": 0.3162,
"step": 1440
},
{
"epoch": 0.06566434199800743,
"grad_norm": 0.9273693561553955,
"learning_rate": 0.00013119058397464915,
"loss": 0.3402,
"step": 1450
},
{
"epoch": 0.06611719952902817,
"grad_norm": 0.5114811658859253,
"learning_rate": 0.00013209597102761432,
"loss": 0.3088,
"step": 1460
},
{
"epoch": 0.06657005706004891,
"grad_norm": 0.6595302224159241,
"learning_rate": 0.00013300135808057945,
"loss": 0.3436,
"step": 1470
},
{
"epoch": 0.06702291459106965,
"grad_norm": 0.7126308679580688,
"learning_rate": 0.0001339067451335446,
"loss": 0.3141,
"step": 1480
},
{
"epoch": 0.0674757721220904,
"grad_norm": 0.6146643161773682,
"learning_rate": 0.00013481213218650975,
"loss": 0.2798,
"step": 1490
},
{
"epoch": 0.06792862965311112,
"grad_norm": 0.8765645623207092,
"learning_rate": 0.0001357175192394749,
"loss": 0.3122,
"step": 1500
},
{
"epoch": 0.06838148718413187,
"grad_norm": 0.6217523217201233,
"learning_rate": 0.00013662290629244003,
"loss": 0.3218,
"step": 1510
},
{
"epoch": 0.06883434471515261,
"grad_norm": 0.5067332983016968,
"learning_rate": 0.00013752829334540517,
"loss": 0.3046,
"step": 1520
},
{
"epoch": 0.06928720224617335,
"grad_norm": 0.6011443734169006,
"learning_rate": 0.0001384336803983703,
"loss": 0.3341,
"step": 1530
},
{
"epoch": 0.0697400597771941,
"grad_norm": 0.5284327268600464,
"learning_rate": 0.00013933906745133544,
"loss": 0.3042,
"step": 1540
},
{
"epoch": 0.07019291730821484,
"grad_norm": 0.5794896483421326,
"learning_rate": 0.0001402444545043006,
"loss": 0.3021,
"step": 1550
},
{
"epoch": 0.07064577483923558,
"grad_norm": 0.5913949012756348,
"learning_rate": 0.00014114984155726574,
"loss": 0.2878,
"step": 1560
},
{
"epoch": 0.07109863237025632,
"grad_norm": 0.8013747334480286,
"learning_rate": 0.00014205522861023088,
"loss": 0.2926,
"step": 1570
},
{
"epoch": 0.07155148990127706,
"grad_norm": 0.4837814271450043,
"learning_rate": 0.00014296061566319604,
"loss": 0.2954,
"step": 1580
},
{
"epoch": 0.0720043474322978,
"grad_norm": 0.6365007162094116,
"learning_rate": 0.00014386600271616118,
"loss": 0.3182,
"step": 1590
},
{
"epoch": 0.07245720496331853,
"grad_norm": 0.6202572584152222,
"learning_rate": 0.0001447713897691263,
"loss": 0.3072,
"step": 1600
},
{
"epoch": 0.07291006249433928,
"grad_norm": 0.7837294936180115,
"learning_rate": 0.00014567677682209145,
"loss": 0.3225,
"step": 1610
},
{
"epoch": 0.07336292002536002,
"grad_norm": 0.5507187843322754,
"learning_rate": 0.0001465821638750566,
"loss": 0.2954,
"step": 1620
},
{
"epoch": 0.07381577755638076,
"grad_norm": 0.5234072804450989,
"learning_rate": 0.00014748755092802173,
"loss": 0.3037,
"step": 1630
},
{
"epoch": 0.0742686350874015,
"grad_norm": 0.6388247013092041,
"learning_rate": 0.00014839293798098687,
"loss": 0.3214,
"step": 1640
},
{
"epoch": 0.07472149261842224,
"grad_norm": 0.5577211380004883,
"learning_rate": 0.00014929832503395203,
"loss": 0.296,
"step": 1650
},
{
"epoch": 0.07517435014944299,
"grad_norm": 0.5393535494804382,
"learning_rate": 0.00015020371208691717,
"loss": 0.2965,
"step": 1660
},
{
"epoch": 0.07562720768046373,
"grad_norm": 0.6293423771858215,
"learning_rate": 0.0001511090991398823,
"loss": 0.2917,
"step": 1670
},
{
"epoch": 0.07608006521148447,
"grad_norm": 0.49211928248405457,
"learning_rate": 0.00015201448619284747,
"loss": 0.284,
"step": 1680
},
{
"epoch": 0.07653292274250521,
"grad_norm": 0.5974680185317993,
"learning_rate": 0.00015291987324581258,
"loss": 0.2852,
"step": 1690
},
{
"epoch": 0.07698578027352596,
"grad_norm": 0.6545078754425049,
"learning_rate": 0.00015382526029877772,
"loss": 0.2782,
"step": 1700
},
{
"epoch": 0.07743863780454668,
"grad_norm": 0.5832185745239258,
"learning_rate": 0.00015473064735174288,
"loss": 0.3127,
"step": 1710
},
{
"epoch": 0.07789149533556743,
"grad_norm": 0.5393053293228149,
"learning_rate": 0.00015563603440470802,
"loss": 0.303,
"step": 1720
},
{
"epoch": 0.07834435286658817,
"grad_norm": 0.6128617525100708,
"learning_rate": 0.00015654142145767315,
"loss": 0.3028,
"step": 1730
},
{
"epoch": 0.07879721039760891,
"grad_norm": 0.6199638843536377,
"learning_rate": 0.00015744680851063832,
"loss": 0.2893,
"step": 1740
},
{
"epoch": 0.07925006792862965,
"grad_norm": 0.5525530576705933,
"learning_rate": 0.00015835219556360346,
"loss": 0.2994,
"step": 1750
},
{
"epoch": 0.0797029254596504,
"grad_norm": 0.5762396454811096,
"learning_rate": 0.0001592575826165686,
"loss": 0.3125,
"step": 1760
},
{
"epoch": 0.08015578299067114,
"grad_norm": 0.5460029244422913,
"learning_rate": 0.00016016296966953373,
"loss": 0.2752,
"step": 1770
},
{
"epoch": 0.08060864052169188,
"grad_norm": 0.5735179781913757,
"learning_rate": 0.00016106835672249887,
"loss": 0.2883,
"step": 1780
},
{
"epoch": 0.08106149805271262,
"grad_norm": 0.4762778878211975,
"learning_rate": 0.000161973743775464,
"loss": 0.2747,
"step": 1790
},
{
"epoch": 0.08151435558373336,
"grad_norm": 0.7053053975105286,
"learning_rate": 0.00016287913082842917,
"loss": 0.2996,
"step": 1800
},
{
"epoch": 0.08196721311475409,
"grad_norm": 0.5661677718162537,
"learning_rate": 0.0001637845178813943,
"loss": 0.2828,
"step": 1810
},
{
"epoch": 0.08242007064577483,
"grad_norm": 0.7437049150466919,
"learning_rate": 0.00016468990493435944,
"loss": 0.2991,
"step": 1820
},
{
"epoch": 0.08287292817679558,
"grad_norm": 0.6861435770988464,
"learning_rate": 0.0001655952919873246,
"loss": 0.2893,
"step": 1830
},
{
"epoch": 0.08332578570781632,
"grad_norm": 0.49107825756073,
"learning_rate": 0.00016650067904028974,
"loss": 0.2813,
"step": 1840
},
{
"epoch": 0.08377864323883706,
"grad_norm": 0.6133277416229248,
"learning_rate": 0.00016740606609325485,
"loss": 0.2687,
"step": 1850
},
{
"epoch": 0.0842315007698578,
"grad_norm": 0.5676704049110413,
"learning_rate": 0.00016831145314622002,
"loss": 0.2734,
"step": 1860
},
{
"epoch": 0.08468435830087855,
"grad_norm": 0.6072496175765991,
"learning_rate": 0.00016921684019918516,
"loss": 0.2867,
"step": 1870
},
{
"epoch": 0.08513721583189929,
"grad_norm": 0.5643963813781738,
"learning_rate": 0.0001701222272521503,
"loss": 0.2812,
"step": 1880
},
{
"epoch": 0.08559007336292003,
"grad_norm": 0.5513191223144531,
"learning_rate": 0.00017102761430511546,
"loss": 0.2674,
"step": 1890
},
{
"epoch": 0.08604293089394077,
"grad_norm": 0.6839463114738464,
"learning_rate": 0.0001719330013580806,
"loss": 0.2905,
"step": 1900
},
{
"epoch": 0.0864957884249615,
"grad_norm": 0.6099688410758972,
"learning_rate": 0.00017283838841104573,
"loss": 0.2887,
"step": 1910
},
{
"epoch": 0.08694864595598224,
"grad_norm": 0.611600399017334,
"learning_rate": 0.0001737437754640109,
"loss": 0.2977,
"step": 1920
},
{
"epoch": 0.08740150348700298,
"grad_norm": 0.5446760654449463,
"learning_rate": 0.000174649162516976,
"loss": 0.3095,
"step": 1930
},
{
"epoch": 0.08785436101802373,
"grad_norm": 0.5957568883895874,
"learning_rate": 0.00017555454956994114,
"loss": 0.275,
"step": 1940
},
{
"epoch": 0.08830721854904447,
"grad_norm": 0.4382849633693695,
"learning_rate": 0.0001764599366229063,
"loss": 0.2902,
"step": 1950
},
{
"epoch": 0.08876007608006521,
"grad_norm": 0.6389471292495728,
"learning_rate": 0.00017736532367587144,
"loss": 0.3022,
"step": 1960
},
{
"epoch": 0.08921293361108595,
"grad_norm": 0.5382727980613708,
"learning_rate": 0.00017827071072883658,
"loss": 0.2796,
"step": 1970
},
{
"epoch": 0.0896657911421067,
"grad_norm": 0.5546545386314392,
"learning_rate": 0.00017917609778180175,
"loss": 0.3119,
"step": 1980
},
{
"epoch": 0.09011864867312744,
"grad_norm": 0.5750550627708435,
"learning_rate": 0.00018008148483476688,
"loss": 0.2573,
"step": 1990
},
{
"epoch": 0.09057150620414818,
"grad_norm": 0.5644318461418152,
"learning_rate": 0.00018098687188773202,
"loss": 0.2822,
"step": 2000
},
{
"epoch": 0.09057150620414818,
"eval_chrf": 51.4571346316286,
"eval_loss": 0.29403626918792725,
"eval_runtime": 26.2584,
"eval_samples_per_second": 0.381,
"eval_steps_per_second": 0.038,
"step": 2000
},
{
"epoch": 0.09102436373516892,
"grad_norm": 0.5000986456871033,
"learning_rate": 0.00018189225894069716,
"loss": 0.2905,
"step": 2010
},
{
"epoch": 0.09147722126618965,
"grad_norm": 0.5213611125946045,
"learning_rate": 0.0001827976459936623,
"loss": 0.2849,
"step": 2020
},
{
"epoch": 0.0919300787972104,
"grad_norm": 0.6767968535423279,
"learning_rate": 0.00018370303304662743,
"loss": 0.2831,
"step": 2030
},
{
"epoch": 0.09238293632823114,
"grad_norm": 0.4952644109725952,
"learning_rate": 0.00018460842009959257,
"loss": 0.2778,
"step": 2040
},
{
"epoch": 0.09283579385925188,
"grad_norm": 0.6272353529930115,
"learning_rate": 0.00018551380715255773,
"loss": 0.3121,
"step": 2050
},
{
"epoch": 0.09328865139027262,
"grad_norm": 0.5801631808280945,
"learning_rate": 0.00018641919420552287,
"loss": 0.2918,
"step": 2060
},
{
"epoch": 0.09374150892129336,
"grad_norm": 0.47861814498901367,
"learning_rate": 0.000187324581258488,
"loss": 0.2693,
"step": 2070
},
{
"epoch": 0.0941943664523141,
"grad_norm": 0.5323045253753662,
"learning_rate": 0.00018822996831145317,
"loss": 0.2807,
"step": 2080
},
{
"epoch": 0.09464722398333485,
"grad_norm": 0.7477088570594788,
"learning_rate": 0.00018913535536441828,
"loss": 0.2658,
"step": 2090
},
{
"epoch": 0.09510008151435559,
"grad_norm": 0.5277721881866455,
"learning_rate": 0.00019004074241738342,
"loss": 0.2895,
"step": 2100
},
{
"epoch": 0.09555293904537633,
"grad_norm": 0.781351625919342,
"learning_rate": 0.00019094612947034858,
"loss": 0.2667,
"step": 2110
},
{
"epoch": 0.09600579657639706,
"grad_norm": 0.4823305010795593,
"learning_rate": 0.00019185151652331372,
"loss": 0.2772,
"step": 2120
},
{
"epoch": 0.0964586541074178,
"grad_norm": 0.5646785497665405,
"learning_rate": 0.00019275690357627886,
"loss": 0.2634,
"step": 2130
},
{
"epoch": 0.09691151163843854,
"grad_norm": 0.5562229156494141,
"learning_rate": 0.00019366229062924402,
"loss": 0.2482,
"step": 2140
},
{
"epoch": 0.09736436916945929,
"grad_norm": 0.3854808509349823,
"learning_rate": 0.00019456767768220916,
"loss": 0.2549,
"step": 2150
},
{
"epoch": 0.09781722670048003,
"grad_norm": 0.4851052165031433,
"learning_rate": 0.0001954730647351743,
"loss": 0.3053,
"step": 2160
},
{
"epoch": 0.09827008423150077,
"grad_norm": 0.6279167532920837,
"learning_rate": 0.00019637845178813943,
"loss": 0.2529,
"step": 2170
},
{
"epoch": 0.09872294176252151,
"grad_norm": 0.5062315464019775,
"learning_rate": 0.00019728383884110457,
"loss": 0.2813,
"step": 2180
},
{
"epoch": 0.09917579929354225,
"grad_norm": 0.7650270462036133,
"learning_rate": 0.0001981892258940697,
"loss": 0.2782,
"step": 2190
},
{
"epoch": 0.099628656824563,
"grad_norm": 0.684183657169342,
"learning_rate": 0.00019909461294703487,
"loss": 0.2933,
"step": 2200
},
{
"epoch": 0.10008151435558374,
"grad_norm": 0.4574642479419708,
"learning_rate": 0.0002,
"loss": 0.2812,
"step": 2210
},
{
"epoch": 0.10053437188660447,
"grad_norm": 0.45532846450805664,
"learning_rate": 0.0001999999719648891,
"loss": 0.277,
"step": 2220
},
{
"epoch": 0.10098722941762521,
"grad_norm": 0.5651437640190125,
"learning_rate": 0.0001999998878595721,
"loss": 0.2575,
"step": 2230
},
{
"epoch": 0.10144008694864595,
"grad_norm": 0.5053766369819641,
"learning_rate": 0.00019999974768409614,
"loss": 0.3048,
"step": 2240
},
{
"epoch": 0.1018929444796667,
"grad_norm": 0.6162516474723816,
"learning_rate": 0.00019999955143853982,
"loss": 0.2515,
"step": 2250
},
{
"epoch": 0.10234580201068744,
"grad_norm": 0.5970934629440308,
"learning_rate": 0.00019999929912301322,
"loss": 0.3024,
"step": 2260
},
{
"epoch": 0.10279865954170818,
"grad_norm": 0.6110081672668457,
"learning_rate": 0.00019999899073765774,
"loss": 0.2766,
"step": 2270
},
{
"epoch": 0.10325151707272892,
"grad_norm": 0.5295965075492859,
"learning_rate": 0.00019999862628264633,
"loss": 0.228,
"step": 2280
},
{
"epoch": 0.10370437460374966,
"grad_norm": 0.41794171929359436,
"learning_rate": 0.00019999820575818336,
"loss": 0.2703,
"step": 2290
},
{
"epoch": 0.1041572321347704,
"grad_norm": 0.5175167918205261,
"learning_rate": 0.00019999772916450464,
"loss": 0.2896,
"step": 2300
},
{
"epoch": 0.10461008966579115,
"grad_norm": 0.6496297121047974,
"learning_rate": 0.00019999719650187729,
"loss": 0.2708,
"step": 2310
},
{
"epoch": 0.10506294719681188,
"grad_norm": 0.5578742027282715,
"learning_rate": 0.0001999966077706001,
"loss": 0.2749,
"step": 2320
},
{
"epoch": 0.10551580472783262,
"grad_norm": 0.5760526657104492,
"learning_rate": 0.00019999596297100308,
"loss": 0.2867,
"step": 2330
},
{
"epoch": 0.10596866225885336,
"grad_norm": 0.5972622632980347,
"learning_rate": 0.00019999526210344784,
"loss": 0.2513,
"step": 2340
},
{
"epoch": 0.1064215197898741,
"grad_norm": 0.6074303388595581,
"learning_rate": 0.0001999945051683273,
"loss": 0.2624,
"step": 2350
},
{
"epoch": 0.10687437732089484,
"grad_norm": 0.460184782743454,
"learning_rate": 0.00019999369216606593,
"loss": 0.2671,
"step": 2360
},
{
"epoch": 0.10732723485191559,
"grad_norm": 0.5181593298912048,
"learning_rate": 0.00019999282309711953,
"loss": 0.2701,
"step": 2370
},
{
"epoch": 0.10778009238293633,
"grad_norm": 0.600666880607605,
"learning_rate": 0.00019999189796197545,
"loss": 0.2891,
"step": 2380
},
{
"epoch": 0.10823294991395707,
"grad_norm": 0.49728748202323914,
"learning_rate": 0.0001999909167611523,
"loss": 0.2754,
"step": 2390
},
{
"epoch": 0.10868580744497781,
"grad_norm": 0.5172849297523499,
"learning_rate": 0.00019998987949520038,
"loss": 0.2507,
"step": 2400
},
{
"epoch": 0.10913866497599856,
"grad_norm": 0.43115153908729553,
"learning_rate": 0.00019998878616470122,
"loss": 0.2827,
"step": 2410
},
{
"epoch": 0.1095915225070193,
"grad_norm": 0.49532899260520935,
"learning_rate": 0.00019998763677026786,
"loss": 0.2568,
"step": 2420
},
{
"epoch": 0.11004438003804003,
"grad_norm": 0.9006966352462769,
"learning_rate": 0.00019998643131254474,
"loss": 0.2636,
"step": 2430
},
{
"epoch": 0.11049723756906077,
"grad_norm": 0.40261346101760864,
"learning_rate": 0.00019998516979220782,
"loss": 0.238,
"step": 2440
},
{
"epoch": 0.11095009510008151,
"grad_norm": 0.5829101204872131,
"learning_rate": 0.0001999838522099644,
"loss": 0.2649,
"step": 2450
},
{
"epoch": 0.11140295263110225,
"grad_norm": 0.5074893236160278,
"learning_rate": 0.00019998247856655325,
"loss": 0.2833,
"step": 2460
},
{
"epoch": 0.111855810162123,
"grad_norm": 0.544731616973877,
"learning_rate": 0.00019998104886274462,
"loss": 0.2562,
"step": 2470
},
{
"epoch": 0.11230866769314374,
"grad_norm": 0.5525861978530884,
"learning_rate": 0.00019997956309934006,
"loss": 0.2751,
"step": 2480
},
{
"epoch": 0.11276152522416448,
"grad_norm": 0.5456953644752502,
"learning_rate": 0.00019997802127717272,
"loss": 0.2757,
"step": 2490
},
{
"epoch": 0.11321438275518522,
"grad_norm": 0.4306289553642273,
"learning_rate": 0.00019997642339710707,
"loss": 0.2539,
"step": 2500
},
{
"epoch": 0.11366724028620596,
"grad_norm": 0.4044501781463623,
"learning_rate": 0.00019997476946003907,
"loss": 0.2585,
"step": 2510
},
{
"epoch": 0.1141200978172267,
"grad_norm": 0.4749915599822998,
"learning_rate": 0.00019997305946689605,
"loss": 0.2666,
"step": 2520
},
{
"epoch": 0.11457295534824743,
"grad_norm": 0.4891221523284912,
"learning_rate": 0.0001999712934186368,
"loss": 0.2489,
"step": 2530
},
{
"epoch": 0.11502581287926818,
"grad_norm": 0.6200125813484192,
"learning_rate": 0.0001999694713162516,
"loss": 0.2787,
"step": 2540
},
{
"epoch": 0.11547867041028892,
"grad_norm": 0.49395912885665894,
"learning_rate": 0.00019996759316076208,
"loss": 0.2397,
"step": 2550
},
{
"epoch": 0.11593152794130966,
"grad_norm": 0.5160639882087708,
"learning_rate": 0.0001999656589532213,
"loss": 0.2616,
"step": 2560
},
{
"epoch": 0.1163843854723304,
"grad_norm": 0.473092257976532,
"learning_rate": 0.0001999636686947138,
"loss": 0.2634,
"step": 2570
},
{
"epoch": 0.11683724300335115,
"grad_norm": 0.6124957799911499,
"learning_rate": 0.00019996162238635555,
"loss": 0.2649,
"step": 2580
},
{
"epoch": 0.11729010053437189,
"grad_norm": 0.36893054842948914,
"learning_rate": 0.00019995952002929387,
"loss": 0.2634,
"step": 2590
},
{
"epoch": 0.11774295806539263,
"grad_norm": 0.5636609196662903,
"learning_rate": 0.0001999573616247076,
"loss": 0.2693,
"step": 2600
},
{
"epoch": 0.11819581559641337,
"grad_norm": 0.539794921875,
"learning_rate": 0.0001999551471738069,
"loss": 0.2305,
"step": 2610
},
{
"epoch": 0.11864867312743411,
"grad_norm": 0.3865772485733032,
"learning_rate": 0.00019995287667783348,
"loss": 0.2461,
"step": 2620
},
{
"epoch": 0.11910153065845484,
"grad_norm": 0.4933566451072693,
"learning_rate": 0.00019995055013806039,
"loss": 0.2805,
"step": 2630
},
{
"epoch": 0.11955438818947559,
"grad_norm": 0.492123544216156,
"learning_rate": 0.0001999481675557921,
"loss": 0.256,
"step": 2640
},
{
"epoch": 0.12000724572049633,
"grad_norm": 0.5840746760368347,
"learning_rate": 0.0001999457289323646,
"loss": 0.2478,
"step": 2650
},
{
"epoch": 0.12046010325151707,
"grad_norm": 0.47510668635368347,
"learning_rate": 0.00019994323426914513,
"loss": 0.2809,
"step": 2660
},
{
"epoch": 0.12091296078253781,
"grad_norm": 0.45924919843673706,
"learning_rate": 0.00019994068356753256,
"loss": 0.2281,
"step": 2670
},
{
"epoch": 0.12136581831355855,
"grad_norm": 0.5342031121253967,
"learning_rate": 0.00019993807682895697,
"loss": 0.2509,
"step": 2680
},
{
"epoch": 0.1218186758445793,
"grad_norm": 0.6540825963020325,
"learning_rate": 0.00019993541405488005,
"loss": 0.2748,
"step": 2690
},
{
"epoch": 0.12227153337560004,
"grad_norm": 0.7478682398796082,
"learning_rate": 0.00019993269524679478,
"loss": 0.258,
"step": 2700
},
{
"epoch": 0.12272439090662078,
"grad_norm": 0.477021723985672,
"learning_rate": 0.00019992992040622563,
"loss": 0.2692,
"step": 2710
},
{
"epoch": 0.12317724843764152,
"grad_norm": 0.39322835206985474,
"learning_rate": 0.0001999270895347284,
"loss": 0.2497,
"step": 2720
},
{
"epoch": 0.12363010596866227,
"grad_norm": 0.5442454218864441,
"learning_rate": 0.00019992420263389047,
"loss": 0.2625,
"step": 2730
},
{
"epoch": 0.124082963499683,
"grad_norm": 0.6335331797599792,
"learning_rate": 0.00019992125970533041,
"loss": 0.2844,
"step": 2740
},
{
"epoch": 0.12453582103070374,
"grad_norm": 0.5638910531997681,
"learning_rate": 0.00019991826075069842,
"loss": 0.2569,
"step": 2750
},
{
"epoch": 0.12498867856172448,
"grad_norm": 0.4456355571746826,
"learning_rate": 0.00019991520577167596,
"loss": 0.2745,
"step": 2760
},
{
"epoch": 0.12544153609274522,
"grad_norm": 0.40395674109458923,
"learning_rate": 0.000199912094769976,
"loss": 0.2483,
"step": 2770
},
{
"epoch": 0.12589439362376598,
"grad_norm": 0.48974549770355225,
"learning_rate": 0.0001999089277473429,
"loss": 0.2388,
"step": 2780
},
{
"epoch": 0.1263472511547867,
"grad_norm": 0.5266743302345276,
"learning_rate": 0.00019990570470555235,
"loss": 0.2382,
"step": 2790
},
{
"epoch": 0.12680010868580743,
"grad_norm": 0.5559818744659424,
"learning_rate": 0.00019990242564641157,
"loss": 0.2503,
"step": 2800
},
{
"epoch": 0.1272529662168282,
"grad_norm": 0.47886645793914795,
"learning_rate": 0.00019989909057175912,
"loss": 0.2432,
"step": 2810
},
{
"epoch": 0.12770582374784892,
"grad_norm": 0.5434776544570923,
"learning_rate": 0.000199895699483465,
"loss": 0.2484,
"step": 2820
},
{
"epoch": 0.12815868127886967,
"grad_norm": 0.5014550089836121,
"learning_rate": 0.00019989225238343058,
"loss": 0.2514,
"step": 2830
},
{
"epoch": 0.1286115388098904,
"grad_norm": 0.5822556018829346,
"learning_rate": 0.00019988874927358868,
"loss": 0.265,
"step": 2840
},
{
"epoch": 0.12906439634091116,
"grad_norm": 0.4825868606567383,
"learning_rate": 0.00019988519015590346,
"loss": 0.2876,
"step": 2850
},
{
"epoch": 0.1295172538719319,
"grad_norm": 0.4180533289909363,
"learning_rate": 0.00019988157503237058,
"loss": 0.2546,
"step": 2860
},
{
"epoch": 0.12997011140295264,
"grad_norm": 0.3579380512237549,
"learning_rate": 0.000199877903905017,
"loss": 0.2413,
"step": 2870
},
{
"epoch": 0.13042296893397337,
"grad_norm": 0.4857385456562042,
"learning_rate": 0.00019987417677590113,
"loss": 0.2704,
"step": 2880
},
{
"epoch": 0.1308758264649941,
"grad_norm": 0.5797598958015442,
"learning_rate": 0.0001998703936471128,
"loss": 0.2314,
"step": 2890
},
{
"epoch": 0.13132868399601486,
"grad_norm": 0.6053803563117981,
"learning_rate": 0.00019986655452077328,
"loss": 0.2407,
"step": 2900
},
{
"epoch": 0.13178154152703558,
"grad_norm": 0.5609238743782043,
"learning_rate": 0.00019986265939903505,
"loss": 0.2219,
"step": 2910
},
{
"epoch": 0.13223439905805634,
"grad_norm": 0.6816825270652771,
"learning_rate": 0.0001998587082840822,
"loss": 0.2516,
"step": 2920
},
{
"epoch": 0.13268725658907707,
"grad_norm": 0.6507164239883423,
"learning_rate": 0.0001998547011781301,
"loss": 0.2366,
"step": 2930
},
{
"epoch": 0.13314011412009782,
"grad_norm": 0.4358994662761688,
"learning_rate": 0.00019985063808342557,
"loss": 0.2484,
"step": 2940
},
{
"epoch": 0.13359297165111855,
"grad_norm": 0.5335454940795898,
"learning_rate": 0.00019984651900224675,
"loss": 0.2438,
"step": 2950
},
{
"epoch": 0.1340458291821393,
"grad_norm": 0.45652270317077637,
"learning_rate": 0.00019984234393690325,
"loss": 0.2508,
"step": 2960
},
{
"epoch": 0.13449868671316004,
"grad_norm": 0.4294167160987854,
"learning_rate": 0.00019983811288973603,
"loss": 0.2393,
"step": 2970
},
{
"epoch": 0.1349515442441808,
"grad_norm": 0.548907995223999,
"learning_rate": 0.00019983382586311746,
"loss": 0.237,
"step": 2980
},
{
"epoch": 0.13540440177520152,
"grad_norm": 0.48326581716537476,
"learning_rate": 0.00019982948285945126,
"loss": 0.2604,
"step": 2990
},
{
"epoch": 0.13585725930622225,
"grad_norm": 0.5535202622413635,
"learning_rate": 0.0001998250838811726,
"loss": 0.2646,
"step": 3000
},
{
"epoch": 0.13585725930622225,
"eval_chrf": 56.01199333702116,
"eval_loss": 0.23407021164894104,
"eval_runtime": 26.3619,
"eval_samples_per_second": 0.379,
"eval_steps_per_second": 0.038,
"step": 3000
},
{
"epoch": 0.136310116837243,
"grad_norm": 0.46452951431274414,
"learning_rate": 0.00019982062893074794,
"loss": 0.2292,
"step": 3010
},
{
"epoch": 0.13676297436826373,
"grad_norm": 0.6923738121986389,
"learning_rate": 0.00019981611801067525,
"loss": 0.2519,
"step": 3020
},
{
"epoch": 0.1372158318992845,
"grad_norm": 0.4062545895576477,
"learning_rate": 0.0001998115511234837,
"loss": 0.2558,
"step": 3030
},
{
"epoch": 0.13766868943030522,
"grad_norm": 0.5292578339576721,
"learning_rate": 0.0001998069282717341,
"loss": 0.2708,
"step": 3040
},
{
"epoch": 0.13812154696132597,
"grad_norm": 0.5739614963531494,
"learning_rate": 0.00019980224945801843,
"loss": 0.2355,
"step": 3050
},
{
"epoch": 0.1385744044923467,
"grad_norm": 0.6753970384597778,
"learning_rate": 0.00019979751468496005,
"loss": 0.2308,
"step": 3060
},
{
"epoch": 0.13902726202336746,
"grad_norm": 0.5382646322250366,
"learning_rate": 0.00019979272395521388,
"loss": 0.2623,
"step": 3070
},
{
"epoch": 0.1394801195543882,
"grad_norm": 0.5424644351005554,
"learning_rate": 0.00019978787727146596,
"loss": 0.2346,
"step": 3080
},
{
"epoch": 0.13993297708540894,
"grad_norm": 0.4321116507053375,
"learning_rate": 0.00019978297463643394,
"loss": 0.2457,
"step": 3090
},
{
"epoch": 0.14038583461642967,
"grad_norm": 0.4374447762966156,
"learning_rate": 0.0001997780160528667,
"loss": 0.2509,
"step": 3100
},
{
"epoch": 0.1408386921474504,
"grad_norm": 0.46546927094459534,
"learning_rate": 0.00019977300152354451,
"loss": 0.258,
"step": 3110
},
{
"epoch": 0.14129154967847116,
"grad_norm": 0.5650977492332458,
"learning_rate": 0.00019976793105127904,
"loss": 0.2502,
"step": 3120
},
{
"epoch": 0.14174440720949188,
"grad_norm": 0.39362677931785583,
"learning_rate": 0.00019976280463891336,
"loss": 0.2315,
"step": 3130
},
{
"epoch": 0.14219726474051264,
"grad_norm": 0.4962209463119507,
"learning_rate": 0.0001997576222893218,
"loss": 0.2504,
"step": 3140
},
{
"epoch": 0.14265012227153337,
"grad_norm": 0.44957464933395386,
"learning_rate": 0.00019975238400541013,
"loss": 0.2581,
"step": 3150
},
{
"epoch": 0.14310297980255413,
"grad_norm": 0.50096595287323,
"learning_rate": 0.00019974708979011548,
"loss": 0.2487,
"step": 3160
},
{
"epoch": 0.14355583733357485,
"grad_norm": 0.47329550981521606,
"learning_rate": 0.00019974173964640632,
"loss": 0.2276,
"step": 3170
},
{
"epoch": 0.1440086948645956,
"grad_norm": 0.36653953790664673,
"learning_rate": 0.0001997363335772825,
"loss": 0.2566,
"step": 3180
},
{
"epoch": 0.14446155239561634,
"grad_norm": 0.5317615866661072,
"learning_rate": 0.00019973087158577522,
"loss": 0.2684,
"step": 3190
},
{
"epoch": 0.14491440992663707,
"grad_norm": 0.5648652911186218,
"learning_rate": 0.00019972535367494698,
"loss": 0.226,
"step": 3200
},
{
"epoch": 0.14536726745765782,
"grad_norm": 0.4121849834918976,
"learning_rate": 0.00019971977984789172,
"loss": 0.257,
"step": 3210
},
{
"epoch": 0.14582012498867855,
"grad_norm": 0.7073838114738464,
"learning_rate": 0.00019971415010773473,
"loss": 0.2362,
"step": 3220
},
{
"epoch": 0.1462729825196993,
"grad_norm": 0.5668673515319824,
"learning_rate": 0.00019970846445763258,
"loss": 0.2478,
"step": 3230
},
{
"epoch": 0.14672584005072004,
"grad_norm": 0.5943505764007568,
"learning_rate": 0.0001997027229007732,
"loss": 0.2309,
"step": 3240
},
{
"epoch": 0.1471786975817408,
"grad_norm": 0.6767163872718811,
"learning_rate": 0.00019969692544037594,
"loss": 0.2482,
"step": 3250
},
{
"epoch": 0.14763155511276152,
"grad_norm": 0.5584532022476196,
"learning_rate": 0.00019969107207969144,
"loss": 0.2383,
"step": 3260
},
{
"epoch": 0.14808441264378228,
"grad_norm": 0.5313519239425659,
"learning_rate": 0.00019968516282200171,
"loss": 0.2211,
"step": 3270
},
{
"epoch": 0.148537270174803,
"grad_norm": 0.5229864120483398,
"learning_rate": 0.00019967919767062005,
"loss": 0.2616,
"step": 3280
},
{
"epoch": 0.14899012770582376,
"grad_norm": 0.4079675078392029,
"learning_rate": 0.00019967317662889113,
"loss": 0.2674,
"step": 3290
},
{
"epoch": 0.1494429852368445,
"grad_norm": 0.44019877910614014,
"learning_rate": 0.000199667099700191,
"loss": 0.241,
"step": 3300
},
{
"epoch": 0.14989584276786522,
"grad_norm": 0.6310319304466248,
"learning_rate": 0.00019966096688792695,
"loss": 0.245,
"step": 3310
},
{
"epoch": 0.15034870029888597,
"grad_norm": 0.5826947093009949,
"learning_rate": 0.0001996547781955377,
"loss": 0.2588,
"step": 3320
},
{
"epoch": 0.1508015578299067,
"grad_norm": 0.5099973678588867,
"learning_rate": 0.00019964853362649328,
"loss": 0.2528,
"step": 3330
},
{
"epoch": 0.15125441536092746,
"grad_norm": 0.5420626401901245,
"learning_rate": 0.000199642233184295,
"loss": 0.2732,
"step": 3340
},
{
"epoch": 0.15170727289194819,
"grad_norm": 0.5267677903175354,
"learning_rate": 0.00019963587687247555,
"loss": 0.2206,
"step": 3350
},
{
"epoch": 0.15216013042296894,
"grad_norm": 0.46923214197158813,
"learning_rate": 0.0001996294646945989,
"loss": 0.2431,
"step": 3360
},
{
"epoch": 0.15261298795398967,
"grad_norm": 0.5497370362281799,
"learning_rate": 0.00019962299665426042,
"loss": 0.2626,
"step": 3370
},
{
"epoch": 0.15306584548501043,
"grad_norm": 0.4924209713935852,
"learning_rate": 0.00019961647275508674,
"loss": 0.2491,
"step": 3380
},
{
"epoch": 0.15351870301603115,
"grad_norm": 0.4276602268218994,
"learning_rate": 0.00019960989300073578,
"loss": 0.261,
"step": 3390
},
{
"epoch": 0.1539715605470519,
"grad_norm": 0.45627403259277344,
"learning_rate": 0.0001996032573948969,
"loss": 0.2398,
"step": 3400
},
{
"epoch": 0.15442441807807264,
"grad_norm": 0.5323420166969299,
"learning_rate": 0.00019959656594129062,
"loss": 0.232,
"step": 3410
},
{
"epoch": 0.15487727560909337,
"grad_norm": 0.4109100103378296,
"learning_rate": 0.0001995898186436689,
"loss": 0.2539,
"step": 3420
},
{
"epoch": 0.15533013314011412,
"grad_norm": 0.7459518313407898,
"learning_rate": 0.00019958301550581495,
"loss": 0.2421,
"step": 3430
},
{
"epoch": 0.15578299067113485,
"grad_norm": 0.3366219103336334,
"learning_rate": 0.00019957615653154332,
"loss": 0.2295,
"step": 3440
},
{
"epoch": 0.1562358482021556,
"grad_norm": 0.5722688436508179,
"learning_rate": 0.00019956924172469983,
"loss": 0.2135,
"step": 3450
},
{
"epoch": 0.15668870573317634,
"grad_norm": 0.47314661741256714,
"learning_rate": 0.00019956227108916164,
"loss": 0.2399,
"step": 3460
},
{
"epoch": 0.1571415632641971,
"grad_norm": 0.40694594383239746,
"learning_rate": 0.0001995552446288372,
"loss": 0.2349,
"step": 3470
},
{
"epoch": 0.15759442079521782,
"grad_norm": 0.44671499729156494,
"learning_rate": 0.00019954816234766626,
"loss": 0.2225,
"step": 3480
},
{
"epoch": 0.15804727832623858,
"grad_norm": 0.4138961434364319,
"learning_rate": 0.00019954102424961986,
"loss": 0.241,
"step": 3490
},
{
"epoch": 0.1585001358572593,
"grad_norm": 0.40024974942207336,
"learning_rate": 0.00019953383033870033,
"loss": 0.2592,
"step": 3500
},
{
"epoch": 0.15895299338828003,
"grad_norm": 0.43061670660972595,
"learning_rate": 0.0001995265806189414,
"loss": 0.2329,
"step": 3510
},
{
"epoch": 0.1594058509193008,
"grad_norm": 0.47086796164512634,
"learning_rate": 0.00019951927509440792,
"loss": 0.2208,
"step": 3520
},
{
"epoch": 0.15985870845032152,
"grad_norm": 0.4552053213119507,
"learning_rate": 0.00019951191376919614,
"loss": 0.2148,
"step": 3530
},
{
"epoch": 0.16031156598134227,
"grad_norm": 0.5751559138298035,
"learning_rate": 0.00019950449664743358,
"loss": 0.2623,
"step": 3540
},
{
"epoch": 0.160764423512363,
"grad_norm": 0.4023667573928833,
"learning_rate": 0.000199497023733279,
"loss": 0.2547,
"step": 3550
},
{
"epoch": 0.16121728104338376,
"grad_norm": 0.37084415555000305,
"learning_rate": 0.00019948949503092252,
"loss": 0.2386,
"step": 3560
},
{
"epoch": 0.1616701385744045,
"grad_norm": 0.46690812706947327,
"learning_rate": 0.0001994819105445855,
"loss": 0.2482,
"step": 3570
},
{
"epoch": 0.16212299610542524,
"grad_norm": 0.3902026414871216,
"learning_rate": 0.00019947427027852053,
"loss": 0.2395,
"step": 3580
},
{
"epoch": 0.16257585363644597,
"grad_norm": 0.4588131010532379,
"learning_rate": 0.00019946657423701157,
"loss": 0.2341,
"step": 3590
},
{
"epoch": 0.16302871116746673,
"grad_norm": 0.6853991150856018,
"learning_rate": 0.00019945882242437382,
"loss": 0.237,
"step": 3600
},
{
"epoch": 0.16348156869848746,
"grad_norm": 0.493629515171051,
"learning_rate": 0.0001994510148449537,
"loss": 0.2584,
"step": 3610
},
{
"epoch": 0.16393442622950818,
"grad_norm": 0.33976632356643677,
"learning_rate": 0.00019944315150312894,
"loss": 0.2475,
"step": 3620
},
{
"epoch": 0.16438728376052894,
"grad_norm": 0.4430384635925293,
"learning_rate": 0.00019943523240330854,
"loss": 0.2257,
"step": 3630
},
{
"epoch": 0.16484014129154967,
"grad_norm": 0.5202488303184509,
"learning_rate": 0.00019942725754993278,
"loss": 0.2005,
"step": 3640
},
{
"epoch": 0.16529299882257042,
"grad_norm": 0.4460958242416382,
"learning_rate": 0.00019941922694747318,
"loss": 0.231,
"step": 3650
},
{
"epoch": 0.16574585635359115,
"grad_norm": 0.46459195017814636,
"learning_rate": 0.00019941114060043248,
"loss": 0.1931,
"step": 3660
},
{
"epoch": 0.1661987138846119,
"grad_norm": 0.4212459325790405,
"learning_rate": 0.0001994029985133447,
"loss": 0.2344,
"step": 3670
},
{
"epoch": 0.16665157141563264,
"grad_norm": 0.4669646620750427,
"learning_rate": 0.00019939480069077517,
"loss": 0.1844,
"step": 3680
},
{
"epoch": 0.1671044289466534,
"grad_norm": 0.40480050444602966,
"learning_rate": 0.0001993865471373204,
"loss": 0.234,
"step": 3690
},
{
"epoch": 0.16755728647767412,
"grad_norm": 0.433941513299942,
"learning_rate": 0.0001993782378576082,
"loss": 0.2179,
"step": 3700
},
{
"epoch": 0.16801014400869488,
"grad_norm": 0.39012041687965393,
"learning_rate": 0.0001993698728562976,
"loss": 0.227,
"step": 3710
},
{
"epoch": 0.1684630015397156,
"grad_norm": 0.5125910639762878,
"learning_rate": 0.00019936145213807885,
"loss": 0.2222,
"step": 3720
},
{
"epoch": 0.16891585907073633,
"grad_norm": 0.3955945074558258,
"learning_rate": 0.0001993529757076735,
"loss": 0.2464,
"step": 3730
},
{
"epoch": 0.1693687166017571,
"grad_norm": 0.5229743123054504,
"learning_rate": 0.00019934444356983427,
"loss": 0.2408,
"step": 3740
},
{
"epoch": 0.16982157413277782,
"grad_norm": 0.35481345653533936,
"learning_rate": 0.00019933585572934516,
"loss": 0.2252,
"step": 3750
},
{
"epoch": 0.17027443166379858,
"grad_norm": 0.3383258581161499,
"learning_rate": 0.0001993272121910214,
"loss": 0.2363,
"step": 3760
},
{
"epoch": 0.1707272891948193,
"grad_norm": 0.5481933951377869,
"learning_rate": 0.00019931851295970944,
"loss": 0.2396,
"step": 3770
},
{
"epoch": 0.17118014672584006,
"grad_norm": 0.603874921798706,
"learning_rate": 0.00019930975804028693,
"loss": 0.2247,
"step": 3780
},
{
"epoch": 0.1716330042568608,
"grad_norm": 0.44633063673973083,
"learning_rate": 0.00019930094743766282,
"loss": 0.2293,
"step": 3790
},
{
"epoch": 0.17208586178788154,
"grad_norm": 0.4310886561870575,
"learning_rate": 0.0001992920811567772,
"loss": 0.2277,
"step": 3800
},
{
"epoch": 0.17253871931890227,
"grad_norm": 0.4108830988407135,
"learning_rate": 0.0001992831592026014,
"loss": 0.2671,
"step": 3810
},
{
"epoch": 0.172991576849923,
"grad_norm": 0.4433537423610687,
"learning_rate": 0.00019927418158013801,
"loss": 0.2579,
"step": 3820
},
{
"epoch": 0.17344443438094376,
"grad_norm": 0.49078381061553955,
"learning_rate": 0.00019926514829442083,
"loss": 0.2291,
"step": 3830
},
{
"epoch": 0.17389729191196449,
"grad_norm": 0.32983776926994324,
"learning_rate": 0.0001992560593505148,
"loss": 0.2263,
"step": 3840
},
{
"epoch": 0.17435014944298524,
"grad_norm": 0.5339885354042053,
"learning_rate": 0.00019924691475351608,
"loss": 0.271,
"step": 3850
},
{
"epoch": 0.17480300697400597,
"grad_norm": 0.5582554936408997,
"learning_rate": 0.00019923771450855213,
"loss": 0.2433,
"step": 3860
},
{
"epoch": 0.17525586450502673,
"grad_norm": 0.5500646233558655,
"learning_rate": 0.0001992284586207815,
"loss": 0.2539,
"step": 3870
},
{
"epoch": 0.17570872203604745,
"grad_norm": 0.40870654582977295,
"learning_rate": 0.00019921914709539404,
"loss": 0.2474,
"step": 3880
},
{
"epoch": 0.1761615795670682,
"grad_norm": 0.5105723142623901,
"learning_rate": 0.00019920977993761072,
"loss": 0.2718,
"step": 3890
},
{
"epoch": 0.17661443709808894,
"grad_norm": 0.5596238970756531,
"learning_rate": 0.0001992003571526837,
"loss": 0.2524,
"step": 3900
},
{
"epoch": 0.1770672946291097,
"grad_norm": 0.4464496374130249,
"learning_rate": 0.00019919087874589638,
"loss": 0.2168,
"step": 3910
},
{
"epoch": 0.17752015216013042,
"grad_norm": 0.5252407789230347,
"learning_rate": 0.0001991813447225633,
"loss": 0.2328,
"step": 3920
},
{
"epoch": 0.17797300969115115,
"grad_norm": 0.45272043347358704,
"learning_rate": 0.00019917175508803026,
"loss": 0.2425,
"step": 3930
},
{
"epoch": 0.1784258672221719,
"grad_norm": 0.4741479158401489,
"learning_rate": 0.00019916210984767413,
"loss": 0.2529,
"step": 3940
},
{
"epoch": 0.17887872475319264,
"grad_norm": 0.5444139838218689,
"learning_rate": 0.00019915240900690307,
"loss": 0.2566,
"step": 3950
},
{
"epoch": 0.1793315822842134,
"grad_norm": 0.5768086314201355,
"learning_rate": 0.00019914265257115634,
"loss": 0.251,
"step": 3960
},
{
"epoch": 0.17978443981523412,
"grad_norm": 0.5692720413208008,
"learning_rate": 0.00019913284054590434,
"loss": 0.271,
"step": 3970
},
{
"epoch": 0.18023729734625488,
"grad_norm": 0.5857642292976379,
"learning_rate": 0.0001991229729366488,
"loss": 0.2225,
"step": 3980
},
{
"epoch": 0.1806901548772756,
"grad_norm": 0.3476020395755768,
"learning_rate": 0.00019911304974892243,
"loss": 0.2377,
"step": 3990
},
{
"epoch": 0.18114301240829636,
"grad_norm": 0.4105261564254761,
"learning_rate": 0.00019910307098828923,
"loss": 0.2168,
"step": 4000
},
{
"epoch": 0.18114301240829636,
"eval_chrf": 76.70440143862535,
"eval_loss": 0.1951996386051178,
"eval_runtime": 13.6389,
"eval_samples_per_second": 0.733,
"eval_steps_per_second": 0.073,
"step": 4000
},
{
"epoch": 0.1815958699393171,
"grad_norm": 0.44274958968162537,
"learning_rate": 0.0001990930366603443,
"loss": 0.2442,
"step": 4010
},
{
"epoch": 0.18204872747033785,
"grad_norm": 0.4935576617717743,
"learning_rate": 0.00019908294677071386,
"loss": 0.2184,
"step": 4020
},
{
"epoch": 0.18250158500135857,
"grad_norm": 0.4292125403881073,
"learning_rate": 0.00019907280132505542,
"loss": 0.2155,
"step": 4030
},
{
"epoch": 0.1829544425323793,
"grad_norm": 0.49460700154304504,
"learning_rate": 0.00019906260032905747,
"loss": 0.2028,
"step": 4040
},
{
"epoch": 0.18340730006340006,
"grad_norm": 0.4970226585865021,
"learning_rate": 0.00019905234378843982,
"loss": 0.2134,
"step": 4050
},
{
"epoch": 0.1838601575944208,
"grad_norm": 0.4835842251777649,
"learning_rate": 0.00019904203170895325,
"loss": 0.2337,
"step": 4060
},
{
"epoch": 0.18431301512544154,
"grad_norm": 0.47777554392814636,
"learning_rate": 0.00019903166409637982,
"loss": 0.2006,
"step": 4070
},
{
"epoch": 0.18476587265646227,
"grad_norm": 0.39269423484802246,
"learning_rate": 0.00019902124095653268,
"loss": 0.2084,
"step": 4080
},
{
"epoch": 0.18521873018748303,
"grad_norm": 0.4256626069545746,
"learning_rate": 0.00019901076229525603,
"loss": 0.227,
"step": 4090
},
{
"epoch": 0.18567158771850376,
"grad_norm": 0.4103119373321533,
"learning_rate": 0.00019900022811842537,
"loss": 0.2563,
"step": 4100
},
{
"epoch": 0.1861244452495245,
"grad_norm": 0.4316677749156952,
"learning_rate": 0.00019898963843194718,
"loss": 0.2257,
"step": 4110
},
{
"epoch": 0.18657730278054524,
"grad_norm": 0.41959917545318604,
"learning_rate": 0.00019897899324175915,
"loss": 0.2369,
"step": 4120
},
{
"epoch": 0.18703016031156597,
"grad_norm": 0.5151909589767456,
"learning_rate": 0.00019896829255383006,
"loss": 0.2132,
"step": 4130
},
{
"epoch": 0.18748301784258672,
"grad_norm": 0.5552547574043274,
"learning_rate": 0.00019895753637415978,
"loss": 0.2321,
"step": 4140
},
{
"epoch": 0.18793587537360745,
"grad_norm": 0.6295343637466431,
"learning_rate": 0.00019894672470877934,
"loss": 0.2465,
"step": 4150
},
{
"epoch": 0.1883887329046282,
"grad_norm": 0.4910910427570343,
"learning_rate": 0.0001989358575637509,
"loss": 0.2113,
"step": 4160
},
{
"epoch": 0.18884159043564894,
"grad_norm": 0.6068474054336548,
"learning_rate": 0.00019892493494516765,
"loss": 0.2362,
"step": 4170
},
{
"epoch": 0.1892944479666697,
"grad_norm": 0.49513566493988037,
"learning_rate": 0.0001989139568591539,
"loss": 0.2156,
"step": 4180
},
{
"epoch": 0.18974730549769042,
"grad_norm": 0.5288516879081726,
"learning_rate": 0.00019890292331186518,
"loss": 0.2071,
"step": 4190
},
{
"epoch": 0.19020016302871118,
"grad_norm": 0.4332873225212097,
"learning_rate": 0.00019889183430948795,
"loss": 0.24,
"step": 4200
},
{
"epoch": 0.1906530205597319,
"grad_norm": 0.4727018177509308,
"learning_rate": 0.00019888068985823986,
"loss": 0.2216,
"step": 4210
},
{
"epoch": 0.19110587809075266,
"grad_norm": 0.4223858118057251,
"learning_rate": 0.00019886948996436962,
"loss": 0.2167,
"step": 4220
},
{
"epoch": 0.1915587356217734,
"grad_norm": 0.5059617161750793,
"learning_rate": 0.00019885823463415702,
"loss": 0.218,
"step": 4230
},
{
"epoch": 0.19201159315279412,
"grad_norm": 0.38767218589782715,
"learning_rate": 0.00019884692387391303,
"loss": 0.2405,
"step": 4240
},
{
"epoch": 0.19246445068381487,
"grad_norm": 0.4692607522010803,
"learning_rate": 0.0001988355576899795,
"loss": 0.214,
"step": 4250
},
{
"epoch": 0.1929173082148356,
"grad_norm": 0.5011858344078064,
"learning_rate": 0.00019882413608872957,
"loss": 0.2359,
"step": 4260
},
{
"epoch": 0.19337016574585636,
"grad_norm": 0.3452933132648468,
"learning_rate": 0.0001988126590765673,
"loss": 0.2068,
"step": 4270
},
{
"epoch": 0.1938230232768771,
"grad_norm": 0.5500070452690125,
"learning_rate": 0.0001988011266599279,
"loss": 0.2388,
"step": 4280
},
{
"epoch": 0.19427588080789784,
"grad_norm": 0.4085341989994049,
"learning_rate": 0.0001987895388452776,
"loss": 0.2222,
"step": 4290
},
{
"epoch": 0.19472873833891857,
"grad_norm": 0.41355833411216736,
"learning_rate": 0.0001987778956391138,
"loss": 0.2117,
"step": 4300
},
{
"epoch": 0.19518159586993933,
"grad_norm": 0.4159574806690216,
"learning_rate": 0.00019876619704796474,
"loss": 0.2261,
"step": 4310
},
{
"epoch": 0.19563445340096006,
"grad_norm": 0.5774794220924377,
"learning_rate": 0.0001987544430783899,
"loss": 0.2134,
"step": 4320
},
{
"epoch": 0.19608731093198078,
"grad_norm": 0.5641900300979614,
"learning_rate": 0.00019874263373697982,
"loss": 0.2382,
"step": 4330
},
{
"epoch": 0.19654016846300154,
"grad_norm": 0.4016419053077698,
"learning_rate": 0.00019873076903035593,
"loss": 0.2298,
"step": 4340
},
{
"epoch": 0.19699302599402227,
"grad_norm": 0.3750913739204407,
"learning_rate": 0.00019871884896517086,
"loss": 0.2013,
"step": 4350
},
{
"epoch": 0.19744588352504303,
"grad_norm": 0.5473160743713379,
"learning_rate": 0.00019870687354810816,
"loss": 0.2275,
"step": 4360
},
{
"epoch": 0.19789874105606375,
"grad_norm": 0.4866637885570526,
"learning_rate": 0.00019869484278588255,
"loss": 0.2438,
"step": 4370
},
{
"epoch": 0.1983515985870845,
"grad_norm": 0.43724608421325684,
"learning_rate": 0.00019868275668523963,
"loss": 0.2374,
"step": 4380
},
{
"epoch": 0.19880445611810524,
"grad_norm": 0.4235040843486786,
"learning_rate": 0.00019867061525295616,
"loss": 0.2229,
"step": 4390
},
{
"epoch": 0.199257313649126,
"grad_norm": 0.5863602757453918,
"learning_rate": 0.0001986584184958398,
"loss": 0.2257,
"step": 4400
},
{
"epoch": 0.19971017118014672,
"grad_norm": 0.4537600576877594,
"learning_rate": 0.0001986461664207294,
"loss": 0.2389,
"step": 4410
},
{
"epoch": 0.20016302871116748,
"grad_norm": 0.4225648045539856,
"learning_rate": 0.00019863385903449464,
"loss": 0.2107,
"step": 4420
},
{
"epoch": 0.2006158862421882,
"grad_norm": 0.49402421712875366,
"learning_rate": 0.00019862149634403635,
"loss": 0.1887,
"step": 4430
},
{
"epoch": 0.20106874377320894,
"grad_norm": 0.5091655850410461,
"learning_rate": 0.00019860907835628626,
"loss": 0.2358,
"step": 4440
},
{
"epoch": 0.2015216013042297,
"grad_norm": 0.5644922852516174,
"learning_rate": 0.0001985966050782072,
"loss": 0.2566,
"step": 4450
},
{
"epoch": 0.20197445883525042,
"grad_norm": 0.4026089608669281,
"learning_rate": 0.00019858407651679298,
"loss": 0.239,
"step": 4460
},
{
"epoch": 0.20242731636627118,
"grad_norm": 0.46105530858039856,
"learning_rate": 0.00019857149267906837,
"loss": 0.2266,
"step": 4470
},
{
"epoch": 0.2028801738972919,
"grad_norm": 0.3332107663154602,
"learning_rate": 0.00019855885357208917,
"loss": 0.2049,
"step": 4480
},
{
"epoch": 0.20333303142831266,
"grad_norm": 0.491138219833374,
"learning_rate": 0.00019854615920294214,
"loss": 0.2296,
"step": 4490
},
{
"epoch": 0.2037858889593334,
"grad_norm": 0.4568822979927063,
"learning_rate": 0.00019853340957874506,
"loss": 0.225,
"step": 4500
},
{
"epoch": 0.20423874649035414,
"grad_norm": 0.5473847985267639,
"learning_rate": 0.00019852060470664662,
"loss": 0.2509,
"step": 4510
},
{
"epoch": 0.20469160402137487,
"grad_norm": 0.40623512864112854,
"learning_rate": 0.0001985077445938266,
"loss": 0.2033,
"step": 4520
},
{
"epoch": 0.20514446155239563,
"grad_norm": 0.44612765312194824,
"learning_rate": 0.0001984948292474957,
"loss": 0.2458,
"step": 4530
},
{
"epoch": 0.20559731908341636,
"grad_norm": 0.4710107147693634,
"learning_rate": 0.0001984818586748955,
"loss": 0.2106,
"step": 4540
},
{
"epoch": 0.20605017661443709,
"grad_norm": 0.4278642237186432,
"learning_rate": 0.00019846883288329874,
"loss": 0.2121,
"step": 4550
},
{
"epoch": 0.20650303414545784,
"grad_norm": 0.48352622985839844,
"learning_rate": 0.00019845575188000893,
"loss": 0.2213,
"step": 4560
},
{
"epoch": 0.20695589167647857,
"grad_norm": 0.5382823944091797,
"learning_rate": 0.00019844261567236065,
"loss": 0.233,
"step": 4570
},
{
"epoch": 0.20740874920749933,
"grad_norm": 0.38672780990600586,
"learning_rate": 0.00019842942426771937,
"loss": 0.2258,
"step": 4580
},
{
"epoch": 0.20786160673852005,
"grad_norm": 0.45074501633644104,
"learning_rate": 0.0001984161776734816,
"loss": 0.2112,
"step": 4590
},
{
"epoch": 0.2083144642695408,
"grad_norm": 0.37882986664772034,
"learning_rate": 0.00019840287589707468,
"loss": 0.219,
"step": 4600
},
{
"epoch": 0.20876732180056154,
"grad_norm": 0.4186610281467438,
"learning_rate": 0.000198389518945957,
"loss": 0.2079,
"step": 4610
},
{
"epoch": 0.2092201793315823,
"grad_norm": 0.35181114077568054,
"learning_rate": 0.00019837610682761777,
"loss": 0.2062,
"step": 4620
},
{
"epoch": 0.20967303686260302,
"grad_norm": 0.38587722182273865,
"learning_rate": 0.0001983626395495772,
"loss": 0.2467,
"step": 4630
},
{
"epoch": 0.21012589439362375,
"grad_norm": 0.7229722738265991,
"learning_rate": 0.00019834911711938644,
"loss": 0.2219,
"step": 4640
},
{
"epoch": 0.2105787519246445,
"grad_norm": 0.5380842685699463,
"learning_rate": 0.00019833553954462757,
"loss": 0.2421,
"step": 4650
},
{
"epoch": 0.21103160945566524,
"grad_norm": 0.4756489396095276,
"learning_rate": 0.00019832190683291356,
"loss": 0.2093,
"step": 4660
},
{
"epoch": 0.211484466986686,
"grad_norm": 0.5300980806350708,
"learning_rate": 0.0001983082189918883,
"loss": 0.2367,
"step": 4670
},
{
"epoch": 0.21193732451770672,
"grad_norm": 0.49144670367240906,
"learning_rate": 0.00019829447602922654,
"loss": 0.2001,
"step": 4680
},
{
"epoch": 0.21239018204872748,
"grad_norm": 0.45265018939971924,
"learning_rate": 0.00019828067795263406,
"loss": 0.23,
"step": 4690
},
{
"epoch": 0.2128430395797482,
"grad_norm": 0.4063917398452759,
"learning_rate": 0.00019826682476984742,
"loss": 0.21,
"step": 4700
},
{
"epoch": 0.21329589711076896,
"grad_norm": 0.36459800601005554,
"learning_rate": 0.00019825291648863414,
"loss": 0.2116,
"step": 4710
},
{
"epoch": 0.2137487546417897,
"grad_norm": 0.4962182641029358,
"learning_rate": 0.00019823895311679268,
"loss": 0.2137,
"step": 4720
},
{
"epoch": 0.21420161217281045,
"grad_norm": 0.5544801354408264,
"learning_rate": 0.00019822493466215227,
"loss": 0.2377,
"step": 4730
},
{
"epoch": 0.21465446970383117,
"grad_norm": 0.4859352111816406,
"learning_rate": 0.00019821086113257311,
"loss": 0.2308,
"step": 4740
},
{
"epoch": 0.2151073272348519,
"grad_norm": 0.8023104667663574,
"learning_rate": 0.00019819673253594627,
"loss": 0.2164,
"step": 4750
},
{
"epoch": 0.21556018476587266,
"grad_norm": 0.37945619225502014,
"learning_rate": 0.00019818254888019366,
"loss": 0.2158,
"step": 4760
},
{
"epoch": 0.2160130422968934,
"grad_norm": 0.375491738319397,
"learning_rate": 0.00019816831017326812,
"loss": 0.2258,
"step": 4770
},
{
"epoch": 0.21646589982791414,
"grad_norm": 0.66087806224823,
"learning_rate": 0.00019815401642315328,
"loss": 0.2385,
"step": 4780
},
{
"epoch": 0.21691875735893487,
"grad_norm": 0.5332721471786499,
"learning_rate": 0.0001981396676378637,
"loss": 0.1996,
"step": 4790
},
{
"epoch": 0.21737161488995563,
"grad_norm": 0.37998682260513306,
"learning_rate": 0.0001981252638254448,
"loss": 0.1871,
"step": 4800
},
{
"epoch": 0.21782447242097636,
"grad_norm": 0.3764188587665558,
"learning_rate": 0.00019811080499397283,
"loss": 0.2142,
"step": 4810
},
{
"epoch": 0.2182773299519971,
"grad_norm": 0.48812979459762573,
"learning_rate": 0.00019809629115155483,
"loss": 0.2152,
"step": 4820
},
{
"epoch": 0.21873018748301784,
"grad_norm": 0.4503139853477478,
"learning_rate": 0.0001980817223063288,
"loss": 0.241,
"step": 4830
},
{
"epoch": 0.2191830450140386,
"grad_norm": 0.4879043400287628,
"learning_rate": 0.00019806709846646348,
"loss": 0.2272,
"step": 4840
},
{
"epoch": 0.21963590254505932,
"grad_norm": 0.4850088655948639,
"learning_rate": 0.00019805241964015853,
"loss": 0.2142,
"step": 4850
},
{
"epoch": 0.22008876007608005,
"grad_norm": 0.44978779554367065,
"learning_rate": 0.00019803768583564438,
"loss": 0.2161,
"step": 4860
},
{
"epoch": 0.2205416176071008,
"grad_norm": 0.4910193383693695,
"learning_rate": 0.00019802289706118234,
"loss": 0.2432,
"step": 4870
},
{
"epoch": 0.22099447513812154,
"grad_norm": 0.40853068232536316,
"learning_rate": 0.00019800805332506448,
"loss": 0.2263,
"step": 4880
},
{
"epoch": 0.2214473326691423,
"grad_norm": 0.4698120951652527,
"learning_rate": 0.00019799315463561368,
"loss": 0.2118,
"step": 4890
},
{
"epoch": 0.22190019020016302,
"grad_norm": 0.5427727103233337,
"learning_rate": 0.0001979782010011837,
"loss": 0.1948,
"step": 4900
},
{
"epoch": 0.22235304773118378,
"grad_norm": 0.3942580223083496,
"learning_rate": 0.00019796319243015913,
"loss": 0.203,
"step": 4910
},
{
"epoch": 0.2228059052622045,
"grad_norm": 0.4781436622142792,
"learning_rate": 0.00019794812893095522,
"loss": 0.2003,
"step": 4920
},
{
"epoch": 0.22325876279322526,
"grad_norm": 0.4551750719547272,
"learning_rate": 0.00019793301051201817,
"loss": 0.2366,
"step": 4930
},
{
"epoch": 0.223711620324246,
"grad_norm": 0.3415755033493042,
"learning_rate": 0.00019791783718182485,
"loss": 0.2003,
"step": 4940
},
{
"epoch": 0.22416447785526672,
"grad_norm": 0.47352227568626404,
"learning_rate": 0.00019790260894888306,
"loss": 0.217,
"step": 4950
},
{
"epoch": 0.22461733538628748,
"grad_norm": 0.6360595226287842,
"learning_rate": 0.00019788732582173127,
"loss": 0.2358,
"step": 4960
},
{
"epoch": 0.2250701929173082,
"grad_norm": 0.48768046498298645,
"learning_rate": 0.0001978719878089387,
"loss": 0.2213,
"step": 4970
},
{
"epoch": 0.22552305044832896,
"grad_norm": 0.4766073226928711,
"learning_rate": 0.00019785659491910549,
"loss": 0.208,
"step": 4980
},
{
"epoch": 0.2259759079793497,
"grad_norm": 0.44524306058883667,
"learning_rate": 0.00019784114716086247,
"loss": 0.2219,
"step": 4990
},
{
"epoch": 0.22642876551037044,
"grad_norm": 0.44435834884643555,
"learning_rate": 0.00019782564454287117,
"loss": 0.2297,
"step": 5000
},
{
"epoch": 0.22642876551037044,
"eval_chrf": 72.84821817627648,
"eval_loss": 0.17898276448249817,
"eval_runtime": 26.7337,
"eval_samples_per_second": 0.374,
"eval_steps_per_second": 0.037,
"step": 5000
},
{
"epoch": 0.22688162304139117,
"grad_norm": 0.3903729319572449,
"learning_rate": 0.00019781008707382399,
"loss": 0.2442,
"step": 5010
},
{
"epoch": 0.22733448057241193,
"grad_norm": 0.3812226355075836,
"learning_rate": 0.000197794474762444,
"loss": 0.2457,
"step": 5020
},
{
"epoch": 0.22778733810343266,
"grad_norm": 0.47868451476097107,
"learning_rate": 0.00019777880761748508,
"loss": 0.235,
"step": 5030
},
{
"epoch": 0.2282401956344534,
"grad_norm": 0.49971717596054077,
"learning_rate": 0.00019776308564773188,
"loss": 0.2117,
"step": 5040
},
{
"epoch": 0.22869305316547414,
"grad_norm": 0.30920127034187317,
"learning_rate": 0.00019774730886199966,
"loss": 0.2015,
"step": 5050
},
{
"epoch": 0.22914591069649487,
"grad_norm": 0.40997618436813354,
"learning_rate": 0.00019773147726913454,
"loss": 0.2125,
"step": 5060
},
{
"epoch": 0.22959876822751563,
"grad_norm": 0.5081835985183716,
"learning_rate": 0.00019771559087801332,
"loss": 0.2154,
"step": 5070
},
{
"epoch": 0.23005162575853635,
"grad_norm": 0.5434297919273376,
"learning_rate": 0.00019769964969754353,
"loss": 0.2285,
"step": 5080
},
{
"epoch": 0.2305044832895571,
"grad_norm": 0.5638498663902283,
"learning_rate": 0.00019768365373666347,
"loss": 0.2377,
"step": 5090
},
{
"epoch": 0.23095734082057784,
"grad_norm": 0.4750683307647705,
"learning_rate": 0.00019766760300434207,
"loss": 0.2125,
"step": 5100
},
{
"epoch": 0.2314101983515986,
"grad_norm": 0.6645839810371399,
"learning_rate": 0.000197651497509579,
"loss": 0.2027,
"step": 5110
},
{
"epoch": 0.23186305588261932,
"grad_norm": 0.6112692952156067,
"learning_rate": 0.00019763533726140464,
"loss": 0.2238,
"step": 5120
},
{
"epoch": 0.23231591341364008,
"grad_norm": 0.8351989984512329,
"learning_rate": 0.00019761912226888014,
"loss": 0.2138,
"step": 5130
},
{
"epoch": 0.2327687709446608,
"grad_norm": 0.5615606904029846,
"learning_rate": 0.00019760285254109722,
"loss": 0.2152,
"step": 5140
},
{
"epoch": 0.23322162847568156,
"grad_norm": 0.5717998743057251,
"learning_rate": 0.00019758652808717837,
"loss": 0.2377,
"step": 5150
},
{
"epoch": 0.2336744860067023,
"grad_norm": 0.7914870381355286,
"learning_rate": 0.00019757014891627676,
"loss": 0.2319,
"step": 5160
},
{
"epoch": 0.23412734353772302,
"grad_norm": 0.43396610021591187,
"learning_rate": 0.00019755371503757624,
"loss": 0.2221,
"step": 5170
},
{
"epoch": 0.23458020106874378,
"grad_norm": 0.42183613777160645,
"learning_rate": 0.00019753722646029126,
"loss": 0.19,
"step": 5180
},
{
"epoch": 0.2350330585997645,
"grad_norm": 0.373030424118042,
"learning_rate": 0.00019752068319366708,
"loss": 0.2196,
"step": 5190
},
{
"epoch": 0.23548591613078526,
"grad_norm": 0.4341379702091217,
"learning_rate": 0.00019750408524697946,
"loss": 0.2207,
"step": 5200
},
{
"epoch": 0.235938773661806,
"grad_norm": 0.3972753882408142,
"learning_rate": 0.00019748743262953502,
"loss": 0.2316,
"step": 5210
},
{
"epoch": 0.23639163119282675,
"grad_norm": 0.3169364631175995,
"learning_rate": 0.00019747072535067082,
"loss": 0.2222,
"step": 5220
},
{
"epoch": 0.23684448872384747,
"grad_norm": 0.4205416440963745,
"learning_rate": 0.00019745396341975472,
"loss": 0.2303,
"step": 5230
},
{
"epoch": 0.23729734625486823,
"grad_norm": 0.4518096148967743,
"learning_rate": 0.00019743714684618516,
"loss": 0.257,
"step": 5240
},
{
"epoch": 0.23775020378588896,
"grad_norm": 0.5455726385116577,
"learning_rate": 0.0001974202756393912,
"loss": 0.2335,
"step": 5250
},
{
"epoch": 0.2382030613169097,
"grad_norm": 0.5465617179870605,
"learning_rate": 0.0001974033498088326,
"loss": 0.2136,
"step": 5260
},
{
"epoch": 0.23865591884793044,
"grad_norm": 0.4863675832748413,
"learning_rate": 0.0001973863693639997,
"loss": 0.2272,
"step": 5270
},
{
"epoch": 0.23910877637895117,
"grad_norm": 0.6372243165969849,
"learning_rate": 0.0001973693343144135,
"loss": 0.221,
"step": 5280
},
{
"epoch": 0.23956163390997193,
"grad_norm": 0.4402875304222107,
"learning_rate": 0.00019735224466962552,
"loss": 0.2056,
"step": 5290
},
{
"epoch": 0.24001449144099266,
"grad_norm": 0.4618695080280304,
"learning_rate": 0.000197335100439218,
"loss": 0.2339,
"step": 5300
},
{
"epoch": 0.2404673489720134,
"grad_norm": 0.5031725764274597,
"learning_rate": 0.00019731790163280376,
"loss": 0.2137,
"step": 5310
},
{
"epoch": 0.24092020650303414,
"grad_norm": 0.34283286333084106,
"learning_rate": 0.00019730064826002622,
"loss": 0.2295,
"step": 5320
},
{
"epoch": 0.2413730640340549,
"grad_norm": 0.5465356707572937,
"learning_rate": 0.00019728334033055936,
"loss": 0.2232,
"step": 5330
},
{
"epoch": 0.24182592156507562,
"grad_norm": 0.34146648645401,
"learning_rate": 0.00019726597785410782,
"loss": 0.2163,
"step": 5340
},
{
"epoch": 0.24227877909609638,
"grad_norm": 0.41163069009780884,
"learning_rate": 0.00019724856084040666,
"loss": 0.2093,
"step": 5350
},
{
"epoch": 0.2427316366271171,
"grad_norm": 0.30183130502700806,
"learning_rate": 0.00019723108929922177,
"loss": 0.2015,
"step": 5360
},
{
"epoch": 0.24318449415813784,
"grad_norm": 0.5386610627174377,
"learning_rate": 0.00019721356324034942,
"loss": 0.208,
"step": 5370
},
{
"epoch": 0.2436373516891586,
"grad_norm": 0.4824809730052948,
"learning_rate": 0.0001971959826736165,
"loss": 0.1962,
"step": 5380
},
{
"epoch": 0.24409020922017932,
"grad_norm": 0.40670323371887207,
"learning_rate": 0.0001971783476088805,
"loss": 0.2423,
"step": 5390
},
{
"epoch": 0.24454306675120008,
"grad_norm": 0.8714826703071594,
"learning_rate": 0.00019716065805602947,
"loss": 0.2203,
"step": 5400
},
{
"epoch": 0.2449959242822208,
"grad_norm": 0.43796902894973755,
"learning_rate": 0.00019714291402498187,
"loss": 0.2068,
"step": 5410
},
{
"epoch": 0.24544878181324156,
"grad_norm": 0.4488067924976349,
"learning_rate": 0.00019712511552568692,
"loss": 0.1948,
"step": 5420
},
{
"epoch": 0.2459016393442623,
"grad_norm": 0.3524841070175171,
"learning_rate": 0.00019710726256812427,
"loss": 0.1947,
"step": 5430
},
{
"epoch": 0.24635449687528305,
"grad_norm": 0.4532184898853302,
"learning_rate": 0.0001970893551623041,
"loss": 0.2044,
"step": 5440
},
{
"epoch": 0.24680735440630377,
"grad_norm": 0.49360191822052,
"learning_rate": 0.0001970713933182671,
"loss": 0.2134,
"step": 5450
},
{
"epoch": 0.24726021193732453,
"grad_norm": 0.3581199645996094,
"learning_rate": 0.00019705337704608455,
"loss": 0.2654,
"step": 5460
},
{
"epoch": 0.24771306946834526,
"grad_norm": 0.47226762771606445,
"learning_rate": 0.0001970353063558582,
"loss": 0.2431,
"step": 5470
},
{
"epoch": 0.248165926999366,
"grad_norm": 0.5614981055259705,
"learning_rate": 0.00019701718125772033,
"loss": 0.2333,
"step": 5480
},
{
"epoch": 0.24861878453038674,
"grad_norm": 0.3719375729560852,
"learning_rate": 0.00019699900176183374,
"loss": 0.213,
"step": 5490
},
{
"epoch": 0.24907164206140747,
"grad_norm": 0.4844624698162079,
"learning_rate": 0.00019698076787839165,
"loss": 0.2224,
"step": 5500
},
{
"epoch": 0.24952449959242823,
"grad_norm": 0.44172951579093933,
"learning_rate": 0.0001969624796176179,
"loss": 0.2274,
"step": 5510
},
{
"epoch": 0.24997735712344896,
"grad_norm": 0.5217725038528442,
"learning_rate": 0.00019694413698976678,
"loss": 0.2247,
"step": 5520
},
{
"epoch": 0.2504302146544697,
"grad_norm": 0.47444963455200195,
"learning_rate": 0.000196925740005123,
"loss": 0.2103,
"step": 5530
},
{
"epoch": 0.25088307218549044,
"grad_norm": 0.5168370008468628,
"learning_rate": 0.00019690728867400177,
"loss": 0.1976,
"step": 5540
},
{
"epoch": 0.2513359297165112,
"grad_norm": 0.4875032603740692,
"learning_rate": 0.0001968887830067488,
"loss": 0.2089,
"step": 5550
},
{
"epoch": 0.25178878724753195,
"grad_norm": 0.486665278673172,
"learning_rate": 0.0001968702230137403,
"loss": 0.2067,
"step": 5560
},
{
"epoch": 0.25224164477855265,
"grad_norm": 0.5231538414955139,
"learning_rate": 0.0001968516087053829,
"loss": 0.2264,
"step": 5570
},
{
"epoch": 0.2526945023095734,
"grad_norm": 0.5574133992195129,
"learning_rate": 0.0001968329400921136,
"loss": 0.2344,
"step": 5580
},
{
"epoch": 0.25314735984059417,
"grad_norm": 0.322848379611969,
"learning_rate": 0.00019681421718440004,
"loss": 0.2059,
"step": 5590
},
{
"epoch": 0.25360021737161487,
"grad_norm": 0.31106603145599365,
"learning_rate": 0.00019679543999274015,
"loss": 0.2006,
"step": 5600
},
{
"epoch": 0.2540530749026356,
"grad_norm": 0.453117311000824,
"learning_rate": 0.00019677660852766233,
"loss": 0.2292,
"step": 5610
},
{
"epoch": 0.2545059324336564,
"grad_norm": 0.4585261940956116,
"learning_rate": 0.00019675772279972544,
"loss": 0.2288,
"step": 5620
},
{
"epoch": 0.25495878996467713,
"grad_norm": 0.4391018748283386,
"learning_rate": 0.00019673878281951875,
"loss": 0.2196,
"step": 5630
},
{
"epoch": 0.25541164749569784,
"grad_norm": 0.5377073884010315,
"learning_rate": 0.00019671978859766193,
"loss": 0.2516,
"step": 5640
},
{
"epoch": 0.2558645050267186,
"grad_norm": 0.4374406635761261,
"learning_rate": 0.0001967007401448051,
"loss": 0.2289,
"step": 5650
},
{
"epoch": 0.25631736255773935,
"grad_norm": 0.46719449758529663,
"learning_rate": 0.00019668163747162874,
"loss": 0.2231,
"step": 5660
},
{
"epoch": 0.25677022008876005,
"grad_norm": 0.38420817255973816,
"learning_rate": 0.00019666248058884385,
"loss": 0.2164,
"step": 5670
},
{
"epoch": 0.2572230776197808,
"grad_norm": 0.4629984498023987,
"learning_rate": 0.00019664326950719164,
"loss": 0.214,
"step": 5680
},
{
"epoch": 0.25767593515080156,
"grad_norm": 0.4738280773162842,
"learning_rate": 0.00019662400423744382,
"loss": 0.1882,
"step": 5690
},
{
"epoch": 0.2581287926818223,
"grad_norm": 0.5236591696739197,
"learning_rate": 0.00019660468479040251,
"loss": 0.2007,
"step": 5700
},
{
"epoch": 0.258581650212843,
"grad_norm": 0.449022114276886,
"learning_rate": 0.00019658531117690018,
"loss": 0.2009,
"step": 5710
},
{
"epoch": 0.2590345077438638,
"grad_norm": 0.4621582329273224,
"learning_rate": 0.00019656588340779958,
"loss": 0.191,
"step": 5720
},
{
"epoch": 0.25948736527488453,
"grad_norm": 0.3730362057685852,
"learning_rate": 0.00019654640149399397,
"loss": 0.2061,
"step": 5730
},
{
"epoch": 0.2599402228059053,
"grad_norm": 0.4392858147621155,
"learning_rate": 0.00019652686544640685,
"loss": 0.2019,
"step": 5740
},
{
"epoch": 0.260393080336926,
"grad_norm": 0.41935303807258606,
"learning_rate": 0.0001965072752759922,
"loss": 0.225,
"step": 5750
},
{
"epoch": 0.26084593786794674,
"grad_norm": 0.39728057384490967,
"learning_rate": 0.0001964876309937342,
"loss": 0.2143,
"step": 5760
},
{
"epoch": 0.2612987953989675,
"grad_norm": 0.38051551580429077,
"learning_rate": 0.00019646793261064746,
"loss": 0.2104,
"step": 5770
},
{
"epoch": 0.2617516529299882,
"grad_norm": 0.45690861344337463,
"learning_rate": 0.00019644818013777693,
"loss": 0.2265,
"step": 5780
},
{
"epoch": 0.26220451046100895,
"grad_norm": 2.40419864654541,
"learning_rate": 0.00019642837358619785,
"loss": 0.191,
"step": 5790
},
{
"epoch": 0.2626573679920297,
"grad_norm": 0.42658358812332153,
"learning_rate": 0.0001964085129670158,
"loss": 0.2098,
"step": 5800
},
{
"epoch": 0.26311022552305047,
"grad_norm": 0.3833671808242798,
"learning_rate": 0.00019638859829136668,
"loss": 0.2034,
"step": 5810
},
{
"epoch": 0.26356308305407117,
"grad_norm": 0.4116675853729248,
"learning_rate": 0.0001963686295704167,
"loss": 0.2212,
"step": 5820
},
{
"epoch": 0.2640159405850919,
"grad_norm": 0.39625871181488037,
"learning_rate": 0.00019634860681536233,
"loss": 0.2281,
"step": 5830
},
{
"epoch": 0.2644687981161127,
"grad_norm": 0.4207947552204132,
"learning_rate": 0.0001963285300374304,
"loss": 0.2353,
"step": 5840
},
{
"epoch": 0.26492165564713344,
"grad_norm": 0.3432312607765198,
"learning_rate": 0.000196308399247878,
"loss": 0.2049,
"step": 5850
},
{
"epoch": 0.26537451317815414,
"grad_norm": 0.43867117166519165,
"learning_rate": 0.0001962882144579925,
"loss": 0.2138,
"step": 5860
},
{
"epoch": 0.2658273707091749,
"grad_norm": 0.39972978830337524,
"learning_rate": 0.00019626797567909158,
"loss": 0.2073,
"step": 5870
},
{
"epoch": 0.26628022824019565,
"grad_norm": 0.46662867069244385,
"learning_rate": 0.00019624768292252314,
"loss": 0.1944,
"step": 5880
},
{
"epoch": 0.26673308577121635,
"grad_norm": 0.47923582792282104,
"learning_rate": 0.0001962273361996654,
"loss": 0.2099,
"step": 5890
},
{
"epoch": 0.2671859433022371,
"grad_norm": 0.45202603936195374,
"learning_rate": 0.00019620693552192678,
"loss": 0.2042,
"step": 5900
},
{
"epoch": 0.26763880083325786,
"grad_norm": 0.36669155955314636,
"learning_rate": 0.00019618648090074603,
"loss": 0.1883,
"step": 5910
},
{
"epoch": 0.2680916583642786,
"grad_norm": 0.3664119243621826,
"learning_rate": 0.00019616597234759205,
"loss": 0.2061,
"step": 5920
},
{
"epoch": 0.2685445158952993,
"grad_norm": 0.2816719114780426,
"learning_rate": 0.0001961454098739641,
"loss": 0.2334,
"step": 5930
},
{
"epoch": 0.2689973734263201,
"grad_norm": 0.38767072558403015,
"learning_rate": 0.0001961247934913915,
"loss": 0.1829,
"step": 5940
},
{
"epoch": 0.26945023095734083,
"grad_norm": 0.45452436804771423,
"learning_rate": 0.00019610412321143398,
"loss": 0.256,
"step": 5950
},
{
"epoch": 0.2699030884883616,
"grad_norm": 0.448044091463089,
"learning_rate": 0.0001960833990456814,
"loss": 0.2153,
"step": 5960
},
{
"epoch": 0.2703559460193823,
"grad_norm": 0.4571818709373474,
"learning_rate": 0.00019606262100575387,
"loss": 0.2267,
"step": 5970
},
{
"epoch": 0.27080880355040304,
"grad_norm": 0.5397394895553589,
"learning_rate": 0.0001960417891033016,
"loss": 0.2056,
"step": 5980
},
{
"epoch": 0.2712616610814238,
"grad_norm": 1.0641297101974487,
"learning_rate": 0.00019602090335000516,
"loss": 0.2006,
"step": 5990
},
{
"epoch": 0.2717145186124445,
"grad_norm": 0.3743475079536438,
"learning_rate": 0.00019599996375757522,
"loss": 0.2114,
"step": 6000
},
{
"epoch": 0.2717145186124445,
"eval_chrf": 85.75058405488659,
"eval_loss": 0.19780972599983215,
"eval_runtime": 8.1051,
"eval_samples_per_second": 1.234,
"eval_steps_per_second": 0.123,
"step": 6000
},
{
"epoch": 0.27216737614346526,
"grad_norm": 0.40974077582359314,
"learning_rate": 0.00019597897033775267,
"loss": 0.2187,
"step": 6010
},
{
"epoch": 0.272620233674486,
"grad_norm": 0.4024125933647156,
"learning_rate": 0.0001959579231023085,
"loss": 0.2057,
"step": 6020
},
{
"epoch": 0.27307309120550677,
"grad_norm": 0.3904462456703186,
"learning_rate": 0.00019593682206304404,
"loss": 0.2154,
"step": 6030
},
{
"epoch": 0.27352594873652747,
"grad_norm": 0.4456275403499603,
"learning_rate": 0.0001959156672317906,
"loss": 0.1945,
"step": 6040
},
{
"epoch": 0.2739788062675482,
"grad_norm": 0.47315770387649536,
"learning_rate": 0.0001958944586204098,
"loss": 0.2118,
"step": 6050
},
{
"epoch": 0.274431663798569,
"grad_norm": 0.3774961829185486,
"learning_rate": 0.00019587319624079334,
"loss": 0.1844,
"step": 6060
},
{
"epoch": 0.27488452132958974,
"grad_norm": 0.38651296496391296,
"learning_rate": 0.00019585188010486307,
"loss": 0.1957,
"step": 6070
},
{
"epoch": 0.27533737886061044,
"grad_norm": 0.41263914108276367,
"learning_rate": 0.000195830510224571,
"loss": 0.1949,
"step": 6080
},
{
"epoch": 0.2757902363916312,
"grad_norm": 0.3857577443122864,
"learning_rate": 0.0001958090866118993,
"loss": 0.2078,
"step": 6090
},
{
"epoch": 0.27624309392265195,
"grad_norm": 0.6576055288314819,
"learning_rate": 0.00019578760927886017,
"loss": 0.2255,
"step": 6100
},
{
"epoch": 0.27669595145367265,
"grad_norm": 0.472508043050766,
"learning_rate": 0.00019576607823749607,
"loss": 0.2043,
"step": 6110
},
{
"epoch": 0.2771488089846934,
"grad_norm": 0.39050430059432983,
"learning_rate": 0.00019574449349987947,
"loss": 0.2083,
"step": 6120
},
{
"epoch": 0.27760166651571416,
"grad_norm": 0.4212459921836853,
"learning_rate": 0.00019572285507811295,
"loss": 0.2012,
"step": 6130
},
{
"epoch": 0.2780545240467349,
"grad_norm": 0.44424203038215637,
"learning_rate": 0.00019570116298432927,
"loss": 0.1881,
"step": 6140
},
{
"epoch": 0.2785073815777556,
"grad_norm": 0.43899261951446533,
"learning_rate": 0.00019567941723069122,
"loss": 0.1974,
"step": 6150
},
{
"epoch": 0.2789602391087764,
"grad_norm": 0.40080156922340393,
"learning_rate": 0.0001956576178293917,
"loss": 0.1923,
"step": 6160
},
{
"epoch": 0.27941309663979713,
"grad_norm": 0.49086445569992065,
"learning_rate": 0.00019563576479265365,
"loss": 0.2112,
"step": 6170
},
{
"epoch": 0.2798659541708179,
"grad_norm": 0.4632461667060852,
"learning_rate": 0.00019561385813273016,
"loss": 0.2087,
"step": 6180
},
{
"epoch": 0.2803188117018386,
"grad_norm": 0.3927869200706482,
"learning_rate": 0.00019559189786190432,
"loss": 0.1968,
"step": 6190
},
{
"epoch": 0.28077166923285934,
"grad_norm": 0.513033926486969,
"learning_rate": 0.00019556988399248927,
"loss": 0.2206,
"step": 6200
},
{
"epoch": 0.2812245267638801,
"grad_norm": 0.4554494321346283,
"learning_rate": 0.00019554781653682832,
"loss": 0.2253,
"step": 6210
},
{
"epoch": 0.2816773842949008,
"grad_norm": 0.3585559129714966,
"learning_rate": 0.00019552569550729468,
"loss": 0.1924,
"step": 6220
},
{
"epoch": 0.28213024182592156,
"grad_norm": 0.48740342259407043,
"learning_rate": 0.00019550352091629166,
"loss": 0.1948,
"step": 6230
},
{
"epoch": 0.2825830993569423,
"grad_norm": 0.4652673006057739,
"learning_rate": 0.0001954812927762526,
"loss": 0.2103,
"step": 6240
},
{
"epoch": 0.28303595688796307,
"grad_norm": 0.4237286150455475,
"learning_rate": 0.0001954590110996409,
"loss": 0.2297,
"step": 6250
},
{
"epoch": 0.28348881441898377,
"grad_norm": 0.6610928177833557,
"learning_rate": 0.0001954366758989499,
"loss": 0.2187,
"step": 6260
},
{
"epoch": 0.2839416719500045,
"grad_norm": 0.4797596037387848,
"learning_rate": 0.00019541428718670306,
"loss": 0.2045,
"step": 6270
},
{
"epoch": 0.2843945294810253,
"grad_norm": 0.3981485962867737,
"learning_rate": 0.0001953918449754537,
"loss": 0.1904,
"step": 6280
},
{
"epoch": 0.284847387012046,
"grad_norm": 0.7103779315948486,
"learning_rate": 0.00019536934927778533,
"loss": 0.1914,
"step": 6290
},
{
"epoch": 0.28530024454306674,
"grad_norm": 0.3921561539173126,
"learning_rate": 0.0001953468001063112,
"loss": 0.2028,
"step": 6300
},
{
"epoch": 0.2857531020740875,
"grad_norm": 0.539473295211792,
"learning_rate": 0.00019532419747367478,
"loss": 0.2178,
"step": 6310
},
{
"epoch": 0.28620595960510825,
"grad_norm": 0.5404443144798279,
"learning_rate": 0.00019530154139254938,
"loss": 0.223,
"step": 6320
},
{
"epoch": 0.28665881713612895,
"grad_norm": 0.4464082717895508,
"learning_rate": 0.00019527883187563832,
"loss": 0.213,
"step": 6330
},
{
"epoch": 0.2871116746671497,
"grad_norm": 0.4774690270423889,
"learning_rate": 0.00019525606893567487,
"loss": 0.2224,
"step": 6340
},
{
"epoch": 0.28756453219817046,
"grad_norm": 0.5096843838691711,
"learning_rate": 0.00019523325258542228,
"loss": 0.2159,
"step": 6350
},
{
"epoch": 0.2880173897291912,
"grad_norm": 0.48254311084747314,
"learning_rate": 0.00019521038283767372,
"loss": 0.1867,
"step": 6360
},
{
"epoch": 0.2884702472602119,
"grad_norm": 0.5749514698982239,
"learning_rate": 0.0001951874597052523,
"loss": 0.2121,
"step": 6370
},
{
"epoch": 0.2889231047912327,
"grad_norm": 0.5011195540428162,
"learning_rate": 0.00019516448320101105,
"loss": 0.2435,
"step": 6380
},
{
"epoch": 0.28937596232225343,
"grad_norm": 0.5004845857620239,
"learning_rate": 0.00019514145333783303,
"loss": 0.2068,
"step": 6390
},
{
"epoch": 0.28982881985327413,
"grad_norm": 0.29733824729919434,
"learning_rate": 0.00019511837012863104,
"loss": 0.1963,
"step": 6400
},
{
"epoch": 0.2902816773842949,
"grad_norm": 0.5419613122940063,
"learning_rate": 0.00019509523358634794,
"loss": 0.217,
"step": 6410
},
{
"epoch": 0.29073453491531565,
"grad_norm": 0.36856991052627563,
"learning_rate": 0.00019507204372395639,
"loss": 0.2054,
"step": 6420
},
{
"epoch": 0.2911873924463364,
"grad_norm": 0.5289191007614136,
"learning_rate": 0.00019504880055445906,
"loss": 0.2295,
"step": 6430
},
{
"epoch": 0.2916402499773571,
"grad_norm": 0.3745858669281006,
"learning_rate": 0.00019502550409088843,
"loss": 0.1861,
"step": 6440
},
{
"epoch": 0.29209310750837786,
"grad_norm": 0.6232544183731079,
"learning_rate": 0.00019500215434630684,
"loss": 0.203,
"step": 6450
},
{
"epoch": 0.2925459650393986,
"grad_norm": 0.45420029759407043,
"learning_rate": 0.0001949787513338066,
"loss": 0.1683,
"step": 6460
},
{
"epoch": 0.29299882257041937,
"grad_norm": 0.5476380586624146,
"learning_rate": 0.0001949552950665098,
"loss": 0.2168,
"step": 6470
},
{
"epoch": 0.29345168010144007,
"grad_norm": 0.4741978347301483,
"learning_rate": 0.0001949317855575684,
"loss": 0.2132,
"step": 6480
},
{
"epoch": 0.2939045376324608,
"grad_norm": 0.5809348821640015,
"learning_rate": 0.00019490822282016427,
"loss": 0.2454,
"step": 6490
},
{
"epoch": 0.2943573951634816,
"grad_norm": 0.5414433479309082,
"learning_rate": 0.00019488460686750906,
"loss": 0.1964,
"step": 6500
},
{
"epoch": 0.2948102526945023,
"grad_norm": 0.36005979776382446,
"learning_rate": 0.00019486093771284432,
"loss": 0.2165,
"step": 6510
},
{
"epoch": 0.29526311022552304,
"grad_norm": 0.385918527841568,
"learning_rate": 0.00019483721536944136,
"loss": 0.2145,
"step": 6520
},
{
"epoch": 0.2957159677565438,
"grad_norm": 0.49405747652053833,
"learning_rate": 0.0001948134398506014,
"loss": 0.2389,
"step": 6530
},
{
"epoch": 0.29616882528756455,
"grad_norm": 0.4467755854129791,
"learning_rate": 0.0001947896111696554,
"loss": 0.2311,
"step": 6540
},
{
"epoch": 0.29662168281858525,
"grad_norm": 0.6420004367828369,
"learning_rate": 0.00019476572933996416,
"loss": 0.2129,
"step": 6550
},
{
"epoch": 0.297074540349606,
"grad_norm": 0.3505796492099762,
"learning_rate": 0.0001947417943749182,
"loss": 0.2156,
"step": 6560
},
{
"epoch": 0.29752739788062676,
"grad_norm": 0.3485499620437622,
"learning_rate": 0.00019471780628793807,
"loss": 0.2009,
"step": 6570
},
{
"epoch": 0.2979802554116475,
"grad_norm": 0.5055803060531616,
"learning_rate": 0.0001946937650924738,
"loss": 0.2169,
"step": 6580
},
{
"epoch": 0.2984331129426682,
"grad_norm": 0.5099272727966309,
"learning_rate": 0.00019466967080200538,
"loss": 0.2276,
"step": 6590
},
{
"epoch": 0.298885970473689,
"grad_norm": 0.38270193338394165,
"learning_rate": 0.00019464552343004257,
"loss": 0.2193,
"step": 6600
},
{
"epoch": 0.29933882800470973,
"grad_norm": 0.4830116927623749,
"learning_rate": 0.00019462132299012482,
"loss": 0.1965,
"step": 6610
},
{
"epoch": 0.29979168553573043,
"grad_norm": 0.4647078812122345,
"learning_rate": 0.00019459706949582134,
"loss": 0.1906,
"step": 6620
},
{
"epoch": 0.3002445430667512,
"grad_norm": 0.6252137422561646,
"learning_rate": 0.0001945727629607312,
"loss": 0.209,
"step": 6630
},
{
"epoch": 0.30069740059777195,
"grad_norm": 0.4478525221347809,
"learning_rate": 0.00019454840339848306,
"loss": 0.225,
"step": 6640
},
{
"epoch": 0.3011502581287927,
"grad_norm": 0.4418219029903412,
"learning_rate": 0.00019452399082273543,
"loss": 0.2185,
"step": 6650
},
{
"epoch": 0.3016031156598134,
"grad_norm": 0.4830974042415619,
"learning_rate": 0.00019449952524717645,
"loss": 0.2212,
"step": 6660
},
{
"epoch": 0.30205597319083416,
"grad_norm": 0.4741996228694916,
"learning_rate": 0.00019447500668552404,
"loss": 0.196,
"step": 6670
},
{
"epoch": 0.3025088307218549,
"grad_norm": 0.36549901962280273,
"learning_rate": 0.0001944504351515258,
"loss": 0.2089,
"step": 6680
},
{
"epoch": 0.30296168825287567,
"grad_norm": 0.37369242310523987,
"learning_rate": 0.0001944258106589591,
"loss": 0.225,
"step": 6690
},
{
"epoch": 0.30341454578389637,
"grad_norm": 0.32829204201698303,
"learning_rate": 0.00019440113322163088,
"loss": 0.191,
"step": 6700
},
{
"epoch": 0.30386740331491713,
"grad_norm": 0.359739750623703,
"learning_rate": 0.00019437640285337786,
"loss": 0.175,
"step": 6710
},
{
"epoch": 0.3043202608459379,
"grad_norm": 0.48949897289276123,
"learning_rate": 0.0001943516195680664,
"loss": 0.2158,
"step": 6720
},
{
"epoch": 0.3047731183769586,
"grad_norm": 0.32513946294784546,
"learning_rate": 0.00019432678337959257,
"loss": 0.1945,
"step": 6730
},
{
"epoch": 0.30522597590797934,
"grad_norm": 0.5650992393493652,
"learning_rate": 0.00019430189430188208,
"loss": 0.2059,
"step": 6740
},
{
"epoch": 0.3056788334390001,
"grad_norm": 0.487543523311615,
"learning_rate": 0.00019427695234889024,
"loss": 0.1989,
"step": 6750
},
{
"epoch": 0.30613169097002085,
"grad_norm": 0.521586000919342,
"learning_rate": 0.0001942519575346021,
"loss": 0.2127,
"step": 6760
},
{
"epoch": 0.30658454850104155,
"grad_norm": 0.5865873694419861,
"learning_rate": 0.0001942269098730323,
"loss": 0.2336,
"step": 6770
},
{
"epoch": 0.3070374060320623,
"grad_norm": 0.49344274401664734,
"learning_rate": 0.0001942018093782251,
"loss": 0.2272,
"step": 6780
},
{
"epoch": 0.30749026356308307,
"grad_norm": 0.5003451704978943,
"learning_rate": 0.00019417665606425447,
"loss": 0.2008,
"step": 6790
},
{
"epoch": 0.3079431210941038,
"grad_norm": 0.4872739613056183,
"learning_rate": 0.00019415144994522384,
"loss": 0.1807,
"step": 6800
},
{
"epoch": 0.3083959786251245,
"grad_norm": 0.40256235003471375,
"learning_rate": 0.00019412619103526636,
"loss": 0.2155,
"step": 6810
},
{
"epoch": 0.3088488361561453,
"grad_norm": 0.4837823212146759,
"learning_rate": 0.00019410087934854483,
"loss": 0.1899,
"step": 6820
},
{
"epoch": 0.30930169368716603,
"grad_norm": 0.3010696470737457,
"learning_rate": 0.00019407551489925145,
"loss": 0.2095,
"step": 6830
},
{
"epoch": 0.30975455121818674,
"grad_norm": 0.4583839476108551,
"learning_rate": 0.0001940500977016082,
"loss": 0.1954,
"step": 6840
},
{
"epoch": 0.3102074087492075,
"grad_norm": 0.3842408061027527,
"learning_rate": 0.00019402462776986655,
"loss": 0.1968,
"step": 6850
},
{
"epoch": 0.31066026628022825,
"grad_norm": 0.4319639503955841,
"learning_rate": 0.00019399910511830757,
"loss": 0.2249,
"step": 6860
},
{
"epoch": 0.311113123811249,
"grad_norm": 0.4340662956237793,
"learning_rate": 0.0001939735297612418,
"loss": 0.1955,
"step": 6870
},
{
"epoch": 0.3115659813422697,
"grad_norm": 0.561085045337677,
"learning_rate": 0.00019394790171300947,
"loss": 0.1808,
"step": 6880
},
{
"epoch": 0.31201883887329046,
"grad_norm": 0.5028398633003235,
"learning_rate": 0.00019392222098798024,
"loss": 0.2016,
"step": 6890
},
{
"epoch": 0.3124716964043112,
"grad_norm": 0.37517672777175903,
"learning_rate": 0.00019389648760055335,
"loss": 0.2457,
"step": 6900
},
{
"epoch": 0.3129245539353319,
"grad_norm": 0.5868512988090515,
"learning_rate": 0.0001938707015651576,
"loss": 0.2184,
"step": 6910
},
{
"epoch": 0.3133774114663527,
"grad_norm": 0.5316442847251892,
"learning_rate": 0.00019384486289625123,
"loss": 0.1874,
"step": 6920
},
{
"epoch": 0.31383026899737343,
"grad_norm": 0.33138179779052734,
"learning_rate": 0.00019381897160832207,
"loss": 0.208,
"step": 6930
},
{
"epoch": 0.3142831265283942,
"grad_norm": 0.49291670322418213,
"learning_rate": 0.00019379302771588744,
"loss": 0.2225,
"step": 6940
},
{
"epoch": 0.3147359840594149,
"grad_norm": 0.41960468888282776,
"learning_rate": 0.00019376703123349408,
"loss": 0.2071,
"step": 6950
},
{
"epoch": 0.31518884159043564,
"grad_norm": 0.4372851252555847,
"learning_rate": 0.00019374098217571833,
"loss": 0.197,
"step": 6960
},
{
"epoch": 0.3156416991214564,
"grad_norm": 0.479920357465744,
"learning_rate": 0.00019371488055716594,
"loss": 0.2175,
"step": 6970
},
{
"epoch": 0.31609455665247715,
"grad_norm": 0.585425615310669,
"learning_rate": 0.00019368872639247213,
"loss": 0.2128,
"step": 6980
},
{
"epoch": 0.31654741418349785,
"grad_norm": 0.5114132165908813,
"learning_rate": 0.00019366251969630164,
"loss": 0.2282,
"step": 6990
},
{
"epoch": 0.3170002717145186,
"grad_norm": 0.3805500864982605,
"learning_rate": 0.00019363626048334856,
"loss": 0.2257,
"step": 7000
},
{
"epoch": 0.3170002717145186,
"eval_chrf": 82.99971755536923,
"eval_loss": 0.1683083176612854,
"eval_runtime": 8.3569,
"eval_samples_per_second": 1.197,
"eval_steps_per_second": 0.12,
"step": 7000
},
{
"epoch": 0.31745312924553937,
"grad_norm": 0.40227165818214417,
"learning_rate": 0.00019360994876833651,
"loss": 0.2096,
"step": 7010
},
{
"epoch": 0.31790598677656007,
"grad_norm": 0.4601973295211792,
"learning_rate": 0.00019358358456601855,
"loss": 0.1969,
"step": 7020
},
{
"epoch": 0.3183588443075808,
"grad_norm": 0.42821553349494934,
"learning_rate": 0.00019355716789117716,
"loss": 0.1847,
"step": 7030
},
{
"epoch": 0.3188117018386016,
"grad_norm": 0.5266753435134888,
"learning_rate": 0.00019353069875862415,
"loss": 0.1914,
"step": 7040
},
{
"epoch": 0.31926455936962234,
"grad_norm": 0.3582618236541748,
"learning_rate": 0.00019350417718320091,
"loss": 0.2137,
"step": 7050
},
{
"epoch": 0.31971741690064304,
"grad_norm": 0.38131266832351685,
"learning_rate": 0.00019347760317977813,
"loss": 0.2159,
"step": 7060
},
{
"epoch": 0.3201702744316638,
"grad_norm": 0.4729970395565033,
"learning_rate": 0.00019345097676325582,
"loss": 0.2093,
"step": 7070
},
{
"epoch": 0.32062313196268455,
"grad_norm": 0.5034728050231934,
"learning_rate": 0.0001934242979485636,
"loss": 0.2536,
"step": 7080
},
{
"epoch": 0.3210759894937053,
"grad_norm": 0.3000277280807495,
"learning_rate": 0.00019339756675066028,
"loss": 0.1886,
"step": 7090
},
{
"epoch": 0.321528847024726,
"grad_norm": 0.45927393436431885,
"learning_rate": 0.0001933707831845341,
"loss": 0.2117,
"step": 7100
},
{
"epoch": 0.32198170455574676,
"grad_norm": 0.4469738304615021,
"learning_rate": 0.00019334394726520267,
"loss": 0.2165,
"step": 7110
},
{
"epoch": 0.3224345620867675,
"grad_norm": 0.44823434948921204,
"learning_rate": 0.00019331705900771295,
"loss": 0.2168,
"step": 7120
},
{
"epoch": 0.3228874196177882,
"grad_norm": 0.4014623165130615,
"learning_rate": 0.00019329011842714124,
"loss": 0.183,
"step": 7130
},
{
"epoch": 0.323340277148809,
"grad_norm": 0.46763408184051514,
"learning_rate": 0.0001932631255385932,
"loss": 0.1927,
"step": 7140
},
{
"epoch": 0.32379313467982973,
"grad_norm": 0.375232070684433,
"learning_rate": 0.00019323608035720378,
"loss": 0.2059,
"step": 7150
},
{
"epoch": 0.3242459922108505,
"grad_norm": 0.5370981097221375,
"learning_rate": 0.00019320898289813728,
"loss": 0.1961,
"step": 7160
},
{
"epoch": 0.3246988497418712,
"grad_norm": 1.2785454988479614,
"learning_rate": 0.00019318183317658733,
"loss": 0.1853,
"step": 7170
},
{
"epoch": 0.32515170727289194,
"grad_norm": 0.5735570788383484,
"learning_rate": 0.00019315463120777682,
"loss": 0.2038,
"step": 7180
},
{
"epoch": 0.3256045648039127,
"grad_norm": 0.6372143030166626,
"learning_rate": 0.00019312737700695793,
"loss": 0.2146,
"step": 7190
},
{
"epoch": 0.32605742233493346,
"grad_norm": 0.5218742489814758,
"learning_rate": 0.00019310007058941217,
"loss": 0.2102,
"step": 7200
},
{
"epoch": 0.32651027986595416,
"grad_norm": 0.40725770592689514,
"learning_rate": 0.00019307271197045034,
"loss": 0.2103,
"step": 7210
},
{
"epoch": 0.3269631373969749,
"grad_norm": 0.4945192039012909,
"learning_rate": 0.00019304530116541244,
"loss": 0.2056,
"step": 7220
},
{
"epoch": 0.32741599492799567,
"grad_norm": 0.48213791847229004,
"learning_rate": 0.0001930178381896678,
"loss": 0.2302,
"step": 7230
},
{
"epoch": 0.32786885245901637,
"grad_norm": 0.3266017436981201,
"learning_rate": 0.00019299032305861494,
"loss": 0.1773,
"step": 7240
},
{
"epoch": 0.3283217099900371,
"grad_norm": 0.466962069272995,
"learning_rate": 0.00019296275578768163,
"loss": 0.2195,
"step": 7250
},
{
"epoch": 0.3287745675210579,
"grad_norm": 0.5370128750801086,
"learning_rate": 0.000192935136392325,
"loss": 0.2086,
"step": 7260
},
{
"epoch": 0.32922742505207864,
"grad_norm": 0.4896990954875946,
"learning_rate": 0.00019290746488803118,
"loss": 0.228,
"step": 7270
},
{
"epoch": 0.32968028258309934,
"grad_norm": 0.37790390849113464,
"learning_rate": 0.00019287974129031575,
"loss": 0.1924,
"step": 7280
},
{
"epoch": 0.3301331401141201,
"grad_norm": 0.42240995168685913,
"learning_rate": 0.00019285196561472334,
"loss": 0.1959,
"step": 7290
},
{
"epoch": 0.33058599764514085,
"grad_norm": 0.48036807775497437,
"learning_rate": 0.00019282413787682784,
"loss": 0.2039,
"step": 7300
},
{
"epoch": 0.3310388551761616,
"grad_norm": 0.4346061944961548,
"learning_rate": 0.0001927962580922323,
"loss": 0.2166,
"step": 7310
},
{
"epoch": 0.3314917127071823,
"grad_norm": 0.36262384057044983,
"learning_rate": 0.00019276832627656906,
"loss": 0.2028,
"step": 7320
},
{
"epoch": 0.33194457023820306,
"grad_norm": 0.36481937766075134,
"learning_rate": 0.00019274034244549948,
"loss": 0.2198,
"step": 7330
},
{
"epoch": 0.3323974277692238,
"grad_norm": 0.5148301124572754,
"learning_rate": 0.00019271230661471416,
"loss": 0.1921,
"step": 7340
},
{
"epoch": 0.3328502853002445,
"grad_norm": 0.6357602477073669,
"learning_rate": 0.00019268421879993286,
"loss": 0.2142,
"step": 7350
},
{
"epoch": 0.3333031428312653,
"grad_norm": 0.42315781116485596,
"learning_rate": 0.0001926560790169045,
"loss": 0.2087,
"step": 7360
},
{
"epoch": 0.33375600036228603,
"grad_norm": 0.36354297399520874,
"learning_rate": 0.00019262788728140708,
"loss": 0.2045,
"step": 7370
},
{
"epoch": 0.3342088578933068,
"grad_norm": 0.4218559265136719,
"learning_rate": 0.00019259964360924777,
"loss": 0.2149,
"step": 7380
},
{
"epoch": 0.3346617154243275,
"grad_norm": 0.389312744140625,
"learning_rate": 0.00019257134801626294,
"loss": 0.1924,
"step": 7390
},
{
"epoch": 0.33511457295534824,
"grad_norm": 0.4661923348903656,
"learning_rate": 0.0001925430005183179,
"loss": 0.2324,
"step": 7400
},
{
"epoch": 0.335567430486369,
"grad_norm": 0.6705445647239685,
"learning_rate": 0.00019251460113130721,
"loss": 0.2186,
"step": 7410
},
{
"epoch": 0.33602028801738976,
"grad_norm": 0.34467417001724243,
"learning_rate": 0.0001924861498711544,
"loss": 0.2131,
"step": 7420
},
{
"epoch": 0.33647314554841046,
"grad_norm": 0.5210064053535461,
"learning_rate": 0.00019245764675381225,
"loss": 0.1881,
"step": 7430
},
{
"epoch": 0.3369260030794312,
"grad_norm": 0.5148203372955322,
"learning_rate": 0.00019242909179526248,
"loss": 0.2057,
"step": 7440
},
{
"epoch": 0.33737886061045197,
"grad_norm": 0.4705396592617035,
"learning_rate": 0.00019240048501151588,
"loss": 0.2316,
"step": 7450
},
{
"epoch": 0.33783171814147267,
"grad_norm": 0.4516450762748718,
"learning_rate": 0.00019237182641861238,
"loss": 0.1875,
"step": 7460
},
{
"epoch": 0.3382845756724934,
"grad_norm": 0.36377689242362976,
"learning_rate": 0.00019234311603262086,
"loss": 0.2088,
"step": 7470
},
{
"epoch": 0.3387374332035142,
"grad_norm": 0.775227963924408,
"learning_rate": 0.00019231435386963942,
"loss": 0.1892,
"step": 7480
},
{
"epoch": 0.33919029073453494,
"grad_norm": 0.45915624499320984,
"learning_rate": 0.00019228553994579494,
"loss": 0.2033,
"step": 7490
},
{
"epoch": 0.33964314826555564,
"grad_norm": 0.5621431469917297,
"learning_rate": 0.00019225667427724352,
"loss": 0.1782,
"step": 7500
},
{
"epoch": 0.3400960057965764,
"grad_norm": 4.9242682456970215,
"learning_rate": 0.0001922277568801702,
"loss": 0.1944,
"step": 7510
},
{
"epoch": 0.34054886332759715,
"grad_norm": 0.41579318046569824,
"learning_rate": 0.00019219878777078896,
"loss": 0.196,
"step": 7520
},
{
"epoch": 0.34100172085861785,
"grad_norm": 0.5250774025917053,
"learning_rate": 0.00019216976696534297,
"loss": 0.2063,
"step": 7530
},
{
"epoch": 0.3414545783896386,
"grad_norm": 0.5255563855171204,
"learning_rate": 0.00019214069448010413,
"loss": 0.2061,
"step": 7540
},
{
"epoch": 0.34190743592065936,
"grad_norm": 0.41017070412635803,
"learning_rate": 0.00019211157033137354,
"loss": 0.1757,
"step": 7550
},
{
"epoch": 0.3423602934516801,
"grad_norm": 0.41834238171577454,
"learning_rate": 0.00019208239453548113,
"loss": 0.2022,
"step": 7560
},
{
"epoch": 0.3428131509827008,
"grad_norm": 0.4785829782485962,
"learning_rate": 0.00019205316710878587,
"loss": 0.224,
"step": 7570
},
{
"epoch": 0.3432660085137216,
"grad_norm": 0.3674987554550171,
"learning_rate": 0.0001920238880676756,
"loss": 0.2083,
"step": 7580
},
{
"epoch": 0.34371886604474233,
"grad_norm": 0.421902596950531,
"learning_rate": 0.00019199455742856714,
"loss": 0.2169,
"step": 7590
},
{
"epoch": 0.3441717235757631,
"grad_norm": 0.7003633379936218,
"learning_rate": 0.00019196517520790626,
"loss": 0.2069,
"step": 7600
},
{
"epoch": 0.3446245811067838,
"grad_norm": 0.40210533142089844,
"learning_rate": 0.00019193574142216768,
"loss": 0.1985,
"step": 7610
},
{
"epoch": 0.34507743863780455,
"grad_norm": 0.44373050332069397,
"learning_rate": 0.00019190625608785493,
"loss": 0.1962,
"step": 7620
},
{
"epoch": 0.3455302961688253,
"grad_norm": 0.49582087993621826,
"learning_rate": 0.00019187671922150053,
"loss": 0.2132,
"step": 7630
},
{
"epoch": 0.345983153699846,
"grad_norm": 0.5912686586380005,
"learning_rate": 0.00019184713083966582,
"loss": 0.2022,
"step": 7640
},
{
"epoch": 0.34643601123086676,
"grad_norm": 0.4800059199333191,
"learning_rate": 0.00019181749095894114,
"loss": 0.1992,
"step": 7650
},
{
"epoch": 0.3468888687618875,
"grad_norm": 0.45192936062812805,
"learning_rate": 0.00019178779959594562,
"loss": 0.2036,
"step": 7660
},
{
"epoch": 0.34734172629290827,
"grad_norm": 0.37126028537750244,
"learning_rate": 0.00019175805676732726,
"loss": 0.1606,
"step": 7670
},
{
"epoch": 0.34779458382392897,
"grad_norm": 0.4179832637310028,
"learning_rate": 0.0001917282624897629,
"loss": 0.194,
"step": 7680
},
{
"epoch": 0.3482474413549497,
"grad_norm": 0.46051672101020813,
"learning_rate": 0.00019169841677995833,
"loss": 0.2031,
"step": 7690
},
{
"epoch": 0.3487002988859705,
"grad_norm": 0.5491907596588135,
"learning_rate": 0.00019166851965464802,
"loss": 0.206,
"step": 7700
},
{
"epoch": 0.34915315641699124,
"grad_norm": 0.32158762216567993,
"learning_rate": 0.00019163857113059542,
"loss": 0.2098,
"step": 7710
},
{
"epoch": 0.34960601394801194,
"grad_norm": 0.4981878101825714,
"learning_rate": 0.0001916085712245927,
"loss": 0.1966,
"step": 7720
},
{
"epoch": 0.3500588714790327,
"grad_norm": 0.4609077274799347,
"learning_rate": 0.0001915785199534609,
"loss": 0.1932,
"step": 7730
},
{
"epoch": 0.35051172901005345,
"grad_norm": 0.4244095981121063,
"learning_rate": 0.00019154841733404982,
"loss": 0.1958,
"step": 7740
},
{
"epoch": 0.35096458654107415,
"grad_norm": 0.40494847297668457,
"learning_rate": 0.00019151826338323805,
"loss": 0.1977,
"step": 7750
},
{
"epoch": 0.3514174440720949,
"grad_norm": 0.3514610528945923,
"learning_rate": 0.000191488058117933,
"loss": 0.1984,
"step": 7760
},
{
"epoch": 0.35187030160311566,
"grad_norm": 0.4453369975090027,
"learning_rate": 0.00019145780155507085,
"loss": 0.2102,
"step": 7770
},
{
"epoch": 0.3523231591341364,
"grad_norm": 0.5334694385528564,
"learning_rate": 0.00019142749371161647,
"loss": 0.1982,
"step": 7780
},
{
"epoch": 0.3527760166651571,
"grad_norm": 0.39791110157966614,
"learning_rate": 0.00019139713460456355,
"loss": 0.2014,
"step": 7790
},
{
"epoch": 0.3532288741961779,
"grad_norm": 0.4057285785675049,
"learning_rate": 0.00019136672425093453,
"loss": 0.213,
"step": 7800
},
{
"epoch": 0.35368173172719863,
"grad_norm": 0.45790570974349976,
"learning_rate": 0.00019133626266778055,
"loss": 0.2204,
"step": 7810
},
{
"epoch": 0.3541345892582194,
"grad_norm": 0.4071456789970398,
"learning_rate": 0.00019130574987218148,
"loss": 0.2166,
"step": 7820
},
{
"epoch": 0.3545874467892401,
"grad_norm": 0.4177386164665222,
"learning_rate": 0.00019127518588124592,
"loss": 0.198,
"step": 7830
},
{
"epoch": 0.35504030432026085,
"grad_norm": 0.3865101635456085,
"learning_rate": 0.00019124457071211117,
"loss": 0.1916,
"step": 7840
},
{
"epoch": 0.3554931618512816,
"grad_norm": 0.41384661197662354,
"learning_rate": 0.0001912139043819432,
"loss": 0.2182,
"step": 7850
},
{
"epoch": 0.3559460193823023,
"grad_norm": 0.34812772274017334,
"learning_rate": 0.00019118318690793676,
"loss": 0.2093,
"step": 7860
},
{
"epoch": 0.35639887691332306,
"grad_norm": 0.31512027978897095,
"learning_rate": 0.0001911524183073151,
"loss": 0.2207,
"step": 7870
},
{
"epoch": 0.3568517344443438,
"grad_norm": 0.3776790201663971,
"learning_rate": 0.00019112159859733033,
"loss": 0.1826,
"step": 7880
},
{
"epoch": 0.35730459197536457,
"grad_norm": 0.3415994346141815,
"learning_rate": 0.0001910907277952631,
"loss": 0.1881,
"step": 7890
},
{
"epoch": 0.35775744950638527,
"grad_norm": 0.4983496367931366,
"learning_rate": 0.00019105980591842272,
"loss": 0.1851,
"step": 7900
},
{
"epoch": 0.35821030703740603,
"grad_norm": 0.48957979679107666,
"learning_rate": 0.00019102883298414717,
"loss": 0.2,
"step": 7910
},
{
"epoch": 0.3586631645684268,
"grad_norm": 0.3085726499557495,
"learning_rate": 0.00019099780900980307,
"loss": 0.1909,
"step": 7920
},
{
"epoch": 0.35911602209944754,
"grad_norm": 0.41775602102279663,
"learning_rate": 0.00019096673401278557,
"loss": 0.1929,
"step": 7930
},
{
"epoch": 0.35956887963046824,
"grad_norm": 0.4147847890853882,
"learning_rate": 0.00019093560801051855,
"loss": 0.172,
"step": 7940
},
{
"epoch": 0.360021737161489,
"grad_norm": 0.3972959518432617,
"learning_rate": 0.00019090443102045437,
"loss": 0.2018,
"step": 7950
},
{
"epoch": 0.36047459469250975,
"grad_norm": 0.3908996880054474,
"learning_rate": 0.0001908732030600741,
"loss": 0.1776,
"step": 7960
},
{
"epoch": 0.36092745222353045,
"grad_norm": 0.48856571316719055,
"learning_rate": 0.0001908419241468873,
"loss": 0.2169,
"step": 7970
},
{
"epoch": 0.3613803097545512,
"grad_norm": 0.5449142456054688,
"learning_rate": 0.00019081059429843208,
"loss": 0.2059,
"step": 7980
},
{
"epoch": 0.36183316728557197,
"grad_norm": 0.40314939618110657,
"learning_rate": 0.00019077921353227525,
"loss": 0.2215,
"step": 7990
},
{
"epoch": 0.3622860248165927,
"grad_norm": 0.4393227994441986,
"learning_rate": 0.000190747781866012,
"loss": 0.2043,
"step": 8000
},
{
"epoch": 0.3622860248165927,
"eval_chrf": 86.15658047675439,
"eval_loss": 0.17065152525901794,
"eval_runtime": 7.5439,
"eval_samples_per_second": 1.326,
"eval_steps_per_second": 0.133,
"step": 8000
},
{
"epoch": 0.3627388823476134,
"grad_norm": 0.4438968598842621,
"learning_rate": 0.00019071629931726616,
"loss": 0.201,
"step": 8010
},
{
"epoch": 0.3631917398786342,
"grad_norm": 0.4053437411785126,
"learning_rate": 0.00019068476590369007,
"loss": 0.1967,
"step": 8020
},
{
"epoch": 0.36364459740965493,
"grad_norm": 0.4435258209705353,
"learning_rate": 0.00019065318164296455,
"loss": 0.2073,
"step": 8030
},
{
"epoch": 0.3640974549406757,
"grad_norm": 0.44856691360473633,
"learning_rate": 0.000190621546552799,
"loss": 0.2382,
"step": 8040
},
{
"epoch": 0.3645503124716964,
"grad_norm": 0.42579466104507446,
"learning_rate": 0.00019058986065093131,
"loss": 0.1648,
"step": 8050
},
{
"epoch": 0.36500317000271715,
"grad_norm": 0.41863974928855896,
"learning_rate": 0.00019055812395512778,
"loss": 0.2206,
"step": 8060
},
{
"epoch": 0.3654560275337379,
"grad_norm": 0.5849347114562988,
"learning_rate": 0.00019052633648318327,
"loss": 0.2036,
"step": 8070
},
{
"epoch": 0.3659088850647586,
"grad_norm": 0.4172811806201935,
"learning_rate": 0.00019049449825292108,
"loss": 0.2059,
"step": 8080
},
{
"epoch": 0.36636174259577936,
"grad_norm": 0.42305058240890503,
"learning_rate": 0.00019046260928219302,
"loss": 0.2533,
"step": 8090
},
{
"epoch": 0.3668146001268001,
"grad_norm": 0.3714189827442169,
"learning_rate": 0.00019043066958887925,
"loss": 0.1827,
"step": 8100
},
{
"epoch": 0.3672674576578209,
"grad_norm": 0.40903255343437195,
"learning_rate": 0.00019039867919088844,
"loss": 0.1811,
"step": 8110
},
{
"epoch": 0.3677203151888416,
"grad_norm": 0.3701893389225006,
"learning_rate": 0.00019036663810615768,
"loss": 0.1738,
"step": 8120
},
{
"epoch": 0.36817317271986233,
"grad_norm": 0.5263366103172302,
"learning_rate": 0.00019033454635265248,
"loss": 0.1978,
"step": 8130
},
{
"epoch": 0.3686260302508831,
"grad_norm": 0.42517194151878357,
"learning_rate": 0.00019030240394836675,
"loss": 0.1896,
"step": 8140
},
{
"epoch": 0.3690788877819038,
"grad_norm": 0.5339916944503784,
"learning_rate": 0.0001902702109113228,
"loss": 0.223,
"step": 8150
},
{
"epoch": 0.36953174531292454,
"grad_norm": 0.49596965312957764,
"learning_rate": 0.0001902379672595714,
"loss": 0.2212,
"step": 8160
},
{
"epoch": 0.3699846028439453,
"grad_norm": 0.47744324803352356,
"learning_rate": 0.00019020567301119155,
"loss": 0.1985,
"step": 8170
},
{
"epoch": 0.37043746037496605,
"grad_norm": 0.543395459651947,
"learning_rate": 0.00019017332818429078,
"loss": 0.2011,
"step": 8180
},
{
"epoch": 0.37089031790598675,
"grad_norm": 0.48390671610832214,
"learning_rate": 0.00019014093279700483,
"loss": 0.2018,
"step": 8190
},
{
"epoch": 0.3713431754370075,
"grad_norm": 0.4345707297325134,
"learning_rate": 0.00019010848686749793,
"loss": 0.2122,
"step": 8200
},
{
"epoch": 0.37179603296802827,
"grad_norm": 0.3869522213935852,
"learning_rate": 0.00019007599041396257,
"loss": 0.1941,
"step": 8210
},
{
"epoch": 0.372248890499049,
"grad_norm": 0.5442917346954346,
"learning_rate": 0.00019004344345461958,
"loss": 0.2046,
"step": 8220
},
{
"epoch": 0.3727017480300697,
"grad_norm": 0.41071373224258423,
"learning_rate": 0.00019001084600771807,
"loss": 0.2044,
"step": 8230
},
{
"epoch": 0.3731546055610905,
"grad_norm": 0.4944802522659302,
"learning_rate": 0.00018997819809153557,
"loss": 0.1961,
"step": 8240
},
{
"epoch": 0.37360746309211124,
"grad_norm": 0.45529964566230774,
"learning_rate": 0.0001899454997243778,
"loss": 0.2038,
"step": 8250
},
{
"epoch": 0.37406032062313194,
"grad_norm": 0.3635946214199066,
"learning_rate": 0.00018991275092457882,
"loss": 0.2083,
"step": 8260
},
{
"epoch": 0.3745131781541527,
"grad_norm": 0.3817839026451111,
"learning_rate": 0.00018987995171050094,
"loss": 0.1883,
"step": 8270
},
{
"epoch": 0.37496603568517345,
"grad_norm": 0.38155823945999146,
"learning_rate": 0.00018984710210053477,
"loss": 0.1938,
"step": 8280
},
{
"epoch": 0.3754188932161942,
"grad_norm": 0.557369589805603,
"learning_rate": 0.00018981420211309916,
"loss": 0.217,
"step": 8290
},
{
"epoch": 0.3758717507472149,
"grad_norm": 0.389067143201828,
"learning_rate": 0.00018978125176664118,
"loss": 0.1791,
"step": 8300
},
{
"epoch": 0.37632460827823566,
"grad_norm": 0.5227335691452026,
"learning_rate": 0.0001897482510796362,
"loss": 0.1886,
"step": 8310
},
{
"epoch": 0.3767774658092564,
"grad_norm": 0.7860269546508789,
"learning_rate": 0.00018971520007058775,
"loss": 0.2142,
"step": 8320
},
{
"epoch": 0.3772303233402772,
"grad_norm": 0.6494739055633545,
"learning_rate": 0.00018968209875802757,
"loss": 0.2166,
"step": 8330
},
{
"epoch": 0.3776831808712979,
"grad_norm": 0.37337711453437805,
"learning_rate": 0.0001896489471605157,
"loss": 0.1673,
"step": 8340
},
{
"epoch": 0.37813603840231863,
"grad_norm": 0.7072011828422546,
"learning_rate": 0.00018961574529664031,
"loss": 0.1797,
"step": 8350
},
{
"epoch": 0.3785888959333394,
"grad_norm": 0.3572746813297272,
"learning_rate": 0.00018958249318501777,
"loss": 0.2033,
"step": 8360
},
{
"epoch": 0.3790417534643601,
"grad_norm": 0.40508168935775757,
"learning_rate": 0.00018954919084429256,
"loss": 0.2167,
"step": 8370
},
{
"epoch": 0.37949461099538084,
"grad_norm": 0.4418649673461914,
"learning_rate": 0.00018951583829313742,
"loss": 0.2197,
"step": 8380
},
{
"epoch": 0.3799474685264016,
"grad_norm": 0.40917450189590454,
"learning_rate": 0.00018948243555025313,
"loss": 0.183,
"step": 8390
},
{
"epoch": 0.38040032605742236,
"grad_norm": 0.5492053031921387,
"learning_rate": 0.00018944898263436878,
"loss": 0.2283,
"step": 8400
},
{
"epoch": 0.38085318358844306,
"grad_norm": 0.34713467955589294,
"learning_rate": 0.00018941547956424144,
"loss": 0.1689,
"step": 8410
},
{
"epoch": 0.3813060411194638,
"grad_norm": 0.6434493660926819,
"learning_rate": 0.00018938192635865634,
"loss": 0.2149,
"step": 8420
},
{
"epoch": 0.38175889865048457,
"grad_norm": 0.4369926154613495,
"learning_rate": 0.00018934832303642692,
"loss": 0.1864,
"step": 8430
},
{
"epoch": 0.3822117561815053,
"grad_norm": 0.436750590801239,
"learning_rate": 0.00018931466961639456,
"loss": 0.1991,
"step": 8440
},
{
"epoch": 0.382664613712526,
"grad_norm": 0.5193954706192017,
"learning_rate": 0.0001892809661174288,
"loss": 0.1964,
"step": 8450
},
{
"epoch": 0.3831174712435468,
"grad_norm": 0.4910030961036682,
"learning_rate": 0.00018924721255842727,
"loss": 0.2151,
"step": 8460
},
{
"epoch": 0.38357032877456754,
"grad_norm": 0.47311288118362427,
"learning_rate": 0.00018921340895831573,
"loss": 0.2132,
"step": 8470
},
{
"epoch": 0.38402318630558824,
"grad_norm": 0.45840010046958923,
"learning_rate": 0.00018917955533604788,
"loss": 0.1868,
"step": 8480
},
{
"epoch": 0.384476043836609,
"grad_norm": 0.5156082510948181,
"learning_rate": 0.00018914565171060553,
"loss": 0.2066,
"step": 8490
},
{
"epoch": 0.38492890136762975,
"grad_norm": 0.3248344659805298,
"learning_rate": 0.0001891116981009985,
"loss": 0.1923,
"step": 8500
},
{
"epoch": 0.3853817588986505,
"grad_norm": 0.3991844058036804,
"learning_rate": 0.0001890776945262647,
"loss": 0.1988,
"step": 8510
},
{
"epoch": 0.3858346164296712,
"grad_norm": 0.39894551038742065,
"learning_rate": 0.00018904364100547,
"loss": 0.1933,
"step": 8520
},
{
"epoch": 0.38628747396069196,
"grad_norm": 0.5236116647720337,
"learning_rate": 0.00018900953755770825,
"loss": 0.2022,
"step": 8530
},
{
"epoch": 0.3867403314917127,
"grad_norm": 0.45928722620010376,
"learning_rate": 0.00018897538420210134,
"loss": 0.2107,
"step": 8540
},
{
"epoch": 0.3871931890227335,
"grad_norm": 0.5236533284187317,
"learning_rate": 0.00018894118095779915,
"loss": 0.207,
"step": 8550
},
{
"epoch": 0.3876460465537542,
"grad_norm": 0.48914825916290283,
"learning_rate": 0.0001889069278439795,
"loss": 0.1956,
"step": 8560
},
{
"epoch": 0.38809890408477493,
"grad_norm": 0.4775371253490448,
"learning_rate": 0.0001888726248798482,
"loss": 0.2077,
"step": 8570
},
{
"epoch": 0.3885517616157957,
"grad_norm": 0.4409538209438324,
"learning_rate": 0.00018883827208463898,
"loss": 0.2194,
"step": 8580
},
{
"epoch": 0.3890046191468164,
"grad_norm": 0.35968494415283203,
"learning_rate": 0.00018880386947761355,
"loss": 0.185,
"step": 8590
},
{
"epoch": 0.38945747667783714,
"grad_norm": 0.4133264422416687,
"learning_rate": 0.00018876941707806152,
"loss": 0.2145,
"step": 8600
},
{
"epoch": 0.3899103342088579,
"grad_norm": 0.3894209563732147,
"learning_rate": 0.00018873491490530042,
"loss": 0.1673,
"step": 8610
},
{
"epoch": 0.39036319173987866,
"grad_norm": 0.43155336380004883,
"learning_rate": 0.00018870036297867566,
"loss": 0.223,
"step": 8620
},
{
"epoch": 0.39081604927089936,
"grad_norm": 0.4406642019748688,
"learning_rate": 0.0001886657613175607,
"loss": 0.1923,
"step": 8630
},
{
"epoch": 0.3912689068019201,
"grad_norm": 0.404340922832489,
"learning_rate": 0.0001886311099413566,
"loss": 0.21,
"step": 8640
},
{
"epoch": 0.39172176433294087,
"grad_norm": 0.3692395091056824,
"learning_rate": 0.00018859640886949262,
"loss": 0.2009,
"step": 8650
},
{
"epoch": 0.39217462186396157,
"grad_norm": 0.3608049750328064,
"learning_rate": 0.00018856165812142561,
"loss": 0.1878,
"step": 8660
},
{
"epoch": 0.3926274793949823,
"grad_norm": 0.3195638656616211,
"learning_rate": 0.00018852685771664047,
"loss": 0.1795,
"step": 8670
},
{
"epoch": 0.3930803369260031,
"grad_norm": 0.4515351951122284,
"learning_rate": 0.00018849200767464983,
"loss": 0.1884,
"step": 8680
},
{
"epoch": 0.39353319445702384,
"grad_norm": 0.44284364581108093,
"learning_rate": 0.0001884571080149942,
"loss": 0.1938,
"step": 8690
},
{
"epoch": 0.39398605198804454,
"grad_norm": 0.49626269936561584,
"learning_rate": 0.00018842215875724188,
"loss": 0.1971,
"step": 8700
},
{
"epoch": 0.3944389095190653,
"grad_norm": 0.5126382112503052,
"learning_rate": 0.00018838715992098898,
"loss": 0.1893,
"step": 8710
},
{
"epoch": 0.39489176705008605,
"grad_norm": 0.4352719187736511,
"learning_rate": 0.00018835211152585949,
"loss": 0.1821,
"step": 8720
},
{
"epoch": 0.3953446245811068,
"grad_norm": 0.4839010238647461,
"learning_rate": 0.00018831701359150506,
"loss": 0.2254,
"step": 8730
},
{
"epoch": 0.3957974821121275,
"grad_norm": 0.3657974302768707,
"learning_rate": 0.0001882818661376052,
"loss": 0.1896,
"step": 8740
},
{
"epoch": 0.39625033964314826,
"grad_norm": 0.3730844259262085,
"learning_rate": 0.0001882466691838672,
"loss": 0.1738,
"step": 8750
},
{
"epoch": 0.396703197174169,
"grad_norm": 0.5131645202636719,
"learning_rate": 0.00018821142275002596,
"loss": 0.2009,
"step": 8760
},
{
"epoch": 0.3971560547051897,
"grad_norm": 0.5660125017166138,
"learning_rate": 0.00018817612685584437,
"loss": 0.2141,
"step": 8770
},
{
"epoch": 0.3976089122362105,
"grad_norm": 0.5042424201965332,
"learning_rate": 0.00018814078152111288,
"loss": 0.2224,
"step": 8780
},
{
"epoch": 0.39806176976723123,
"grad_norm": 0.4598681628704071,
"learning_rate": 0.0001881053867656496,
"loss": 0.2008,
"step": 8790
},
{
"epoch": 0.398514627298252,
"grad_norm": 0.49776187539100647,
"learning_rate": 0.00018806994260930058,
"loss": 0.226,
"step": 8800
},
{
"epoch": 0.3989674848292727,
"grad_norm": 0.34232836961746216,
"learning_rate": 0.00018803444907193937,
"loss": 0.1985,
"step": 8810
},
{
"epoch": 0.39942034236029345,
"grad_norm": 0.5097756385803223,
"learning_rate": 0.00018799890617346728,
"loss": 0.2321,
"step": 8820
},
{
"epoch": 0.3998731998913142,
"grad_norm": 0.3807264566421509,
"learning_rate": 0.0001879633139338133,
"loss": 0.1831,
"step": 8830
},
{
"epoch": 0.40032605742233496,
"grad_norm": 0.4587901830673218,
"learning_rate": 0.00018792767237293408,
"loss": 0.1976,
"step": 8840
},
{
"epoch": 0.40077891495335566,
"grad_norm": 0.6649393439292908,
"learning_rate": 0.0001878919815108139,
"loss": 0.1715,
"step": 8850
},
{
"epoch": 0.4012317724843764,
"grad_norm": 0.426932156085968,
"learning_rate": 0.00018785624136746472,
"loss": 0.1879,
"step": 8860
},
{
"epoch": 0.40168463001539717,
"grad_norm": 0.38488268852233887,
"learning_rate": 0.00018782045196292612,
"loss": 0.1992,
"step": 8870
},
{
"epoch": 0.40213748754641787,
"grad_norm": 0.5162119269371033,
"learning_rate": 0.00018778461331726533,
"loss": 0.2185,
"step": 8880
},
{
"epoch": 0.4025903450774386,
"grad_norm": 0.4168258011341095,
"learning_rate": 0.0001877487254505771,
"loss": 0.191,
"step": 8890
},
{
"epoch": 0.4030432026084594,
"grad_norm": 0.3788599669933319,
"learning_rate": 0.00018771278838298388,
"loss": 0.1824,
"step": 8900
},
{
"epoch": 0.40349606013948014,
"grad_norm": 0.38353031873703003,
"learning_rate": 0.0001876768021346356,
"loss": 0.1922,
"step": 8910
},
{
"epoch": 0.40394891767050084,
"grad_norm": 0.42297571897506714,
"learning_rate": 0.00018764076672570993,
"loss": 0.1845,
"step": 8920
},
{
"epoch": 0.4044017752015216,
"grad_norm": 0.5288705825805664,
"learning_rate": 0.00018760468217641195,
"loss": 0.2182,
"step": 8930
},
{
"epoch": 0.40485463273254235,
"grad_norm": 0.40291109681129456,
"learning_rate": 0.00018756854850697431,
"loss": 0.2134,
"step": 8940
},
{
"epoch": 0.4053074902635631,
"grad_norm": 0.37385135889053345,
"learning_rate": 0.0001875323657376573,
"loss": 0.1942,
"step": 8950
},
{
"epoch": 0.4057603477945838,
"grad_norm": 0.36549118161201477,
"learning_rate": 0.00018749613388874866,
"loss": 0.2172,
"step": 8960
},
{
"epoch": 0.40621320532560456,
"grad_norm": 0.4215755760669708,
"learning_rate": 0.00018745985298056363,
"loss": 0.1837,
"step": 8970
},
{
"epoch": 0.4066660628566253,
"grad_norm": 0.32497653365135193,
"learning_rate": 0.00018742352303344504,
"loss": 0.1762,
"step": 8980
},
{
"epoch": 0.407118920387646,
"grad_norm": 0.5675212740898132,
"learning_rate": 0.00018738714406776316,
"loss": 0.1878,
"step": 8990
},
{
"epoch": 0.4075717779186668,
"grad_norm": 0.5199867486953735,
"learning_rate": 0.00018735071610391578,
"loss": 0.2148,
"step": 9000
},
{
"epoch": 0.4075717779186668,
"eval_chrf": 78.88053316914994,
"eval_loss": 0.16049259901046753,
"eval_runtime": 26.3188,
"eval_samples_per_second": 0.38,
"eval_steps_per_second": 0.038,
"step": 9000
},
{
"epoch": 0.40802463544968753,
"grad_norm": 0.4164709150791168,
"learning_rate": 0.0001873142391623281,
"loss": 0.16,
"step": 9010
},
{
"epoch": 0.4084774929807083,
"grad_norm": 0.6372416615486145,
"learning_rate": 0.00018727771326345282,
"loss": 0.1991,
"step": 9020
},
{
"epoch": 0.408930350511729,
"grad_norm": 0.378200888633728,
"learning_rate": 0.00018724113842777013,
"loss": 0.2072,
"step": 9030
},
{
"epoch": 0.40938320804274975,
"grad_norm": 0.4380868375301361,
"learning_rate": 0.00018720451467578762,
"loss": 0.2091,
"step": 9040
},
{
"epoch": 0.4098360655737705,
"grad_norm": 0.5316906571388245,
"learning_rate": 0.00018716784202804028,
"loss": 0.1966,
"step": 9050
},
{
"epoch": 0.41028892310479126,
"grad_norm": 0.4007183611392975,
"learning_rate": 0.00018713112050509055,
"loss": 0.1774,
"step": 9060
},
{
"epoch": 0.41074178063581196,
"grad_norm": 0.45667392015457153,
"learning_rate": 0.00018709435012752827,
"loss": 0.2131,
"step": 9070
},
{
"epoch": 0.4111946381668327,
"grad_norm": 0.5240411758422852,
"learning_rate": 0.0001870575309159707,
"loss": 0.2055,
"step": 9080
},
{
"epoch": 0.41164749569785347,
"grad_norm": 0.3834088444709778,
"learning_rate": 0.0001870206628910624,
"loss": 0.204,
"step": 9090
},
{
"epoch": 0.41210035322887417,
"grad_norm": 0.428633451461792,
"learning_rate": 0.0001869837460734754,
"loss": 0.2158,
"step": 9100
},
{
"epoch": 0.41255321075989493,
"grad_norm": 0.5074917078018188,
"learning_rate": 0.00018694678048390904,
"loss": 0.2004,
"step": 9110
},
{
"epoch": 0.4130060682909157,
"grad_norm": 0.42477914690971375,
"learning_rate": 0.00018690976614308996,
"loss": 0.2096,
"step": 9120
},
{
"epoch": 0.41345892582193644,
"grad_norm": 0.3913014233112335,
"learning_rate": 0.00018687270307177225,
"loss": 0.1851,
"step": 9130
},
{
"epoch": 0.41391178335295714,
"grad_norm": 0.517967700958252,
"learning_rate": 0.00018683559129073723,
"loss": 0.1933,
"step": 9140
},
{
"epoch": 0.4143646408839779,
"grad_norm": 0.5146768093109131,
"learning_rate": 0.00018679843082079352,
"loss": 0.2077,
"step": 9150
},
{
"epoch": 0.41481749841499865,
"grad_norm": 0.43314865231513977,
"learning_rate": 0.0001867612216827771,
"loss": 0.1918,
"step": 9160
},
{
"epoch": 0.4152703559460194,
"grad_norm": 0.4207741618156433,
"learning_rate": 0.00018672396389755126,
"loss": 0.1985,
"step": 9170
},
{
"epoch": 0.4157232134770401,
"grad_norm": 0.3719622790813446,
"learning_rate": 0.00018668665748600648,
"loss": 0.1795,
"step": 9180
},
{
"epoch": 0.41617607100806087,
"grad_norm": 0.5668209791183472,
"learning_rate": 0.0001866493024690606,
"loss": 0.2179,
"step": 9190
},
{
"epoch": 0.4166289285390816,
"grad_norm": 0.42645466327667236,
"learning_rate": 0.00018661189886765855,
"loss": 0.1831,
"step": 9200
},
{
"epoch": 0.4170817860701023,
"grad_norm": 0.5149989724159241,
"learning_rate": 0.0001865744467027727,
"loss": 0.1948,
"step": 9210
},
{
"epoch": 0.4175346436011231,
"grad_norm": 0.4551143944263458,
"learning_rate": 0.00018653694599540254,
"loss": 0.1969,
"step": 9220
},
{
"epoch": 0.41798750113214383,
"grad_norm": 0.32381895184516907,
"learning_rate": 0.00018649939676657483,
"loss": 0.2078,
"step": 9230
},
{
"epoch": 0.4184403586631646,
"grad_norm": 0.537377119064331,
"learning_rate": 0.00018646179903734344,
"loss": 0.1839,
"step": 9240
},
{
"epoch": 0.4188932161941853,
"grad_norm": 0.45596960186958313,
"learning_rate": 0.00018642415282878958,
"loss": 0.1687,
"step": 9250
},
{
"epoch": 0.41934607372520605,
"grad_norm": 0.3386632204055786,
"learning_rate": 0.00018638645816202148,
"loss": 0.1995,
"step": 9260
},
{
"epoch": 0.4197989312562268,
"grad_norm": 0.5089104771614075,
"learning_rate": 0.00018634871505817467,
"loss": 0.1969,
"step": 9270
},
{
"epoch": 0.4202517887872475,
"grad_norm": 0.34959906339645386,
"learning_rate": 0.0001863109235384118,
"loss": 0.1791,
"step": 9280
},
{
"epoch": 0.42070464631826826,
"grad_norm": 0.42316970229148865,
"learning_rate": 0.00018627308362392263,
"loss": 0.1821,
"step": 9290
},
{
"epoch": 0.421157503849289,
"grad_norm": 0.5111418962478638,
"learning_rate": 0.00018623519533592412,
"loss": 0.2211,
"step": 9300
},
{
"epoch": 0.4216103613803098,
"grad_norm": 0.3825642764568329,
"learning_rate": 0.0001861972586956603,
"loss": 0.1862,
"step": 9310
},
{
"epoch": 0.4220632189113305,
"grad_norm": 0.5130252838134766,
"learning_rate": 0.0001861592737244023,
"loss": 0.2011,
"step": 9320
},
{
"epoch": 0.42251607644235123,
"grad_norm": 0.435468465089798,
"learning_rate": 0.0001861212404434484,
"loss": 0.2097,
"step": 9330
},
{
"epoch": 0.422968933973372,
"grad_norm": 0.5458192825317383,
"learning_rate": 0.00018608315887412395,
"loss": 0.2038,
"step": 9340
},
{
"epoch": 0.42342179150439274,
"grad_norm": 0.3725493550300598,
"learning_rate": 0.0001860450290377814,
"loss": 0.1823,
"step": 9350
},
{
"epoch": 0.42387464903541344,
"grad_norm": 0.4673443138599396,
"learning_rate": 0.00018600685095580016,
"loss": 0.1862,
"step": 9360
},
{
"epoch": 0.4243275065664342,
"grad_norm": 0.40920791029930115,
"learning_rate": 0.0001859686246495868,
"loss": 0.2102,
"step": 9370
},
{
"epoch": 0.42478036409745495,
"grad_norm": 0.5569631457328796,
"learning_rate": 0.00018593035014057493,
"loss": 0.1948,
"step": 9380
},
{
"epoch": 0.42523322162847565,
"grad_norm": 0.4123372435569763,
"learning_rate": 0.00018589202745022512,
"loss": 0.1904,
"step": 9390
},
{
"epoch": 0.4256860791594964,
"grad_norm": 0.5805758237838745,
"learning_rate": 0.00018585365660002499,
"loss": 0.222,
"step": 9400
},
{
"epoch": 0.42613893669051717,
"grad_norm": 0.38345110416412354,
"learning_rate": 0.00018581523761148917,
"loss": 0.1959,
"step": 9410
},
{
"epoch": 0.4265917942215379,
"grad_norm": 0.4751555025577545,
"learning_rate": 0.00018577677050615922,
"loss": 0.1794,
"step": 9420
},
{
"epoch": 0.4270446517525586,
"grad_norm": 0.32770711183547974,
"learning_rate": 0.00018573825530560382,
"loss": 0.1994,
"step": 9430
},
{
"epoch": 0.4274975092835794,
"grad_norm": 0.3502885699272156,
"learning_rate": 0.00018569969203141847,
"loss": 0.2047,
"step": 9440
},
{
"epoch": 0.42795036681460014,
"grad_norm": 0.4478504955768585,
"learning_rate": 0.0001856610807052257,
"loss": 0.2055,
"step": 9450
},
{
"epoch": 0.4284032243456209,
"grad_norm": 0.31034862995147705,
"learning_rate": 0.00018562242134867492,
"loss": 0.1664,
"step": 9460
},
{
"epoch": 0.4288560818766416,
"grad_norm": 0.4220707416534424,
"learning_rate": 0.00018558371398344264,
"loss": 0.1919,
"step": 9470
},
{
"epoch": 0.42930893940766235,
"grad_norm": 0.46567854285240173,
"learning_rate": 0.00018554495863123204,
"loss": 0.211,
"step": 9480
},
{
"epoch": 0.4297617969386831,
"grad_norm": 0.47741076350212097,
"learning_rate": 0.00018550615531377337,
"loss": 0.2074,
"step": 9490
},
{
"epoch": 0.4302146544697038,
"grad_norm": 0.5314249396324158,
"learning_rate": 0.00018546730405282372,
"loss": 0.1915,
"step": 9500
},
{
"epoch": 0.43066751200072456,
"grad_norm": 0.40051180124282837,
"learning_rate": 0.00018542840487016714,
"loss": 0.1915,
"step": 9510
},
{
"epoch": 0.4311203695317453,
"grad_norm": 0.39250391721725464,
"learning_rate": 0.00018538945778761448,
"loss": 0.2155,
"step": 9520
},
{
"epoch": 0.4315732270627661,
"grad_norm": 0.35448992252349854,
"learning_rate": 0.00018535046282700337,
"loss": 0.2018,
"step": 9530
},
{
"epoch": 0.4320260845937868,
"grad_norm": 0.7133687734603882,
"learning_rate": 0.00018531142001019846,
"loss": 0.1711,
"step": 9540
},
{
"epoch": 0.43247894212480753,
"grad_norm": 0.5354053974151611,
"learning_rate": 0.00018527232935909108,
"loss": 0.1958,
"step": 9550
},
{
"epoch": 0.4329317996558283,
"grad_norm": 0.4301791191101074,
"learning_rate": 0.0001852331908955995,
"loss": 0.2147,
"step": 9560
},
{
"epoch": 0.43338465718684904,
"grad_norm": 0.2932838499546051,
"learning_rate": 0.00018519400464166868,
"loss": 0.2106,
"step": 9570
},
{
"epoch": 0.43383751471786974,
"grad_norm": 0.384493887424469,
"learning_rate": 0.0001851547706192705,
"loss": 0.1827,
"step": 9580
},
{
"epoch": 0.4342903722488905,
"grad_norm": 0.4205734431743622,
"learning_rate": 0.00018511548885040356,
"loss": 0.1799,
"step": 9590
},
{
"epoch": 0.43474322977991126,
"grad_norm": 0.3769165575504303,
"learning_rate": 0.0001850761593570932,
"loss": 0.1914,
"step": 9600
},
{
"epoch": 0.43519608731093196,
"grad_norm": 0.5843045711517334,
"learning_rate": 0.00018503678216139159,
"loss": 0.2098,
"step": 9610
},
{
"epoch": 0.4356489448419527,
"grad_norm": 0.591116726398468,
"learning_rate": 0.00018499735728537756,
"loss": 0.1849,
"step": 9620
},
{
"epoch": 0.43610180237297347,
"grad_norm": 0.39489904046058655,
"learning_rate": 0.00018495788475115678,
"loss": 0.1955,
"step": 9630
},
{
"epoch": 0.4365546599039942,
"grad_norm": 0.38326331973075867,
"learning_rate": 0.00018491836458086155,
"loss": 0.194,
"step": 9640
},
{
"epoch": 0.4370075174350149,
"grad_norm": 0.49937698245048523,
"learning_rate": 0.00018487879679665093,
"loss": 0.1861,
"step": 9650
},
{
"epoch": 0.4374603749660357,
"grad_norm": 0.46309980750083923,
"learning_rate": 0.00018483918142071066,
"loss": 0.1935,
"step": 9660
},
{
"epoch": 0.43791323249705644,
"grad_norm": 0.4150969386100769,
"learning_rate": 0.00018479951847525319,
"loss": 0.2049,
"step": 9670
},
{
"epoch": 0.4383660900280772,
"grad_norm": 0.41933974623680115,
"learning_rate": 0.0001847598079825176,
"loss": 0.1918,
"step": 9680
},
{
"epoch": 0.4388189475590979,
"grad_norm": 0.4882836639881134,
"learning_rate": 0.00018472004996476966,
"loss": 0.2102,
"step": 9690
},
{
"epoch": 0.43927180509011865,
"grad_norm": 0.5054790377616882,
"learning_rate": 0.0001846802444443018,
"loss": 0.2131,
"step": 9700
},
{
"epoch": 0.4397246626211394,
"grad_norm": 0.45476034283638,
"learning_rate": 0.00018464039144343297,
"loss": 0.1929,
"step": 9710
},
{
"epoch": 0.4401775201521601,
"grad_norm": 0.4609181582927704,
"learning_rate": 0.00018460049098450898,
"loss": 0.2106,
"step": 9720
},
{
"epoch": 0.44063037768318086,
"grad_norm": 0.4659000337123871,
"learning_rate": 0.00018456054308990197,
"loss": 0.212,
"step": 9730
},
{
"epoch": 0.4410832352142016,
"grad_norm": 0.6165845394134521,
"learning_rate": 0.00018452054778201094,
"loss": 0.2109,
"step": 9740
},
{
"epoch": 0.4415360927452224,
"grad_norm": 0.4262254536151886,
"learning_rate": 0.00018448050508326124,
"loss": 0.2149,
"step": 9750
},
{
"epoch": 0.4419889502762431,
"grad_norm": 0.39887556433677673,
"learning_rate": 0.00018444041501610494,
"loss": 0.2128,
"step": 9760
},
{
"epoch": 0.44244180780726383,
"grad_norm": 0.45499277114868164,
"learning_rate": 0.00018440027760302066,
"loss": 0.1714,
"step": 9770
},
{
"epoch": 0.4428946653382846,
"grad_norm": 0.3185460865497589,
"learning_rate": 0.00018436009286651347,
"loss": 0.1933,
"step": 9780
},
{
"epoch": 0.44334752286930534,
"grad_norm": 0.4237852394580841,
"learning_rate": 0.0001843198608291151,
"loss": 0.2142,
"step": 9790
},
{
"epoch": 0.44380038040032604,
"grad_norm": 0.5233890414237976,
"learning_rate": 0.00018427958151338373,
"loss": 0.2019,
"step": 9800
},
{
"epoch": 0.4442532379313468,
"grad_norm": 0.4304122030735016,
"learning_rate": 0.00018423925494190406,
"loss": 0.2197,
"step": 9810
},
{
"epoch": 0.44470609546236756,
"grad_norm": 0.33954623341560364,
"learning_rate": 0.00018419888113728727,
"loss": 0.1886,
"step": 9820
},
{
"epoch": 0.44515895299338826,
"grad_norm": 0.47928377985954285,
"learning_rate": 0.00018415846012217104,
"loss": 0.1713,
"step": 9830
},
{
"epoch": 0.445611810524409,
"grad_norm": 0.4934302270412445,
"learning_rate": 0.00018411799191921956,
"loss": 0.2239,
"step": 9840
},
{
"epoch": 0.44606466805542977,
"grad_norm": 0.4325009882450104,
"learning_rate": 0.00018407747655112343,
"loss": 0.1843,
"step": 9850
},
{
"epoch": 0.4465175255864505,
"grad_norm": 0.5995849967002869,
"learning_rate": 0.00018403691404059966,
"loss": 0.2361,
"step": 9860
},
{
"epoch": 0.4469703831174712,
"grad_norm": 0.38609248399734497,
"learning_rate": 0.0001839963044103918,
"loss": 0.1931,
"step": 9870
},
{
"epoch": 0.447423240648492,
"grad_norm": 0.48337194323539734,
"learning_rate": 0.00018395564768326972,
"loss": 0.19,
"step": 9880
},
{
"epoch": 0.44787609817951274,
"grad_norm": 0.45147621631622314,
"learning_rate": 0.00018391494388202975,
"loss": 0.2236,
"step": 9890
},
{
"epoch": 0.44832895571053344,
"grad_norm": 0.47226592898368835,
"learning_rate": 0.0001838741930294946,
"loss": 0.2025,
"step": 9900
},
{
"epoch": 0.4487818132415542,
"grad_norm": 0.47736138105392456,
"learning_rate": 0.00018383339514851338,
"loss": 0.1986,
"step": 9910
},
{
"epoch": 0.44923467077257495,
"grad_norm": 0.49481838941574097,
"learning_rate": 0.00018379255026196152,
"loss": 0.2069,
"step": 9920
},
{
"epoch": 0.4496875283035957,
"grad_norm": 0.41190800070762634,
"learning_rate": 0.00018375165839274086,
"loss": 0.1831,
"step": 9930
},
{
"epoch": 0.4501403858346164,
"grad_norm": 0.433465838432312,
"learning_rate": 0.0001837107195637796,
"loss": 0.2219,
"step": 9940
},
{
"epoch": 0.45059324336563716,
"grad_norm": 0.44359904527664185,
"learning_rate": 0.00018366973379803215,
"loss": 0.1933,
"step": 9950
},
{
"epoch": 0.4510461008966579,
"grad_norm": 0.5206522941589355,
"learning_rate": 0.00018362870111847935,
"loss": 0.1892,
"step": 9960
},
{
"epoch": 0.4514989584276787,
"grad_norm": 0.3605899512767792,
"learning_rate": 0.00018358762154812834,
"loss": 0.1936,
"step": 9970
},
{
"epoch": 0.4519518159586994,
"grad_norm": 0.34897580742836,
"learning_rate": 0.00018354649511001254,
"loss": 0.2013,
"step": 9980
},
{
"epoch": 0.45240467348972013,
"grad_norm": 0.36002519726753235,
"learning_rate": 0.0001835053218271916,
"loss": 0.1795,
"step": 9990
},
{
"epoch": 0.4528575310207409,
"grad_norm": 0.4565688371658325,
"learning_rate": 0.00018346410172275144,
"loss": 0.1725,
"step": 10000
},
{
"epoch": 0.4528575310207409,
"eval_chrf": 76.88655918784474,
"eval_loss": 0.15447305142879486,
"eval_runtime": 12.1525,
"eval_samples_per_second": 0.823,
"eval_steps_per_second": 0.082,
"step": 10000
},
{
"epoch": 0.4533103885517616,
"grad_norm": 0.4288187325000763,
"learning_rate": 0.0001834228348198043,
"loss": 0.1634,
"step": 10010
},
{
"epoch": 0.45376324608278235,
"grad_norm": 0.41219136118888855,
"learning_rate": 0.00018338152114148864,
"loss": 0.1954,
"step": 10020
},
{
"epoch": 0.4542161036138031,
"grad_norm": 0.32993972301483154,
"learning_rate": 0.00018334016071096915,
"loss": 0.1897,
"step": 10030
},
{
"epoch": 0.45466896114482386,
"grad_norm": 0.4461534917354584,
"learning_rate": 0.00018329875355143667,
"loss": 0.1948,
"step": 10040
},
{
"epoch": 0.45512181867584456,
"grad_norm": 0.38866010308265686,
"learning_rate": 0.00018325729968610824,
"loss": 0.1903,
"step": 10050
},
{
"epoch": 0.4555746762068653,
"grad_norm": 0.4280669689178467,
"learning_rate": 0.00018321579913822727,
"loss": 0.1966,
"step": 10060
},
{
"epoch": 0.45602753373788607,
"grad_norm": 0.5818290710449219,
"learning_rate": 0.00018317425193106307,
"loss": 0.1909,
"step": 10070
},
{
"epoch": 0.4564803912689068,
"grad_norm": 0.5169594287872314,
"learning_rate": 0.00018313265808791136,
"loss": 0.2037,
"step": 10080
},
{
"epoch": 0.4569332487999275,
"grad_norm": 0.4597228467464447,
"learning_rate": 0.00018309101763209384,
"loss": 0.1969,
"step": 10090
},
{
"epoch": 0.4573861063309483,
"grad_norm": 0.4891718327999115,
"learning_rate": 0.0001830493305869584,
"loss": 0.1946,
"step": 10100
},
{
"epoch": 0.45783896386196904,
"grad_norm": 0.49224698543548584,
"learning_rate": 0.0001830075969758791,
"loss": 0.1781,
"step": 10110
},
{
"epoch": 0.45829182139298974,
"grad_norm": 0.5037763118743896,
"learning_rate": 0.000182965816822256,
"loss": 0.1981,
"step": 10120
},
{
"epoch": 0.4587446789240105,
"grad_norm": 0.44621360301971436,
"learning_rate": 0.00018292399014951545,
"loss": 0.1762,
"step": 10130
},
{
"epoch": 0.45919753645503125,
"grad_norm": 0.38351958990097046,
"learning_rate": 0.00018288211698110963,
"loss": 0.1818,
"step": 10140
},
{
"epoch": 0.459650393986052,
"grad_norm": 0.4175410270690918,
"learning_rate": 0.00018284019734051695,
"loss": 0.1524,
"step": 10150
},
{
"epoch": 0.4601032515170727,
"grad_norm": 0.6031002402305603,
"learning_rate": 0.00018279823125124192,
"loss": 0.1992,
"step": 10160
},
{
"epoch": 0.46055610904809346,
"grad_norm": 0.3932390809059143,
"learning_rate": 0.0001827562187368149,
"loss": 0.1872,
"step": 10170
},
{
"epoch": 0.4610089665791142,
"grad_norm": 0.3468590974807739,
"learning_rate": 0.0001827141598207925,
"loss": 0.1883,
"step": 10180
},
{
"epoch": 0.461461824110135,
"grad_norm": 0.5273679494857788,
"learning_rate": 0.0001826720545267572,
"loss": 0.1855,
"step": 10190
},
{
"epoch": 0.4619146816411557,
"grad_norm": 0.31331586837768555,
"learning_rate": 0.0001826299028783175,
"loss": 0.1936,
"step": 10200
},
{
"epoch": 0.46236753917217643,
"grad_norm": 0.4715229272842407,
"learning_rate": 0.00018258770489910803,
"loss": 0.205,
"step": 10210
},
{
"epoch": 0.4628203967031972,
"grad_norm": 0.6142929196357727,
"learning_rate": 0.0001825454606127892,
"loss": 0.2015,
"step": 10220
},
{
"epoch": 0.4632732542342179,
"grad_norm": 0.4637449383735657,
"learning_rate": 0.0001825031700430475,
"loss": 0.201,
"step": 10230
},
{
"epoch": 0.46372611176523865,
"grad_norm": 0.37245267629623413,
"learning_rate": 0.00018246083321359535,
"loss": 0.1939,
"step": 10240
},
{
"epoch": 0.4641789692962594,
"grad_norm": 0.4161212146282196,
"learning_rate": 0.0001824184501481711,
"loss": 0.1987,
"step": 10250
},
{
"epoch": 0.46463182682728016,
"grad_norm": 0.5343114137649536,
"learning_rate": 0.000182376020870539,
"loss": 0.1745,
"step": 10260
},
{
"epoch": 0.46508468435830086,
"grad_norm": 0.39276260137557983,
"learning_rate": 0.0001823335454044893,
"loss": 0.205,
"step": 10270
},
{
"epoch": 0.4655375418893216,
"grad_norm": 0.36505231261253357,
"learning_rate": 0.00018229102377383805,
"loss": 0.1628,
"step": 10280
},
{
"epoch": 0.46599039942034237,
"grad_norm": 0.4490553140640259,
"learning_rate": 0.0001822484560024272,
"loss": 0.1943,
"step": 10290
},
{
"epoch": 0.4664432569513631,
"grad_norm": 0.42624643445014954,
"learning_rate": 0.00018220584211412467,
"loss": 0.2038,
"step": 10300
},
{
"epoch": 0.46689611448238383,
"grad_norm": 0.4004163146018982,
"learning_rate": 0.00018216318213282412,
"loss": 0.1981,
"step": 10310
},
{
"epoch": 0.4673489720134046,
"grad_norm": 0.39912936091423035,
"learning_rate": 0.00018212047608244506,
"loss": 0.1918,
"step": 10320
},
{
"epoch": 0.46780182954442534,
"grad_norm": 0.5702713131904602,
"learning_rate": 0.0001820777239869329,
"loss": 0.2164,
"step": 10330
},
{
"epoch": 0.46825468707544604,
"grad_norm": 0.3886665403842926,
"learning_rate": 0.00018203492587025885,
"loss": 0.2134,
"step": 10340
},
{
"epoch": 0.4687075446064668,
"grad_norm": 0.6027638912200928,
"learning_rate": 0.00018199208175641987,
"loss": 0.196,
"step": 10350
},
{
"epoch": 0.46916040213748755,
"grad_norm": 0.4351930022239685,
"learning_rate": 0.00018194919166943877,
"loss": 0.1891,
"step": 10360
},
{
"epoch": 0.4696132596685083,
"grad_norm": 0.29797980189323425,
"learning_rate": 0.00018190625563336416,
"loss": 0.1905,
"step": 10370
},
{
"epoch": 0.470066117199529,
"grad_norm": 0.5475037097930908,
"learning_rate": 0.0001818632736722703,
"loss": 0.1911,
"step": 10380
},
{
"epoch": 0.47051897473054977,
"grad_norm": 0.4412976801395416,
"learning_rate": 0.0001818202458102573,
"loss": 0.184,
"step": 10390
},
{
"epoch": 0.4709718322615705,
"grad_norm": 0.4919610619544983,
"learning_rate": 0.000181777172071451,
"loss": 0.1872,
"step": 10400
},
{
"epoch": 0.4714246897925913,
"grad_norm": 0.32885703444480896,
"learning_rate": 0.0001817340524800029,
"loss": 0.1685,
"step": 10410
},
{
"epoch": 0.471877547323612,
"grad_norm": 0.43138033151626587,
"learning_rate": 0.00018169088706009026,
"loss": 0.1606,
"step": 10420
},
{
"epoch": 0.47233040485463273,
"grad_norm": 0.36871063709259033,
"learning_rate": 0.00018164767583591604,
"loss": 0.2123,
"step": 10430
},
{
"epoch": 0.4727832623856535,
"grad_norm": 0.41590359807014465,
"learning_rate": 0.00018160441883170888,
"loss": 0.1947,
"step": 10440
},
{
"epoch": 0.4732361199166742,
"grad_norm": 0.47074031829833984,
"learning_rate": 0.00018156111607172304,
"loss": 0.1742,
"step": 10450
},
{
"epoch": 0.47368897744769495,
"grad_norm": 0.386139452457428,
"learning_rate": 0.0001815177675802385,
"loss": 0.1847,
"step": 10460
},
{
"epoch": 0.4741418349787157,
"grad_norm": 0.5042863488197327,
"learning_rate": 0.00018147437338156088,
"loss": 0.1685,
"step": 10470
},
{
"epoch": 0.47459469250973646,
"grad_norm": 0.4570608139038086,
"learning_rate": 0.00018143093350002137,
"loss": 0.187,
"step": 10480
},
{
"epoch": 0.47504755004075716,
"grad_norm": 0.4626786708831787,
"learning_rate": 0.00018138744795997681,
"loss": 0.2201,
"step": 10490
},
{
"epoch": 0.4755004075717779,
"grad_norm": 0.5135970711708069,
"learning_rate": 0.00018134391678580964,
"loss": 0.1854,
"step": 10500
},
{
"epoch": 0.4759532651027987,
"grad_norm": 0.34417203068733215,
"learning_rate": 0.0001813003400019279,
"loss": 0.1852,
"step": 10510
},
{
"epoch": 0.4764061226338194,
"grad_norm": 0.3838786780834198,
"learning_rate": 0.00018125671763276517,
"loss": 0.1947,
"step": 10520
},
{
"epoch": 0.47685898016484013,
"grad_norm": 0.5423650145530701,
"learning_rate": 0.0001812130497027806,
"loss": 0.194,
"step": 10530
},
{
"epoch": 0.4773118376958609,
"grad_norm": 0.3431115746498108,
"learning_rate": 0.0001811693362364589,
"loss": 0.2074,
"step": 10540
},
{
"epoch": 0.47776469522688164,
"grad_norm": 0.38044998049736023,
"learning_rate": 0.00018112557725831035,
"loss": 0.1829,
"step": 10550
},
{
"epoch": 0.47821755275790234,
"grad_norm": 0.4877161383628845,
"learning_rate": 0.0001810817727928707,
"loss": 0.2014,
"step": 10560
},
{
"epoch": 0.4786704102889231,
"grad_norm": 0.553992748260498,
"learning_rate": 0.00018103792286470113,
"loss": 0.1937,
"step": 10570
},
{
"epoch": 0.47912326781994385,
"grad_norm": 0.3857057988643646,
"learning_rate": 0.0001809940274983885,
"loss": 0.1928,
"step": 10580
},
{
"epoch": 0.4795761253509646,
"grad_norm": 0.7242947816848755,
"learning_rate": 0.00018095008671854498,
"loss": 0.1709,
"step": 10590
},
{
"epoch": 0.4800289828819853,
"grad_norm": 0.47653719782829285,
"learning_rate": 0.00018090610054980824,
"loss": 0.1756,
"step": 10600
},
{
"epoch": 0.48048184041300607,
"grad_norm": 0.38235634565353394,
"learning_rate": 0.00018086206901684148,
"loss": 0.181,
"step": 10610
},
{
"epoch": 0.4809346979440268,
"grad_norm": 0.5229091048240662,
"learning_rate": 0.00018081799214433324,
"loss": 0.2015,
"step": 10620
},
{
"epoch": 0.4813875554750475,
"grad_norm": 0.46029898524284363,
"learning_rate": 0.0001807738699569975,
"loss": 0.1974,
"step": 10630
},
{
"epoch": 0.4818404130060683,
"grad_norm": 0.4247789978981018,
"learning_rate": 0.00018072970247957378,
"loss": 0.1822,
"step": 10640
},
{
"epoch": 0.48229327053708904,
"grad_norm": 0.32685187458992004,
"learning_rate": 0.00018068548973682673,
"loss": 0.199,
"step": 10650
},
{
"epoch": 0.4827461280681098,
"grad_norm": 0.5261408686637878,
"learning_rate": 0.00018064123175354663,
"loss": 0.1965,
"step": 10660
},
{
"epoch": 0.4831989855991305,
"grad_norm": 0.33152681589126587,
"learning_rate": 0.00018059692855454902,
"loss": 0.1649,
"step": 10670
},
{
"epoch": 0.48365184313015125,
"grad_norm": 0.31257107853889465,
"learning_rate": 0.00018055258016467477,
"loss": 0.1896,
"step": 10680
},
{
"epoch": 0.484104700661172,
"grad_norm": 0.3735596835613251,
"learning_rate": 0.00018050818660879015,
"loss": 0.1835,
"step": 10690
},
{
"epoch": 0.48455755819219276,
"grad_norm": 0.45948630571365356,
"learning_rate": 0.0001804637479117867,
"loss": 0.1785,
"step": 10700
},
{
"epoch": 0.48501041572321346,
"grad_norm": 0.34303784370422363,
"learning_rate": 0.00018041926409858132,
"loss": 0.1989,
"step": 10710
},
{
"epoch": 0.4854632732542342,
"grad_norm": 0.5830017328262329,
"learning_rate": 0.00018037473519411616,
"loss": 0.2055,
"step": 10720
},
{
"epoch": 0.485916130785255,
"grad_norm": 0.4883866608142853,
"learning_rate": 0.0001803301612233587,
"loss": 0.2011,
"step": 10730
},
{
"epoch": 0.4863689883162757,
"grad_norm": 0.5013921856880188,
"learning_rate": 0.00018028554221130163,
"loss": 0.1803,
"step": 10740
},
{
"epoch": 0.48682184584729643,
"grad_norm": 0.6551631689071655,
"learning_rate": 0.00018024087818296298,
"loss": 0.2131,
"step": 10750
},
{
"epoch": 0.4872747033783172,
"grad_norm": 0.35842567682266235,
"learning_rate": 0.00018019616916338594,
"loss": 0.2094,
"step": 10760
},
{
"epoch": 0.48772756090933794,
"grad_norm": 0.4328503906726837,
"learning_rate": 0.00018015141517763894,
"loss": 0.1568,
"step": 10770
},
{
"epoch": 0.48818041844035864,
"grad_norm": 0.4990096092224121,
"learning_rate": 0.00018010661625081565,
"loss": 0.1851,
"step": 10780
},
{
"epoch": 0.4886332759713794,
"grad_norm": 0.36216849088668823,
"learning_rate": 0.00018006177240803494,
"loss": 0.2044,
"step": 10790
},
{
"epoch": 0.48908613350240016,
"grad_norm": 0.45498183369636536,
"learning_rate": 0.00018001688367444082,
"loss": 0.1827,
"step": 10800
},
{
"epoch": 0.4895389910334209,
"grad_norm": 0.3285827040672302,
"learning_rate": 0.00017997195007520258,
"loss": 0.1884,
"step": 10810
},
{
"epoch": 0.4899918485644416,
"grad_norm": 0.3710488975048065,
"learning_rate": 0.00017992697163551452,
"loss": 0.1743,
"step": 10820
},
{
"epoch": 0.49044470609546237,
"grad_norm": 0.4138637185096741,
"learning_rate": 0.00017988194838059615,
"loss": 0.177,
"step": 10830
},
{
"epoch": 0.4908975636264831,
"grad_norm": 0.34731653332710266,
"learning_rate": 0.00017983688033569212,
"loss": 0.1972,
"step": 10840
},
{
"epoch": 0.4913504211575038,
"grad_norm": 0.3208175003528595,
"learning_rate": 0.0001797917675260722,
"loss": 0.1735,
"step": 10850
},
{
"epoch": 0.4918032786885246,
"grad_norm": 0.354033887386322,
"learning_rate": 0.00017974660997703126,
"loss": 0.1965,
"step": 10860
},
{
"epoch": 0.49225613621954534,
"grad_norm": 0.4648807942867279,
"learning_rate": 0.00017970140771388917,
"loss": 0.2096,
"step": 10870
},
{
"epoch": 0.4927089937505661,
"grad_norm": 0.4400973618030548,
"learning_rate": 0.000179656160761991,
"loss": 0.1879,
"step": 10880
},
{
"epoch": 0.4931618512815868,
"grad_norm": 0.4962785840034485,
"learning_rate": 0.0001796108691467068,
"loss": 0.1716,
"step": 10890
},
{
"epoch": 0.49361470881260755,
"grad_norm": 0.4156644940376282,
"learning_rate": 0.00017956553289343166,
"loss": 0.1932,
"step": 10900
},
{
"epoch": 0.4940675663436283,
"grad_norm": 0.4051489233970642,
"learning_rate": 0.00017952015202758574,
"loss": 0.1956,
"step": 10910
},
{
"epoch": 0.49452042387464906,
"grad_norm": 0.4099380671977997,
"learning_rate": 0.00017947472657461416,
"loss": 0.1833,
"step": 10920
},
{
"epoch": 0.49497328140566976,
"grad_norm": 0.39209482073783875,
"learning_rate": 0.00017942925655998711,
"loss": 0.2349,
"step": 10930
},
{
"epoch": 0.4954261389366905,
"grad_norm": 0.37333500385284424,
"learning_rate": 0.00017938374200919974,
"loss": 0.1655,
"step": 10940
},
{
"epoch": 0.4958789964677113,
"grad_norm": 0.38342049717903137,
"learning_rate": 0.0001793381829477721,
"loss": 0.1969,
"step": 10950
},
{
"epoch": 0.496331853998732,
"grad_norm": 0.3624749779701233,
"learning_rate": 0.00017929257940124934,
"loss": 0.1828,
"step": 10960
},
{
"epoch": 0.49678471152975273,
"grad_norm": 0.36027488112449646,
"learning_rate": 0.00017924693139520137,
"loss": 0.2038,
"step": 10970
},
{
"epoch": 0.4972375690607735,
"grad_norm": 0.34053680300712585,
"learning_rate": 0.0001792012389552232,
"loss": 0.1776,
"step": 10980
},
{
"epoch": 0.49769042659179424,
"grad_norm": 0.2984725832939148,
"learning_rate": 0.00017915550210693466,
"loss": 0.2036,
"step": 10990
},
{
"epoch": 0.49814328412281494,
"grad_norm": 0.3361246883869171,
"learning_rate": 0.00017910972087598051,
"loss": 0.1645,
"step": 11000
},
{
"epoch": 0.49814328412281494,
"eval_chrf": 75.57746990852579,
"eval_loss": 0.14612668752670288,
"eval_runtime": 26.7541,
"eval_samples_per_second": 0.374,
"eval_steps_per_second": 0.037,
"step": 11000
},
{
"epoch": 0.4985961416538357,
"grad_norm": 0.41162702441215515,
"learning_rate": 0.00017906389528803035,
"loss": 0.2009,
"step": 11010
},
{
"epoch": 0.49904899918485646,
"grad_norm": 0.3144682049751282,
"learning_rate": 0.00017901802536877877,
"loss": 0.1899,
"step": 11020
},
{
"epoch": 0.4995018567158772,
"grad_norm": 0.47382402420043945,
"learning_rate": 0.00017897211114394502,
"loss": 0.2115,
"step": 11030
},
{
"epoch": 0.4999547142468979,
"grad_norm": 0.2976755201816559,
"learning_rate": 0.00017892615263927344,
"loss": 0.1869,
"step": 11040
},
{
"epoch": 0.5004075717779186,
"grad_norm": 0.35733887553215027,
"learning_rate": 0.00017888014988053297,
"loss": 0.1705,
"step": 11050
},
{
"epoch": 0.5008604293089394,
"grad_norm": 0.36196479201316833,
"learning_rate": 0.00017883410289351747,
"loss": 0.1966,
"step": 11060
},
{
"epoch": 0.5013132868399601,
"grad_norm": 0.4164937734603882,
"learning_rate": 0.00017878801170404565,
"loss": 0.1725,
"step": 11070
},
{
"epoch": 0.5017661443709809,
"grad_norm": 0.3591492772102356,
"learning_rate": 0.00017874187633796086,
"loss": 0.1992,
"step": 11080
},
{
"epoch": 0.5022190019020016,
"grad_norm": 0.33890897035598755,
"learning_rate": 0.00017869569682113135,
"loss": 0.1709,
"step": 11090
},
{
"epoch": 0.5026718594330224,
"grad_norm": 0.36221665143966675,
"learning_rate": 0.00017864947317945007,
"loss": 0.1734,
"step": 11100
},
{
"epoch": 0.5031247169640432,
"grad_norm": 0.49572357535362244,
"learning_rate": 0.0001786032054388347,
"loss": 0.1893,
"step": 11110
},
{
"epoch": 0.5035775744950639,
"grad_norm": 0.40498650074005127,
"learning_rate": 0.0001785568936252277,
"loss": 0.1887,
"step": 11120
},
{
"epoch": 0.5040304320260846,
"grad_norm": 0.3324230909347534,
"learning_rate": 0.0001785105377645962,
"loss": 0.1993,
"step": 11130
},
{
"epoch": 0.5044832895571053,
"grad_norm": 0.35598695278167725,
"learning_rate": 0.000178464137882932,
"loss": 0.1983,
"step": 11140
},
{
"epoch": 0.5049361470881261,
"grad_norm": 0.3882586658000946,
"learning_rate": 0.00017841769400625163,
"loss": 0.1666,
"step": 11150
},
{
"epoch": 0.5053890046191468,
"grad_norm": 0.42131948471069336,
"learning_rate": 0.0001783712061605963,
"loss": 0.2014,
"step": 11160
},
{
"epoch": 0.5058418621501676,
"grad_norm": 0.40716439485549927,
"learning_rate": 0.0001783246743720318,
"loss": 0.2202,
"step": 11170
},
{
"epoch": 0.5062947196811883,
"grad_norm": 0.30183571577072144,
"learning_rate": 0.00017827809866664867,
"loss": 0.1717,
"step": 11180
},
{
"epoch": 0.5067475772122091,
"grad_norm": 0.4751286208629608,
"learning_rate": 0.00017823147907056197,
"loss": 0.1711,
"step": 11190
},
{
"epoch": 0.5072004347432297,
"grad_norm": 0.37397274374961853,
"learning_rate": 0.00017818481560991144,
"loss": 0.1883,
"step": 11200
},
{
"epoch": 0.5076532922742505,
"grad_norm": 0.4874507486820221,
"learning_rate": 0.00017813810831086133,
"loss": 0.206,
"step": 11210
},
{
"epoch": 0.5081061498052712,
"grad_norm": 0.31040722131729126,
"learning_rate": 0.00017809135719960056,
"loss": 0.1902,
"step": 11220
},
{
"epoch": 0.508559007336292,
"grad_norm": 0.49066388607025146,
"learning_rate": 0.0001780445623023426,
"loss": 0.2025,
"step": 11230
},
{
"epoch": 0.5090118648673128,
"grad_norm": 0.4019545614719391,
"learning_rate": 0.00017799772364532546,
"loss": 0.175,
"step": 11240
},
{
"epoch": 0.5094647223983335,
"grad_norm": 0.33892253041267395,
"learning_rate": 0.00017795084125481162,
"loss": 0.1881,
"step": 11250
},
{
"epoch": 0.5099175799293543,
"grad_norm": 0.43992340564727783,
"learning_rate": 0.0001779039151570882,
"loss": 0.1854,
"step": 11260
},
{
"epoch": 0.5103704374603749,
"grad_norm": 0.4423786401748657,
"learning_rate": 0.00017785694537846673,
"loss": 0.2154,
"step": 11270
},
{
"epoch": 0.5108232949913957,
"grad_norm": 0.54453444480896,
"learning_rate": 0.00017780993194528328,
"loss": 0.186,
"step": 11280
},
{
"epoch": 0.5112761525224164,
"grad_norm": 0.3905886709690094,
"learning_rate": 0.00017776287488389838,
"loss": 0.202,
"step": 11290
},
{
"epoch": 0.5117290100534372,
"grad_norm": 0.3799689710140228,
"learning_rate": 0.00017771577422069705,
"loss": 0.1575,
"step": 11300
},
{
"epoch": 0.5121818675844579,
"grad_norm": 0.5496554374694824,
"learning_rate": 0.00017766862998208873,
"loss": 0.1695,
"step": 11310
},
{
"epoch": 0.5126347251154787,
"grad_norm": 0.34472495317459106,
"learning_rate": 0.00017762144219450726,
"loss": 0.185,
"step": 11320
},
{
"epoch": 0.5130875826464995,
"grad_norm": 0.4271334111690521,
"learning_rate": 0.000177574210884411,
"loss": 0.2074,
"step": 11330
},
{
"epoch": 0.5135404401775201,
"grad_norm": 0.37587055563926697,
"learning_rate": 0.0001775269360782826,
"loss": 0.1664,
"step": 11340
},
{
"epoch": 0.5139932977085409,
"grad_norm": 0.31624364852905273,
"learning_rate": 0.0001774796178026292,
"loss": 0.186,
"step": 11350
},
{
"epoch": 0.5144461552395616,
"grad_norm": 0.5031947493553162,
"learning_rate": 0.0001774322560839822,
"loss": 0.1635,
"step": 11360
},
{
"epoch": 0.5148990127705824,
"grad_norm": 0.39671581983566284,
"learning_rate": 0.00017738485094889747,
"loss": 0.1945,
"step": 11370
},
{
"epoch": 0.5153518703016031,
"grad_norm": 0.3413325548171997,
"learning_rate": 0.00017733740242395515,
"loss": 0.1745,
"step": 11380
},
{
"epoch": 0.5158047278326239,
"grad_norm": 0.3880815804004669,
"learning_rate": 0.00017728991053575974,
"loss": 0.189,
"step": 11390
},
{
"epoch": 0.5162575853636446,
"grad_norm": 0.40092733502388,
"learning_rate": 0.00017724237531094002,
"loss": 0.1689,
"step": 11400
},
{
"epoch": 0.5167104428946654,
"grad_norm": 0.3720043897628784,
"learning_rate": 0.00017719479677614917,
"loss": 0.194,
"step": 11410
},
{
"epoch": 0.517163300425686,
"grad_norm": 0.3282621502876282,
"learning_rate": 0.0001771471749580645,
"loss": 0.1609,
"step": 11420
},
{
"epoch": 0.5176161579567068,
"grad_norm": 0.4283618927001953,
"learning_rate": 0.00017709950988338772,
"loss": 0.1654,
"step": 11430
},
{
"epoch": 0.5180690154877275,
"grad_norm": 0.480474591255188,
"learning_rate": 0.0001770518015788447,
"loss": 0.1965,
"step": 11440
},
{
"epoch": 0.5185218730187483,
"grad_norm": 0.4488031566143036,
"learning_rate": 0.00017700405007118564,
"loss": 0.1829,
"step": 11450
},
{
"epoch": 0.5189747305497691,
"grad_norm": 0.4382071793079376,
"learning_rate": 0.00017695625538718485,
"loss": 0.1626,
"step": 11460
},
{
"epoch": 0.5194275880807898,
"grad_norm": 0.38167351484298706,
"learning_rate": 0.000176908417553641,
"loss": 0.1858,
"step": 11470
},
{
"epoch": 0.5198804456118106,
"grad_norm": 0.320625364780426,
"learning_rate": 0.0001768605365973768,
"loss": 0.1794,
"step": 11480
},
{
"epoch": 0.5203333031428312,
"grad_norm": 0.39186790585517883,
"learning_rate": 0.00017681261254523926,
"loss": 0.1769,
"step": 11490
},
{
"epoch": 0.520786160673852,
"grad_norm": 0.34316763281822205,
"learning_rate": 0.00017676464542409946,
"loss": 0.1764,
"step": 11500
},
{
"epoch": 0.5212390182048727,
"grad_norm": 0.39774084091186523,
"learning_rate": 0.0001767166352608527,
"loss": 0.1957,
"step": 11510
},
{
"epoch": 0.5216918757358935,
"grad_norm": 0.4764063358306885,
"learning_rate": 0.0001766685820824184,
"loss": 0.1894,
"step": 11520
},
{
"epoch": 0.5221447332669142,
"grad_norm": 0.41005027294158936,
"learning_rate": 0.00017662048591574002,
"loss": 0.1895,
"step": 11530
},
{
"epoch": 0.522597590797935,
"grad_norm": 0.4711003601551056,
"learning_rate": 0.00017657234678778523,
"loss": 0.2117,
"step": 11540
},
{
"epoch": 0.5230504483289558,
"grad_norm": 0.3926088511943817,
"learning_rate": 0.00017652416472554574,
"loss": 0.1927,
"step": 11550
},
{
"epoch": 0.5235033058599764,
"grad_norm": 0.2955372631549835,
"learning_rate": 0.00017647593975603736,
"loss": 0.1925,
"step": 11560
},
{
"epoch": 0.5239561633909972,
"grad_norm": 0.3705880045890808,
"learning_rate": 0.0001764276719062999,
"loss": 0.1835,
"step": 11570
},
{
"epoch": 0.5244090209220179,
"grad_norm": 0.37469375133514404,
"learning_rate": 0.00017637936120339727,
"loss": 0.1819,
"step": 11580
},
{
"epoch": 0.5248618784530387,
"grad_norm": 0.4436321258544922,
"learning_rate": 0.0001763310076744174,
"loss": 0.2017,
"step": 11590
},
{
"epoch": 0.5253147359840594,
"grad_norm": 0.5750246644020081,
"learning_rate": 0.00017628261134647216,
"loss": 0.1968,
"step": 11600
},
{
"epoch": 0.5257675935150802,
"grad_norm": 0.3641575872898102,
"learning_rate": 0.00017623417224669758,
"loss": 0.2034,
"step": 11610
},
{
"epoch": 0.5262204510461009,
"grad_norm": 0.4717412292957306,
"learning_rate": 0.0001761856904022535,
"loss": 0.2,
"step": 11620
},
{
"epoch": 0.5266733085771217,
"grad_norm": 0.447290301322937,
"learning_rate": 0.00017613716584032383,
"loss": 0.1757,
"step": 11630
},
{
"epoch": 0.5271261661081423,
"grad_norm": 0.5311118960380554,
"learning_rate": 0.00017608859858811636,
"loss": 0.1872,
"step": 11640
},
{
"epoch": 0.5275790236391631,
"grad_norm": 0.508751630783081,
"learning_rate": 0.00017603998867286286,
"loss": 0.1916,
"step": 11650
},
{
"epoch": 0.5280318811701838,
"grad_norm": 0.3642581105232239,
"learning_rate": 0.00017599133612181906,
"loss": 0.1885,
"step": 11660
},
{
"epoch": 0.5284847387012046,
"grad_norm": 0.415244460105896,
"learning_rate": 0.00017594264096226455,
"loss": 0.1916,
"step": 11670
},
{
"epoch": 0.5289375962322254,
"grad_norm": 0.3395320475101471,
"learning_rate": 0.00017589390322150276,
"loss": 0.1935,
"step": 11680
},
{
"epoch": 0.5293904537632461,
"grad_norm": 0.3394661843776703,
"learning_rate": 0.00017584512292686112,
"loss": 0.2148,
"step": 11690
},
{
"epoch": 0.5298433112942669,
"grad_norm": 0.3732159733772278,
"learning_rate": 0.00017579630010569077,
"loss": 0.1955,
"step": 11700
},
{
"epoch": 0.5302961688252875,
"grad_norm": 0.3915143311023712,
"learning_rate": 0.00017574743478536686,
"loss": 0.1916,
"step": 11710
},
{
"epoch": 0.5307490263563083,
"grad_norm": 0.44478586316108704,
"learning_rate": 0.0001756985269932882,
"loss": 0.2156,
"step": 11720
},
{
"epoch": 0.531201883887329,
"grad_norm": 0.394619345664978,
"learning_rate": 0.00017564957675687758,
"loss": 0.178,
"step": 11730
},
{
"epoch": 0.5316547414183498,
"grad_norm": 0.4088374674320221,
"learning_rate": 0.00017560058410358143,
"loss": 0.1767,
"step": 11740
},
{
"epoch": 0.5321075989493705,
"grad_norm": 0.46319580078125,
"learning_rate": 0.0001755515490608701,
"loss": 0.2001,
"step": 11750
},
{
"epoch": 0.5325604564803913,
"grad_norm": 0.5034335255622864,
"learning_rate": 0.0001755024716562376,
"loss": 0.1801,
"step": 11760
},
{
"epoch": 0.533013314011412,
"grad_norm": 0.4826143682003021,
"learning_rate": 0.0001754533519172018,
"loss": 0.1964,
"step": 11770
},
{
"epoch": 0.5334661715424327,
"grad_norm": 0.3931207060813904,
"learning_rate": 0.00017540418987130413,
"loss": 0.173,
"step": 11780
},
{
"epoch": 0.5339190290734535,
"grad_norm": 0.4404812157154083,
"learning_rate": 0.00017535498554611,
"loss": 0.2032,
"step": 11790
},
{
"epoch": 0.5343718866044742,
"grad_norm": 0.47916218638420105,
"learning_rate": 0.0001753057389692083,
"loss": 0.1845,
"step": 11800
},
{
"epoch": 0.534824744135495,
"grad_norm": 0.3846713900566101,
"learning_rate": 0.00017525645016821173,
"loss": 0.1753,
"step": 11810
},
{
"epoch": 0.5352776016665157,
"grad_norm": 0.4517151415348053,
"learning_rate": 0.00017520711917075657,
"loss": 0.1724,
"step": 11820
},
{
"epoch": 0.5357304591975365,
"grad_norm": 0.3133130669593811,
"learning_rate": 0.00017515774600450289,
"loss": 0.2,
"step": 11830
},
{
"epoch": 0.5361833167285572,
"grad_norm": 0.4651758670806885,
"learning_rate": 0.00017510833069713435,
"loss": 0.1812,
"step": 11840
},
{
"epoch": 0.536636174259578,
"grad_norm": 0.38983821868896484,
"learning_rate": 0.00017505887327635812,
"loss": 0.1898,
"step": 11850
},
{
"epoch": 0.5370890317905986,
"grad_norm": 0.49756869673728943,
"learning_rate": 0.0001750093737699052,
"loss": 0.1697,
"step": 11860
},
{
"epoch": 0.5375418893216194,
"grad_norm": 0.22499801218509674,
"learning_rate": 0.00017495983220552997,
"loss": 0.1763,
"step": 11870
},
{
"epoch": 0.5379947468526401,
"grad_norm": 0.4487616717815399,
"learning_rate": 0.00017491024861101054,
"loss": 0.1887,
"step": 11880
},
{
"epoch": 0.5384476043836609,
"grad_norm": 0.4188464879989624,
"learning_rate": 0.00017486062301414862,
"loss": 0.1763,
"step": 11890
},
{
"epoch": 0.5389004619146817,
"grad_norm": 0.33243581652641296,
"learning_rate": 0.00017481095544276921,
"loss": 0.2061,
"step": 11900
},
{
"epoch": 0.5393533194457024,
"grad_norm": 0.380401074886322,
"learning_rate": 0.0001747612459247212,
"loss": 0.1743,
"step": 11910
},
{
"epoch": 0.5398061769767232,
"grad_norm": 0.4662674367427826,
"learning_rate": 0.00017471149448787675,
"loss": 0.1879,
"step": 11920
},
{
"epoch": 0.5402590345077438,
"grad_norm": 0.3613015115261078,
"learning_rate": 0.00017466170116013166,
"loss": 0.1565,
"step": 11930
},
{
"epoch": 0.5407118920387646,
"grad_norm": 0.29406189918518066,
"learning_rate": 0.00017461186596940507,
"loss": 0.1927,
"step": 11940
},
{
"epoch": 0.5411647495697853,
"grad_norm": 0.6028956174850464,
"learning_rate": 0.0001745619889436397,
"loss": 0.1967,
"step": 11950
},
{
"epoch": 0.5416176071008061,
"grad_norm": 0.4816293716430664,
"learning_rate": 0.00017451207011080178,
"loss": 0.1964,
"step": 11960
},
{
"epoch": 0.5420704646318268,
"grad_norm": 0.41138580441474915,
"learning_rate": 0.00017446210949888084,
"loss": 0.2273,
"step": 11970
},
{
"epoch": 0.5425233221628476,
"grad_norm": 0.418501615524292,
"learning_rate": 0.0001744121071358899,
"loss": 0.1986,
"step": 11980
},
{
"epoch": 0.5429761796938684,
"grad_norm": 0.5979639887809753,
"learning_rate": 0.0001743620630498655,
"loss": 0.2006,
"step": 11990
},
{
"epoch": 0.543429037224889,
"grad_norm": 0.4319825768470764,
"learning_rate": 0.00017431197726886733,
"loss": 0.1867,
"step": 12000
},
{
"epoch": 0.543429037224889,
"eval_chrf": 75.72498462085531,
"eval_loss": 0.1541338711977005,
"eval_runtime": 27.1149,
"eval_samples_per_second": 0.369,
"eval_steps_per_second": 0.037,
"step": 12000
},
{
"epoch": 0.5438818947559098,
"grad_norm": 0.378240168094635,
"learning_rate": 0.00017426184982097872,
"loss": 0.1939,
"step": 12010
},
{
"epoch": 0.5443347522869305,
"grad_norm": 0.45126333832740784,
"learning_rate": 0.00017421168073430613,
"loss": 0.1846,
"step": 12020
},
{
"epoch": 0.5447876098179513,
"grad_norm": 0.3531319797039032,
"learning_rate": 0.00017416147003697957,
"loss": 0.1472,
"step": 12030
},
{
"epoch": 0.545240467348972,
"grad_norm": 0.5536248087882996,
"learning_rate": 0.00017411121775715222,
"loss": 0.1972,
"step": 12040
},
{
"epoch": 0.5456933248799928,
"grad_norm": 0.5452578067779541,
"learning_rate": 0.0001740609239230007,
"loss": 0.1789,
"step": 12050
},
{
"epoch": 0.5461461824110135,
"grad_norm": 0.5128986835479736,
"learning_rate": 0.00017401058856272487,
"loss": 0.2148,
"step": 12060
},
{
"epoch": 0.5465990399420342,
"grad_norm": 0.43081146478652954,
"learning_rate": 0.00017396021170454785,
"loss": 0.2226,
"step": 12070
},
{
"epoch": 0.5470518974730549,
"grad_norm": 0.298042893409729,
"learning_rate": 0.00017390979337671608,
"loss": 0.1724,
"step": 12080
},
{
"epoch": 0.5475047550040757,
"grad_norm": 0.42853906750679016,
"learning_rate": 0.0001738593336074992,
"loss": 0.1845,
"step": 12090
},
{
"epoch": 0.5479576125350964,
"grad_norm": 0.45352569222450256,
"learning_rate": 0.00017380883242519007,
"loss": 0.1822,
"step": 12100
},
{
"epoch": 0.5484104700661172,
"grad_norm": 0.49138566851615906,
"learning_rate": 0.00017375828985810495,
"loss": 0.1735,
"step": 12110
},
{
"epoch": 0.548863327597138,
"grad_norm": 0.4722447395324707,
"learning_rate": 0.00017370770593458308,
"loss": 0.1804,
"step": 12120
},
{
"epoch": 0.5493161851281587,
"grad_norm": 0.3948294520378113,
"learning_rate": 0.00017365708068298695,
"loss": 0.1744,
"step": 12130
},
{
"epoch": 0.5497690426591795,
"grad_norm": 0.35122090578079224,
"learning_rate": 0.00017360641413170232,
"loss": 0.1713,
"step": 12140
},
{
"epoch": 0.5502219001902001,
"grad_norm": 0.4121938645839691,
"learning_rate": 0.00017355570630913804,
"loss": 0.1915,
"step": 12150
},
{
"epoch": 0.5506747577212209,
"grad_norm": 0.4639427959918976,
"learning_rate": 0.00017350495724372604,
"loss": 0.1607,
"step": 12160
},
{
"epoch": 0.5511276152522416,
"grad_norm": 0.39819326996803284,
"learning_rate": 0.0001734541669639215,
"loss": 0.1722,
"step": 12170
},
{
"epoch": 0.5515804727832624,
"grad_norm": 0.6044564843177795,
"learning_rate": 0.00017340333549820255,
"loss": 0.1663,
"step": 12180
},
{
"epoch": 0.5520333303142831,
"grad_norm": 0.37291619181632996,
"learning_rate": 0.0001733524628750706,
"loss": 0.1584,
"step": 12190
},
{
"epoch": 0.5524861878453039,
"grad_norm": 0.42959266901016235,
"learning_rate": 0.00017330154912304998,
"loss": 0.2168,
"step": 12200
},
{
"epoch": 0.5529390453763247,
"grad_norm": 0.35410502552986145,
"learning_rate": 0.00017325059427068817,
"loss": 0.1891,
"step": 12210
},
{
"epoch": 0.5533919029073453,
"grad_norm": 0.4390549063682556,
"learning_rate": 0.00017319959834655565,
"loss": 0.1763,
"step": 12220
},
{
"epoch": 0.5538447604383661,
"grad_norm": 0.5032073855400085,
"learning_rate": 0.00017314856137924602,
"loss": 0.1728,
"step": 12230
},
{
"epoch": 0.5542976179693868,
"grad_norm": 0.49394169449806213,
"learning_rate": 0.00017309748339737572,
"loss": 0.2135,
"step": 12240
},
{
"epoch": 0.5547504755004076,
"grad_norm": 0.4199583828449249,
"learning_rate": 0.00017304636442958432,
"loss": 0.2059,
"step": 12250
},
{
"epoch": 0.5552033330314283,
"grad_norm": 0.521976888179779,
"learning_rate": 0.00017299520450453438,
"loss": 0.1986,
"step": 12260
},
{
"epoch": 0.5556561905624491,
"grad_norm": 0.45586416125297546,
"learning_rate": 0.00017294400365091135,
"loss": 0.1876,
"step": 12270
},
{
"epoch": 0.5561090480934698,
"grad_norm": 0.31665554642677307,
"learning_rate": 0.00017289276189742366,
"loss": 0.1909,
"step": 12280
},
{
"epoch": 0.5565619056244905,
"grad_norm": 0.25845375657081604,
"learning_rate": 0.00017284147927280267,
"loss": 0.2069,
"step": 12290
},
{
"epoch": 0.5570147631555112,
"grad_norm": 0.39719051122665405,
"learning_rate": 0.0001727901558058027,
"loss": 0.1956,
"step": 12300
},
{
"epoch": 0.557467620686532,
"grad_norm": 0.41454482078552246,
"learning_rate": 0.0001727387915252009,
"loss": 0.1884,
"step": 12310
},
{
"epoch": 0.5579204782175528,
"grad_norm": 0.36447224020957947,
"learning_rate": 0.0001726873864597973,
"loss": 0.1692,
"step": 12320
},
{
"epoch": 0.5583733357485735,
"grad_norm": 0.40834981203079224,
"learning_rate": 0.00017263594063841493,
"loss": 0.2028,
"step": 12330
},
{
"epoch": 0.5588261932795943,
"grad_norm": 0.43426138162612915,
"learning_rate": 0.00017258445408989948,
"loss": 0.1887,
"step": 12340
},
{
"epoch": 0.559279050810615,
"grad_norm": 0.415056049823761,
"learning_rate": 0.00017253292684311965,
"loss": 0.1886,
"step": 12350
},
{
"epoch": 0.5597319083416358,
"grad_norm": 0.3616989850997925,
"learning_rate": 0.0001724813589269668,
"loss": 0.2022,
"step": 12360
},
{
"epoch": 0.5601847658726564,
"grad_norm": 0.454166442155838,
"learning_rate": 0.00017242975037035527,
"loss": 0.1972,
"step": 12370
},
{
"epoch": 0.5606376234036772,
"grad_norm": 0.29079487919807434,
"learning_rate": 0.000172378101202222,
"loss": 0.1778,
"step": 12380
},
{
"epoch": 0.5610904809346979,
"grad_norm": 0.4357644319534302,
"learning_rate": 0.00017232641145152686,
"loss": 0.197,
"step": 12390
},
{
"epoch": 0.5615433384657187,
"grad_norm": 0.34741294384002686,
"learning_rate": 0.00017227468114725238,
"loss": 0.1714,
"step": 12400
},
{
"epoch": 0.5619961959967394,
"grad_norm": 0.4908086061477661,
"learning_rate": 0.00017222291031840383,
"loss": 0.1976,
"step": 12410
},
{
"epoch": 0.5624490535277602,
"grad_norm": 0.49520352482795715,
"learning_rate": 0.0001721710989940093,
"loss": 0.178,
"step": 12420
},
{
"epoch": 0.562901911058781,
"grad_norm": 0.47621747851371765,
"learning_rate": 0.00017211924720311945,
"loss": 0.1798,
"step": 12430
},
{
"epoch": 0.5633547685898016,
"grad_norm": 0.41770318150520325,
"learning_rate": 0.0001720673549748077,
"loss": 0.1894,
"step": 12440
},
{
"epoch": 0.5638076261208224,
"grad_norm": 0.39249005913734436,
"learning_rate": 0.00017201542233817016,
"loss": 0.1858,
"step": 12450
},
{
"epoch": 0.5642604836518431,
"grad_norm": 0.43662548065185547,
"learning_rate": 0.0001719634493223256,
"loss": 0.1754,
"step": 12460
},
{
"epoch": 0.5647133411828639,
"grad_norm": 0.3656141459941864,
"learning_rate": 0.00017191143595641535,
"loss": 0.184,
"step": 12470
},
{
"epoch": 0.5651661987138846,
"grad_norm": 0.3354056477546692,
"learning_rate": 0.00017185938226960346,
"loss": 0.2049,
"step": 12480
},
{
"epoch": 0.5656190562449054,
"grad_norm": 0.329375296831131,
"learning_rate": 0.0001718072882910765,
"loss": 0.1919,
"step": 12490
},
{
"epoch": 0.5660719137759261,
"grad_norm": 0.5593803524971008,
"learning_rate": 0.00017175515405004372,
"loss": 0.2334,
"step": 12500
},
{
"epoch": 0.5665247713069468,
"grad_norm": 0.3757700026035309,
"learning_rate": 0.0001717029795757369,
"loss": 0.2013,
"step": 12510
},
{
"epoch": 0.5669776288379675,
"grad_norm": 0.3514690101146698,
"learning_rate": 0.00017165076489741038,
"loss": 0.1819,
"step": 12520
},
{
"epoch": 0.5674304863689883,
"grad_norm": 0.513963520526886,
"learning_rate": 0.00017159851004434104,
"loss": 0.2205,
"step": 12530
},
{
"epoch": 0.567883343900009,
"grad_norm": 0.3796728849411011,
"learning_rate": 0.00017154621504582833,
"loss": 0.2084,
"step": 12540
},
{
"epoch": 0.5683362014310298,
"grad_norm": 0.3902125656604767,
"learning_rate": 0.0001714938799311941,
"loss": 0.2021,
"step": 12550
},
{
"epoch": 0.5687890589620506,
"grad_norm": 0.48895615339279175,
"learning_rate": 0.00017144150472978283,
"loss": 0.1698,
"step": 12560
},
{
"epoch": 0.5692419164930713,
"grad_norm": 0.4550088346004486,
"learning_rate": 0.00017138908947096133,
"loss": 0.1822,
"step": 12570
},
{
"epoch": 0.569694774024092,
"grad_norm": 0.36123690009117126,
"learning_rate": 0.00017133663418411908,
"loss": 0.1781,
"step": 12580
},
{
"epoch": 0.5701476315551127,
"grad_norm": 0.3133276104927063,
"learning_rate": 0.00017128413889866772,
"loss": 0.1918,
"step": 12590
},
{
"epoch": 0.5706004890861335,
"grad_norm": 0.3202573359012604,
"learning_rate": 0.00017123160364404161,
"loss": 0.1895,
"step": 12600
},
{
"epoch": 0.5710533466171542,
"grad_norm": 0.31997188925743103,
"learning_rate": 0.00017117902844969733,
"loss": 0.1823,
"step": 12610
},
{
"epoch": 0.571506204148175,
"grad_norm": 0.4120926558971405,
"learning_rate": 0.0001711264133451139,
"loss": 0.1867,
"step": 12620
},
{
"epoch": 0.5719590616791957,
"grad_norm": 0.47499316930770874,
"learning_rate": 0.0001710737583597927,
"loss": 0.1824,
"step": 12630
},
{
"epoch": 0.5724119192102165,
"grad_norm": 0.363459974527359,
"learning_rate": 0.00017102106352325758,
"loss": 0.1869,
"step": 12640
},
{
"epoch": 0.5728647767412373,
"grad_norm": 0.3851202726364136,
"learning_rate": 0.0001709683288650546,
"loss": 0.1846,
"step": 12650
},
{
"epoch": 0.5733176342722579,
"grad_norm": 0.5470035672187805,
"learning_rate": 0.00017091555441475214,
"loss": 0.1697,
"step": 12660
},
{
"epoch": 0.5737704918032787,
"grad_norm": 0.4892798066139221,
"learning_rate": 0.00017086274020194106,
"loss": 0.1849,
"step": 12670
},
{
"epoch": 0.5742233493342994,
"grad_norm": 0.38861966133117676,
"learning_rate": 0.00017080988625623435,
"loss": 0.1936,
"step": 12680
},
{
"epoch": 0.5746762068653202,
"grad_norm": 0.43364793062210083,
"learning_rate": 0.00017075699260726737,
"loss": 0.1713,
"step": 12690
},
{
"epoch": 0.5751290643963409,
"grad_norm": 0.42354050278663635,
"learning_rate": 0.00017070405928469763,
"loss": 0.1698,
"step": 12700
},
{
"epoch": 0.5755819219273617,
"grad_norm": 0.37953200936317444,
"learning_rate": 0.00017065108631820507,
"loss": 0.199,
"step": 12710
},
{
"epoch": 0.5760347794583824,
"grad_norm": 0.46639448404312134,
"learning_rate": 0.0001705980737374916,
"loss": 0.1974,
"step": 12720
},
{
"epoch": 0.5764876369894031,
"grad_norm": 0.3444575369358063,
"learning_rate": 0.00017054502157228164,
"loss": 0.1922,
"step": 12730
},
{
"epoch": 0.5769404945204238,
"grad_norm": 0.429169625043869,
"learning_rate": 0.00017049192985232162,
"loss": 0.1849,
"step": 12740
},
{
"epoch": 0.5773933520514446,
"grad_norm": 0.4769784212112427,
"learning_rate": 0.00017043879860738015,
"loss": 0.1912,
"step": 12750
},
{
"epoch": 0.5778462095824654,
"grad_norm": 0.33860528469085693,
"learning_rate": 0.00017038562786724802,
"loss": 0.161,
"step": 12760
},
{
"epoch": 0.5782990671134861,
"grad_norm": 0.2882649898529053,
"learning_rate": 0.00017033241766173826,
"loss": 0.1619,
"step": 12770
},
{
"epoch": 0.5787519246445069,
"grad_norm": 0.4699576497077942,
"learning_rate": 0.0001702791680206859,
"loss": 0.2025,
"step": 12780
},
{
"epoch": 0.5792047821755276,
"grad_norm": 0.4863574504852295,
"learning_rate": 0.0001702258789739481,
"loss": 0.1661,
"step": 12790
},
{
"epoch": 0.5796576397065483,
"grad_norm": 0.32957029342651367,
"learning_rate": 0.0001701725505514042,
"loss": 0.2129,
"step": 12800
},
{
"epoch": 0.580110497237569,
"grad_norm": 0.5269854664802551,
"learning_rate": 0.00017011918278295553,
"loss": 0.1843,
"step": 12810
},
{
"epoch": 0.5805633547685898,
"grad_norm": 0.36829832196235657,
"learning_rate": 0.00017006577569852555,
"loss": 0.1828,
"step": 12820
},
{
"epoch": 0.5810162122996105,
"grad_norm": 0.40970858931541443,
"learning_rate": 0.00017001232932805973,
"loss": 0.1959,
"step": 12830
},
{
"epoch": 0.5814690698306313,
"grad_norm": 0.33761903643608093,
"learning_rate": 0.00016995884370152556,
"loss": 0.1903,
"step": 12840
},
{
"epoch": 0.581921927361652,
"grad_norm": 0.4831819534301758,
"learning_rate": 0.0001699053188489125,
"loss": 0.1869,
"step": 12850
},
{
"epoch": 0.5823747848926728,
"grad_norm": 0.41735371947288513,
"learning_rate": 0.00016985175480023213,
"loss": 0.1593,
"step": 12860
},
{
"epoch": 0.5828276424236936,
"grad_norm": 0.5282895565032959,
"learning_rate": 0.00016979815158551785,
"loss": 0.1907,
"step": 12870
},
{
"epoch": 0.5832804999547142,
"grad_norm": 0.387832909822464,
"learning_rate": 0.0001697445092348252,
"loss": 0.1765,
"step": 12880
},
{
"epoch": 0.583733357485735,
"grad_norm": 0.4405990242958069,
"learning_rate": 0.0001696908277782315,
"loss": 0.188,
"step": 12890
},
{
"epoch": 0.5841862150167557,
"grad_norm": 0.45045939087867737,
"learning_rate": 0.00016963710724583606,
"loss": 0.2001,
"step": 12900
},
{
"epoch": 0.5846390725477765,
"grad_norm": 0.42628213763237,
"learning_rate": 0.0001695833476677601,
"loss": 0.1865,
"step": 12910
},
{
"epoch": 0.5850919300787972,
"grad_norm": 0.49529409408569336,
"learning_rate": 0.00016952954907414677,
"loss": 0.16,
"step": 12920
},
{
"epoch": 0.585544787609818,
"grad_norm": 0.40902742743492126,
"learning_rate": 0.00016947571149516106,
"loss": 0.163,
"step": 12930
},
{
"epoch": 0.5859976451408387,
"grad_norm": 0.42194312810897827,
"learning_rate": 0.00016942183496098978,
"loss": 0.1794,
"step": 12940
},
{
"epoch": 0.5864505026718594,
"grad_norm": 0.5004866123199463,
"learning_rate": 0.00016936791950184166,
"loss": 0.1669,
"step": 12950
},
{
"epoch": 0.5869033602028801,
"grad_norm": 0.4472070038318634,
"learning_rate": 0.00016931396514794717,
"loss": 0.2021,
"step": 12960
},
{
"epoch": 0.5873562177339009,
"grad_norm": 0.3932659924030304,
"learning_rate": 0.00016925997192955873,
"loss": 0.1887,
"step": 12970
},
{
"epoch": 0.5878090752649217,
"grad_norm": 0.3474467098712921,
"learning_rate": 0.00016920593987695032,
"loss": 0.1698,
"step": 12980
},
{
"epoch": 0.5882619327959424,
"grad_norm": 0.4554367661476135,
"learning_rate": 0.00016915186902041794,
"loss": 0.1921,
"step": 12990
},
{
"epoch": 0.5887147903269632,
"grad_norm": 0.33510255813598633,
"learning_rate": 0.0001690977593902792,
"loss": 0.1704,
"step": 13000
},
{
"epoch": 0.5887147903269632,
"eval_chrf": 80.09006919939789,
"eval_loss": 0.1506444215774536,
"eval_runtime": 9.1339,
"eval_samples_per_second": 1.095,
"eval_steps_per_second": 0.109,
"step": 13000
},
{
"epoch": 0.5891676478579839,
"grad_norm": 0.43937554955482483,
"learning_rate": 0.0001690436110168735,
"loss": 0.17,
"step": 13010
},
{
"epoch": 0.5896205053890046,
"grad_norm": 0.3773411512374878,
"learning_rate": 0.00016898942393056196,
"loss": 0.1759,
"step": 13020
},
{
"epoch": 0.5900733629200253,
"grad_norm": 0.3168924152851105,
"learning_rate": 0.00016893519816172736,
"loss": 0.1591,
"step": 13030
},
{
"epoch": 0.5905262204510461,
"grad_norm": 0.4434441328048706,
"learning_rate": 0.00016888093374077429,
"loss": 0.1874,
"step": 13040
},
{
"epoch": 0.5909790779820668,
"grad_norm": 0.45755648612976074,
"learning_rate": 0.0001688266306981288,
"loss": 0.1773,
"step": 13050
},
{
"epoch": 0.5914319355130876,
"grad_norm": 0.3861162066459656,
"learning_rate": 0.00016877228906423888,
"loss": 0.1668,
"step": 13060
},
{
"epoch": 0.5918847930441083,
"grad_norm": 0.4664711058139801,
"learning_rate": 0.00016871790886957387,
"loss": 0.1837,
"step": 13070
},
{
"epoch": 0.5923376505751291,
"grad_norm": 0.3919123709201813,
"learning_rate": 0.00016866349014462494,
"loss": 0.1925,
"step": 13080
},
{
"epoch": 0.5927905081061499,
"grad_norm": 0.4407183825969696,
"learning_rate": 0.0001686090329199048,
"loss": 0.1594,
"step": 13090
},
{
"epoch": 0.5932433656371705,
"grad_norm": 0.44760841131210327,
"learning_rate": 0.0001685545372259477,
"loss": 0.2046,
"step": 13100
},
{
"epoch": 0.5936962231681913,
"grad_norm": 0.3986581861972809,
"learning_rate": 0.0001685000030933095,
"loss": 0.1637,
"step": 13110
},
{
"epoch": 0.594149080699212,
"grad_norm": 0.38675206899642944,
"learning_rate": 0.00016844543055256762,
"loss": 0.1896,
"step": 13120
},
{
"epoch": 0.5946019382302328,
"grad_norm": 0.4637945890426636,
"learning_rate": 0.000168390819634321,
"loss": 0.1778,
"step": 13130
},
{
"epoch": 0.5950547957612535,
"grad_norm": 0.41805940866470337,
"learning_rate": 0.0001683361703691901,
"loss": 0.1874,
"step": 13140
},
{
"epoch": 0.5955076532922743,
"grad_norm": 0.3749958574771881,
"learning_rate": 0.00016828148278781688,
"loss": 0.1922,
"step": 13150
},
{
"epoch": 0.595960510823295,
"grad_norm": 0.6044149994850159,
"learning_rate": 0.00016822675692086482,
"loss": 0.1661,
"step": 13160
},
{
"epoch": 0.5964133683543157,
"grad_norm": 0.38552409410476685,
"learning_rate": 0.0001681719927990188,
"loss": 0.1799,
"step": 13170
},
{
"epoch": 0.5968662258853364,
"grad_norm": 0.34303176403045654,
"learning_rate": 0.0001681171904529852,
"loss": 0.1908,
"step": 13180
},
{
"epoch": 0.5973190834163572,
"grad_norm": 0.4143158197402954,
"learning_rate": 0.0001680623499134918,
"loss": 0.2099,
"step": 13190
},
{
"epoch": 0.597771940947378,
"grad_norm": 0.46413254737854004,
"learning_rate": 0.00016800747121128784,
"loss": 0.188,
"step": 13200
},
{
"epoch": 0.5982247984783987,
"grad_norm": 0.47489702701568604,
"learning_rate": 0.00016795255437714396,
"loss": 0.1704,
"step": 13210
},
{
"epoch": 0.5986776560094195,
"grad_norm": 0.36207646131515503,
"learning_rate": 0.00016789759944185203,
"loss": 0.1859,
"step": 13220
},
{
"epoch": 0.5991305135404402,
"grad_norm": 0.4927097260951996,
"learning_rate": 0.0001678426064362255,
"loss": 0.1767,
"step": 13230
},
{
"epoch": 0.5995833710714609,
"grad_norm": 0.42713093757629395,
"learning_rate": 0.00016778757539109908,
"loss": 0.1907,
"step": 13240
},
{
"epoch": 0.6000362286024816,
"grad_norm": 0.4796130657196045,
"learning_rate": 0.00016773250633732875,
"loss": 0.1916,
"step": 13250
},
{
"epoch": 0.6004890861335024,
"grad_norm": 0.5012636184692383,
"learning_rate": 0.00016767739930579188,
"loss": 0.1873,
"step": 13260
},
{
"epoch": 0.6009419436645231,
"grad_norm": 0.45663386583328247,
"learning_rate": 0.0001676222543273871,
"loss": 0.1987,
"step": 13270
},
{
"epoch": 0.6013948011955439,
"grad_norm": 0.4900738298892975,
"learning_rate": 0.00016756707143303427,
"loss": 0.1848,
"step": 13280
},
{
"epoch": 0.6018476587265646,
"grad_norm": 0.3995307683944702,
"learning_rate": 0.00016751185065367466,
"loss": 0.1692,
"step": 13290
},
{
"epoch": 0.6023005162575854,
"grad_norm": 0.3223266899585724,
"learning_rate": 0.0001674565920202706,
"loss": 0.1792,
"step": 13300
},
{
"epoch": 0.602753373788606,
"grad_norm": 0.3668299615383148,
"learning_rate": 0.0001674012955638058,
"loss": 0.2157,
"step": 13310
},
{
"epoch": 0.6032062313196268,
"grad_norm": 0.2830699384212494,
"learning_rate": 0.000167345961315285,
"loss": 0.1748,
"step": 13320
},
{
"epoch": 0.6036590888506476,
"grad_norm": 0.493255615234375,
"learning_rate": 0.00016729058930573437,
"loss": 0.184,
"step": 13330
},
{
"epoch": 0.6041119463816683,
"grad_norm": 0.41153353452682495,
"learning_rate": 0.000167235179566201,
"loss": 0.1774,
"step": 13340
},
{
"epoch": 0.6045648039126891,
"grad_norm": 0.4964105784893036,
"learning_rate": 0.00016717973212775333,
"loss": 0.1733,
"step": 13350
},
{
"epoch": 0.6050176614437098,
"grad_norm": 0.3415859043598175,
"learning_rate": 0.00016712424702148085,
"loss": 0.1691,
"step": 13360
},
{
"epoch": 0.6054705189747306,
"grad_norm": 0.41315367817878723,
"learning_rate": 0.00016706872427849415,
"loss": 0.1889,
"step": 13370
},
{
"epoch": 0.6059233765057513,
"grad_norm": 0.36277538537979126,
"learning_rate": 0.00016701316392992495,
"loss": 0.17,
"step": 13380
},
{
"epoch": 0.606376234036772,
"grad_norm": 0.4528109133243561,
"learning_rate": 0.0001669575660069261,
"loss": 0.1808,
"step": 13390
},
{
"epoch": 0.6068290915677927,
"grad_norm": 0.36657941341400146,
"learning_rate": 0.0001669019305406715,
"loss": 0.2003,
"step": 13400
},
{
"epoch": 0.6072819490988135,
"grad_norm": 0.4129428267478943,
"learning_rate": 0.000166846257562356,
"loss": 0.1891,
"step": 13410
},
{
"epoch": 0.6077348066298343,
"grad_norm": 0.4617420434951782,
"learning_rate": 0.00016679054710319564,
"loss": 0.1801,
"step": 13420
},
{
"epoch": 0.608187664160855,
"grad_norm": 0.3708018362522125,
"learning_rate": 0.00016673479919442733,
"loss": 0.1781,
"step": 13430
},
{
"epoch": 0.6086405216918758,
"grad_norm": 0.4533080458641052,
"learning_rate": 0.00016667901386730908,
"loss": 0.1928,
"step": 13440
},
{
"epoch": 0.6090933792228965,
"grad_norm": 0.29118624329566956,
"learning_rate": 0.00016662319115311986,
"loss": 0.1776,
"step": 13450
},
{
"epoch": 0.6095462367539172,
"grad_norm": 0.5299730896949768,
"learning_rate": 0.00016656733108315957,
"loss": 0.1915,
"step": 13460
},
{
"epoch": 0.6099990942849379,
"grad_norm": 0.46744877099990845,
"learning_rate": 0.00016651143368874908,
"loss": 0.1553,
"step": 13470
},
{
"epoch": 0.6104519518159587,
"grad_norm": 0.3915580213069916,
"learning_rate": 0.00016645549900123018,
"loss": 0.2021,
"step": 13480
},
{
"epoch": 0.6109048093469794,
"grad_norm": 0.32643139362335205,
"learning_rate": 0.00016639952705196556,
"loss": 0.1998,
"step": 13490
},
{
"epoch": 0.6113576668780002,
"grad_norm": 0.37357577681541443,
"learning_rate": 0.00016634351787233887,
"loss": 0.1631,
"step": 13500
},
{
"epoch": 0.611810524409021,
"grad_norm": 0.288700670003891,
"learning_rate": 0.0001662874714937545,
"loss": 0.2021,
"step": 13510
},
{
"epoch": 0.6122633819400417,
"grad_norm": 0.3211473822593689,
"learning_rate": 0.00016623138794763786,
"loss": 0.1686,
"step": 13520
},
{
"epoch": 0.6127162394710624,
"grad_norm": 0.4806463420391083,
"learning_rate": 0.00016617526726543508,
"loss": 0.1893,
"step": 13530
},
{
"epoch": 0.6131690970020831,
"grad_norm": 0.3648808002471924,
"learning_rate": 0.00016611910947861316,
"loss": 0.156,
"step": 13540
},
{
"epoch": 0.6136219545331039,
"grad_norm": 0.4847327768802643,
"learning_rate": 0.00016606291461865986,
"loss": 0.2123,
"step": 13550
},
{
"epoch": 0.6140748120641246,
"grad_norm": 0.4586341083049774,
"learning_rate": 0.0001660066827170838,
"loss": 0.1851,
"step": 13560
},
{
"epoch": 0.6145276695951454,
"grad_norm": 0.46938562393188477,
"learning_rate": 0.0001659504138054143,
"loss": 0.1598,
"step": 13570
},
{
"epoch": 0.6149805271261661,
"grad_norm": 0.43038567900657654,
"learning_rate": 0.00016589410791520152,
"loss": 0.1872,
"step": 13580
},
{
"epoch": 0.6154333846571869,
"grad_norm": 0.4367424249649048,
"learning_rate": 0.00016583776507801624,
"loss": 0.1878,
"step": 13590
},
{
"epoch": 0.6158862421882076,
"grad_norm": 0.5961470603942871,
"learning_rate": 0.00016578138532545003,
"loss": 0.2002,
"step": 13600
},
{
"epoch": 0.6163390997192283,
"grad_norm": 0.4500272572040558,
"learning_rate": 0.00016572496868911518,
"loss": 0.2165,
"step": 13610
},
{
"epoch": 0.616791957250249,
"grad_norm": 0.4046367406845093,
"learning_rate": 0.00016566851520064462,
"loss": 0.164,
"step": 13620
},
{
"epoch": 0.6172448147812698,
"grad_norm": 0.4983348250389099,
"learning_rate": 0.00016561202489169186,
"loss": 0.1654,
"step": 13630
},
{
"epoch": 0.6176976723122906,
"grad_norm": 0.43825361132621765,
"learning_rate": 0.00016555549779393122,
"loss": 0.1803,
"step": 13640
},
{
"epoch": 0.6181505298433113,
"grad_norm": 0.39608263969421387,
"learning_rate": 0.00016549893393905755,
"loss": 0.1712,
"step": 13650
},
{
"epoch": 0.6186033873743321,
"grad_norm": 0.3408968448638916,
"learning_rate": 0.00016544233335878634,
"loss": 0.1707,
"step": 13660
},
{
"epoch": 0.6190562449053528,
"grad_norm": 0.48537546396255493,
"learning_rate": 0.00016538569608485365,
"loss": 0.1879,
"step": 13670
},
{
"epoch": 0.6195091024363735,
"grad_norm": 0.4846585690975189,
"learning_rate": 0.0001653290221490161,
"loss": 0.1791,
"step": 13680
},
{
"epoch": 0.6199619599673942,
"grad_norm": 0.4026714861392975,
"learning_rate": 0.00016527231158305092,
"loss": 0.1861,
"step": 13690
},
{
"epoch": 0.620414817498415,
"grad_norm": 0.5842049717903137,
"learning_rate": 0.00016521556441875584,
"loss": 0.1913,
"step": 13700
},
{
"epoch": 0.6208676750294357,
"grad_norm": 0.417464941740036,
"learning_rate": 0.00016515878068794918,
"loss": 0.1638,
"step": 13710
},
{
"epoch": 0.6213205325604565,
"grad_norm": 0.32036682963371277,
"learning_rate": 0.00016510196042246963,
"loss": 0.1441,
"step": 13720
},
{
"epoch": 0.6217733900914773,
"grad_norm": 0.4793328046798706,
"learning_rate": 0.00016504510365417642,
"loss": 0.1806,
"step": 13730
},
{
"epoch": 0.622226247622498,
"grad_norm": 0.3982367515563965,
"learning_rate": 0.00016498821041494935,
"loss": 0.1668,
"step": 13740
},
{
"epoch": 0.6226791051535187,
"grad_norm": 0.2917279005050659,
"learning_rate": 0.00016493128073668853,
"loss": 0.1669,
"step": 13750
},
{
"epoch": 0.6231319626845394,
"grad_norm": 0.38130876421928406,
"learning_rate": 0.00016487431465131455,
"loss": 0.1964,
"step": 13760
},
{
"epoch": 0.6235848202155602,
"grad_norm": 0.24941422045230865,
"learning_rate": 0.00016481731219076843,
"loss": 0.1919,
"step": 13770
},
{
"epoch": 0.6240376777465809,
"grad_norm": 0.35797667503356934,
"learning_rate": 0.00016476027338701164,
"loss": 0.1757,
"step": 13780
},
{
"epoch": 0.6244905352776017,
"grad_norm": 0.3741672933101654,
"learning_rate": 0.00016470319827202587,
"loss": 0.1733,
"step": 13790
},
{
"epoch": 0.6249433928086224,
"grad_norm": 0.36127394437789917,
"learning_rate": 0.00016464608687781327,
"loss": 0.1693,
"step": 13800
},
{
"epoch": 0.6253962503396432,
"grad_norm": 0.3201135993003845,
"learning_rate": 0.00016458893923639644,
"loss": 0.1658,
"step": 13810
},
{
"epoch": 0.6258491078706638,
"grad_norm": 0.4127233922481537,
"learning_rate": 0.00016453175537981806,
"loss": 0.1792,
"step": 13820
},
{
"epoch": 0.6263019654016846,
"grad_norm": 0.43056994676589966,
"learning_rate": 0.0001644745353401413,
"loss": 0.1803,
"step": 13830
},
{
"epoch": 0.6267548229327053,
"grad_norm": 0.3311462700366974,
"learning_rate": 0.00016441727914944955,
"loss": 0.1862,
"step": 13840
},
{
"epoch": 0.6272076804637261,
"grad_norm": 0.4950120151042938,
"learning_rate": 0.00016435998683984648,
"loss": 0.1782,
"step": 13850
},
{
"epoch": 0.6276605379947469,
"grad_norm": 0.4820355176925659,
"learning_rate": 0.00016430265844345602,
"loss": 0.194,
"step": 13860
},
{
"epoch": 0.6281133955257676,
"grad_norm": 0.30587446689605713,
"learning_rate": 0.00016424529399242235,
"loss": 0.2033,
"step": 13870
},
{
"epoch": 0.6285662530567884,
"grad_norm": 0.42574718594551086,
"learning_rate": 0.00016418789351890984,
"loss": 0.1769,
"step": 13880
},
{
"epoch": 0.6290191105878091,
"grad_norm": 0.3411298394203186,
"learning_rate": 0.00016413045705510304,
"loss": 0.1806,
"step": 13890
},
{
"epoch": 0.6294719681188298,
"grad_norm": 0.43649446964263916,
"learning_rate": 0.0001640729846332067,
"loss": 0.1939,
"step": 13900
},
{
"epoch": 0.6299248256498505,
"grad_norm": 0.4125385582447052,
"learning_rate": 0.00016401547628544573,
"loss": 0.1664,
"step": 13910
},
{
"epoch": 0.6303776831808713,
"grad_norm": 0.3264622986316681,
"learning_rate": 0.00016395793204406527,
"loss": 0.1784,
"step": 13920
},
{
"epoch": 0.630830540711892,
"grad_norm": 0.4634540379047394,
"learning_rate": 0.00016390035194133038,
"loss": 0.1744,
"step": 13930
},
{
"epoch": 0.6312833982429128,
"grad_norm": 0.2661249339580536,
"learning_rate": 0.00016384273600952645,
"loss": 0.1654,
"step": 13940
},
{
"epoch": 0.6317362557739336,
"grad_norm": 0.3725392818450928,
"learning_rate": 0.0001637850842809588,
"loss": 0.1874,
"step": 13950
},
{
"epoch": 0.6321891133049543,
"grad_norm": 0.38668251037597656,
"learning_rate": 0.00016372739678795288,
"loss": 0.1789,
"step": 13960
},
{
"epoch": 0.632641970835975,
"grad_norm": 0.3284059464931488,
"learning_rate": 0.00016366967356285422,
"loss": 0.1799,
"step": 13970
},
{
"epoch": 0.6330948283669957,
"grad_norm": 0.2830691933631897,
"learning_rate": 0.00016361191463802843,
"loss": 0.1689,
"step": 13980
},
{
"epoch": 0.6335476858980165,
"grad_norm": 0.40212684869766235,
"learning_rate": 0.00016355412004586092,
"loss": 0.1841,
"step": 13990
},
{
"epoch": 0.6340005434290372,
"grad_norm": 0.36592209339141846,
"learning_rate": 0.00016349628981875738,
"loss": 0.197,
"step": 14000
},
{
"epoch": 0.6340005434290372,
"eval_chrf": 56.30788920587864,
"eval_loss": 0.14252474904060364,
"eval_runtime": 26.7741,
"eval_samples_per_second": 0.373,
"eval_steps_per_second": 0.037,
"step": 14000
},
{
"epoch": 0.634453400960058,
"grad_norm": 0.48045670986175537,
"learning_rate": 0.00016343842398914324,
"loss": 0.1697,
"step": 14010
},
{
"epoch": 0.6349062584910787,
"grad_norm": 0.34194836020469666,
"learning_rate": 0.0001633805225894641,
"loss": 0.1616,
"step": 14020
},
{
"epoch": 0.6353591160220995,
"grad_norm": 0.40786996483802795,
"learning_rate": 0.0001633225856521853,
"loss": 0.1897,
"step": 14030
},
{
"epoch": 0.6358119735531201,
"grad_norm": 0.5029662251472473,
"learning_rate": 0.0001632646132097923,
"loss": 0.2056,
"step": 14040
},
{
"epoch": 0.6362648310841409,
"grad_norm": 0.419687956571579,
"learning_rate": 0.00016320660529479033,
"loss": 0.1913,
"step": 14050
},
{
"epoch": 0.6367176886151616,
"grad_norm": 0.4122810661792755,
"learning_rate": 0.00016314856193970454,
"loss": 0.1918,
"step": 14060
},
{
"epoch": 0.6371705461461824,
"grad_norm": 0.3298819959163666,
"learning_rate": 0.00016309048317708,
"loss": 0.1654,
"step": 14070
},
{
"epoch": 0.6376234036772032,
"grad_norm": 0.6079479455947876,
"learning_rate": 0.0001630323690394816,
"loss": 0.1895,
"step": 14080
},
{
"epoch": 0.6380762612082239,
"grad_norm": 0.3452903628349304,
"learning_rate": 0.00016297421955949407,
"loss": 0.1766,
"step": 14090
},
{
"epoch": 0.6385291187392447,
"grad_norm": 0.3545880615711212,
"learning_rate": 0.00016291603476972192,
"loss": 0.1755,
"step": 14100
},
{
"epoch": 0.6389819762702654,
"grad_norm": 0.46532562375068665,
"learning_rate": 0.00016285781470278953,
"loss": 0.1937,
"step": 14110
},
{
"epoch": 0.6394348338012861,
"grad_norm": 0.32557931542396545,
"learning_rate": 0.000162799559391341,
"loss": 0.178,
"step": 14120
},
{
"epoch": 0.6398876913323068,
"grad_norm": 0.44921159744262695,
"learning_rate": 0.0001627412688680402,
"loss": 0.1896,
"step": 14130
},
{
"epoch": 0.6403405488633276,
"grad_norm": 0.5234042406082153,
"learning_rate": 0.00016268294316557083,
"loss": 0.1662,
"step": 14140
},
{
"epoch": 0.6407934063943483,
"grad_norm": 0.46567443013191223,
"learning_rate": 0.00016262458231663612,
"loss": 0.1814,
"step": 14150
},
{
"epoch": 0.6412462639253691,
"grad_norm": 0.446133017539978,
"learning_rate": 0.0001625661863539592,
"loss": 0.1998,
"step": 14160
},
{
"epoch": 0.6416991214563899,
"grad_norm": 0.4532266855239868,
"learning_rate": 0.00016250775531028282,
"loss": 0.1725,
"step": 14170
},
{
"epoch": 0.6421519789874106,
"grad_norm": 0.4015495181083679,
"learning_rate": 0.00016244928921836936,
"loss": 0.1588,
"step": 14180
},
{
"epoch": 0.6426048365184313,
"grad_norm": 0.3836342990398407,
"learning_rate": 0.00016239078811100093,
"loss": 0.1604,
"step": 14190
},
{
"epoch": 0.643057694049452,
"grad_norm": 0.252178817987442,
"learning_rate": 0.0001623322520209792,
"loss": 0.1878,
"step": 14200
},
{
"epoch": 0.6435105515804728,
"grad_norm": 0.3731275796890259,
"learning_rate": 0.00016227368098112552,
"loss": 0.1735,
"step": 14210
},
{
"epoch": 0.6439634091114935,
"grad_norm": 0.42285943031311035,
"learning_rate": 0.00016221507502428074,
"loss": 0.1809,
"step": 14220
},
{
"epoch": 0.6444162666425143,
"grad_norm": 0.4480184018611908,
"learning_rate": 0.0001621564341833054,
"loss": 0.1863,
"step": 14230
},
{
"epoch": 0.644869124173535,
"grad_norm": 0.3354874849319458,
"learning_rate": 0.00016209775849107953,
"loss": 0.168,
"step": 14240
},
{
"epoch": 0.6453219817045558,
"grad_norm": 0.44964078068733215,
"learning_rate": 0.00016203904798050273,
"loss": 0.1726,
"step": 14250
},
{
"epoch": 0.6457748392355764,
"grad_norm": 0.5185778141021729,
"learning_rate": 0.0001619803026844941,
"loss": 0.1824,
"step": 14260
},
{
"epoch": 0.6462276967665972,
"grad_norm": 0.40909233689308167,
"learning_rate": 0.0001619215226359923,
"loss": 0.1797,
"step": 14270
},
{
"epoch": 0.646680554297618,
"grad_norm": 0.4697762727737427,
"learning_rate": 0.00016186270786795536,
"loss": 0.1731,
"step": 14280
},
{
"epoch": 0.6471334118286387,
"grad_norm": 0.48620936274528503,
"learning_rate": 0.00016180385841336095,
"loss": 0.1896,
"step": 14290
},
{
"epoch": 0.6475862693596595,
"grad_norm": 0.37995168566703796,
"learning_rate": 0.00016174497430520597,
"loss": 0.1731,
"step": 14300
},
{
"epoch": 0.6480391268906802,
"grad_norm": 0.4603124260902405,
"learning_rate": 0.000161686055576507,
"loss": 0.1811,
"step": 14310
},
{
"epoch": 0.648491984421701,
"grad_norm": 0.28444549441337585,
"learning_rate": 0.00016162710226029975,
"loss": 0.1875,
"step": 14320
},
{
"epoch": 0.6489448419527216,
"grad_norm": 0.6212883591651917,
"learning_rate": 0.00016156811438963964,
"loss": 0.1638,
"step": 14330
},
{
"epoch": 0.6493976994837424,
"grad_norm": 0.5507875680923462,
"learning_rate": 0.0001615090919976012,
"loss": 0.1671,
"step": 14340
},
{
"epoch": 0.6498505570147631,
"grad_norm": 0.3705752491950989,
"learning_rate": 0.00016145003511727838,
"loss": 0.1837,
"step": 14350
},
{
"epoch": 0.6503034145457839,
"grad_norm": 0.32453569769859314,
"learning_rate": 0.0001613909437817846,
"loss": 0.1866,
"step": 14360
},
{
"epoch": 0.6507562720768046,
"grad_norm": 0.5403971672058105,
"learning_rate": 0.00016133181802425247,
"loss": 0.1729,
"step": 14370
},
{
"epoch": 0.6512091296078254,
"grad_norm": 0.36369985342025757,
"learning_rate": 0.00016127265787783393,
"loss": 0.1754,
"step": 14380
},
{
"epoch": 0.6516619871388462,
"grad_norm": 0.44516465067863464,
"learning_rate": 0.00016121346337570017,
"loss": 0.1771,
"step": 14390
},
{
"epoch": 0.6521148446698669,
"grad_norm": 0.40263697504997253,
"learning_rate": 0.00016115423455104172,
"loss": 0.1536,
"step": 14400
},
{
"epoch": 0.6525677022008876,
"grad_norm": 0.5482146143913269,
"learning_rate": 0.00016109497143706832,
"loss": 0.1701,
"step": 14410
},
{
"epoch": 0.6530205597319083,
"grad_norm": 0.48209378123283386,
"learning_rate": 0.00016103567406700894,
"loss": 0.1801,
"step": 14420
},
{
"epoch": 0.6534734172629291,
"grad_norm": 0.5684372782707214,
"learning_rate": 0.00016097634247411166,
"loss": 0.1688,
"step": 14430
},
{
"epoch": 0.6539262747939498,
"grad_norm": 0.45816928148269653,
"learning_rate": 0.0001609169766916439,
"loss": 0.1962,
"step": 14440
},
{
"epoch": 0.6543791323249706,
"grad_norm": 0.3339681923389435,
"learning_rate": 0.0001608575767528922,
"loss": 0.1811,
"step": 14450
},
{
"epoch": 0.6548319898559913,
"grad_norm": 0.5153873562812805,
"learning_rate": 0.00016079814269116222,
"loss": 0.1774,
"step": 14460
},
{
"epoch": 0.6552848473870121,
"grad_norm": 0.35115787386894226,
"learning_rate": 0.00016073867453977877,
"loss": 0.1919,
"step": 14470
},
{
"epoch": 0.6557377049180327,
"grad_norm": 0.3883000314235687,
"learning_rate": 0.00016067917233208577,
"loss": 0.1746,
"step": 14480
},
{
"epoch": 0.6561905624490535,
"grad_norm": 0.6237671375274658,
"learning_rate": 0.00016061963610144626,
"loss": 0.1955,
"step": 14490
},
{
"epoch": 0.6566434199800742,
"grad_norm": 0.3687939941883087,
"learning_rate": 0.0001605600658812423,
"loss": 0.1543,
"step": 14500
},
{
"epoch": 0.657096277511095,
"grad_norm": 0.3738263249397278,
"learning_rate": 0.00016050046170487507,
"loss": 0.1709,
"step": 14510
},
{
"epoch": 0.6575491350421158,
"grad_norm": 0.5553914308547974,
"learning_rate": 0.00016044082360576475,
"loss": 0.1835,
"step": 14520
},
{
"epoch": 0.6580019925731365,
"grad_norm": 0.31449252367019653,
"learning_rate": 0.00016038115161735056,
"loss": 0.1818,
"step": 14530
},
{
"epoch": 0.6584548501041573,
"grad_norm": 0.36781567335128784,
"learning_rate": 0.00016032144577309075,
"loss": 0.183,
"step": 14540
},
{
"epoch": 0.6589077076351779,
"grad_norm": 0.35801130533218384,
"learning_rate": 0.00016026170610646245,
"loss": 0.1771,
"step": 14550
},
{
"epoch": 0.6593605651661987,
"grad_norm": 0.3693104386329651,
"learning_rate": 0.0001602019326509619,
"loss": 0.1763,
"step": 14560
},
{
"epoch": 0.6598134226972194,
"grad_norm": 0.3711768090724945,
"learning_rate": 0.00016014212544010413,
"loss": 0.1707,
"step": 14570
},
{
"epoch": 0.6602662802282402,
"grad_norm": 0.42132726311683655,
"learning_rate": 0.00016008228450742324,
"loss": 0.1832,
"step": 14580
},
{
"epoch": 0.6607191377592609,
"grad_norm": 0.3945630192756653,
"learning_rate": 0.00016002240988647213,
"loss": 0.152,
"step": 14590
},
{
"epoch": 0.6611719952902817,
"grad_norm": 0.4569098949432373,
"learning_rate": 0.0001599625016108227,
"loss": 0.2033,
"step": 14600
},
{
"epoch": 0.6616248528213025,
"grad_norm": 0.42940595746040344,
"learning_rate": 0.00015990255971406556,
"loss": 0.1824,
"step": 14610
},
{
"epoch": 0.6620777103523232,
"grad_norm": 0.4168236553668976,
"learning_rate": 0.00015984258422981033,
"loss": 0.1647,
"step": 14620
},
{
"epoch": 0.6625305678833439,
"grad_norm": 0.43679308891296387,
"learning_rate": 0.00015978257519168535,
"loss": 0.1796,
"step": 14630
},
{
"epoch": 0.6629834254143646,
"grad_norm": 0.4291830062866211,
"learning_rate": 0.0001597225326333379,
"loss": 0.1682,
"step": 14640
},
{
"epoch": 0.6634362829453854,
"grad_norm": 0.46512332558631897,
"learning_rate": 0.0001596624565884339,
"loss": 0.1599,
"step": 14650
},
{
"epoch": 0.6638891404764061,
"grad_norm": 0.441057950258255,
"learning_rate": 0.00015960234709065812,
"loss": 0.1759,
"step": 14660
},
{
"epoch": 0.6643419980074269,
"grad_norm": 0.3182176947593689,
"learning_rate": 0.00015954220417371416,
"loss": 0.1502,
"step": 14670
},
{
"epoch": 0.6647948555384476,
"grad_norm": 0.364551305770874,
"learning_rate": 0.00015948202787132421,
"loss": 0.181,
"step": 14680
},
{
"epoch": 0.6652477130694684,
"grad_norm": 0.4026900827884674,
"learning_rate": 0.0001594218182172293,
"loss": 0.188,
"step": 14690
},
{
"epoch": 0.665700570600489,
"grad_norm": 0.5440531969070435,
"learning_rate": 0.00015936157524518908,
"loss": 0.1913,
"step": 14700
},
{
"epoch": 0.6661534281315098,
"grad_norm": 0.3313021659851074,
"learning_rate": 0.000159301298988982,
"loss": 0.1691,
"step": 14710
},
{
"epoch": 0.6666062856625306,
"grad_norm": 0.3782554268836975,
"learning_rate": 0.00015924098948240497,
"loss": 0.2081,
"step": 14720
},
{
"epoch": 0.6670591431935513,
"grad_norm": 0.41381847858428955,
"learning_rate": 0.00015918064675927375,
"loss": 0.17,
"step": 14730
},
{
"epoch": 0.6675120007245721,
"grad_norm": 0.3430550694465637,
"learning_rate": 0.0001591202708534226,
"loss": 0.1712,
"step": 14740
},
{
"epoch": 0.6679648582555928,
"grad_norm": 0.43353360891342163,
"learning_rate": 0.00015905986179870447,
"loss": 0.187,
"step": 14750
},
{
"epoch": 0.6684177157866136,
"grad_norm": 0.38335633277893066,
"learning_rate": 0.00015899941962899084,
"loss": 0.1688,
"step": 14760
},
{
"epoch": 0.6688705733176342,
"grad_norm": 0.3117828369140625,
"learning_rate": 0.0001589389443781717,
"loss": 0.1886,
"step": 14770
},
{
"epoch": 0.669323430848655,
"grad_norm": 0.492303729057312,
"learning_rate": 0.00015887843608015573,
"loss": 0.1917,
"step": 14780
},
{
"epoch": 0.6697762883796757,
"grad_norm": 0.412275105714798,
"learning_rate": 0.00015881789476887006,
"loss": 0.2031,
"step": 14790
},
{
"epoch": 0.6702291459106965,
"grad_norm": 0.5670540928840637,
"learning_rate": 0.0001587573204782603,
"loss": 0.1911,
"step": 14800
},
{
"epoch": 0.6706820034417172,
"grad_norm": 0.7483437061309814,
"learning_rate": 0.00015869671324229061,
"loss": 0.1807,
"step": 14810
},
{
"epoch": 0.671134860972738,
"grad_norm": 0.4051755964756012,
"learning_rate": 0.00015863607309494362,
"loss": 0.1651,
"step": 14820
},
{
"epoch": 0.6715877185037588,
"grad_norm": 0.38806095719337463,
"learning_rate": 0.00015857540007022033,
"loss": 0.1711,
"step": 14830
},
{
"epoch": 0.6720405760347795,
"grad_norm": 0.45691871643066406,
"learning_rate": 0.00015851469420214033,
"loss": 0.1823,
"step": 14840
},
{
"epoch": 0.6724934335658002,
"grad_norm": 0.41940930485725403,
"learning_rate": 0.00015845395552474147,
"loss": 0.1643,
"step": 14850
},
{
"epoch": 0.6729462910968209,
"grad_norm": 0.41174575686454773,
"learning_rate": 0.00015839318407208011,
"loss": 0.1715,
"step": 14860
},
{
"epoch": 0.6733991486278417,
"grad_norm": 0.36043575406074524,
"learning_rate": 0.00015833237987823088,
"loss": 0.1951,
"step": 14870
},
{
"epoch": 0.6738520061588624,
"grad_norm": 0.3869519531726837,
"learning_rate": 0.0001582715429772869,
"loss": 0.1683,
"step": 14880
},
{
"epoch": 0.6743048636898832,
"grad_norm": 0.503814697265625,
"learning_rate": 0.0001582106734033595,
"loss": 0.158,
"step": 14890
},
{
"epoch": 0.6747577212209039,
"grad_norm": 0.5015385746955872,
"learning_rate": 0.00015814977119057836,
"loss": 0.1788,
"step": 14900
},
{
"epoch": 0.6752105787519247,
"grad_norm": 0.6002166271209717,
"learning_rate": 0.00015808883637309155,
"loss": 0.1733,
"step": 14910
},
{
"epoch": 0.6756634362829453,
"grad_norm": 0.4434871971607208,
"learning_rate": 0.00015802786898506534,
"loss": 0.2155,
"step": 14920
},
{
"epoch": 0.6761162938139661,
"grad_norm": 0.36393222212791443,
"learning_rate": 0.00015796686906068425,
"loss": 0.149,
"step": 14930
},
{
"epoch": 0.6765691513449869,
"grad_norm": 0.5228678584098816,
"learning_rate": 0.0001579058366341511,
"loss": 0.1782,
"step": 14940
},
{
"epoch": 0.6770220088760076,
"grad_norm": 0.37390783429145813,
"learning_rate": 0.00015784477173968691,
"loss": 0.1822,
"step": 14950
},
{
"epoch": 0.6774748664070284,
"grad_norm": 0.4303264319896698,
"learning_rate": 0.0001577836744115309,
"loss": 0.1965,
"step": 14960
},
{
"epoch": 0.6779277239380491,
"grad_norm": 0.38123011589050293,
"learning_rate": 0.00015772254468394045,
"loss": 0.1838,
"step": 14970
},
{
"epoch": 0.6783805814690699,
"grad_norm": 0.4034271240234375,
"learning_rate": 0.0001576613825911912,
"loss": 0.2019,
"step": 14980
},
{
"epoch": 0.6788334390000905,
"grad_norm": 0.47110286355018616,
"learning_rate": 0.00015760018816757674,
"loss": 0.189,
"step": 14990
},
{
"epoch": 0.6792862965311113,
"grad_norm": 0.3800531029701233,
"learning_rate": 0.00015753896144740908,
"loss": 0.1712,
"step": 15000
},
{
"epoch": 0.6792862965311113,
"eval_chrf": 79.72314247218827,
"eval_loss": 0.16112208366394043,
"eval_runtime": 10.9363,
"eval_samples_per_second": 0.914,
"eval_steps_per_second": 0.091,
"step": 15000
},
{
"epoch": 0.679739154062132,
"grad_norm": 0.43706080317497253,
"learning_rate": 0.00015747770246501806,
"loss": 0.1939,
"step": 15010
},
{
"epoch": 0.6801920115931528,
"grad_norm": 0.4327431619167328,
"learning_rate": 0.00015741641125475178,
"loss": 0.19,
"step": 15020
},
{
"epoch": 0.6806448691241735,
"grad_norm": 0.34704381227493286,
"learning_rate": 0.00015735508785097636,
"loss": 0.1595,
"step": 15030
},
{
"epoch": 0.6810977266551943,
"grad_norm": 0.45255932211875916,
"learning_rate": 0.00015729373228807593,
"loss": 0.2069,
"step": 15040
},
{
"epoch": 0.6815505841862151,
"grad_norm": 0.4494575560092926,
"learning_rate": 0.00015723234460045273,
"loss": 0.1864,
"step": 15050
},
{
"epoch": 0.6820034417172357,
"grad_norm": 0.43747246265411377,
"learning_rate": 0.00015717092482252695,
"loss": 0.175,
"step": 15060
},
{
"epoch": 0.6824562992482565,
"grad_norm": 0.30457955598831177,
"learning_rate": 0.00015710947298873683,
"loss": 0.1757,
"step": 15070
},
{
"epoch": 0.6829091567792772,
"grad_norm": 0.3637182414531708,
"learning_rate": 0.0001570479891335385,
"loss": 0.2001,
"step": 15080
},
{
"epoch": 0.683362014310298,
"grad_norm": 0.47846782207489014,
"learning_rate": 0.00015698647329140614,
"loss": 0.1855,
"step": 15090
},
{
"epoch": 0.6838148718413187,
"grad_norm": 0.3498048484325409,
"learning_rate": 0.00015692492549683178,
"loss": 0.1709,
"step": 15100
},
{
"epoch": 0.6842677293723395,
"grad_norm": 0.49282166361808777,
"learning_rate": 0.00015686334578432541,
"loss": 0.1888,
"step": 15110
},
{
"epoch": 0.6847205869033602,
"grad_norm": 0.35319894552230835,
"learning_rate": 0.00015680173418841493,
"loss": 0.1775,
"step": 15120
},
{
"epoch": 0.685173444434381,
"grad_norm": 0.4341161251068115,
"learning_rate": 0.00015674009074364607,
"loss": 0.2043,
"step": 15130
},
{
"epoch": 0.6856263019654016,
"grad_norm": 0.3676917552947998,
"learning_rate": 0.00015667841548458252,
"loss": 0.1753,
"step": 15140
},
{
"epoch": 0.6860791594964224,
"grad_norm": 0.364055335521698,
"learning_rate": 0.00015661670844580567,
"loss": 0.1563,
"step": 15150
},
{
"epoch": 0.6865320170274432,
"grad_norm": 0.5195023417472839,
"learning_rate": 0.00015655496966191477,
"loss": 0.1876,
"step": 15160
},
{
"epoch": 0.6869848745584639,
"grad_norm": 0.4854353070259094,
"learning_rate": 0.00015649319916752696,
"loss": 0.1817,
"step": 15170
},
{
"epoch": 0.6874377320894847,
"grad_norm": 0.47016629576683044,
"learning_rate": 0.00015643139699727705,
"loss": 0.1921,
"step": 15180
},
{
"epoch": 0.6878905896205054,
"grad_norm": 0.367403507232666,
"learning_rate": 0.00015636956318581765,
"loss": 0.1758,
"step": 15190
},
{
"epoch": 0.6883434471515262,
"grad_norm": 0.3292784094810486,
"learning_rate": 0.00015630769776781914,
"loss": 0.1727,
"step": 15200
},
{
"epoch": 0.6887963046825468,
"grad_norm": 0.3884672224521637,
"learning_rate": 0.0001562458007779696,
"loss": 0.1761,
"step": 15210
},
{
"epoch": 0.6892491622135676,
"grad_norm": 0.3499252200126648,
"learning_rate": 0.0001561838722509748,
"loss": 0.1887,
"step": 15220
},
{
"epoch": 0.6897020197445883,
"grad_norm": 1.7124887704849243,
"learning_rate": 0.0001561219122215582,
"loss": 0.1469,
"step": 15230
},
{
"epoch": 0.6901548772756091,
"grad_norm": 0.4366299510002136,
"learning_rate": 0.00015605992072446092,
"loss": 0.1706,
"step": 15240
},
{
"epoch": 0.6906077348066298,
"grad_norm": 0.29201626777648926,
"learning_rate": 0.00015599789779444174,
"loss": 0.1959,
"step": 15250
},
{
"epoch": 0.6910605923376506,
"grad_norm": 0.3731488883495331,
"learning_rate": 0.00015593584346627702,
"loss": 0.194,
"step": 15260
},
{
"epoch": 0.6915134498686714,
"grad_norm": 0.43735894560813904,
"learning_rate": 0.00015587375777476083,
"loss": 0.1679,
"step": 15270
},
{
"epoch": 0.691966307399692,
"grad_norm": 0.5219751596450806,
"learning_rate": 0.00015581164075470472,
"loss": 0.1818,
"step": 15280
},
{
"epoch": 0.6924191649307128,
"grad_norm": 0.38412991166114807,
"learning_rate": 0.00015574949244093783,
"loss": 0.1735,
"step": 15290
},
{
"epoch": 0.6928720224617335,
"grad_norm": 0.4602055251598358,
"learning_rate": 0.0001556873128683068,
"loss": 0.1802,
"step": 15300
},
{
"epoch": 0.6933248799927543,
"grad_norm": 0.5174024701118469,
"learning_rate": 0.000155625102071676,
"loss": 0.1584,
"step": 15310
},
{
"epoch": 0.693777737523775,
"grad_norm": 0.47438669204711914,
"learning_rate": 0.00015556286008592705,
"loss": 0.1802,
"step": 15320
},
{
"epoch": 0.6942305950547958,
"grad_norm": 0.3923074007034302,
"learning_rate": 0.0001555005869459592,
"loss": 0.1661,
"step": 15330
},
{
"epoch": 0.6946834525858165,
"grad_norm": 0.4011114537715912,
"learning_rate": 0.00015543828268668915,
"loss": 0.166,
"step": 15340
},
{
"epoch": 0.6951363101168373,
"grad_norm": 0.5458866357803345,
"learning_rate": 0.00015537594734305098,
"loss": 0.1884,
"step": 15350
},
{
"epoch": 0.6955891676478579,
"grad_norm": 0.5292759537696838,
"learning_rate": 0.00015531358094999633,
"loss": 0.1781,
"step": 15360
},
{
"epoch": 0.6960420251788787,
"grad_norm": 0.46571633219718933,
"learning_rate": 0.00015525118354249413,
"loss": 0.1807,
"step": 15370
},
{
"epoch": 0.6964948827098995,
"grad_norm": 0.4802543520927429,
"learning_rate": 0.00015518875515553075,
"loss": 0.1805,
"step": 15380
},
{
"epoch": 0.6969477402409202,
"grad_norm": 0.37716320157051086,
"learning_rate": 0.00015512629582410998,
"loss": 0.1844,
"step": 15390
},
{
"epoch": 0.697400597771941,
"grad_norm": 0.39970386028289795,
"learning_rate": 0.0001550638055832528,
"loss": 0.1482,
"step": 15400
},
{
"epoch": 0.6978534553029617,
"grad_norm": 0.4422559142112732,
"learning_rate": 0.0001550012844679977,
"loss": 0.199,
"step": 15410
},
{
"epoch": 0.6983063128339825,
"grad_norm": 0.3976079523563385,
"learning_rate": 0.00015493873251340041,
"loss": 0.1693,
"step": 15420
},
{
"epoch": 0.6987591703650031,
"grad_norm": 0.42097848653793335,
"learning_rate": 0.00015487614975453394,
"loss": 0.1636,
"step": 15430
},
{
"epoch": 0.6992120278960239,
"grad_norm": 0.3382587432861328,
"learning_rate": 0.00015481353622648854,
"loss": 0.1626,
"step": 15440
},
{
"epoch": 0.6996648854270446,
"grad_norm": 0.6046080589294434,
"learning_rate": 0.00015475089196437182,
"loss": 0.2045,
"step": 15450
},
{
"epoch": 0.7001177429580654,
"grad_norm": 0.36402109265327454,
"learning_rate": 0.00015468821700330855,
"loss": 0.1881,
"step": 15460
},
{
"epoch": 0.7005706004890861,
"grad_norm": 0.4470110237598419,
"learning_rate": 0.00015462551137844063,
"loss": 0.1714,
"step": 15470
},
{
"epoch": 0.7010234580201069,
"grad_norm": 0.369165301322937,
"learning_rate": 0.00015456277512492737,
"loss": 0.1648,
"step": 15480
},
{
"epoch": 0.7014763155511277,
"grad_norm": 0.26493504643440247,
"learning_rate": 0.00015450000827794505,
"loss": 0.1724,
"step": 15490
},
{
"epoch": 0.7019291730821483,
"grad_norm": 0.44206854701042175,
"learning_rate": 0.00015443721087268715,
"loss": 0.1534,
"step": 15500
},
{
"epoch": 0.7023820306131691,
"grad_norm": 0.4720167815685272,
"learning_rate": 0.00015437438294436438,
"loss": 0.1945,
"step": 15510
},
{
"epoch": 0.7028348881441898,
"grad_norm": 0.42412590980529785,
"learning_rate": 0.0001543115245282045,
"loss": 0.1993,
"step": 15520
},
{
"epoch": 0.7032877456752106,
"grad_norm": 0.3611489534378052,
"learning_rate": 0.0001542486356594523,
"loss": 0.1725,
"step": 15530
},
{
"epoch": 0.7037406032062313,
"grad_norm": 0.467637836933136,
"learning_rate": 0.0001541857163733698,
"loss": 0.1662,
"step": 15540
},
{
"epoch": 0.7041934607372521,
"grad_norm": 0.44230884313583374,
"learning_rate": 0.00015412276670523592,
"loss": 0.1812,
"step": 15550
},
{
"epoch": 0.7046463182682728,
"grad_norm": 0.40242454409599304,
"learning_rate": 0.0001540597866903467,
"loss": 0.1596,
"step": 15560
},
{
"epoch": 0.7050991757992935,
"grad_norm": 0.4991750121116638,
"learning_rate": 0.00015399677636401514,
"loss": 0.2045,
"step": 15570
},
{
"epoch": 0.7055520333303142,
"grad_norm": 0.38457340002059937,
"learning_rate": 0.0001539337357615713,
"loss": 0.158,
"step": 15580
},
{
"epoch": 0.706004890861335,
"grad_norm": 0.5183397531509399,
"learning_rate": 0.00015387066491836217,
"loss": 0.1964,
"step": 15590
},
{
"epoch": 0.7064577483923558,
"grad_norm": 0.37787193059921265,
"learning_rate": 0.00015380756386975178,
"loss": 0.1738,
"step": 15600
},
{
"epoch": 0.7069106059233765,
"grad_norm": 0.41050177812576294,
"learning_rate": 0.00015374443265112091,
"loss": 0.171,
"step": 15610
},
{
"epoch": 0.7073634634543973,
"grad_norm": 0.45450904965400696,
"learning_rate": 0.00015368127129786744,
"loss": 0.1597,
"step": 15620
},
{
"epoch": 0.707816320985418,
"grad_norm": 0.3817354440689087,
"learning_rate": 0.00015361807984540612,
"loss": 0.1904,
"step": 15630
},
{
"epoch": 0.7082691785164388,
"grad_norm": 0.47394871711730957,
"learning_rate": 0.00015355485832916846,
"loss": 0.199,
"step": 15640
},
{
"epoch": 0.7087220360474594,
"grad_norm": 0.41219305992126465,
"learning_rate": 0.00015349160678460294,
"loss": 0.1688,
"step": 15650
},
{
"epoch": 0.7091748935784802,
"grad_norm": 0.4790438413619995,
"learning_rate": 0.00015342832524717484,
"loss": 0.1725,
"step": 15660
},
{
"epoch": 0.7096277511095009,
"grad_norm": 0.37097597122192383,
"learning_rate": 0.00015336501375236625,
"loss": 0.179,
"step": 15670
},
{
"epoch": 0.7100806086405217,
"grad_norm": 0.4001186192035675,
"learning_rate": 0.00015330167233567607,
"loss": 0.1846,
"step": 15680
},
{
"epoch": 0.7105334661715424,
"grad_norm": 0.2776915431022644,
"learning_rate": 0.00015323830103262,
"loss": 0.1459,
"step": 15690
},
{
"epoch": 0.7109863237025632,
"grad_norm": 0.45224013924598694,
"learning_rate": 0.00015317489987873043,
"loss": 0.1904,
"step": 15700
},
{
"epoch": 0.711439181233584,
"grad_norm": 0.5740782618522644,
"learning_rate": 0.00015311146890955655,
"loss": 0.1683,
"step": 15710
},
{
"epoch": 0.7118920387646046,
"grad_norm": 0.4492044448852539,
"learning_rate": 0.00015304800816066426,
"loss": 0.1652,
"step": 15720
},
{
"epoch": 0.7123448962956254,
"grad_norm": 0.36929264664649963,
"learning_rate": 0.00015298451766763608,
"loss": 0.1718,
"step": 15730
},
{
"epoch": 0.7127977538266461,
"grad_norm": 0.4240851402282715,
"learning_rate": 0.00015292099746607135,
"loss": 0.2047,
"step": 15740
},
{
"epoch": 0.7132506113576669,
"grad_norm": 0.42248156666755676,
"learning_rate": 0.00015285744759158592,
"loss": 0.205,
"step": 15750
},
{
"epoch": 0.7137034688886876,
"grad_norm": 0.35184019804000854,
"learning_rate": 0.0001527938680798124,
"loss": 0.1894,
"step": 15760
},
{
"epoch": 0.7141563264197084,
"grad_norm": 0.45526647567749023,
"learning_rate": 0.00015273025896639993,
"loss": 0.1753,
"step": 15770
},
{
"epoch": 0.7146091839507291,
"grad_norm": 0.3351684510707855,
"learning_rate": 0.00015266662028701425,
"loss": 0.1756,
"step": 15780
},
{
"epoch": 0.7150620414817498,
"grad_norm": 0.36095982789993286,
"learning_rate": 0.0001526029520773378,
"loss": 0.1781,
"step": 15790
},
{
"epoch": 0.7155148990127705,
"grad_norm": 0.40348491072654724,
"learning_rate": 0.00015253925437306939,
"loss": 0.2053,
"step": 15800
},
{
"epoch": 0.7159677565437913,
"grad_norm": 0.6563317775726318,
"learning_rate": 0.00015247552720992454,
"loss": 0.2023,
"step": 15810
},
{
"epoch": 0.7164206140748121,
"grad_norm": 0.5342344641685486,
"learning_rate": 0.00015241177062363522,
"loss": 0.1682,
"step": 15820
},
{
"epoch": 0.7168734716058328,
"grad_norm": 0.4724293053150177,
"learning_rate": 0.00015234798464994976,
"loss": 0.1865,
"step": 15830
},
{
"epoch": 0.7173263291368536,
"grad_norm": 0.3945305347442627,
"learning_rate": 0.00015228416932463326,
"loss": 0.1545,
"step": 15840
},
{
"epoch": 0.7177791866678743,
"grad_norm": 0.4521254003047943,
"learning_rate": 0.00015222032468346702,
"loss": 0.1825,
"step": 15850
},
{
"epoch": 0.7182320441988951,
"grad_norm": 0.38483917713165283,
"learning_rate": 0.0001521564507622489,
"loss": 0.1686,
"step": 15860
},
{
"epoch": 0.7186849017299157,
"grad_norm": 0.40425315499305725,
"learning_rate": 0.00015209254759679313,
"loss": 0.1505,
"step": 15870
},
{
"epoch": 0.7191377592609365,
"grad_norm": 0.34470799565315247,
"learning_rate": 0.00015202861522293034,
"loss": 0.1855,
"step": 15880
},
{
"epoch": 0.7195906167919572,
"grad_norm": 0.3882667124271393,
"learning_rate": 0.00015196465367650763,
"loss": 0.1721,
"step": 15890
},
{
"epoch": 0.720043474322978,
"grad_norm": 0.46202096343040466,
"learning_rate": 0.00015190066299338833,
"loss": 0.1869,
"step": 15900
},
{
"epoch": 0.7204963318539988,
"grad_norm": 0.444326788187027,
"learning_rate": 0.00015183664320945215,
"loss": 0.1734,
"step": 15910
},
{
"epoch": 0.7209491893850195,
"grad_norm": 0.3723192811012268,
"learning_rate": 0.00015177259436059513,
"loss": 0.172,
"step": 15920
},
{
"epoch": 0.7214020469160403,
"grad_norm": 0.4908646047115326,
"learning_rate": 0.00015170851648272962,
"loss": 0.1834,
"step": 15930
},
{
"epoch": 0.7218549044470609,
"grad_norm": 0.6243703961372375,
"learning_rate": 0.0001516444096117842,
"loss": 0.2002,
"step": 15940
},
{
"epoch": 0.7223077619780817,
"grad_norm": 0.3855245113372803,
"learning_rate": 0.0001515802737837038,
"loss": 0.1922,
"step": 15950
},
{
"epoch": 0.7227606195091024,
"grad_norm": 0.3904649019241333,
"learning_rate": 0.00015151610903444942,
"loss": 0.185,
"step": 15960
},
{
"epoch": 0.7232134770401232,
"grad_norm": 0.28631916642189026,
"learning_rate": 0.00015145191539999846,
"loss": 0.1685,
"step": 15970
},
{
"epoch": 0.7236663345711439,
"grad_norm": 0.38608241081237793,
"learning_rate": 0.00015138769291634437,
"loss": 0.2114,
"step": 15980
},
{
"epoch": 0.7241191921021647,
"grad_norm": 0.4355546534061432,
"learning_rate": 0.00015132344161949689,
"loss": 0.1499,
"step": 15990
},
{
"epoch": 0.7245720496331854,
"grad_norm": 0.4393002986907959,
"learning_rate": 0.00015125916154548185,
"loss": 0.1834,
"step": 16000
},
{
"epoch": 0.7245720496331854,
"eval_chrf": 80.97540239216912,
"eval_loss": 0.16372597217559814,
"eval_runtime": 16.3583,
"eval_samples_per_second": 0.611,
"eval_steps_per_second": 0.061,
"step": 16000
},
{
"epoch": 0.7250249071642061,
"grad_norm": 0.413165807723999,
"learning_rate": 0.00015119485273034123,
"loss": 0.1591,
"step": 16010
},
{
"epoch": 0.7254777646952268,
"grad_norm": 0.5554599761962891,
"learning_rate": 0.00015113051521013313,
"loss": 0.1754,
"step": 16020
},
{
"epoch": 0.7259306222262476,
"grad_norm": 0.41952240467071533,
"learning_rate": 0.00015106614902093174,
"loss": 0.1743,
"step": 16030
},
{
"epoch": 0.7263834797572684,
"grad_norm": 0.5459387302398682,
"learning_rate": 0.00015100175419882728,
"loss": 0.2083,
"step": 16040
},
{
"epoch": 0.7268363372882891,
"grad_norm": 0.5478861331939697,
"learning_rate": 0.00015093733077992612,
"loss": 0.1837,
"step": 16050
},
{
"epoch": 0.7272891948193099,
"grad_norm": 0.3802858889102936,
"learning_rate": 0.0001508728788003506,
"loss": 0.1839,
"step": 16060
},
{
"epoch": 0.7277420523503306,
"grad_norm": 0.39728957414627075,
"learning_rate": 0.00015080839829623909,
"loss": 0.1603,
"step": 16070
},
{
"epoch": 0.7281949098813514,
"grad_norm": 0.4951670467853546,
"learning_rate": 0.00015074388930374596,
"loss": 0.1769,
"step": 16080
},
{
"epoch": 0.728647767412372,
"grad_norm": 0.37879490852355957,
"learning_rate": 0.00015067935185904152,
"loss": 0.1603,
"step": 16090
},
{
"epoch": 0.7291006249433928,
"grad_norm": 0.33771833777427673,
"learning_rate": 0.00015061478599831212,
"loss": 0.1876,
"step": 16100
},
{
"epoch": 0.7295534824744135,
"grad_norm": 0.5644478797912598,
"learning_rate": 0.0001505501917577599,
"loss": 0.177,
"step": 16110
},
{
"epoch": 0.7300063400054343,
"grad_norm": 0.4218476116657257,
"learning_rate": 0.000150485569173603,
"loss": 0.1801,
"step": 16120
},
{
"epoch": 0.730459197536455,
"grad_norm": 0.2640566825866699,
"learning_rate": 0.00015042091828207553,
"loss": 0.1885,
"step": 16130
},
{
"epoch": 0.7309120550674758,
"grad_norm": 0.5227727293968201,
"learning_rate": 0.0001503562391194273,
"loss": 0.1689,
"step": 16140
},
{
"epoch": 0.7313649125984966,
"grad_norm": 0.5304290056228638,
"learning_rate": 0.00015029153172192413,
"loss": 0.203,
"step": 16150
},
{
"epoch": 0.7318177701295172,
"grad_norm": 0.35998350381851196,
"learning_rate": 0.00015022679612584753,
"loss": 0.1614,
"step": 16160
},
{
"epoch": 0.732270627660538,
"grad_norm": 0.3472217321395874,
"learning_rate": 0.0001501620323674949,
"loss": 0.1797,
"step": 16170
},
{
"epoch": 0.7327234851915587,
"grad_norm": 0.30541810393333435,
"learning_rate": 0.00015009724048317954,
"loss": 0.1527,
"step": 16180
},
{
"epoch": 0.7331763427225795,
"grad_norm": 0.4038911759853363,
"learning_rate": 0.00015003242050923022,
"loss": 0.2105,
"step": 16190
},
{
"epoch": 0.7336292002536002,
"grad_norm": 0.4016546308994293,
"learning_rate": 0.0001499675724819918,
"loss": 0.1821,
"step": 16200
},
{
"epoch": 0.734082057784621,
"grad_norm": 0.37570664286613464,
"learning_rate": 0.0001499026964378246,
"loss": 0.2005,
"step": 16210
},
{
"epoch": 0.7345349153156417,
"grad_norm": 0.35822850465774536,
"learning_rate": 0.00014983779241310488,
"loss": 0.2041,
"step": 16220
},
{
"epoch": 0.7349877728466624,
"grad_norm": 0.5933873653411865,
"learning_rate": 0.00014977286044422436,
"loss": 0.1714,
"step": 16230
},
{
"epoch": 0.7354406303776831,
"grad_norm": 0.35240015387535095,
"learning_rate": 0.0001497079005675906,
"loss": 0.1685,
"step": 16240
},
{
"epoch": 0.7358934879087039,
"grad_norm": 0.5472621321678162,
"learning_rate": 0.0001496429128196267,
"loss": 0.1909,
"step": 16250
},
{
"epoch": 0.7363463454397247,
"grad_norm": 0.3498932421207428,
"learning_rate": 0.00014957789723677147,
"loss": 0.1505,
"step": 16260
},
{
"epoch": 0.7367992029707454,
"grad_norm": 0.4525830149650574,
"learning_rate": 0.0001495128538554793,
"loss": 0.1731,
"step": 16270
},
{
"epoch": 0.7372520605017662,
"grad_norm": 0.4489855170249939,
"learning_rate": 0.00014944778271222013,
"loss": 0.1724,
"step": 16280
},
{
"epoch": 0.7377049180327869,
"grad_norm": 0.401915967464447,
"learning_rate": 0.0001493826838434795,
"loss": 0.1678,
"step": 16290
},
{
"epoch": 0.7381577755638076,
"grad_norm": 0.30955928564071655,
"learning_rate": 0.00014931755728575852,
"loss": 0.1657,
"step": 16300
},
{
"epoch": 0.7386106330948283,
"grad_norm": 0.43423953652381897,
"learning_rate": 0.00014925240307557376,
"loss": 0.1949,
"step": 16310
},
{
"epoch": 0.7390634906258491,
"grad_norm": 0.3125792443752289,
"learning_rate": 0.00014918722124945735,
"loss": 0.1707,
"step": 16320
},
{
"epoch": 0.7395163481568698,
"grad_norm": 0.6532090306282043,
"learning_rate": 0.00014912201184395685,
"loss": 0.1801,
"step": 16330
},
{
"epoch": 0.7399692056878906,
"grad_norm": 0.3583991825580597,
"learning_rate": 0.00014905677489563537,
"loss": 0.1661,
"step": 16340
},
{
"epoch": 0.7404220632189114,
"grad_norm": 0.3103122413158417,
"learning_rate": 0.0001489915104410714,
"loss": 0.1585,
"step": 16350
},
{
"epoch": 0.7408749207499321,
"grad_norm": 0.5210757851600647,
"learning_rate": 0.00014892621851685879,
"loss": 0.167,
"step": 16360
},
{
"epoch": 0.7413277782809529,
"grad_norm": 0.3990892767906189,
"learning_rate": 0.000148860899159607,
"loss": 0.1672,
"step": 16370
},
{
"epoch": 0.7417806358119735,
"grad_norm": 0.3420082926750183,
"learning_rate": 0.00014879555240594064,
"loss": 0.163,
"step": 16380
},
{
"epoch": 0.7422334933429943,
"grad_norm": 0.39129629731178284,
"learning_rate": 0.0001487301782924998,
"loss": 0.1637,
"step": 16390
},
{
"epoch": 0.742686350874015,
"grad_norm": 0.4083895981311798,
"learning_rate": 0.00014866477685593989,
"loss": 0.1748,
"step": 16400
},
{
"epoch": 0.7431392084050358,
"grad_norm": 0.5733197927474976,
"learning_rate": 0.00014859934813293165,
"loss": 0.167,
"step": 16410
},
{
"epoch": 0.7435920659360565,
"grad_norm": 0.45333850383758545,
"learning_rate": 0.0001485338921601611,
"loss": 0.1763,
"step": 16420
},
{
"epoch": 0.7440449234670773,
"grad_norm": 0.4153973162174225,
"learning_rate": 0.0001484684089743296,
"loss": 0.1784,
"step": 16430
},
{
"epoch": 0.744497780998098,
"grad_norm": 0.4648512899875641,
"learning_rate": 0.00014840289861215363,
"loss": 0.1816,
"step": 16440
},
{
"epoch": 0.7449506385291187,
"grad_norm": 0.3321559727191925,
"learning_rate": 0.00014833736111036507,
"loss": 0.179,
"step": 16450
},
{
"epoch": 0.7454034960601394,
"grad_norm": 0.4481167793273926,
"learning_rate": 0.0001482717965057109,
"loss": 0.1517,
"step": 16460
},
{
"epoch": 0.7458563535911602,
"grad_norm": 0.3410244584083557,
"learning_rate": 0.00014820620483495332,
"loss": 0.1651,
"step": 16470
},
{
"epoch": 0.746309211122181,
"grad_norm": 0.40615367889404297,
"learning_rate": 0.00014814058613486978,
"loss": 0.1855,
"step": 16480
},
{
"epoch": 0.7467620686532017,
"grad_norm": 0.5035942792892456,
"learning_rate": 0.00014807494044225282,
"loss": 0.1768,
"step": 16490
},
{
"epoch": 0.7472149261842225,
"grad_norm": 0.6195945739746094,
"learning_rate": 0.00014800926779391012,
"loss": 0.1635,
"step": 16500
},
{
"epoch": 0.7476677837152432,
"grad_norm": 0.43557047843933105,
"learning_rate": 0.00014794356822666444,
"loss": 0.1511,
"step": 16510
},
{
"epoch": 0.7481206412462639,
"grad_norm": 0.3755180537700653,
"learning_rate": 0.00014787784177735372,
"loss": 0.184,
"step": 16520
},
{
"epoch": 0.7485734987772846,
"grad_norm": 0.3500405251979828,
"learning_rate": 0.0001478120884828309,
"loss": 0.1753,
"step": 16530
},
{
"epoch": 0.7490263563083054,
"grad_norm": 0.43246424198150635,
"learning_rate": 0.00014774630837996404,
"loss": 0.1919,
"step": 16540
},
{
"epoch": 0.7494792138393261,
"grad_norm": 0.3369980752468109,
"learning_rate": 0.00014768050150563611,
"loss": 0.1831,
"step": 16550
},
{
"epoch": 0.7499320713703469,
"grad_norm": 0.3230205774307251,
"learning_rate": 0.0001476146678967453,
"loss": 0.1981,
"step": 16560
},
{
"epoch": 0.7503849289013677,
"grad_norm": 0.5379915237426758,
"learning_rate": 0.0001475488075902045,
"loss": 0.1676,
"step": 16570
},
{
"epoch": 0.7508377864323884,
"grad_norm": 0.3465295433998108,
"learning_rate": 0.00014748292062294183,
"loss": 0.1706,
"step": 16580
},
{
"epoch": 0.7512906439634092,
"grad_norm": 0.39468634128570557,
"learning_rate": 0.00014741700703190026,
"loss": 0.1481,
"step": 16590
},
{
"epoch": 0.7517435014944298,
"grad_norm": 0.40046387910842896,
"learning_rate": 0.00014735106685403768,
"loss": 0.1889,
"step": 16600
},
{
"epoch": 0.7521963590254506,
"grad_norm": 0.3478832542896271,
"learning_rate": 0.00014728510012632686,
"loss": 0.197,
"step": 16610
},
{
"epoch": 0.7526492165564713,
"grad_norm": 0.4016125202178955,
"learning_rate": 0.00014721910688575548,
"loss": 0.1549,
"step": 16620
},
{
"epoch": 0.7531020740874921,
"grad_norm": 0.415632039308548,
"learning_rate": 0.00014715308716932617,
"loss": 0.1722,
"step": 16630
},
{
"epoch": 0.7535549316185128,
"grad_norm": 0.4447801411151886,
"learning_rate": 0.00014708704101405626,
"loss": 0.1649,
"step": 16640
},
{
"epoch": 0.7540077891495336,
"grad_norm": 0.3012316823005676,
"learning_rate": 0.000147020968456978,
"loss": 0.1813,
"step": 16650
},
{
"epoch": 0.7544606466805543,
"grad_norm": 0.3829558789730072,
"learning_rate": 0.00014695486953513845,
"loss": 0.1678,
"step": 16660
},
{
"epoch": 0.754913504211575,
"grad_norm": 0.4798243045806885,
"learning_rate": 0.00014688874428559937,
"loss": 0.1805,
"step": 16670
},
{
"epoch": 0.7553663617425957,
"grad_norm": 0.40611493587493896,
"learning_rate": 0.00014682259274543738,
"loss": 0.1735,
"step": 16680
},
{
"epoch": 0.7558192192736165,
"grad_norm": 0.43578040599823,
"learning_rate": 0.00014675641495174376,
"loss": 0.1868,
"step": 16690
},
{
"epoch": 0.7562720768046373,
"grad_norm": 0.325115442276001,
"learning_rate": 0.00014669021094162457,
"loss": 0.1977,
"step": 16700
},
{
"epoch": 0.756724934335658,
"grad_norm": 0.32389017939567566,
"learning_rate": 0.00014662398075220053,
"loss": 0.1472,
"step": 16710
},
{
"epoch": 0.7571777918666788,
"grad_norm": 0.46646058559417725,
"learning_rate": 0.00014655772442060706,
"loss": 0.1864,
"step": 16720
},
{
"epoch": 0.7576306493976995,
"grad_norm": 0.3650246560573578,
"learning_rate": 0.00014649144198399422,
"loss": 0.1564,
"step": 16730
},
{
"epoch": 0.7580835069287202,
"grad_norm": 0.3701794147491455,
"learning_rate": 0.00014642513347952674,
"loss": 0.1871,
"step": 16740
},
{
"epoch": 0.7585363644597409,
"grad_norm": 0.2885875999927521,
"learning_rate": 0.00014635879894438395,
"loss": 0.1663,
"step": 16750
},
{
"epoch": 0.7589892219907617,
"grad_norm": 0.5447561144828796,
"learning_rate": 0.00014629243841575974,
"loss": 0.2039,
"step": 16760
},
{
"epoch": 0.7594420795217824,
"grad_norm": 0.36518388986587524,
"learning_rate": 0.00014622605193086264,
"loss": 0.1666,
"step": 16770
},
{
"epoch": 0.7598949370528032,
"grad_norm": 0.4391559958457947,
"learning_rate": 0.00014615963952691567,
"loss": 0.176,
"step": 16780
},
{
"epoch": 0.760347794583824,
"grad_norm": 0.3832821846008301,
"learning_rate": 0.00014609320124115641,
"loss": 0.1987,
"step": 16790
},
{
"epoch": 0.7608006521148447,
"grad_norm": 0.3192236125469208,
"learning_rate": 0.000146026737110837,
"loss": 0.1716,
"step": 16800
},
{
"epoch": 0.7612535096458654,
"grad_norm": 0.4159684479236603,
"learning_rate": 0.000145960247173224,
"loss": 0.1624,
"step": 16810
},
{
"epoch": 0.7617063671768861,
"grad_norm": 0.35425105690956116,
"learning_rate": 0.00014589373146559843,
"loss": 0.1704,
"step": 16820
},
{
"epoch": 0.7621592247079069,
"grad_norm": 0.3028491139411926,
"learning_rate": 0.00014582719002525582,
"loss": 0.1564,
"step": 16830
},
{
"epoch": 0.7626120822389276,
"grad_norm": 0.5623031258583069,
"learning_rate": 0.00014576062288950613,
"loss": 0.146,
"step": 16840
},
{
"epoch": 0.7630649397699484,
"grad_norm": 0.49645712971687317,
"learning_rate": 0.00014569403009567365,
"loss": 0.1825,
"step": 16850
},
{
"epoch": 0.7635177973009691,
"grad_norm": 0.36377304792404175,
"learning_rate": 0.00014562741168109715,
"loss": 0.1858,
"step": 16860
},
{
"epoch": 0.7639706548319899,
"grad_norm": 0.4291059672832489,
"learning_rate": 0.00014556076768312975,
"loss": 0.1638,
"step": 16870
},
{
"epoch": 0.7644235123630106,
"grad_norm": 0.5143736004829407,
"learning_rate": 0.0001454940981391388,
"loss": 0.2007,
"step": 16880
},
{
"epoch": 0.7648763698940313,
"grad_norm": 0.36746224761009216,
"learning_rate": 0.0001454274030865061,
"loss": 0.1696,
"step": 16890
},
{
"epoch": 0.765329227425052,
"grad_norm": 0.33395445346832275,
"learning_rate": 0.00014536068256262773,
"loss": 0.1762,
"step": 16900
},
{
"epoch": 0.7657820849560728,
"grad_norm": 0.24888546764850616,
"learning_rate": 0.00014529393660491405,
"loss": 0.1755,
"step": 16910
},
{
"epoch": 0.7662349424870936,
"grad_norm": 0.3185586929321289,
"learning_rate": 0.00014522716525078964,
"loss": 0.1797,
"step": 16920
},
{
"epoch": 0.7666878000181143,
"grad_norm": 0.5242403745651245,
"learning_rate": 0.00014516036853769334,
"loss": 0.1675,
"step": 16930
},
{
"epoch": 0.7671406575491351,
"grad_norm": 0.555658221244812,
"learning_rate": 0.00014509354650307817,
"loss": 0.1782,
"step": 16940
},
{
"epoch": 0.7675935150801558,
"grad_norm": 0.4107116162776947,
"learning_rate": 0.00014502669918441152,
"loss": 0.1678,
"step": 16950
},
{
"epoch": 0.7680463726111765,
"grad_norm": 0.27505549788475037,
"learning_rate": 0.00014495982661917473,
"loss": 0.1538,
"step": 16960
},
{
"epoch": 0.7684992301421972,
"grad_norm": 0.5204653143882751,
"learning_rate": 0.00014489292884486342,
"loss": 0.1879,
"step": 16970
},
{
"epoch": 0.768952087673218,
"grad_norm": 0.35485538840293884,
"learning_rate": 0.00014482600589898732,
"loss": 0.1701,
"step": 16980
},
{
"epoch": 0.7694049452042387,
"grad_norm": 0.3793945014476776,
"learning_rate": 0.00014475905781907033,
"loss": 0.1726,
"step": 16990
},
{
"epoch": 0.7698578027352595,
"grad_norm": 0.4990425109863281,
"learning_rate": 0.0001446920846426503,
"loss": 0.1692,
"step": 17000
},
{
"epoch": 0.7698578027352595,
"eval_chrf": 81.35576111494969,
"eval_loss": 0.13622412085533142,
"eval_runtime": 7.5338,
"eval_samples_per_second": 1.327,
"eval_steps_per_second": 0.133,
"step": 17000
},
{
"epoch": 0.7703106602662803,
"grad_norm": 0.47874826192855835,
"learning_rate": 0.00014462508640727926,
"loss": 0.1742,
"step": 17010
},
{
"epoch": 0.770763517797301,
"grad_norm": 0.34694787859916687,
"learning_rate": 0.00014455806315052328,
"loss": 0.1803,
"step": 17020
},
{
"epoch": 0.7712163753283217,
"grad_norm": 0.3238365650177002,
"learning_rate": 0.0001444910149099625,
"loss": 0.1822,
"step": 17030
},
{
"epoch": 0.7716692328593424,
"grad_norm": 0.4790160357952118,
"learning_rate": 0.0001444239417231909,
"loss": 0.1539,
"step": 17040
},
{
"epoch": 0.7721220903903632,
"grad_norm": 0.5026448369026184,
"learning_rate": 0.00014435684362781666,
"loss": 0.1889,
"step": 17050
},
{
"epoch": 0.7725749479213839,
"grad_norm": 0.3439713418483734,
"learning_rate": 0.00014428972066146185,
"loss": 0.1631,
"step": 17060
},
{
"epoch": 0.7730278054524047,
"grad_norm": 0.42995843291282654,
"learning_rate": 0.00014422257286176237,
"loss": 0.1802,
"step": 17070
},
{
"epoch": 0.7734806629834254,
"grad_norm": 0.33130544424057007,
"learning_rate": 0.0001441554002663682,
"loss": 0.1446,
"step": 17080
},
{
"epoch": 0.7739335205144462,
"grad_norm": 0.40167292952537537,
"learning_rate": 0.00014408820291294316,
"loss": 0.1611,
"step": 17090
},
{
"epoch": 0.774386378045467,
"grad_norm": 0.3448004126548767,
"learning_rate": 0.00014402098083916493,
"loss": 0.1491,
"step": 17100
},
{
"epoch": 0.7748392355764876,
"grad_norm": 0.29201722145080566,
"learning_rate": 0.00014395373408272516,
"loss": 0.1777,
"step": 17110
},
{
"epoch": 0.7752920931075084,
"grad_norm": 0.4543125033378601,
"learning_rate": 0.0001438864626813291,
"loss": 0.1725,
"step": 17120
},
{
"epoch": 0.7757449506385291,
"grad_norm": 0.37582921981811523,
"learning_rate": 0.0001438191666726961,
"loss": 0.1894,
"step": 17130
},
{
"epoch": 0.7761978081695499,
"grad_norm": 0.3258911371231079,
"learning_rate": 0.00014375184609455917,
"loss": 0.1624,
"step": 17140
},
{
"epoch": 0.7766506657005706,
"grad_norm": 0.508508563041687,
"learning_rate": 0.00014368450098466506,
"loss": 0.1739,
"step": 17150
},
{
"epoch": 0.7771035232315914,
"grad_norm": 0.35970497131347656,
"learning_rate": 0.00014361713138077433,
"loss": 0.1926,
"step": 17160
},
{
"epoch": 0.7775563807626121,
"grad_norm": 0.3001771867275238,
"learning_rate": 0.00014354973732066132,
"loss": 0.1668,
"step": 17170
},
{
"epoch": 0.7780092382936328,
"grad_norm": 0.38364094495773315,
"learning_rate": 0.00014348231884211399,
"loss": 0.1876,
"step": 17180
},
{
"epoch": 0.7784620958246535,
"grad_norm": 0.34611520171165466,
"learning_rate": 0.000143414875982934,
"loss": 0.185,
"step": 17190
},
{
"epoch": 0.7789149533556743,
"grad_norm": 0.3437662720680237,
"learning_rate": 0.00014334740878093675,
"loss": 0.1668,
"step": 17200
},
{
"epoch": 0.779367810886695,
"grad_norm": 0.34300467371940613,
"learning_rate": 0.00014327991727395125,
"loss": 0.1841,
"step": 17210
},
{
"epoch": 0.7798206684177158,
"grad_norm": 0.48151400685310364,
"learning_rate": 0.0001432124014998201,
"loss": 0.1598,
"step": 17220
},
{
"epoch": 0.7802735259487366,
"grad_norm": 0.41797640919685364,
"learning_rate": 0.00014314486149639963,
"loss": 0.1672,
"step": 17230
},
{
"epoch": 0.7807263834797573,
"grad_norm": 0.34387126564979553,
"learning_rate": 0.00014307729730155956,
"loss": 0.1826,
"step": 17240
},
{
"epoch": 0.781179241010778,
"grad_norm": 0.47516635060310364,
"learning_rate": 0.00014300970895318336,
"loss": 0.1627,
"step": 17250
},
{
"epoch": 0.7816320985417987,
"grad_norm": 0.4279870390892029,
"learning_rate": 0.000142942096489168,
"loss": 0.1616,
"step": 17260
},
{
"epoch": 0.7820849560728195,
"grad_norm": 0.31233519315719604,
"learning_rate": 0.00014287445994742382,
"loss": 0.1484,
"step": 17270
},
{
"epoch": 0.7825378136038402,
"grad_norm": 0.4351353645324707,
"learning_rate": 0.00014280679936587483,
"loss": 0.1739,
"step": 17280
},
{
"epoch": 0.782990671134861,
"grad_norm": 0.3079448640346527,
"learning_rate": 0.0001427391147824585,
"loss": 0.1645,
"step": 17290
},
{
"epoch": 0.7834435286658817,
"grad_norm": 0.542190670967102,
"learning_rate": 0.00014267140623512573,
"loss": 0.18,
"step": 17300
},
{
"epoch": 0.7838963861969025,
"grad_norm": 0.41900062561035156,
"learning_rate": 0.0001426036737618408,
"loss": 0.1575,
"step": 17310
},
{
"epoch": 0.7843492437279231,
"grad_norm": 0.4176177680492401,
"learning_rate": 0.00014253591740058148,
"loss": 0.147,
"step": 17320
},
{
"epoch": 0.7848021012589439,
"grad_norm": 0.3914748430252075,
"learning_rate": 0.00014246813718933897,
"loss": 0.1871,
"step": 17330
},
{
"epoch": 0.7852549587899647,
"grad_norm": 0.3752351403236389,
"learning_rate": 0.00014240033316611768,
"loss": 0.153,
"step": 17340
},
{
"epoch": 0.7857078163209854,
"grad_norm": 0.5136403441429138,
"learning_rate": 0.00014233250536893553,
"loss": 0.1738,
"step": 17350
},
{
"epoch": 0.7861606738520062,
"grad_norm": 0.6337378621101379,
"learning_rate": 0.00014226465383582375,
"loss": 0.1708,
"step": 17360
},
{
"epoch": 0.7866135313830269,
"grad_norm": 0.42952653765678406,
"learning_rate": 0.0001421967786048268,
"loss": 0.1901,
"step": 17370
},
{
"epoch": 0.7870663889140477,
"grad_norm": 0.352598637342453,
"learning_rate": 0.00014212887971400248,
"loss": 0.1735,
"step": 17380
},
{
"epoch": 0.7875192464450684,
"grad_norm": 0.4647071659564972,
"learning_rate": 0.00014206095720142186,
"loss": 0.17,
"step": 17390
},
{
"epoch": 0.7879721039760891,
"grad_norm": 0.5100772976875305,
"learning_rate": 0.00014199301110516923,
"loss": 0.1549,
"step": 17400
},
{
"epoch": 0.7884249615071098,
"grad_norm": 0.3351843059062958,
"learning_rate": 0.0001419250414633421,
"loss": 0.1597,
"step": 17410
},
{
"epoch": 0.7888778190381306,
"grad_norm": 0.45948684215545654,
"learning_rate": 0.00014185704831405125,
"loss": 0.211,
"step": 17420
},
{
"epoch": 0.7893306765691513,
"grad_norm": 0.4675339162349701,
"learning_rate": 0.00014178903169542056,
"loss": 0.1643,
"step": 17430
},
{
"epoch": 0.7897835341001721,
"grad_norm": 0.3438480496406555,
"learning_rate": 0.0001417209916455871,
"loss": 0.1509,
"step": 17440
},
{
"epoch": 0.7902363916311929,
"grad_norm": 0.399120569229126,
"learning_rate": 0.0001416529282027011,
"loss": 0.1437,
"step": 17450
},
{
"epoch": 0.7906892491622136,
"grad_norm": 0.4113789200782776,
"learning_rate": 0.00014158484140492584,
"loss": 0.1588,
"step": 17460
},
{
"epoch": 0.7911421066932343,
"grad_norm": 0.26559188961982727,
"learning_rate": 0.00014151673129043774,
"loss": 0.1866,
"step": 17470
},
{
"epoch": 0.791594964224255,
"grad_norm": 0.5179800391197205,
"learning_rate": 0.00014144859789742633,
"loss": 0.1728,
"step": 17480
},
{
"epoch": 0.7920478217552758,
"grad_norm": 0.3756614625453949,
"learning_rate": 0.00014138044126409414,
"loss": 0.1769,
"step": 17490
},
{
"epoch": 0.7925006792862965,
"grad_norm": 0.5483672618865967,
"learning_rate": 0.00014131226142865674,
"loss": 0.1763,
"step": 17500
},
{
"epoch": 0.7929535368173173,
"grad_norm": 0.36379554867744446,
"learning_rate": 0.0001412440584293427,
"loss": 0.1943,
"step": 17510
},
{
"epoch": 0.793406394348338,
"grad_norm": 0.39273983240127563,
"learning_rate": 0.00014117583230439365,
"loss": 0.1749,
"step": 17520
},
{
"epoch": 0.7938592518793588,
"grad_norm": 0.5314661264419556,
"learning_rate": 0.00014110758309206404,
"loss": 0.1744,
"step": 17530
},
{
"epoch": 0.7943121094103794,
"grad_norm": 0.4262920022010803,
"learning_rate": 0.00014103931083062142,
"loss": 0.1979,
"step": 17540
},
{
"epoch": 0.7947649669414002,
"grad_norm": 0.7184832096099854,
"learning_rate": 0.00014097101555834619,
"loss": 0.1747,
"step": 17550
},
{
"epoch": 0.795217824472421,
"grad_norm": 0.33851149678230286,
"learning_rate": 0.00014090269731353166,
"loss": 0.1597,
"step": 17560
},
{
"epoch": 0.7956706820034417,
"grad_norm": 0.42809876799583435,
"learning_rate": 0.00014083435613448402,
"loss": 0.1591,
"step": 17570
},
{
"epoch": 0.7961235395344625,
"grad_norm": 0.4343106150627136,
"learning_rate": 0.0001407659920595223,
"loss": 0.1664,
"step": 17580
},
{
"epoch": 0.7965763970654832,
"grad_norm": 0.2742310166358948,
"learning_rate": 0.0001406976051269784,
"loss": 0.1845,
"step": 17590
},
{
"epoch": 0.797029254596504,
"grad_norm": 0.3528534471988678,
"learning_rate": 0.00014062919537519703,
"loss": 0.1606,
"step": 17600
},
{
"epoch": 0.7974821121275247,
"grad_norm": 0.34193626046180725,
"learning_rate": 0.0001405607628425357,
"loss": 0.1507,
"step": 17610
},
{
"epoch": 0.7979349696585454,
"grad_norm": 0.39214470982551575,
"learning_rate": 0.00014049230756736468,
"loss": 0.2263,
"step": 17620
},
{
"epoch": 0.7983878271895661,
"grad_norm": 0.39479154348373413,
"learning_rate": 0.00014042382958806695,
"loss": 0.1968,
"step": 17630
},
{
"epoch": 0.7988406847205869,
"grad_norm": 0.4590633511543274,
"learning_rate": 0.00014035532894303833,
"loss": 0.1643,
"step": 17640
},
{
"epoch": 0.7992935422516076,
"grad_norm": 0.5160406827926636,
"learning_rate": 0.00014028680567068725,
"loss": 0.1696,
"step": 17650
},
{
"epoch": 0.7997463997826284,
"grad_norm": 0.3378937840461731,
"learning_rate": 0.00014021825980943485,
"loss": 0.1803,
"step": 17660
},
{
"epoch": 0.8001992573136492,
"grad_norm": 0.4125441014766693,
"learning_rate": 0.000140149691397715,
"loss": 0.1462,
"step": 17670
},
{
"epoch": 0.8006521148446699,
"grad_norm": 0.4796926975250244,
"learning_rate": 0.0001400811004739741,
"loss": 0.1605,
"step": 17680
},
{
"epoch": 0.8011049723756906,
"grad_norm": 0.31294503808021545,
"learning_rate": 0.00014001248707667122,
"loss": 0.1583,
"step": 17690
},
{
"epoch": 0.8015578299067113,
"grad_norm": 0.4358683228492737,
"learning_rate": 0.00013994385124427812,
"loss": 0.1648,
"step": 17700
},
{
"epoch": 0.8020106874377321,
"grad_norm": 0.4065668284893036,
"learning_rate": 0.000139875193015279,
"loss": 0.153,
"step": 17710
},
{
"epoch": 0.8024635449687528,
"grad_norm": 0.4194456934928894,
"learning_rate": 0.00013980651242817072,
"loss": 0.1767,
"step": 17720
},
{
"epoch": 0.8029164024997736,
"grad_norm": 0.343515008687973,
"learning_rate": 0.00013973780952146263,
"loss": 0.173,
"step": 17730
},
{
"epoch": 0.8033692600307943,
"grad_norm": 0.4212777614593506,
"learning_rate": 0.00013966908433367655,
"loss": 0.1804,
"step": 17740
},
{
"epoch": 0.8038221175618151,
"grad_norm": 0.5466294288635254,
"learning_rate": 0.00013960033690334694,
"loss": 0.1559,
"step": 17750
},
{
"epoch": 0.8042749750928357,
"grad_norm": 0.6396472454071045,
"learning_rate": 0.00013953156726902058,
"loss": 0.1683,
"step": 17760
},
{
"epoch": 0.8047278326238565,
"grad_norm": 0.3665732741355896,
"learning_rate": 0.00013946277546925673,
"loss": 0.1843,
"step": 17770
},
{
"epoch": 0.8051806901548773,
"grad_norm": 0.2784450054168701,
"learning_rate": 0.00013939396154262715,
"loss": 0.1835,
"step": 17780
},
{
"epoch": 0.805633547685898,
"grad_norm": 0.3693857789039612,
"learning_rate": 0.00013932512552771597,
"loss": 0.1727,
"step": 17790
},
{
"epoch": 0.8060864052169188,
"grad_norm": 0.3366953134536743,
"learning_rate": 0.00013925626746311967,
"loss": 0.1716,
"step": 17800
},
{
"epoch": 0.8065392627479395,
"grad_norm": 0.3310844898223877,
"learning_rate": 0.0001391873873874471,
"loss": 0.1738,
"step": 17810
},
{
"epoch": 0.8069921202789603,
"grad_norm": 0.31167569756507874,
"learning_rate": 0.0001391184853393195,
"loss": 0.1516,
"step": 17820
},
{
"epoch": 0.807444977809981,
"grad_norm": 0.3066498041152954,
"learning_rate": 0.00013904956135737042,
"loss": 0.1602,
"step": 17830
},
{
"epoch": 0.8078978353410017,
"grad_norm": 0.3827773928642273,
"learning_rate": 0.00013898061548024563,
"loss": 0.179,
"step": 17840
},
{
"epoch": 0.8083506928720224,
"grad_norm": 0.33546361327171326,
"learning_rate": 0.0001389116477466033,
"loss": 0.191,
"step": 17850
},
{
"epoch": 0.8088035504030432,
"grad_norm": 0.3183661699295044,
"learning_rate": 0.00013884265819511375,
"loss": 0.1717,
"step": 17860
},
{
"epoch": 0.809256407934064,
"grad_norm": 0.40386420488357544,
"learning_rate": 0.00013877364686445961,
"loss": 0.166,
"step": 17870
},
{
"epoch": 0.8097092654650847,
"grad_norm": 0.43872517347335815,
"learning_rate": 0.00013870461379333565,
"loss": 0.1755,
"step": 17880
},
{
"epoch": 0.8101621229961055,
"grad_norm": 0.41907742619514465,
"learning_rate": 0.00013863555902044884,
"loss": 0.1637,
"step": 17890
},
{
"epoch": 0.8106149805271262,
"grad_norm": 0.3590562343597412,
"learning_rate": 0.00013856648258451842,
"loss": 0.164,
"step": 17900
},
{
"epoch": 0.8110678380581469,
"grad_norm": 0.7209745049476624,
"learning_rate": 0.0001384973845242757,
"loss": 0.1882,
"step": 17910
},
{
"epoch": 0.8115206955891676,
"grad_norm": 0.4454377591609955,
"learning_rate": 0.00013842826487846406,
"loss": 0.2026,
"step": 17920
},
{
"epoch": 0.8119735531201884,
"grad_norm": 0.36102646589279175,
"learning_rate": 0.00013835912368583903,
"loss": 0.1707,
"step": 17930
},
{
"epoch": 0.8124264106512091,
"grad_norm": 0.42702364921569824,
"learning_rate": 0.00013828996098516828,
"loss": 0.1748,
"step": 17940
},
{
"epoch": 0.8128792681822299,
"grad_norm": 0.6162272691726685,
"learning_rate": 0.00013822077681523149,
"loss": 0.164,
"step": 17950
},
{
"epoch": 0.8133321257132506,
"grad_norm": 0.5108441114425659,
"learning_rate": 0.00013815157121482033,
"loss": 0.1623,
"step": 17960
},
{
"epoch": 0.8137849832442714,
"grad_norm": 0.84194016456604,
"learning_rate": 0.00013808234422273857,
"loss": 0.1859,
"step": 17970
},
{
"epoch": 0.814237840775292,
"grad_norm": 0.577024519443512,
"learning_rate": 0.0001380130958778019,
"loss": 0.1862,
"step": 17980
},
{
"epoch": 0.8146906983063128,
"grad_norm": 0.3528417944908142,
"learning_rate": 0.00013794382621883807,
"loss": 0.178,
"step": 17990
},
{
"epoch": 0.8151435558373336,
"grad_norm": 0.46698030829429626,
"learning_rate": 0.00013787453528468674,
"loss": 0.1615,
"step": 18000
},
{
"epoch": 0.8151435558373336,
"eval_chrf": 87.79281231152616,
"eval_loss": 0.14898350834846497,
"eval_runtime": 6.5857,
"eval_samples_per_second": 1.518,
"eval_steps_per_second": 0.152,
"step": 18000
},
{
"epoch": 0.8155964133683543,
"grad_norm": 0.3077142536640167,
"learning_rate": 0.00013780522311419944,
"loss": 0.1648,
"step": 18010
},
{
"epoch": 0.8160492708993751,
"grad_norm": 0.3573880195617676,
"learning_rate": 0.00013773588974623968,
"loss": 0.1724,
"step": 18020
},
{
"epoch": 0.8165021284303958,
"grad_norm": 0.27870693802833557,
"learning_rate": 0.00013766653521968283,
"loss": 0.1714,
"step": 18030
},
{
"epoch": 0.8169549859614166,
"grad_norm": 0.49699628353118896,
"learning_rate": 0.00013759715957341613,
"loss": 0.1805,
"step": 18040
},
{
"epoch": 0.8174078434924372,
"grad_norm": 0.281811386346817,
"learning_rate": 0.00013752776284633867,
"loss": 0.1554,
"step": 18050
},
{
"epoch": 0.817860701023458,
"grad_norm": 0.4690953195095062,
"learning_rate": 0.0001374583450773613,
"loss": 0.1842,
"step": 18060
},
{
"epoch": 0.8183135585544787,
"grad_norm": 0.3380923271179199,
"learning_rate": 0.0001373889063054068,
"loss": 0.1734,
"step": 18070
},
{
"epoch": 0.8187664160854995,
"grad_norm": 0.4177154302597046,
"learning_rate": 0.00013731944656940955,
"loss": 0.1864,
"step": 18080
},
{
"epoch": 0.8192192736165202,
"grad_norm": 0.3774815499782562,
"learning_rate": 0.00013724996590831586,
"loss": 0.1843,
"step": 18090
},
{
"epoch": 0.819672131147541,
"grad_norm": 0.4200029969215393,
"learning_rate": 0.00013718046436108364,
"loss": 0.1555,
"step": 18100
},
{
"epoch": 0.8201249886785618,
"grad_norm": 0.4777076840400696,
"learning_rate": 0.0001371109419666826,
"loss": 0.1599,
"step": 18110
},
{
"epoch": 0.8205778462095825,
"grad_norm": 0.3393424451351166,
"learning_rate": 0.00013704139876409406,
"loss": 0.1785,
"step": 18120
},
{
"epoch": 0.8210307037406032,
"grad_norm": 0.4316195249557495,
"learning_rate": 0.00013697183479231107,
"loss": 0.1823,
"step": 18130
},
{
"epoch": 0.8214835612716239,
"grad_norm": 0.3540068566799164,
"learning_rate": 0.0001369022500903383,
"loss": 0.1555,
"step": 18140
},
{
"epoch": 0.8219364188026447,
"grad_norm": 0.5195394158363342,
"learning_rate": 0.00013683264469719207,
"loss": 0.1789,
"step": 18150
},
{
"epoch": 0.8223892763336654,
"grad_norm": 0.5439714193344116,
"learning_rate": 0.00013676301865190023,
"loss": 0.1614,
"step": 18160
},
{
"epoch": 0.8228421338646862,
"grad_norm": 0.3994079530239105,
"learning_rate": 0.00013669337199350226,
"loss": 0.1764,
"step": 18170
},
{
"epoch": 0.8232949913957069,
"grad_norm": 0.40646135807037354,
"learning_rate": 0.00013662370476104924,
"loss": 0.1548,
"step": 18180
},
{
"epoch": 0.8237478489267277,
"grad_norm": 0.5411052703857422,
"learning_rate": 0.00013655401699360373,
"loss": 0.1716,
"step": 18190
},
{
"epoch": 0.8242007064577483,
"grad_norm": 0.35534271597862244,
"learning_rate": 0.0001364843087302398,
"loss": 0.1652,
"step": 18200
},
{
"epoch": 0.8246535639887691,
"grad_norm": 0.4019007384777069,
"learning_rate": 0.00013641458001004304,
"loss": 0.1776,
"step": 18210
},
{
"epoch": 0.8251064215197899,
"grad_norm": 0.6988927721977234,
"learning_rate": 0.0001363448308721105,
"loss": 0.1747,
"step": 18220
},
{
"epoch": 0.8255592790508106,
"grad_norm": 0.4218527674674988,
"learning_rate": 0.00013627506135555065,
"loss": 0.1903,
"step": 18230
},
{
"epoch": 0.8260121365818314,
"grad_norm": 0.321307510137558,
"learning_rate": 0.00013620527149948343,
"loss": 0.174,
"step": 18240
},
{
"epoch": 0.8264649941128521,
"grad_norm": 0.38911622762680054,
"learning_rate": 0.00013613546134304017,
"loss": 0.185,
"step": 18250
},
{
"epoch": 0.8269178516438729,
"grad_norm": 0.3723275363445282,
"learning_rate": 0.00013606563092536362,
"loss": 0.1894,
"step": 18260
},
{
"epoch": 0.8273707091748935,
"grad_norm": 0.4374692738056183,
"learning_rate": 0.00013599578028560778,
"loss": 0.1571,
"step": 18270
},
{
"epoch": 0.8278235667059143,
"grad_norm": 0.5195155143737793,
"learning_rate": 0.00013592590946293805,
"loss": 0.1629,
"step": 18280
},
{
"epoch": 0.828276424236935,
"grad_norm": 0.38242098689079285,
"learning_rate": 0.00013585601849653125,
"loss": 0.1831,
"step": 18290
},
{
"epoch": 0.8287292817679558,
"grad_norm": 0.43245652318000793,
"learning_rate": 0.0001357861074255753,
"loss": 0.1784,
"step": 18300
},
{
"epoch": 0.8291821392989766,
"grad_norm": 0.40913039445877075,
"learning_rate": 0.00013571617628926956,
"loss": 0.1589,
"step": 18310
},
{
"epoch": 0.8296349968299973,
"grad_norm": 0.3442098796367645,
"learning_rate": 0.00013564622512682453,
"loss": 0.1806,
"step": 18320
},
{
"epoch": 0.8300878543610181,
"grad_norm": 0.549633264541626,
"learning_rate": 0.00013557625397746202,
"loss": 0.1751,
"step": 18330
},
{
"epoch": 0.8305407118920388,
"grad_norm": 0.23728685081005096,
"learning_rate": 0.00013550626288041497,
"loss": 0.1571,
"step": 18340
},
{
"epoch": 0.8309935694230595,
"grad_norm": 0.4054383933544159,
"learning_rate": 0.00013543625187492755,
"loss": 0.1529,
"step": 18350
},
{
"epoch": 0.8314464269540802,
"grad_norm": 0.3019578754901886,
"learning_rate": 0.00013536622100025514,
"loss": 0.1614,
"step": 18360
},
{
"epoch": 0.831899284485101,
"grad_norm": 0.5014357566833496,
"learning_rate": 0.00013529617029566415,
"loss": 0.1703,
"step": 18370
},
{
"epoch": 0.8323521420161217,
"grad_norm": 0.4359058737754822,
"learning_rate": 0.00013522609980043214,
"loss": 0.2027,
"step": 18380
},
{
"epoch": 0.8328049995471425,
"grad_norm": 0.3514244556427002,
"learning_rate": 0.00013515600955384786,
"loss": 0.1515,
"step": 18390
},
{
"epoch": 0.8332578570781632,
"grad_norm": 0.4225936830043793,
"learning_rate": 0.00013508589959521105,
"loss": 0.1937,
"step": 18400
},
{
"epoch": 0.833710714609184,
"grad_norm": 0.5653186440467834,
"learning_rate": 0.0001350157699638325,
"loss": 0.1969,
"step": 18410
},
{
"epoch": 0.8341635721402046,
"grad_norm": 0.3789112865924835,
"learning_rate": 0.00013494562069903408,
"loss": 0.1673,
"step": 18420
},
{
"epoch": 0.8346164296712254,
"grad_norm": 0.3935513496398926,
"learning_rate": 0.0001348754518401486,
"loss": 0.1875,
"step": 18430
},
{
"epoch": 0.8350692872022462,
"grad_norm": 0.34713736176490784,
"learning_rate": 0.0001348052634265199,
"loss": 0.1584,
"step": 18440
},
{
"epoch": 0.8355221447332669,
"grad_norm": 0.395840048789978,
"learning_rate": 0.00013473505549750284,
"loss": 0.1761,
"step": 18450
},
{
"epoch": 0.8359750022642877,
"grad_norm": 0.47905436158180237,
"learning_rate": 0.00013466482809246303,
"loss": 0.1687,
"step": 18460
},
{
"epoch": 0.8364278597953084,
"grad_norm": 0.46361273527145386,
"learning_rate": 0.00013459458125077725,
"loss": 0.1912,
"step": 18470
},
{
"epoch": 0.8368807173263292,
"grad_norm": 0.37687766551971436,
"learning_rate": 0.00013452431501183304,
"loss": 0.1664,
"step": 18480
},
{
"epoch": 0.8373335748573498,
"grad_norm": 0.515882670879364,
"learning_rate": 0.00013445402941502881,
"loss": 0.1706,
"step": 18490
},
{
"epoch": 0.8377864323883706,
"grad_norm": 0.5330377817153931,
"learning_rate": 0.00013438372449977386,
"loss": 0.1863,
"step": 18500
},
{
"epoch": 0.8382392899193913,
"grad_norm": 0.2746626138687134,
"learning_rate": 0.0001343134003054883,
"loss": 0.1675,
"step": 18510
},
{
"epoch": 0.8386921474504121,
"grad_norm": 0.32701629400253296,
"learning_rate": 0.00013424305687160308,
"loss": 0.1727,
"step": 18520
},
{
"epoch": 0.8391450049814329,
"grad_norm": 0.3433604836463928,
"learning_rate": 0.00013417269423755994,
"loss": 0.1805,
"step": 18530
},
{
"epoch": 0.8395978625124536,
"grad_norm": 0.44374939799308777,
"learning_rate": 0.0001341023124428113,
"loss": 0.1723,
"step": 18540
},
{
"epoch": 0.8400507200434744,
"grad_norm": 0.2972099184989929,
"learning_rate": 0.00013403191152682045,
"loss": 0.1763,
"step": 18550
},
{
"epoch": 0.840503577574495,
"grad_norm": 0.4678293764591217,
"learning_rate": 0.0001339614915290613,
"loss": 0.1534,
"step": 18560
},
{
"epoch": 0.8409564351055158,
"grad_norm": 0.5384738445281982,
"learning_rate": 0.00013389105248901853,
"loss": 0.2031,
"step": 18570
},
{
"epoch": 0.8414092926365365,
"grad_norm": 0.4295203387737274,
"learning_rate": 0.00013382059444618744,
"loss": 0.1875,
"step": 18580
},
{
"epoch": 0.8418621501675573,
"grad_norm": 0.31987982988357544,
"learning_rate": 0.00013375011744007402,
"loss": 0.1756,
"step": 18590
},
{
"epoch": 0.842315007698578,
"grad_norm": 0.3537510931491852,
"learning_rate": 0.00013367962151019492,
"loss": 0.1782,
"step": 18600
},
{
"epoch": 0.8427678652295988,
"grad_norm": 0.4680691063404083,
"learning_rate": 0.0001336091066960773,
"loss": 0.1539,
"step": 18610
},
{
"epoch": 0.8432207227606195,
"grad_norm": 0.43693849444389343,
"learning_rate": 0.000133538573037259,
"loss": 0.1569,
"step": 18620
},
{
"epoch": 0.8436735802916403,
"grad_norm": 0.3837292790412903,
"learning_rate": 0.0001334680205732884,
"loss": 0.1864,
"step": 18630
},
{
"epoch": 0.844126437822661,
"grad_norm": 0.5856052041053772,
"learning_rate": 0.00013339744934372444,
"loss": 0.1763,
"step": 18640
},
{
"epoch": 0.8445792953536817,
"grad_norm": 0.2959698438644409,
"learning_rate": 0.00013332685938813657,
"loss": 0.1503,
"step": 18650
},
{
"epoch": 0.8450321528847025,
"grad_norm": 0.39290863275527954,
"learning_rate": 0.00013325625074610465,
"loss": 0.1512,
"step": 18660
},
{
"epoch": 0.8454850104157232,
"grad_norm": 0.400134414434433,
"learning_rate": 0.00013318562345721922,
"loss": 0.1758,
"step": 18670
},
{
"epoch": 0.845937867946744,
"grad_norm": 0.407697468996048,
"learning_rate": 0.0001331149775610811,
"loss": 0.1946,
"step": 18680
},
{
"epoch": 0.8463907254777647,
"grad_norm": 0.497427374124527,
"learning_rate": 0.00013304431309730158,
"loss": 0.2094,
"step": 18690
},
{
"epoch": 0.8468435830087855,
"grad_norm": 0.41305825114250183,
"learning_rate": 0.0001329736301055024,
"loss": 0.1808,
"step": 18700
},
{
"epoch": 0.8472964405398061,
"grad_norm": 0.39067190885543823,
"learning_rate": 0.0001329029286253157,
"loss": 0.1704,
"step": 18710
},
{
"epoch": 0.8477492980708269,
"grad_norm": 0.27752751111984253,
"learning_rate": 0.00013283220869638395,
"loss": 0.1722,
"step": 18720
},
{
"epoch": 0.8482021556018476,
"grad_norm": 0.3473440408706665,
"learning_rate": 0.00013276147035835994,
"loss": 0.165,
"step": 18730
},
{
"epoch": 0.8486550131328684,
"grad_norm": 0.3963128924369812,
"learning_rate": 0.00013269071365090682,
"loss": 0.1722,
"step": 18740
},
{
"epoch": 0.8491078706638892,
"grad_norm": 0.39070188999176025,
"learning_rate": 0.00013261993861369805,
"loss": 0.1658,
"step": 18750
},
{
"epoch": 0.8495607281949099,
"grad_norm": 0.3592500686645508,
"learning_rate": 0.00013254914528641732,
"loss": 0.1706,
"step": 18760
},
{
"epoch": 0.8500135857259307,
"grad_norm": 0.42990413308143616,
"learning_rate": 0.00013247833370875865,
"loss": 0.1676,
"step": 18770
},
{
"epoch": 0.8504664432569513,
"grad_norm": 0.41089728474617004,
"learning_rate": 0.00013240750392042621,
"loss": 0.1692,
"step": 18780
},
{
"epoch": 0.8509193007879721,
"grad_norm": 0.4188067615032196,
"learning_rate": 0.00013233665596113444,
"loss": 0.1655,
"step": 18790
},
{
"epoch": 0.8513721583189928,
"grad_norm": 0.41813355684280396,
"learning_rate": 0.00013226578987060795,
"loss": 0.1775,
"step": 18800
},
{
"epoch": 0.8518250158500136,
"grad_norm": 0.42807725071907043,
"learning_rate": 0.00013219490568858148,
"loss": 0.1607,
"step": 18810
},
{
"epoch": 0.8522778733810343,
"grad_norm": 0.4690854251384735,
"learning_rate": 0.00013212400345480001,
"loss": 0.1721,
"step": 18820
},
{
"epoch": 0.8527307309120551,
"grad_norm": 0.31033626198768616,
"learning_rate": 0.00013205308320901854,
"loss": 0.1679,
"step": 18830
},
{
"epoch": 0.8531835884430758,
"grad_norm": 0.5011230707168579,
"learning_rate": 0.0001319821449910022,
"loss": 0.1763,
"step": 18840
},
{
"epoch": 0.8536364459740966,
"grad_norm": 0.3080917000770569,
"learning_rate": 0.0001319111888405262,
"loss": 0.1638,
"step": 18850
},
{
"epoch": 0.8540893035051172,
"grad_norm": 0.39213791489601135,
"learning_rate": 0.00013184021479737586,
"loss": 0.1598,
"step": 18860
},
{
"epoch": 0.854542161036138,
"grad_norm": 0.38774001598358154,
"learning_rate": 0.00013176922290134645,
"loss": 0.1877,
"step": 18870
},
{
"epoch": 0.8549950185671588,
"grad_norm": 0.3466808497905731,
"learning_rate": 0.00013169821319224326,
"loss": 0.1871,
"step": 18880
},
{
"epoch": 0.8554478760981795,
"grad_norm": 0.2837083637714386,
"learning_rate": 0.00013162718570988166,
"loss": 0.1614,
"step": 18890
},
{
"epoch": 0.8559007336292003,
"grad_norm": 0.48217517137527466,
"learning_rate": 0.00013155614049408684,
"loss": 0.1823,
"step": 18900
},
{
"epoch": 0.856353591160221,
"grad_norm": 0.483885258436203,
"learning_rate": 0.00013148507758469407,
"loss": 0.1734,
"step": 18910
},
{
"epoch": 0.8568064486912418,
"grad_norm": 0.516879677772522,
"learning_rate": 0.00013141399702154848,
"loss": 0.1756,
"step": 18920
},
{
"epoch": 0.8572593062222624,
"grad_norm": 0.39171308279037476,
"learning_rate": 0.00013134289884450504,
"loss": 0.1522,
"step": 18930
},
{
"epoch": 0.8577121637532832,
"grad_norm": 0.4065245985984802,
"learning_rate": 0.0001312717830934287,
"loss": 0.1578,
"step": 18940
},
{
"epoch": 0.8581650212843039,
"grad_norm": 0.29680877923965454,
"learning_rate": 0.00013120064980819422,
"loss": 0.1882,
"step": 18950
},
{
"epoch": 0.8586178788153247,
"grad_norm": 0.29750049114227295,
"learning_rate": 0.00013112949902868617,
"loss": 0.1789,
"step": 18960
},
{
"epoch": 0.8590707363463455,
"grad_norm": 0.324982225894928,
"learning_rate": 0.00013105833079479897,
"loss": 0.1846,
"step": 18970
},
{
"epoch": 0.8595235938773662,
"grad_norm": 0.31095758080482483,
"learning_rate": 0.0001309871451464368,
"loss": 0.1584,
"step": 18980
},
{
"epoch": 0.859976451408387,
"grad_norm": 0.2947046458721161,
"learning_rate": 0.00013091594212351362,
"loss": 0.1834,
"step": 18990
},
{
"epoch": 0.8604293089394076,
"grad_norm": 0.40782859921455383,
"learning_rate": 0.00013084472176595307,
"loss": 0.1716,
"step": 19000
},
{
"epoch": 0.8604293089394076,
"eval_chrf": 75.47261980576427,
"eval_loss": 0.1526792198419571,
"eval_runtime": 26.5755,
"eval_samples_per_second": 0.376,
"eval_steps_per_second": 0.038,
"step": 19000
},
{
"epoch": 0.8608821664704284,
"grad_norm": 0.5305262804031372,
"learning_rate": 0.00013077348411368863,
"loss": 0.1745,
"step": 19010
},
{
"epoch": 0.8613350240014491,
"grad_norm": 0.3732976019382477,
"learning_rate": 0.0001307022292066634,
"loss": 0.1635,
"step": 19020
},
{
"epoch": 0.8617878815324699,
"grad_norm": 0.3454831838607788,
"learning_rate": 0.00013063095708483016,
"loss": 0.1764,
"step": 19030
},
{
"epoch": 0.8622407390634906,
"grad_norm": 0.4817901849746704,
"learning_rate": 0.00013055966778815128,
"loss": 0.1635,
"step": 19040
},
{
"epoch": 0.8626935965945114,
"grad_norm": 0.500036358833313,
"learning_rate": 0.00013048836135659891,
"loss": 0.154,
"step": 19050
},
{
"epoch": 0.8631464541255321,
"grad_norm": 0.2934643030166626,
"learning_rate": 0.00013041703783015472,
"loss": 0.1992,
"step": 19060
},
{
"epoch": 0.8635993116565529,
"grad_norm": 0.38243478536605835,
"learning_rate": 0.00013034569724880993,
"loss": 0.1561,
"step": 19070
},
{
"epoch": 0.8640521691875735,
"grad_norm": 0.25158563256263733,
"learning_rate": 0.0001302743396525654,
"loss": 0.1652,
"step": 19080
},
{
"epoch": 0.8645050267185943,
"grad_norm": 0.426620215177536,
"learning_rate": 0.00013020296508143143,
"loss": 0.1529,
"step": 19090
},
{
"epoch": 0.8649578842496151,
"grad_norm": 0.3732966184616089,
"learning_rate": 0.000130131573575428,
"loss": 0.1948,
"step": 19100
},
{
"epoch": 0.8654107417806358,
"grad_norm": 0.4865129590034485,
"learning_rate": 0.00013006016517458436,
"loss": 0.1756,
"step": 19110
},
{
"epoch": 0.8658635993116566,
"grad_norm": 0.3668878674507141,
"learning_rate": 0.00012998873991893948,
"loss": 0.1642,
"step": 19120
},
{
"epoch": 0.8663164568426773,
"grad_norm": 0.3702840209007263,
"learning_rate": 0.00012991729784854158,
"loss": 0.1598,
"step": 19130
},
{
"epoch": 0.8667693143736981,
"grad_norm": 0.40551936626434326,
"learning_rate": 0.0001298458390034484,
"loss": 0.1824,
"step": 19140
},
{
"epoch": 0.8672221719047187,
"grad_norm": 0.41427895426750183,
"learning_rate": 0.0001297743634237271,
"loss": 0.1862,
"step": 19150
},
{
"epoch": 0.8676750294357395,
"grad_norm": 0.42724162340164185,
"learning_rate": 0.00012970287114945418,
"loss": 0.1645,
"step": 19160
},
{
"epoch": 0.8681278869667602,
"grad_norm": 0.3212936818599701,
"learning_rate": 0.00012963136222071553,
"loss": 0.1619,
"step": 19170
},
{
"epoch": 0.868580744497781,
"grad_norm": 0.4648410975933075,
"learning_rate": 0.00012955983667760636,
"loss": 0.169,
"step": 19180
},
{
"epoch": 0.8690336020288018,
"grad_norm": 0.4471531808376312,
"learning_rate": 0.00012948829456023116,
"loss": 0.1645,
"step": 19190
},
{
"epoch": 0.8694864595598225,
"grad_norm": 0.518792986869812,
"learning_rate": 0.00012941673590870383,
"loss": 0.173,
"step": 19200
},
{
"epoch": 0.8699393170908433,
"grad_norm": 0.40794938802719116,
"learning_rate": 0.00012934516076314737,
"loss": 0.1732,
"step": 19210
},
{
"epoch": 0.8703921746218639,
"grad_norm": 0.37186163663864136,
"learning_rate": 0.00012927356916369424,
"loss": 0.1667,
"step": 19220
},
{
"epoch": 0.8708450321528847,
"grad_norm": 0.2797354757785797,
"learning_rate": 0.0001292019611504859,
"loss": 0.1723,
"step": 19230
},
{
"epoch": 0.8712978896839054,
"grad_norm": 0.43414950370788574,
"learning_rate": 0.00012913033676367316,
"loss": 0.179,
"step": 19240
},
{
"epoch": 0.8717507472149262,
"grad_norm": 0.41452449560165405,
"learning_rate": 0.000129058696043416,
"loss": 0.1482,
"step": 19250
},
{
"epoch": 0.8722036047459469,
"grad_norm": 0.3870978057384491,
"learning_rate": 0.00012898703902988352,
"loss": 0.1714,
"step": 19260
},
{
"epoch": 0.8726564622769677,
"grad_norm": 0.4780474007129669,
"learning_rate": 0.00012891536576325395,
"loss": 0.1803,
"step": 19270
},
{
"epoch": 0.8731093198079884,
"grad_norm": 0.3603726029396057,
"learning_rate": 0.00012884367628371464,
"loss": 0.1755,
"step": 19280
},
{
"epoch": 0.8735621773390091,
"grad_norm": 0.43098974227905273,
"learning_rate": 0.00012877197063146208,
"loss": 0.1659,
"step": 19290
},
{
"epoch": 0.8740150348700298,
"grad_norm": 0.27953872084617615,
"learning_rate": 0.00012870024884670172,
"loss": 0.1511,
"step": 19300
},
{
"epoch": 0.8744678924010506,
"grad_norm": 0.43370550870895386,
"learning_rate": 0.0001286285109696482,
"loss": 0.1984,
"step": 19310
},
{
"epoch": 0.8749207499320714,
"grad_norm": 0.45979931950569153,
"learning_rate": 0.00012855675704052507,
"loss": 0.2006,
"step": 19320
},
{
"epoch": 0.8753736074630921,
"grad_norm": 0.40239641070365906,
"learning_rate": 0.00012848498709956493,
"loss": 0.1909,
"step": 19330
},
{
"epoch": 0.8758264649941129,
"grad_norm": 0.3627179265022278,
"learning_rate": 0.00012841320118700932,
"loss": 0.1714,
"step": 19340
},
{
"epoch": 0.8762793225251336,
"grad_norm": 0.4519731104373932,
"learning_rate": 0.00012834139934310882,
"loss": 0.1651,
"step": 19350
},
{
"epoch": 0.8767321800561544,
"grad_norm": 0.43609437346458435,
"learning_rate": 0.00012826958160812278,
"loss": 0.18,
"step": 19360
},
{
"epoch": 0.877185037587175,
"grad_norm": 0.5031613111495972,
"learning_rate": 0.00012819774802231968,
"loss": 0.1814,
"step": 19370
},
{
"epoch": 0.8776378951181958,
"grad_norm": 0.32297253608703613,
"learning_rate": 0.00012812589862597665,
"loss": 0.162,
"step": 19380
},
{
"epoch": 0.8780907526492165,
"grad_norm": 0.507293701171875,
"learning_rate": 0.0001280540334593799,
"loss": 0.1762,
"step": 19390
},
{
"epoch": 0.8785436101802373,
"grad_norm": 0.24356497824192047,
"learning_rate": 0.0001279821525628244,
"loss": 0.1643,
"step": 19400
},
{
"epoch": 0.8789964677112581,
"grad_norm": 0.5529003739356995,
"learning_rate": 0.00012791025597661386,
"loss": 0.152,
"step": 19410
},
{
"epoch": 0.8794493252422788,
"grad_norm": 0.4408567249774933,
"learning_rate": 0.00012783834374106086,
"loss": 0.1831,
"step": 19420
},
{
"epoch": 0.8799021827732996,
"grad_norm": 0.5482146143913269,
"learning_rate": 0.00012776641589648676,
"loss": 0.1752,
"step": 19430
},
{
"epoch": 0.8803550403043202,
"grad_norm": 0.4384392499923706,
"learning_rate": 0.00012769447248322174,
"loss": 0.1712,
"step": 19440
},
{
"epoch": 0.880807897835341,
"grad_norm": 0.46668869256973267,
"learning_rate": 0.00012762251354160454,
"loss": 0.1736,
"step": 19450
},
{
"epoch": 0.8812607553663617,
"grad_norm": 0.2595170736312866,
"learning_rate": 0.0001275505391119827,
"loss": 0.1724,
"step": 19460
},
{
"epoch": 0.8817136128973825,
"grad_norm": 0.4810440242290497,
"learning_rate": 0.00012747854923471247,
"loss": 0.1842,
"step": 19470
},
{
"epoch": 0.8821664704284032,
"grad_norm": 0.43019989132881165,
"learning_rate": 0.00012740654395015875,
"loss": 0.1953,
"step": 19480
},
{
"epoch": 0.882619327959424,
"grad_norm": 0.4522167146205902,
"learning_rate": 0.00012733452329869504,
"loss": 0.1633,
"step": 19490
},
{
"epoch": 0.8830721854904447,
"grad_norm": 0.34009966254234314,
"learning_rate": 0.00012726248732070346,
"loss": 0.1387,
"step": 19500
},
{
"epoch": 0.8835250430214654,
"grad_norm": 0.3226310908794403,
"learning_rate": 0.00012719043605657478,
"loss": 0.1698,
"step": 19510
},
{
"epoch": 0.8839779005524862,
"grad_norm": 0.5143437385559082,
"learning_rate": 0.0001271183695467083,
"loss": 0.165,
"step": 19520
},
{
"epoch": 0.8844307580835069,
"grad_norm": 0.5247877836227417,
"learning_rate": 0.00012704628783151184,
"loss": 0.182,
"step": 19530
},
{
"epoch": 0.8848836156145277,
"grad_norm": 0.3131117522716522,
"learning_rate": 0.00012697419095140182,
"loss": 0.1618,
"step": 19540
},
{
"epoch": 0.8853364731455484,
"grad_norm": 0.4093931019306183,
"learning_rate": 0.00012690207894680306,
"loss": 0.1672,
"step": 19550
},
{
"epoch": 0.8857893306765692,
"grad_norm": 0.4105209708213806,
"learning_rate": 0.000126829951858149,
"loss": 0.186,
"step": 19560
},
{
"epoch": 0.8862421882075899,
"grad_norm": 0.4243008494377136,
"learning_rate": 0.00012675780972588142,
"loss": 0.1784,
"step": 19570
},
{
"epoch": 0.8866950457386107,
"grad_norm": 0.43030065298080444,
"learning_rate": 0.00012668565259045056,
"loss": 0.1774,
"step": 19580
},
{
"epoch": 0.8871479032696313,
"grad_norm": 0.42522814869880676,
"learning_rate": 0.00012661348049231507,
"loss": 0.1906,
"step": 19590
},
{
"epoch": 0.8876007608006521,
"grad_norm": 0.5001883506774902,
"learning_rate": 0.00012654129347194207,
"loss": 0.1915,
"step": 19600
},
{
"epoch": 0.8880536183316728,
"grad_norm": 0.31651952862739563,
"learning_rate": 0.00012646909156980692,
"loss": 0.1562,
"step": 19610
},
{
"epoch": 0.8885064758626936,
"grad_norm": 0.35776180028915405,
"learning_rate": 0.00012639687482639343,
"loss": 0.1728,
"step": 19620
},
{
"epoch": 0.8889593333937144,
"grad_norm": 0.39580416679382324,
"learning_rate": 0.00012632464328219363,
"loss": 0.1799,
"step": 19630
},
{
"epoch": 0.8894121909247351,
"grad_norm": 0.3365071713924408,
"learning_rate": 0.00012625239697770804,
"loss": 0.1586,
"step": 19640
},
{
"epoch": 0.8898650484557559,
"grad_norm": 0.3606979548931122,
"learning_rate": 0.00012618013595344516,
"loss": 0.1815,
"step": 19650
},
{
"epoch": 0.8903179059867765,
"grad_norm": 0.43666747212409973,
"learning_rate": 0.00012610786024992197,
"loss": 0.1538,
"step": 19660
},
{
"epoch": 0.8907707635177973,
"grad_norm": 0.4219381511211395,
"learning_rate": 0.00012603556990766366,
"loss": 0.1796,
"step": 19670
},
{
"epoch": 0.891223621048818,
"grad_norm": 0.38019394874572754,
"learning_rate": 0.00012596326496720355,
"loss": 0.1694,
"step": 19680
},
{
"epoch": 0.8916764785798388,
"grad_norm": 0.31997179985046387,
"learning_rate": 0.00012589094546908315,
"loss": 0.1744,
"step": 19690
},
{
"epoch": 0.8921293361108595,
"grad_norm": 0.365190714597702,
"learning_rate": 0.0001258186114538522,
"loss": 0.2,
"step": 19700
},
{
"epoch": 0.8925821936418803,
"grad_norm": 0.4236038029193878,
"learning_rate": 0.00012574626296206855,
"loss": 0.159,
"step": 19710
},
{
"epoch": 0.893035051172901,
"grad_norm": 0.5261380672454834,
"learning_rate": 0.00012567390003429812,
"loss": 0.178,
"step": 19720
},
{
"epoch": 0.8934879087039217,
"grad_norm": 0.5068031549453735,
"learning_rate": 0.00012560152271111504,
"loss": 0.1615,
"step": 19730
},
{
"epoch": 0.8939407662349425,
"grad_norm": 0.3815973401069641,
"learning_rate": 0.00012552913103310134,
"loss": 0.1653,
"step": 19740
},
{
"epoch": 0.8943936237659632,
"grad_norm": 0.309400349855423,
"learning_rate": 0.0001254567250408473,
"loss": 0.1759,
"step": 19750
},
{
"epoch": 0.894846481296984,
"grad_norm": 0.3322960138320923,
"learning_rate": 0.000125384304774951,
"loss": 0.1803,
"step": 19760
},
{
"epoch": 0.8952993388280047,
"grad_norm": 0.40339186787605286,
"learning_rate": 0.00012531187027601872,
"loss": 0.1832,
"step": 19770
},
{
"epoch": 0.8957521963590255,
"grad_norm": 0.47105276584625244,
"learning_rate": 0.0001252394215846646,
"loss": 0.1772,
"step": 19780
},
{
"epoch": 0.8962050538900462,
"grad_norm": 0.42406246066093445,
"learning_rate": 0.00012516695874151086,
"loss": 0.1874,
"step": 19790
},
{
"epoch": 0.8966579114210669,
"grad_norm": 0.4142908453941345,
"learning_rate": 0.00012509448178718753,
"loss": 0.1702,
"step": 19800
},
{
"epoch": 0.8971107689520876,
"grad_norm": 0.3621874451637268,
"learning_rate": 0.00012502199076233254,
"loss": 0.149,
"step": 19810
},
{
"epoch": 0.8975636264831084,
"grad_norm": 0.29331645369529724,
"learning_rate": 0.00012494948570759186,
"loss": 0.1819,
"step": 19820
},
{
"epoch": 0.8980164840141291,
"grad_norm": 0.32960814237594604,
"learning_rate": 0.00012487696666361924,
"loss": 0.1852,
"step": 19830
},
{
"epoch": 0.8984693415451499,
"grad_norm": 0.4001091420650482,
"learning_rate": 0.00012480443367107616,
"loss": 0.1491,
"step": 19840
},
{
"epoch": 0.8989221990761707,
"grad_norm": 0.31662559509277344,
"learning_rate": 0.00012473188677063215,
"loss": 0.1681,
"step": 19850
},
{
"epoch": 0.8993750566071914,
"grad_norm": 0.37801915407180786,
"learning_rate": 0.00012465932600296432,
"loss": 0.1531,
"step": 19860
},
{
"epoch": 0.8998279141382122,
"grad_norm": 0.43345585465431213,
"learning_rate": 0.00012458675140875774,
"loss": 0.1482,
"step": 19870
},
{
"epoch": 0.9002807716692328,
"grad_norm": 0.4435916543006897,
"learning_rate": 0.0001245141630287051,
"loss": 0.1631,
"step": 19880
},
{
"epoch": 0.9007336292002536,
"grad_norm": 0.35654473304748535,
"learning_rate": 0.00012444156090350687,
"loss": 0.1535,
"step": 19890
},
{
"epoch": 0.9011864867312743,
"grad_norm": 0.40578678250312805,
"learning_rate": 0.0001243689450738712,
"loss": 0.1862,
"step": 19900
},
{
"epoch": 0.9016393442622951,
"grad_norm": 0.386254221200943,
"learning_rate": 0.000124296315580514,
"loss": 0.164,
"step": 19910
},
{
"epoch": 0.9020922017933158,
"grad_norm": 0.4299822747707367,
"learning_rate": 0.00012422367246415873,
"loss": 0.1852,
"step": 19920
},
{
"epoch": 0.9025450593243366,
"grad_norm": 0.4686278700828552,
"learning_rate": 0.0001241510157655366,
"loss": 0.1773,
"step": 19930
},
{
"epoch": 0.9029979168553574,
"grad_norm": 0.35422420501708984,
"learning_rate": 0.00012407834552538637,
"loss": 0.1786,
"step": 19940
},
{
"epoch": 0.903450774386378,
"grad_norm": 0.3344101905822754,
"learning_rate": 0.00012400566178445436,
"loss": 0.1601,
"step": 19950
},
{
"epoch": 0.9039036319173988,
"grad_norm": 0.4716935157775879,
"learning_rate": 0.00012393296458349456,
"loss": 0.1395,
"step": 19960
},
{
"epoch": 0.9043564894484195,
"grad_norm": 0.3622031509876251,
"learning_rate": 0.0001238602539632684,
"loss": 0.1645,
"step": 19970
},
{
"epoch": 0.9048093469794403,
"grad_norm": 0.4005542993545532,
"learning_rate": 0.00012378752996454494,
"loss": 0.1799,
"step": 19980
},
{
"epoch": 0.905262204510461,
"grad_norm": 0.4990575909614563,
"learning_rate": 0.0001237147926281007,
"loss": 0.1653,
"step": 19990
},
{
"epoch": 0.9057150620414818,
"grad_norm": 0.47105804085731506,
"learning_rate": 0.00012364204199471956,
"loss": 0.1906,
"step": 20000
},
{
"epoch": 0.9057150620414818,
"eval_chrf": 82.21479011673927,
"eval_loss": 0.16131897270679474,
"eval_runtime": 10.4072,
"eval_samples_per_second": 0.961,
"eval_steps_per_second": 0.096,
"step": 20000
},
{
"epoch": 0.9061679195725025,
"grad_norm": 0.3149387240409851,
"learning_rate": 0.00012356927810519306,
"loss": 0.1668,
"step": 20010
},
{
"epoch": 0.9066207771035232,
"grad_norm": 0.3994668126106262,
"learning_rate": 0.00012349650100032004,
"loss": 0.1893,
"step": 20020
},
{
"epoch": 0.9070736346345439,
"grad_norm": 0.3106105327606201,
"learning_rate": 0.0001234237107209068,
"loss": 0.1739,
"step": 20030
},
{
"epoch": 0.9075264921655647,
"grad_norm": 0.3529313802719116,
"learning_rate": 0.000123350907307767,
"loss": 0.1554,
"step": 20040
},
{
"epoch": 0.9079793496965854,
"grad_norm": 0.4536036550998688,
"learning_rate": 0.00012327809080172163,
"loss": 0.1773,
"step": 20050
},
{
"epoch": 0.9084322072276062,
"grad_norm": 0.48275458812713623,
"learning_rate": 0.00012320526124359917,
"loss": 0.1731,
"step": 20060
},
{
"epoch": 0.908885064758627,
"grad_norm": 0.3880382776260376,
"learning_rate": 0.00012313241867423523,
"loss": 0.1476,
"step": 20070
},
{
"epoch": 0.9093379222896477,
"grad_norm": 0.3142046332359314,
"learning_rate": 0.00012305956313447285,
"loss": 0.1439,
"step": 20080
},
{
"epoch": 0.9097907798206685,
"grad_norm": 0.37252524495124817,
"learning_rate": 0.00012298669466516223,
"loss": 0.1763,
"step": 20090
},
{
"epoch": 0.9102436373516891,
"grad_norm": 0.3946685492992401,
"learning_rate": 0.00012291381330716095,
"loss": 0.1838,
"step": 20100
},
{
"epoch": 0.9106964948827099,
"grad_norm": 0.5468018054962158,
"learning_rate": 0.00012284091910133368,
"loss": 0.1552,
"step": 20110
},
{
"epoch": 0.9111493524137306,
"grad_norm": 0.38861414790153503,
"learning_rate": 0.00012276801208855245,
"loss": 0.1724,
"step": 20120
},
{
"epoch": 0.9116022099447514,
"grad_norm": 0.4579319655895233,
"learning_rate": 0.00012269509230969632,
"loss": 0.1714,
"step": 20130
},
{
"epoch": 0.9120550674757721,
"grad_norm": 0.3694315552711487,
"learning_rate": 0.0001226221598056516,
"loss": 0.1831,
"step": 20140
},
{
"epoch": 0.9125079250067929,
"grad_norm": 0.391891747713089,
"learning_rate": 0.00012254921461731167,
"loss": 0.152,
"step": 20150
},
{
"epoch": 0.9129607825378137,
"grad_norm": 0.4429548978805542,
"learning_rate": 0.0001224762567855771,
"loss": 0.1794,
"step": 20160
},
{
"epoch": 0.9134136400688343,
"grad_norm": 0.6164150834083557,
"learning_rate": 0.00012240328635135548,
"loss": 0.18,
"step": 20170
},
{
"epoch": 0.913866497599855,
"grad_norm": 0.3344804644584656,
"learning_rate": 0.00012233030335556157,
"loss": 0.1473,
"step": 20180
},
{
"epoch": 0.9143193551308758,
"grad_norm": 0.34692907333374023,
"learning_rate": 0.00012225730783911698,
"loss": 0.175,
"step": 20190
},
{
"epoch": 0.9147722126618966,
"grad_norm": 0.39275220036506653,
"learning_rate": 0.00012218429984295047,
"loss": 0.1679,
"step": 20200
},
{
"epoch": 0.9152250701929173,
"grad_norm": 0.3784923851490021,
"learning_rate": 0.0001221112794079979,
"loss": 0.1685,
"step": 20210
},
{
"epoch": 0.9156779277239381,
"grad_norm": 0.31384286284446716,
"learning_rate": 0.0001220382465752019,
"loss": 0.1795,
"step": 20220
},
{
"epoch": 0.9161307852549588,
"grad_norm": 0.4809572100639343,
"learning_rate": 0.00012196520138551215,
"loss": 0.165,
"step": 20230
},
{
"epoch": 0.9165836427859795,
"grad_norm": 0.3832622468471527,
"learning_rate": 0.00012189214387988525,
"loss": 0.1566,
"step": 20240
},
{
"epoch": 0.9170365003170002,
"grad_norm": 0.3654673993587494,
"learning_rate": 0.00012181907409928475,
"loss": 0.1637,
"step": 20250
},
{
"epoch": 0.917489357848021,
"grad_norm": 0.3105533719062805,
"learning_rate": 0.00012174599208468099,
"loss": 0.1506,
"step": 20260
},
{
"epoch": 0.9179422153790417,
"grad_norm": 0.3788946866989136,
"learning_rate": 0.0001216728978770512,
"loss": 0.1676,
"step": 20270
},
{
"epoch": 0.9183950729100625,
"grad_norm": 0.45621371269226074,
"learning_rate": 0.00012159979151737952,
"loss": 0.1642,
"step": 20280
},
{
"epoch": 0.9188479304410833,
"grad_norm": 0.39650899171829224,
"learning_rate": 0.00012152667304665683,
"loss": 0.1547,
"step": 20290
},
{
"epoch": 0.919300787972104,
"grad_norm": 0.41694116592407227,
"learning_rate": 0.00012145354250588079,
"loss": 0.1577,
"step": 20300
},
{
"epoch": 0.9197536455031247,
"grad_norm": 0.4147912859916687,
"learning_rate": 0.00012138039993605588,
"loss": 0.1747,
"step": 20310
},
{
"epoch": 0.9202065030341454,
"grad_norm": 0.6738628149032593,
"learning_rate": 0.00012130724537819332,
"loss": 0.1763,
"step": 20320
},
{
"epoch": 0.9206593605651662,
"grad_norm": 0.4486740529537201,
"learning_rate": 0.00012123407887331102,
"loss": 0.1649,
"step": 20330
},
{
"epoch": 0.9211122180961869,
"grad_norm": 0.35589438676834106,
"learning_rate": 0.00012116090046243355,
"loss": 0.1676,
"step": 20340
},
{
"epoch": 0.9215650756272077,
"grad_norm": 0.3210257291793823,
"learning_rate": 0.00012108771018659229,
"loss": 0.1736,
"step": 20350
},
{
"epoch": 0.9220179331582284,
"grad_norm": 0.39245256781578064,
"learning_rate": 0.00012101450808682513,
"loss": 0.1705,
"step": 20360
},
{
"epoch": 0.9224707906892492,
"grad_norm": 0.4380585849285126,
"learning_rate": 0.00012094129420417668,
"loss": 0.1705,
"step": 20370
},
{
"epoch": 0.92292364822027,
"grad_norm": 0.2744285762310028,
"learning_rate": 0.00012086806857969812,
"loss": 0.1471,
"step": 20380
},
{
"epoch": 0.9233765057512906,
"grad_norm": 0.41960641741752625,
"learning_rate": 0.00012079483125444719,
"loss": 0.1819,
"step": 20390
},
{
"epoch": 0.9238293632823114,
"grad_norm": 0.3407396674156189,
"learning_rate": 0.00012072158226948829,
"loss": 0.1854,
"step": 20400
},
{
"epoch": 0.9242822208133321,
"grad_norm": 0.4826781451702118,
"learning_rate": 0.00012064832166589222,
"loss": 0.1484,
"step": 20410
},
{
"epoch": 0.9247350783443529,
"grad_norm": 0.3638167083263397,
"learning_rate": 0.0001205750494847364,
"loss": 0.1497,
"step": 20420
},
{
"epoch": 0.9251879358753736,
"grad_norm": 0.4030158221721649,
"learning_rate": 0.00012050176576710468,
"loss": 0.1573,
"step": 20430
},
{
"epoch": 0.9256407934063944,
"grad_norm": 0.3100288510322571,
"learning_rate": 0.00012042847055408741,
"loss": 0.1558,
"step": 20440
},
{
"epoch": 0.9260936509374151,
"grad_norm": 0.32881560921669006,
"learning_rate": 0.00012035516388678143,
"loss": 0.1667,
"step": 20450
},
{
"epoch": 0.9265465084684358,
"grad_norm": 0.32451504468917847,
"learning_rate": 0.00012028184580628987,
"loss": 0.1761,
"step": 20460
},
{
"epoch": 0.9269993659994565,
"grad_norm": 0.4388659596443176,
"learning_rate": 0.00012020851635372236,
"loss": 0.1648,
"step": 20470
},
{
"epoch": 0.9274522235304773,
"grad_norm": 0.360840767621994,
"learning_rate": 0.00012013517557019494,
"loss": 0.1834,
"step": 20480
},
{
"epoch": 0.927905081061498,
"grad_norm": 0.3814234137535095,
"learning_rate": 0.0001200618234968299,
"loss": 0.1587,
"step": 20490
},
{
"epoch": 0.9283579385925188,
"grad_norm": 0.34749165177345276,
"learning_rate": 0.00011998846017475592,
"loss": 0.1834,
"step": 20500
},
{
"epoch": 0.9288107961235396,
"grad_norm": 0.340725839138031,
"learning_rate": 0.00011991508564510798,
"loss": 0.1534,
"step": 20510
},
{
"epoch": 0.9292636536545603,
"grad_norm": 1.043115496635437,
"learning_rate": 0.00011984169994902736,
"loss": 0.1837,
"step": 20520
},
{
"epoch": 0.929716511185581,
"grad_norm": 0.46120333671569824,
"learning_rate": 0.00011976830312766156,
"loss": 0.1585,
"step": 20530
},
{
"epoch": 0.9301693687166017,
"grad_norm": 0.39356350898742676,
"learning_rate": 0.00011969489522216433,
"loss": 0.1699,
"step": 20540
},
{
"epoch": 0.9306222262476225,
"grad_norm": 0.27802759408950806,
"learning_rate": 0.00011962147627369567,
"loss": 0.1558,
"step": 20550
},
{
"epoch": 0.9310750837786432,
"grad_norm": 0.48346295952796936,
"learning_rate": 0.00011954804632342176,
"loss": 0.1897,
"step": 20560
},
{
"epoch": 0.931527941309664,
"grad_norm": 0.4480477571487427,
"learning_rate": 0.00011947460541251487,
"loss": 0.1599,
"step": 20570
},
{
"epoch": 0.9319807988406847,
"grad_norm": 0.30732083320617676,
"learning_rate": 0.00011940115358215354,
"loss": 0.1808,
"step": 20580
},
{
"epoch": 0.9324336563717055,
"grad_norm": 0.3854066729545593,
"learning_rate": 0.00011932769087352236,
"loss": 0.1765,
"step": 20590
},
{
"epoch": 0.9328865139027263,
"grad_norm": 0.4150194525718689,
"learning_rate": 0.00011925421732781206,
"loss": 0.1785,
"step": 20600
},
{
"epoch": 0.9333393714337469,
"grad_norm": 0.2741736173629761,
"learning_rate": 0.00011918073298621936,
"loss": 0.1724,
"step": 20610
},
{
"epoch": 0.9337922289647677,
"grad_norm": 0.38691046833992004,
"learning_rate": 0.00011910723788994716,
"loss": 0.1639,
"step": 20620
},
{
"epoch": 0.9342450864957884,
"grad_norm": 0.4636295437812805,
"learning_rate": 0.00011903373208020425,
"loss": 0.152,
"step": 20630
},
{
"epoch": 0.9346979440268092,
"grad_norm": 0.43888625502586365,
"learning_rate": 0.0001189602155982056,
"loss": 0.168,
"step": 20640
},
{
"epoch": 0.9351508015578299,
"grad_norm": 0.3848056197166443,
"learning_rate": 0.00011888668848517198,
"loss": 0.1534,
"step": 20650
},
{
"epoch": 0.9356036590888507,
"grad_norm": 0.5369208455085754,
"learning_rate": 0.0001188131507823302,
"loss": 0.15,
"step": 20660
},
{
"epoch": 0.9360565166198714,
"grad_norm": 0.3236347734928131,
"learning_rate": 0.00011873960253091308,
"loss": 0.1584,
"step": 20670
},
{
"epoch": 0.9365093741508921,
"grad_norm": 0.43311768770217896,
"learning_rate": 0.00011866604377215922,
"loss": 0.1673,
"step": 20680
},
{
"epoch": 0.9369622316819128,
"grad_norm": 0.28458085656166077,
"learning_rate": 0.00011859247454731323,
"loss": 0.1377,
"step": 20690
},
{
"epoch": 0.9374150892129336,
"grad_norm": 0.3330056369304657,
"learning_rate": 0.0001185188948976255,
"loss": 0.1327,
"step": 20700
},
{
"epoch": 0.9378679467439544,
"grad_norm": 0.43871769309043884,
"learning_rate": 0.00011844530486435239,
"loss": 0.1771,
"step": 20710
},
{
"epoch": 0.9383208042749751,
"grad_norm": 0.34857964515686035,
"learning_rate": 0.00011837170448875587,
"loss": 0.1577,
"step": 20720
},
{
"epoch": 0.9387736618059959,
"grad_norm": 0.3620039224624634,
"learning_rate": 0.00011829809381210389,
"loss": 0.1583,
"step": 20730
},
{
"epoch": 0.9392265193370166,
"grad_norm": 0.3414750099182129,
"learning_rate": 0.00011822447287567014,
"loss": 0.1608,
"step": 20740
},
{
"epoch": 0.9396793768680373,
"grad_norm": 0.357601523399353,
"learning_rate": 0.000118150841720734,
"loss": 0.1558,
"step": 20750
},
{
"epoch": 0.940132234399058,
"grad_norm": 0.4363787770271301,
"learning_rate": 0.00011807720038858068,
"loss": 0.1757,
"step": 20760
},
{
"epoch": 0.9405850919300788,
"grad_norm": 0.3434285819530487,
"learning_rate": 0.00011800354892050093,
"loss": 0.1641,
"step": 20770
},
{
"epoch": 0.9410379494610995,
"grad_norm": 0.3395892083644867,
"learning_rate": 0.00011792988735779143,
"loss": 0.1491,
"step": 20780
},
{
"epoch": 0.9414908069921203,
"grad_norm": 0.41426268219947815,
"learning_rate": 0.00011785621574175431,
"loss": 0.1848,
"step": 20790
},
{
"epoch": 0.941943664523141,
"grad_norm": 0.39705103635787964,
"learning_rate": 0.0001177825341136974,
"loss": 0.1712,
"step": 20800
},
{
"epoch": 0.9423965220541618,
"grad_norm": 0.5175170302391052,
"learning_rate": 0.00011770884251493417,
"loss": 0.1831,
"step": 20810
},
{
"epoch": 0.9428493795851826,
"grad_norm": 0.3863191306591034,
"learning_rate": 0.00011763514098678366,
"loss": 0.1581,
"step": 20820
},
{
"epoch": 0.9433022371162032,
"grad_norm": 0.4071378707885742,
"learning_rate": 0.0001175614295705705,
"loss": 0.1622,
"step": 20830
},
{
"epoch": 0.943755094647224,
"grad_norm": 0.4960409998893738,
"learning_rate": 0.00011748770830762481,
"loss": 0.1541,
"step": 20840
},
{
"epoch": 0.9442079521782447,
"grad_norm": 0.38787540793418884,
"learning_rate": 0.00011741397723928228,
"loss": 0.1571,
"step": 20850
},
{
"epoch": 0.9446608097092655,
"grad_norm": 0.3468814194202423,
"learning_rate": 0.00011734023640688406,
"loss": 0.1446,
"step": 20860
},
{
"epoch": 0.9451136672402862,
"grad_norm": 0.5620803833007812,
"learning_rate": 0.00011726648585177685,
"loss": 0.1719,
"step": 20870
},
{
"epoch": 0.945566524771307,
"grad_norm": 0.4213656187057495,
"learning_rate": 0.0001171927256153127,
"loss": 0.1607,
"step": 20880
},
{
"epoch": 0.9460193823023277,
"grad_norm": 0.5288670659065247,
"learning_rate": 0.00011711895573884917,
"loss": 0.1607,
"step": 20890
},
{
"epoch": 0.9464722398333484,
"grad_norm": 0.4692736268043518,
"learning_rate": 0.00011704517626374918,
"loss": 0.1741,
"step": 20900
},
{
"epoch": 0.9469250973643691,
"grad_norm": 0.39539778232574463,
"learning_rate": 0.00011697138723138108,
"loss": 0.1605,
"step": 20910
},
{
"epoch": 0.9473779548953899,
"grad_norm": 0.4380393326282501,
"learning_rate": 0.00011689758868311846,
"loss": 0.2162,
"step": 20920
},
{
"epoch": 0.9478308124264107,
"grad_norm": 0.4683678448200226,
"learning_rate": 0.00011682378066034041,
"loss": 0.1818,
"step": 20930
},
{
"epoch": 0.9482836699574314,
"grad_norm": 0.4942455291748047,
"learning_rate": 0.00011674996320443123,
"loss": 0.1823,
"step": 20940
},
{
"epoch": 0.9487365274884522,
"grad_norm": 0.2584671378135681,
"learning_rate": 0.00011667613635678054,
"loss": 0.1601,
"step": 20950
},
{
"epoch": 0.9491893850194729,
"grad_norm": 0.30206599831581116,
"learning_rate": 0.00011660230015878316,
"loss": 0.1674,
"step": 20960
},
{
"epoch": 0.9496422425504936,
"grad_norm": 0.35249218344688416,
"learning_rate": 0.00011652845465183928,
"loss": 0.231,
"step": 20970
},
{
"epoch": 0.9500951000815143,
"grad_norm": 0.40178045630455017,
"learning_rate": 0.00011645459987735423,
"loss": 0.1838,
"step": 20980
},
{
"epoch": 0.9505479576125351,
"grad_norm": 0.2893231511116028,
"learning_rate": 0.0001163807358767385,
"loss": 0.151,
"step": 20990
},
{
"epoch": 0.9510008151435558,
"grad_norm": 0.3375166356563568,
"learning_rate": 0.00011630686269140782,
"loss": 0.168,
"step": 21000
},
{
"epoch": 0.9510008151435558,
"eval_chrf": 82.61639619570752,
"eval_loss": 0.13870181143283844,
"eval_runtime": 7.946,
"eval_samples_per_second": 1.258,
"eval_steps_per_second": 0.126,
"step": 21000
},
{
"epoch": 0.9514536726745766,
"grad_norm": 0.3449011445045471,
"learning_rate": 0.00011623298036278307,
"loss": 0.1653,
"step": 21010
},
{
"epoch": 0.9519065302055973,
"grad_norm": 0.4119865894317627,
"learning_rate": 0.00011615908893229027,
"loss": 0.1471,
"step": 21020
},
{
"epoch": 0.9523593877366181,
"grad_norm": 0.3247711956501007,
"learning_rate": 0.0001160851884413604,
"loss": 0.1378,
"step": 21030
},
{
"epoch": 0.9528122452676387,
"grad_norm": 0.4262673556804657,
"learning_rate": 0.00011601127893142971,
"loss": 0.1623,
"step": 21040
},
{
"epoch": 0.9532651027986595,
"grad_norm": 0.3229232132434845,
"learning_rate": 0.0001159373604439394,
"loss": 0.1606,
"step": 21050
},
{
"epoch": 0.9537179603296803,
"grad_norm": 0.45158863067626953,
"learning_rate": 0.00011586343302033573,
"loss": 0.1705,
"step": 21060
},
{
"epoch": 0.954170817860701,
"grad_norm": 0.4874420762062073,
"learning_rate": 0.00011578949670206998,
"loss": 0.1654,
"step": 21070
},
{
"epoch": 0.9546236753917218,
"grad_norm": 0.4906403720378876,
"learning_rate": 0.00011571555153059841,
"loss": 0.1764,
"step": 21080
},
{
"epoch": 0.9550765329227425,
"grad_norm": 0.44753482937812805,
"learning_rate": 0.00011564159754738223,
"loss": 0.1529,
"step": 21090
},
{
"epoch": 0.9555293904537633,
"grad_norm": 0.5234647989273071,
"learning_rate": 0.00011556763479388763,
"loss": 0.1671,
"step": 21100
},
{
"epoch": 0.955982247984784,
"grad_norm": 0.25665393471717834,
"learning_rate": 0.00011549366331158567,
"loss": 0.1399,
"step": 21110
},
{
"epoch": 0.9564351055158047,
"grad_norm": 0.4347527325153351,
"learning_rate": 0.00011541968314195228,
"loss": 0.1867,
"step": 21120
},
{
"epoch": 0.9568879630468254,
"grad_norm": 0.4241902232170105,
"learning_rate": 0.00011534569432646838,
"loss": 0.1716,
"step": 21130
},
{
"epoch": 0.9573408205778462,
"grad_norm": 0.3855469524860382,
"learning_rate": 0.00011527169690661965,
"loss": 0.1605,
"step": 21140
},
{
"epoch": 0.957793678108867,
"grad_norm": 0.4303516745567322,
"learning_rate": 0.00011519769092389653,
"loss": 0.1919,
"step": 21150
},
{
"epoch": 0.9582465356398877,
"grad_norm": 0.44530144333839417,
"learning_rate": 0.00011512367641979445,
"loss": 0.1452,
"step": 21160
},
{
"epoch": 0.9586993931709085,
"grad_norm": 0.4251921772956848,
"learning_rate": 0.00011504965343581345,
"loss": 0.1952,
"step": 21170
},
{
"epoch": 0.9591522507019292,
"grad_norm": 0.3609083294868469,
"learning_rate": 0.00011497562201345835,
"loss": 0.1586,
"step": 21180
},
{
"epoch": 0.9596051082329499,
"grad_norm": 0.30689379572868347,
"learning_rate": 0.00011490158219423879,
"loss": 0.1575,
"step": 21190
},
{
"epoch": 0.9600579657639706,
"grad_norm": 0.46872153878211975,
"learning_rate": 0.00011482753401966904,
"loss": 0.1583,
"step": 21200
},
{
"epoch": 0.9605108232949914,
"grad_norm": 0.5662645697593689,
"learning_rate": 0.00011475347753126809,
"loss": 0.1746,
"step": 21210
},
{
"epoch": 0.9609636808260121,
"grad_norm": 0.3720380365848541,
"learning_rate": 0.00011467941277055953,
"loss": 0.1445,
"step": 21220
},
{
"epoch": 0.9614165383570329,
"grad_norm": 0.49396568536758423,
"learning_rate": 0.00011460533977907166,
"loss": 0.159,
"step": 21230
},
{
"epoch": 0.9618693958880536,
"grad_norm": 0.3395419120788574,
"learning_rate": 0.0001145312585983374,
"loss": 0.1708,
"step": 21240
},
{
"epoch": 0.9623222534190744,
"grad_norm": 0.42840003967285156,
"learning_rate": 0.00011445716926989421,
"loss": 0.159,
"step": 21250
},
{
"epoch": 0.962775110950095,
"grad_norm": 0.4080982804298401,
"learning_rate": 0.00011438307183528413,
"loss": 0.1438,
"step": 21260
},
{
"epoch": 0.9632279684811158,
"grad_norm": 0.3299719989299774,
"learning_rate": 0.0001143089663360538,
"loss": 0.1622,
"step": 21270
},
{
"epoch": 0.9636808260121366,
"grad_norm": 0.4868001341819763,
"learning_rate": 0.00011423485281375426,
"loss": 0.173,
"step": 21280
},
{
"epoch": 0.9641336835431573,
"grad_norm": 0.8481622934341431,
"learning_rate": 0.0001141607313099412,
"loss": 0.1577,
"step": 21290
},
{
"epoch": 0.9645865410741781,
"grad_norm": 0.4979325234889984,
"learning_rate": 0.00011408660186617467,
"loss": 0.1587,
"step": 21300
},
{
"epoch": 0.9650393986051988,
"grad_norm": 0.4334494173526764,
"learning_rate": 0.00011401246452401923,
"loss": 0.172,
"step": 21310
},
{
"epoch": 0.9654922561362196,
"grad_norm": 0.3916884660720825,
"learning_rate": 0.00011393831932504384,
"loss": 0.178,
"step": 21320
},
{
"epoch": 0.9659451136672403,
"grad_norm": 0.2896512448787689,
"learning_rate": 0.00011386416631082193,
"loss": 0.1622,
"step": 21330
},
{
"epoch": 0.966397971198261,
"grad_norm": 0.48449796438217163,
"learning_rate": 0.00011379000552293117,
"loss": 0.1698,
"step": 21340
},
{
"epoch": 0.9668508287292817,
"grad_norm": 0.43414372205734253,
"learning_rate": 0.00011371583700295371,
"loss": 0.1722,
"step": 21350
},
{
"epoch": 0.9673036862603025,
"grad_norm": 0.39366286993026733,
"learning_rate": 0.00011364166079247604,
"loss": 0.1701,
"step": 21360
},
{
"epoch": 0.9677565437913233,
"grad_norm": 0.3768509328365326,
"learning_rate": 0.00011356747693308891,
"loss": 0.1692,
"step": 21370
},
{
"epoch": 0.968209401322344,
"grad_norm": 0.38022980093955994,
"learning_rate": 0.00011349328546638738,
"loss": 0.1917,
"step": 21380
},
{
"epoch": 0.9686622588533648,
"grad_norm": 0.3759568929672241,
"learning_rate": 0.00011341908643397072,
"loss": 0.1916,
"step": 21390
},
{
"epoch": 0.9691151163843855,
"grad_norm": 0.3977113366127014,
"learning_rate": 0.00011334487987744256,
"loss": 0.175,
"step": 21400
},
{
"epoch": 0.9695679739154062,
"grad_norm": 0.4212728440761566,
"learning_rate": 0.00011327066583841066,
"loss": 0.1672,
"step": 21410
},
{
"epoch": 0.9700208314464269,
"grad_norm": 0.36486679315567017,
"learning_rate": 0.00011319644435848697,
"loss": 0.176,
"step": 21420
},
{
"epoch": 0.9704736889774477,
"grad_norm": 0.46536704897880554,
"learning_rate": 0.00011312221547928766,
"loss": 0.1629,
"step": 21430
},
{
"epoch": 0.9709265465084684,
"grad_norm": 0.35601305961608887,
"learning_rate": 0.000113047979242433,
"loss": 0.1671,
"step": 21440
},
{
"epoch": 0.9713794040394892,
"grad_norm": 0.32921913266181946,
"learning_rate": 0.00011297373568954745,
"loss": 0.1455,
"step": 21450
},
{
"epoch": 0.97183226157051,
"grad_norm": 0.32573631405830383,
"learning_rate": 0.00011289948486225952,
"loss": 0.1607,
"step": 21460
},
{
"epoch": 0.9722851191015307,
"grad_norm": 0.47739171981811523,
"learning_rate": 0.0001128252268022018,
"loss": 0.1556,
"step": 21470
},
{
"epoch": 0.9727379766325513,
"grad_norm": 0.40861859917640686,
"learning_rate": 0.00011275096155101102,
"loss": 0.1824,
"step": 21480
},
{
"epoch": 0.9731908341635721,
"grad_norm": 0.41582047939300537,
"learning_rate": 0.00011267668915032776,
"loss": 0.1548,
"step": 21490
},
{
"epoch": 0.9736436916945929,
"grad_norm": 0.45832404494285583,
"learning_rate": 0.00011260240964179678,
"loss": 0.1585,
"step": 21500
},
{
"epoch": 0.9740965492256136,
"grad_norm": 0.3012300729751587,
"learning_rate": 0.00011252812306706677,
"loss": 0.1486,
"step": 21510
},
{
"epoch": 0.9745494067566344,
"grad_norm": 0.3662928640842438,
"learning_rate": 0.00011245382946779037,
"loss": 0.1588,
"step": 21520
},
{
"epoch": 0.9750022642876551,
"grad_norm": 0.39952388405799866,
"learning_rate": 0.00011237952888562415,
"loss": 0.1885,
"step": 21530
},
{
"epoch": 0.9754551218186759,
"grad_norm": 0.3074231743812561,
"learning_rate": 0.00011230522136222861,
"loss": 0.1519,
"step": 21540
},
{
"epoch": 0.9759079793496965,
"grad_norm": 0.44197362661361694,
"learning_rate": 0.00011223090693926817,
"loss": 0.1674,
"step": 21550
},
{
"epoch": 0.9763608368807173,
"grad_norm": 0.4374144971370697,
"learning_rate": 0.00011215658565841109,
"loss": 0.1624,
"step": 21560
},
{
"epoch": 0.976813694411738,
"grad_norm": 0.4729935824871063,
"learning_rate": 0.00011208225756132944,
"loss": 0.1807,
"step": 21570
},
{
"epoch": 0.9772665519427588,
"grad_norm": 0.2917758822441101,
"learning_rate": 0.0001120079226896992,
"loss": 0.1616,
"step": 21580
},
{
"epoch": 0.9777194094737796,
"grad_norm": 0.3587353527545929,
"learning_rate": 0.00011193358108520005,
"loss": 0.1721,
"step": 21590
},
{
"epoch": 0.9781722670048003,
"grad_norm": 0.3926864266395569,
"learning_rate": 0.00011185923278951556,
"loss": 0.1572,
"step": 21600
},
{
"epoch": 0.9786251245358211,
"grad_norm": 0.36469602584838867,
"learning_rate": 0.0001117848778443329,
"loss": 0.1521,
"step": 21610
},
{
"epoch": 0.9790779820668418,
"grad_norm": 0.39263635873794556,
"learning_rate": 0.00011171051629134308,
"loss": 0.1759,
"step": 21620
},
{
"epoch": 0.9795308395978625,
"grad_norm": 0.33528193831443787,
"learning_rate": 0.00011163614817224082,
"loss": 0.1388,
"step": 21630
},
{
"epoch": 0.9799836971288832,
"grad_norm": 0.48358967900276184,
"learning_rate": 0.00011156177352872446,
"loss": 0.1658,
"step": 21640
},
{
"epoch": 0.980436554659904,
"grad_norm": 0.47192758321762085,
"learning_rate": 0.00011148739240249603,
"loss": 0.1658,
"step": 21650
},
{
"epoch": 0.9808894121909247,
"grad_norm": 0.3485560119152069,
"learning_rate": 0.0001114130048352612,
"loss": 0.1754,
"step": 21660
},
{
"epoch": 0.9813422697219455,
"grad_norm": 0.42553985118865967,
"learning_rate": 0.00011133861086872928,
"loss": 0.1793,
"step": 21670
},
{
"epoch": 0.9817951272529662,
"grad_norm": 0.38573968410491943,
"learning_rate": 0.00011126421054461305,
"loss": 0.1605,
"step": 21680
},
{
"epoch": 0.982247984783987,
"grad_norm": 0.47721385955810547,
"learning_rate": 0.00011118980390462899,
"loss": 0.1615,
"step": 21690
},
{
"epoch": 0.9827008423150076,
"grad_norm": 0.47984451055526733,
"learning_rate": 0.00011111539099049705,
"loss": 0.1595,
"step": 21700
},
{
"epoch": 0.9831536998460284,
"grad_norm": 0.29439032077789307,
"learning_rate": 0.00011104097184394074,
"loss": 0.1643,
"step": 21710
},
{
"epoch": 0.9836065573770492,
"grad_norm": 0.29327526688575745,
"learning_rate": 0.00011096654650668705,
"loss": 0.149,
"step": 21720
},
{
"epoch": 0.9840594149080699,
"grad_norm": 0.4929738938808441,
"learning_rate": 0.00011089211502046634,
"loss": 0.1629,
"step": 21730
},
{
"epoch": 0.9845122724390907,
"grad_norm": 0.3322129547595978,
"learning_rate": 0.00011081767742701263,
"loss": 0.1437,
"step": 21740
},
{
"epoch": 0.9849651299701114,
"grad_norm": 0.4976238012313843,
"learning_rate": 0.0001107432337680632,
"loss": 0.1635,
"step": 21750
},
{
"epoch": 0.9854179875011322,
"grad_norm": 0.3639504611492157,
"learning_rate": 0.00011066878408535872,
"loss": 0.1469,
"step": 21760
},
{
"epoch": 0.9858708450321528,
"grad_norm": 0.45734038949012756,
"learning_rate": 0.00011059432842064336,
"loss": 0.1692,
"step": 21770
},
{
"epoch": 0.9863237025631736,
"grad_norm": 0.44936123490333557,
"learning_rate": 0.00011051986681566458,
"loss": 0.1744,
"step": 21780
},
{
"epoch": 0.9867765600941943,
"grad_norm": 0.40992605686187744,
"learning_rate": 0.00011044539931217312,
"loss": 0.1719,
"step": 21790
},
{
"epoch": 0.9872294176252151,
"grad_norm": 0.36595234274864197,
"learning_rate": 0.00011037092595192309,
"loss": 0.1749,
"step": 21800
},
{
"epoch": 0.9876822751562359,
"grad_norm": 0.36314117908477783,
"learning_rate": 0.00011029644677667188,
"loss": 0.1774,
"step": 21810
},
{
"epoch": 0.9881351326872566,
"grad_norm": 0.47040295600891113,
"learning_rate": 0.0001102219618281801,
"loss": 0.1817,
"step": 21820
},
{
"epoch": 0.9885879902182774,
"grad_norm": 0.3664730489253998,
"learning_rate": 0.00011014747114821167,
"loss": 0.1443,
"step": 21830
},
{
"epoch": 0.9890408477492981,
"grad_norm": 0.3812478184700012,
"learning_rate": 0.00011007297477853365,
"loss": 0.1578,
"step": 21840
},
{
"epoch": 0.9894937052803188,
"grad_norm": 0.4033992290496826,
"learning_rate": 0.00010999847276091632,
"loss": 0.1463,
"step": 21850
},
{
"epoch": 0.9899465628113395,
"grad_norm": 0.37883979082107544,
"learning_rate": 0.00010992396513713315,
"loss": 0.1476,
"step": 21860
},
{
"epoch": 0.9903994203423603,
"grad_norm": 0.39016664028167725,
"learning_rate": 0.00010984945194896073,
"loss": 0.1744,
"step": 21870
},
{
"epoch": 0.990852277873381,
"grad_norm": 0.3429701626300812,
"learning_rate": 0.00010977493323817873,
"loss": 0.1599,
"step": 21880
},
{
"epoch": 0.9913051354044018,
"grad_norm": 0.43505728244781494,
"learning_rate": 0.00010970040904656997,
"loss": 0.1693,
"step": 21890
},
{
"epoch": 0.9917579929354225,
"grad_norm": 0.3216778039932251,
"learning_rate": 0.00010962587941592036,
"loss": 0.1513,
"step": 21900
},
{
"epoch": 0.9922108504664433,
"grad_norm": 0.4504454433917999,
"learning_rate": 0.00010955134438801882,
"loss": 0.1396,
"step": 21910
},
{
"epoch": 0.992663707997464,
"grad_norm": 0.3114122450351715,
"learning_rate": 0.00010947680400465725,
"loss": 0.1453,
"step": 21920
},
{
"epoch": 0.9931165655284847,
"grad_norm": 0.4424838125705719,
"learning_rate": 0.00010940225830763066,
"loss": 0.1713,
"step": 21930
},
{
"epoch": 0.9935694230595055,
"grad_norm": 0.3902185261249542,
"learning_rate": 0.00010932770733873703,
"loss": 0.1813,
"step": 21940
},
{
"epoch": 0.9940222805905262,
"grad_norm": 0.34867456555366516,
"learning_rate": 0.00010925315113977719,
"loss": 0.173,
"step": 21950
},
{
"epoch": 0.994475138121547,
"grad_norm": 0.46827998757362366,
"learning_rate": 0.00010917858975255496,
"loss": 0.1607,
"step": 21960
},
{
"epoch": 0.9949279956525677,
"grad_norm": 0.37731778621673584,
"learning_rate": 0.00010910402321887709,
"loss": 0.1736,
"step": 21970
},
{
"epoch": 0.9953808531835885,
"grad_norm": 0.45258596539497375,
"learning_rate": 0.00010902945158055324,
"loss": 0.1583,
"step": 21980
},
{
"epoch": 0.9958337107146091,
"grad_norm": 0.32490190863609314,
"learning_rate": 0.00010895487487939582,
"loss": 0.1762,
"step": 21990
},
{
"epoch": 0.9962865682456299,
"grad_norm": 0.39621788263320923,
"learning_rate": 0.00010888029315722022,
"loss": 0.1548,
"step": 22000
},
{
"epoch": 0.9962865682456299,
"eval_chrf": 76.49056376513751,
"eval_loss": 0.1375478208065033,
"eval_runtime": 26.6076,
"eval_samples_per_second": 0.376,
"eval_steps_per_second": 0.038,
"step": 22000
},
{
"epoch": 0.9967394257766506,
"grad_norm": 0.35230210423469543,
"learning_rate": 0.00010880570645584452,
"loss": 0.1785,
"step": 22010
},
{
"epoch": 0.9971922833076714,
"grad_norm": 0.37812137603759766,
"learning_rate": 0.00010873111481708969,
"loss": 0.1567,
"step": 22020
},
{
"epoch": 0.9976451408386922,
"grad_norm": 0.4391620457172394,
"learning_rate": 0.0001086565182827794,
"loss": 0.1745,
"step": 22030
},
{
"epoch": 0.9980979983697129,
"grad_norm": 0.32292863726615906,
"learning_rate": 0.00010858191689474013,
"loss": 0.1548,
"step": 22040
},
{
"epoch": 0.9985508559007337,
"grad_norm": 0.3486003279685974,
"learning_rate": 0.00010850731069480102,
"loss": 0.1632,
"step": 22050
},
{
"epoch": 0.9990037134317544,
"grad_norm": 0.4655235707759857,
"learning_rate": 0.00010843269972479396,
"loss": 0.1687,
"step": 22060
},
{
"epoch": 0.9994565709627751,
"grad_norm": 0.36596444249153137,
"learning_rate": 0.00010835808402655341,
"loss": 0.1667,
"step": 22070
},
{
"epoch": 0.9999094284937958,
"grad_norm": 0.3869829475879669,
"learning_rate": 0.00010828346364191661,
"loss": 0.1676,
"step": 22080
},
{
"epoch": 1.0003622860248167,
"grad_norm": 0.28441983461380005,
"learning_rate": 0.00010820883861272339,
"loss": 0.1416,
"step": 22090
},
{
"epoch": 1.0008151435558372,
"grad_norm": 0.3512919545173645,
"learning_rate": 0.00010813420898081616,
"loss": 0.1395,
"step": 22100
},
{
"epoch": 1.001268001086858,
"grad_norm": 0.3900757431983948,
"learning_rate": 0.00010805957478803988,
"loss": 0.1396,
"step": 22110
},
{
"epoch": 1.0017208586178787,
"grad_norm": 0.32627588510513306,
"learning_rate": 0.00010798493607624214,
"loss": 0.1382,
"step": 22120
},
{
"epoch": 1.0021737161488995,
"grad_norm": 0.4319378733634949,
"learning_rate": 0.00010791029288727306,
"loss": 0.1418,
"step": 22130
},
{
"epoch": 1.0026265736799203,
"grad_norm": 0.32595789432525635,
"learning_rate": 0.0001078356452629852,
"loss": 0.1414,
"step": 22140
},
{
"epoch": 1.003079431210941,
"grad_norm": 0.28670698404312134,
"learning_rate": 0.00010776099324523363,
"loss": 0.1513,
"step": 22150
},
{
"epoch": 1.0035322887419618,
"grad_norm": 0.4010210633277893,
"learning_rate": 0.00010768633687587598,
"loss": 0.1329,
"step": 22160
},
{
"epoch": 1.0039851462729825,
"grad_norm": 0.38099223375320435,
"learning_rate": 0.00010761167619677214,
"loss": 0.1422,
"step": 22170
},
{
"epoch": 1.0044380038040033,
"grad_norm": 0.40705400705337524,
"learning_rate": 0.00010753701124978463,
"loss": 0.1401,
"step": 22180
},
{
"epoch": 1.004890861335024,
"grad_norm": 0.932197093963623,
"learning_rate": 0.00010746234207677817,
"loss": 0.1512,
"step": 22190
},
{
"epoch": 1.0053437188660448,
"grad_norm": 0.5024510025978088,
"learning_rate": 0.00010738766871961994,
"loss": 0.1488,
"step": 22200
},
{
"epoch": 1.0057965763970655,
"grad_norm": 0.3365090787410736,
"learning_rate": 0.00010731299122017948,
"loss": 0.1638,
"step": 22210
},
{
"epoch": 1.0062494339280863,
"grad_norm": 0.2693597078323364,
"learning_rate": 0.00010723830962032861,
"loss": 0.1397,
"step": 22220
},
{
"epoch": 1.006702291459107,
"grad_norm": 0.3750072419643402,
"learning_rate": 0.00010716362396194149,
"loss": 0.1546,
"step": 22230
},
{
"epoch": 1.0071551489901278,
"grad_norm": 0.29761919379234314,
"learning_rate": 0.00010708893428689453,
"loss": 0.1396,
"step": 22240
},
{
"epoch": 1.0076080065211483,
"grad_norm": 0.37203189730644226,
"learning_rate": 0.0001070142406370664,
"loss": 0.1436,
"step": 22250
},
{
"epoch": 1.008060864052169,
"grad_norm": 0.35829347372055054,
"learning_rate": 0.00010693954305433795,
"loss": 0.1564,
"step": 22260
},
{
"epoch": 1.0085137215831899,
"grad_norm": 0.3740488588809967,
"learning_rate": 0.00010686484158059234,
"loss": 0.1455,
"step": 22270
},
{
"epoch": 1.0089665791142106,
"grad_norm": 0.3748299777507782,
"learning_rate": 0.00010679013625771484,
"loss": 0.1524,
"step": 22280
},
{
"epoch": 1.0094194366452314,
"grad_norm": 0.5208830237388611,
"learning_rate": 0.00010671542712759285,
"loss": 0.1742,
"step": 22290
},
{
"epoch": 1.0098722941762521,
"grad_norm": 0.4118936359882355,
"learning_rate": 0.00010664071423211599,
"loss": 0.1486,
"step": 22300
},
{
"epoch": 1.0103251517072729,
"grad_norm": 0.3148258924484253,
"learning_rate": 0.00010656599761317591,
"loss": 0.1282,
"step": 22310
},
{
"epoch": 1.0107780092382936,
"grad_norm": 0.3419557213783264,
"learning_rate": 0.00010649127731266641,
"loss": 0.1379,
"step": 22320
},
{
"epoch": 1.0112308667693144,
"grad_norm": 0.40964436531066895,
"learning_rate": 0.00010641655337248335,
"loss": 0.1484,
"step": 22330
},
{
"epoch": 1.0116837243003352,
"grad_norm": 0.32588711380958557,
"learning_rate": 0.00010634182583452456,
"loss": 0.1606,
"step": 22340
},
{
"epoch": 1.012136581831356,
"grad_norm": 0.4060667157173157,
"learning_rate": 0.00010626709474068996,
"loss": 0.1553,
"step": 22350
},
{
"epoch": 1.0125894393623767,
"grad_norm": 0.3781129717826843,
"learning_rate": 0.00010619236013288143,
"loss": 0.1389,
"step": 22360
},
{
"epoch": 1.0130422968933974,
"grad_norm": 0.4548001289367676,
"learning_rate": 0.00010611762205300286,
"loss": 0.1704,
"step": 22370
},
{
"epoch": 1.0134951544244182,
"grad_norm": 0.3514746427536011,
"learning_rate": 0.00010604288054296001,
"loss": 0.1213,
"step": 22380
},
{
"epoch": 1.0139480119554387,
"grad_norm": 0.46039777994155884,
"learning_rate": 0.00010596813564466064,
"loss": 0.1597,
"step": 22390
},
{
"epoch": 1.0144008694864595,
"grad_norm": 0.7013415098190308,
"learning_rate": 0.00010589338740001438,
"loss": 0.1791,
"step": 22400
},
{
"epoch": 1.0148537270174802,
"grad_norm": 0.3029846251010895,
"learning_rate": 0.00010581863585093272,
"loss": 0.1366,
"step": 22410
},
{
"epoch": 1.015306584548501,
"grad_norm": 0.31886354088783264,
"learning_rate": 0.00010574388103932904,
"loss": 0.1605,
"step": 22420
},
{
"epoch": 1.0157594420795217,
"grad_norm": 0.3466900587081909,
"learning_rate": 0.00010566912300711854,
"loss": 0.1679,
"step": 22430
},
{
"epoch": 1.0162122996105425,
"grad_norm": 0.4567810297012329,
"learning_rate": 0.00010559436179621818,
"loss": 0.1647,
"step": 22440
},
{
"epoch": 1.0166651571415632,
"grad_norm": 0.4758337140083313,
"learning_rate": 0.00010551959744854673,
"loss": 0.1779,
"step": 22450
},
{
"epoch": 1.017118014672584,
"grad_norm": 0.28552138805389404,
"learning_rate": 0.00010544483000602477,
"loss": 0.1367,
"step": 22460
},
{
"epoch": 1.0175708722036048,
"grad_norm": 0.5133971571922302,
"learning_rate": 0.00010537005951057454,
"loss": 0.1613,
"step": 22470
},
{
"epoch": 1.0180237297346255,
"grad_norm": 0.4176846444606781,
"learning_rate": 0.00010529528600412005,
"loss": 0.1583,
"step": 22480
},
{
"epoch": 1.0184765872656463,
"grad_norm": 0.39284244179725647,
"learning_rate": 0.00010522050952858692,
"loss": 0.1496,
"step": 22490
},
{
"epoch": 1.018929444796667,
"grad_norm": 0.38390839099884033,
"learning_rate": 0.00010514573012590252,
"loss": 0.1524,
"step": 22500
},
{
"epoch": 1.0193823023276878,
"grad_norm": 0.5051958560943604,
"learning_rate": 0.00010507094783799583,
"loss": 0.1695,
"step": 22510
},
{
"epoch": 1.0198351598587085,
"grad_norm": 0.4255245327949524,
"learning_rate": 0.00010499616270679747,
"loss": 0.1525,
"step": 22520
},
{
"epoch": 1.0202880173897293,
"grad_norm": 0.5033703446388245,
"learning_rate": 0.00010492137477423955,
"loss": 0.1549,
"step": 22530
},
{
"epoch": 1.0207408749207498,
"grad_norm": 0.39385154843330383,
"learning_rate": 0.00010484658408225589,
"loss": 0.1412,
"step": 22540
},
{
"epoch": 1.0211937324517706,
"grad_norm": 0.34952855110168457,
"learning_rate": 0.0001047717906727818,
"loss": 0.158,
"step": 22550
},
{
"epoch": 1.0216465899827913,
"grad_norm": 0.35554689168930054,
"learning_rate": 0.0001046969945877541,
"loss": 0.1451,
"step": 22560
},
{
"epoch": 1.022099447513812,
"grad_norm": 0.313905268907547,
"learning_rate": 0.00010462219586911111,
"loss": 0.1746,
"step": 22570
},
{
"epoch": 1.0225523050448329,
"grad_norm": 0.3594076633453369,
"learning_rate": 0.00010454739455879263,
"loss": 0.1796,
"step": 22580
},
{
"epoch": 1.0230051625758536,
"grad_norm": 0.312707781791687,
"learning_rate": 0.00010447259069873993,
"loss": 0.149,
"step": 22590
},
{
"epoch": 1.0234580201068744,
"grad_norm": 0.3713543713092804,
"learning_rate": 0.0001043977843308957,
"loss": 0.1529,
"step": 22600
},
{
"epoch": 1.0239108776378951,
"grad_norm": 0.8227018713951111,
"learning_rate": 0.00010432297549720407,
"loss": 0.1308,
"step": 22610
},
{
"epoch": 1.0243637351689159,
"grad_norm": 0.38186246156692505,
"learning_rate": 0.00010424816423961046,
"loss": 0.1658,
"step": 22620
},
{
"epoch": 1.0248165926999366,
"grad_norm": 0.4258041977882385,
"learning_rate": 0.00010417335060006177,
"loss": 0.1493,
"step": 22630
},
{
"epoch": 1.0252694502309574,
"grad_norm": 0.3841048777103424,
"learning_rate": 0.00010409853462050611,
"loss": 0.1593,
"step": 22640
},
{
"epoch": 1.0257223077619781,
"grad_norm": 0.2536158561706543,
"learning_rate": 0.000104023716342893,
"loss": 0.1557,
"step": 22650
},
{
"epoch": 1.026175165292999,
"grad_norm": 0.28186845779418945,
"learning_rate": 0.00010394889580917325,
"loss": 0.151,
"step": 22660
},
{
"epoch": 1.0266280228240197,
"grad_norm": 0.40862837433815,
"learning_rate": 0.00010387407306129882,
"loss": 0.1464,
"step": 22670
},
{
"epoch": 1.0270808803550402,
"grad_norm": 0.4918057322502136,
"learning_rate": 0.00010379924814122304,
"loss": 0.1499,
"step": 22680
},
{
"epoch": 1.027533737886061,
"grad_norm": 0.39476755261421204,
"learning_rate": 0.00010372442109090039,
"loss": 0.1311,
"step": 22690
},
{
"epoch": 1.0279865954170817,
"grad_norm": 0.38193678855895996,
"learning_rate": 0.00010364959195228656,
"loss": 0.1567,
"step": 22700
},
{
"epoch": 1.0284394529481025,
"grad_norm": 0.40314528346061707,
"learning_rate": 0.00010357476076733847,
"loss": 0.1545,
"step": 22710
},
{
"epoch": 1.0288923104791232,
"grad_norm": 0.3467330038547516,
"learning_rate": 0.00010349992757801408,
"loss": 0.1469,
"step": 22720
},
{
"epoch": 1.029345168010144,
"grad_norm": 0.2559974789619446,
"learning_rate": 0.00010342509242627252,
"loss": 0.1517,
"step": 22730
},
{
"epoch": 1.0297980255411647,
"grad_norm": 0.5004274845123291,
"learning_rate": 0.00010335025535407403,
"loss": 0.1448,
"step": 22740
},
{
"epoch": 1.0302508830721855,
"grad_norm": 0.30660852789878845,
"learning_rate": 0.00010327541640337996,
"loss": 0.1423,
"step": 22750
},
{
"epoch": 1.0307037406032062,
"grad_norm": 0.22897501289844513,
"learning_rate": 0.00010320057561615262,
"loss": 0.162,
"step": 22760
},
{
"epoch": 1.031156598134227,
"grad_norm": 0.3936751186847687,
"learning_rate": 0.00010312573303435544,
"loss": 0.1315,
"step": 22770
},
{
"epoch": 1.0316094556652478,
"grad_norm": 0.3321261703968048,
"learning_rate": 0.0001030508886999528,
"loss": 0.1582,
"step": 22780
},
{
"epoch": 1.0320623131962685,
"grad_norm": 0.30580833554267883,
"learning_rate": 0.00010297604265491012,
"loss": 0.1694,
"step": 22790
},
{
"epoch": 1.0325151707272893,
"grad_norm": 0.3953670859336853,
"learning_rate": 0.00010290119494119372,
"loss": 0.1545,
"step": 22800
},
{
"epoch": 1.03296802825831,
"grad_norm": 0.41720178723335266,
"learning_rate": 0.00010282634560077087,
"loss": 0.1492,
"step": 22810
},
{
"epoch": 1.0334208857893308,
"grad_norm": 0.3415760397911072,
"learning_rate": 0.00010275149467560978,
"loss": 0.1699,
"step": 22820
},
{
"epoch": 1.0338737433203513,
"grad_norm": 0.5049695372581482,
"learning_rate": 0.00010267664220767954,
"loss": 0.1823,
"step": 22830
},
{
"epoch": 1.034326600851372,
"grad_norm": 0.25039663910865784,
"learning_rate": 0.00010260178823895005,
"loss": 0.1235,
"step": 22840
},
{
"epoch": 1.0347794583823928,
"grad_norm": 0.44545069336891174,
"learning_rate": 0.00010252693281139212,
"loss": 0.1686,
"step": 22850
},
{
"epoch": 1.0352323159134136,
"grad_norm": 0.4185168445110321,
"learning_rate": 0.00010245207596697737,
"loss": 0.1647,
"step": 22860
},
{
"epoch": 1.0356851734444343,
"grad_norm": 0.3525523841381073,
"learning_rate": 0.0001023772177476782,
"loss": 0.1423,
"step": 22870
},
{
"epoch": 1.036138030975455,
"grad_norm": 0.40866556763648987,
"learning_rate": 0.00010230235819546772,
"loss": 0.1598,
"step": 22880
},
{
"epoch": 1.0365908885064758,
"grad_norm": 0.244676873087883,
"learning_rate": 0.00010222749735231993,
"loss": 0.1442,
"step": 22890
},
{
"epoch": 1.0370437460374966,
"grad_norm": 0.47297218441963196,
"learning_rate": 0.00010215263526020943,
"loss": 0.1649,
"step": 22900
},
{
"epoch": 1.0374966035685174,
"grad_norm": 0.44725802540779114,
"learning_rate": 0.00010207777196111156,
"loss": 0.1343,
"step": 22910
},
{
"epoch": 1.0379494610995381,
"grad_norm": 0.4038509726524353,
"learning_rate": 0.00010200290749700234,
"loss": 0.1538,
"step": 22920
},
{
"epoch": 1.0384023186305589,
"grad_norm": 0.34586551785469055,
"learning_rate": 0.00010192804190985845,
"loss": 0.1473,
"step": 22930
},
{
"epoch": 1.0388551761615796,
"grad_norm": 0.3961934447288513,
"learning_rate": 0.0001018531752416572,
"loss": 0.1465,
"step": 22940
},
{
"epoch": 1.0393080336926004,
"grad_norm": 0.46139267086982727,
"learning_rate": 0.00010177830753437645,
"loss": 0.1622,
"step": 22950
},
{
"epoch": 1.0397608912236211,
"grad_norm": 0.4097284972667694,
"learning_rate": 0.00010170343882999473,
"loss": 0.1822,
"step": 22960
},
{
"epoch": 1.040213748754642,
"grad_norm": 0.34919238090515137,
"learning_rate": 0.00010162856917049109,
"loss": 0.1658,
"step": 22970
},
{
"epoch": 1.0406666062856624,
"grad_norm": 0.4870430827140808,
"learning_rate": 0.0001015536985978451,
"loss": 0.1514,
"step": 22980
},
{
"epoch": 1.0411194638166832,
"grad_norm": 0.38768109679222107,
"learning_rate": 0.00010147882715403683,
"loss": 0.1511,
"step": 22990
},
{
"epoch": 1.041572321347704,
"grad_norm": 0.35548967123031616,
"learning_rate": 0.00010140395488104693,
"loss": 0.1608,
"step": 23000
},
{
"epoch": 1.041572321347704,
"eval_chrf": 84.92145032155595,
"eval_loss": 0.14364776015281677,
"eval_runtime": 12.9171,
"eval_samples_per_second": 0.774,
"eval_steps_per_second": 0.077,
"step": 23000
}
],
"logging_steps": 10,
"max_steps": 44164,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 6.023525553167401e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}