{ "best_global_step": 24000, "best_metric": 85.56038719629122, "best_model_checkpoint": "checkpoints_7B_lora_translated/en-chv-final/checkpoint-24000", "epoch": 0.38206826286296486, "eval_steps": 1000, "global_step": 33000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00011577826147362571, "grad_norm": 2.2461981773376465, "learning_rate": 1.389317690645261e-07, "loss": 3.2022, "step": 10 }, { "epoch": 0.00023155652294725142, "grad_norm": 1.8676271438598633, "learning_rate": 2.9330040135844395e-07, "loss": 3.0958, "step": 20 }, { "epoch": 0.00034733478442087714, "grad_norm": 2.413166046142578, "learning_rate": 4.476690336523618e-07, "loss": 3.2048, "step": 30 }, { "epoch": 0.00046311304589450285, "grad_norm": 2.3978662490844727, "learning_rate": 6.020376659462798e-07, "loss": 3.1917, "step": 40 }, { "epoch": 0.0005788913073681286, "grad_norm": 2.6323177814483643, "learning_rate": 7.564062982401976e-07, "loss": 3.1719, "step": 50 }, { "epoch": 0.0006946695688417543, "grad_norm": 1.8137117624282837, "learning_rate": 9.107749305341156e-07, "loss": 3.2232, "step": 60 }, { "epoch": 0.00081044783031538, "grad_norm": 2.2883598804473877, "learning_rate": 1.0651435628280334e-06, "loss": 3.1677, "step": 70 }, { "epoch": 0.0009262260917890057, "grad_norm": 2.310643434524536, "learning_rate": 1.2195121951219514e-06, "loss": 3.1682, "step": 80 }, { "epoch": 0.0010420043532626314, "grad_norm": 2.4111878871917725, "learning_rate": 1.3738808274158691e-06, "loss": 3.1628, "step": 90 }, { "epoch": 0.0011577826147362571, "grad_norm": 2.883052349090576, "learning_rate": 1.5282494597097871e-06, "loss": 3.1689, "step": 100 }, { "epoch": 0.0012735608762098828, "grad_norm": 2.283472776412964, "learning_rate": 1.6826180920037051e-06, "loss": 3.1567, "step": 110 }, { "epoch": 0.0013893391376835085, "grad_norm": 2.956798791885376, "learning_rate": 1.8369867242976229e-06, "loss": 3.0266, "step": 120 }, { "epoch": 0.0015051173991571343, "grad_norm": 2.6263251304626465, "learning_rate": 1.9913553565915407e-06, "loss": 2.913, "step": 130 }, { "epoch": 0.00162089566063076, "grad_norm": 2.811680316925049, "learning_rate": 2.1457239888854584e-06, "loss": 2.859, "step": 140 }, { "epoch": 0.0017366739221043857, "grad_norm": 2.3774526119232178, "learning_rate": 2.3000926211793766e-06, "loss": 2.8647, "step": 150 }, { "epoch": 0.0018524521835780114, "grad_norm": 2.3617496490478516, "learning_rate": 2.4544612534732944e-06, "loss": 2.7865, "step": 160 }, { "epoch": 0.001968230445051637, "grad_norm": 2.1448798179626465, "learning_rate": 2.608829885767212e-06, "loss": 2.6098, "step": 170 }, { "epoch": 0.002084008706525263, "grad_norm": 1.6232613325119019, "learning_rate": 2.7631985180611304e-06, "loss": 2.5649, "step": 180 }, { "epoch": 0.0021997869679988885, "grad_norm": 1.9341398477554321, "learning_rate": 2.917567150355048e-06, "loss": 2.4696, "step": 190 }, { "epoch": 0.0023155652294725142, "grad_norm": 1.791678547859192, "learning_rate": 3.071935782648966e-06, "loss": 2.4328, "step": 200 }, { "epoch": 0.00243134349094614, "grad_norm": 1.7509756088256836, "learning_rate": 3.2263044149428837e-06, "loss": 2.2921, "step": 210 }, { "epoch": 0.0025471217524197657, "grad_norm": 1.1987025737762451, "learning_rate": 3.3806730472368014e-06, "loss": 2.2021, "step": 220 }, { "epoch": 0.0026629000138933914, "grad_norm": 1.2671940326690674, "learning_rate": 3.5350416795307192e-06, "loss": 2.0922, "step": 230 }, { "epoch": 0.002778678275367017, "grad_norm": 1.0479227304458618, "learning_rate": 3.689410311824638e-06, "loss": 2.1015, "step": 240 }, { "epoch": 0.002894456536840643, "grad_norm": 0.9470033645629883, "learning_rate": 3.843778944118555e-06, "loss": 2.0249, "step": 250 }, { "epoch": 0.0030102347983142685, "grad_norm": 0.8377466797828674, "learning_rate": 3.998147576412473e-06, "loss": 1.9202, "step": 260 }, { "epoch": 0.0031260130597878942, "grad_norm": 0.7731795310974121, "learning_rate": 4.152516208706391e-06, "loss": 1.8559, "step": 270 }, { "epoch": 0.00324179132126152, "grad_norm": 0.776683509349823, "learning_rate": 4.306884841000309e-06, "loss": 1.8088, "step": 280 }, { "epoch": 0.0033575695827351456, "grad_norm": 0.6955391764640808, "learning_rate": 4.461253473294226e-06, "loss": 1.7343, "step": 290 }, { "epoch": 0.0034733478442087714, "grad_norm": 0.6504161953926086, "learning_rate": 4.6156221055881445e-06, "loss": 1.6248, "step": 300 }, { "epoch": 0.003589126105682397, "grad_norm": 0.7929391860961914, "learning_rate": 4.769990737882063e-06, "loss": 1.711, "step": 310 }, { "epoch": 0.0037049043671560228, "grad_norm": 0.8294330835342407, "learning_rate": 4.924359370175981e-06, "loss": 1.6191, "step": 320 }, { "epoch": 0.0038206826286296485, "grad_norm": 0.6876463890075684, "learning_rate": 5.078728002469898e-06, "loss": 1.6912, "step": 330 }, { "epoch": 0.003936460890103274, "grad_norm": 0.6395148038864136, "learning_rate": 5.233096634763816e-06, "loss": 1.5623, "step": 340 }, { "epoch": 0.0040522391515769, "grad_norm": 0.6318981647491455, "learning_rate": 5.387465267057734e-06, "loss": 1.5632, "step": 350 }, { "epoch": 0.004168017413050526, "grad_norm": 0.659462571144104, "learning_rate": 5.541833899351652e-06, "loss": 1.6424, "step": 360 }, { "epoch": 0.004283795674524152, "grad_norm": 0.6242923736572266, "learning_rate": 5.69620253164557e-06, "loss": 1.5602, "step": 370 }, { "epoch": 0.004399573935997777, "grad_norm": 0.7650619745254517, "learning_rate": 5.8505711639394875e-06, "loss": 1.612, "step": 380 }, { "epoch": 0.004515352197471403, "grad_norm": 0.634069561958313, "learning_rate": 6.004939796233406e-06, "loss": 1.4928, "step": 390 }, { "epoch": 0.0046311304589450285, "grad_norm": 0.7291055917739868, "learning_rate": 6.159308428527324e-06, "loss": 1.5118, "step": 400 }, { "epoch": 0.004746908720418655, "grad_norm": 0.7340362071990967, "learning_rate": 6.313677060821241e-06, "loss": 1.5039, "step": 410 }, { "epoch": 0.00486268698189228, "grad_norm": 0.7519592642784119, "learning_rate": 6.4680456931151586e-06, "loss": 1.5023, "step": 420 }, { "epoch": 0.004978465243365906, "grad_norm": 0.875359296798706, "learning_rate": 6.622414325409078e-06, "loss": 1.5312, "step": 430 }, { "epoch": 0.005094243504839531, "grad_norm": 0.7627996206283569, "learning_rate": 6.776782957702996e-06, "loss": 1.4769, "step": 440 }, { "epoch": 0.0052100217663131575, "grad_norm": 0.9831496477127075, "learning_rate": 6.931151589996913e-06, "loss": 1.5039, "step": 450 }, { "epoch": 0.005325800027786783, "grad_norm": 0.6336537003517151, "learning_rate": 7.085520222290831e-06, "loss": 1.4107, "step": 460 }, { "epoch": 0.005441578289260409, "grad_norm": 0.9347900152206421, "learning_rate": 7.239888854584749e-06, "loss": 1.4793, "step": 470 }, { "epoch": 0.005557356550734034, "grad_norm": 0.9351417422294617, "learning_rate": 7.394257486878667e-06, "loss": 1.4556, "step": 480 }, { "epoch": 0.00567313481220766, "grad_norm": 1.0249700546264648, "learning_rate": 7.548626119172584e-06, "loss": 1.5199, "step": 490 }, { "epoch": 0.005788913073681286, "grad_norm": 1.002763032913208, "learning_rate": 7.702994751466502e-06, "loss": 1.4544, "step": 500 }, { "epoch": 0.005904691335154912, "grad_norm": 0.8069408535957336, "learning_rate": 7.85736338376042e-06, "loss": 1.4169, "step": 510 }, { "epoch": 0.006020469596628537, "grad_norm": 0.9878456592559814, "learning_rate": 8.011732016054338e-06, "loss": 1.4122, "step": 520 }, { "epoch": 0.006136247858102163, "grad_norm": 1.0431718826293945, "learning_rate": 8.166100648348255e-06, "loss": 1.3979, "step": 530 }, { "epoch": 0.0062520261195757884, "grad_norm": 0.853024423122406, "learning_rate": 8.320469280642174e-06, "loss": 1.429, "step": 540 }, { "epoch": 0.006367804381049415, "grad_norm": 1.0999716520309448, "learning_rate": 8.474837912936092e-06, "loss": 1.3591, "step": 550 }, { "epoch": 0.00648358264252304, "grad_norm": 1.1147035360336304, "learning_rate": 8.629206545230009e-06, "loss": 1.3853, "step": 560 }, { "epoch": 0.006599360903996666, "grad_norm": 1.2002438306808472, "learning_rate": 8.783575177523926e-06, "loss": 1.3646, "step": 570 }, { "epoch": 0.006715139165470291, "grad_norm": 0.8041489720344543, "learning_rate": 8.937943809817845e-06, "loss": 1.3767, "step": 580 }, { "epoch": 0.0068309174269439174, "grad_norm": 1.0359758138656616, "learning_rate": 9.092312442111765e-06, "loss": 1.3744, "step": 590 }, { "epoch": 0.006946695688417543, "grad_norm": 1.066063642501831, "learning_rate": 9.246681074405682e-06, "loss": 1.3475, "step": 600 }, { "epoch": 0.007062473949891169, "grad_norm": 1.219103217124939, "learning_rate": 9.4010497066996e-06, "loss": 1.4439, "step": 610 }, { "epoch": 0.007178252211364794, "grad_norm": 1.5211087465286255, "learning_rate": 9.555418338993517e-06, "loss": 1.3859, "step": 620 }, { "epoch": 0.00729403047283842, "grad_norm": 1.2016996145248413, "learning_rate": 9.709786971287436e-06, "loss": 1.349, "step": 630 }, { "epoch": 0.0074098087343120456, "grad_norm": 1.355387806892395, "learning_rate": 9.864155603581353e-06, "loss": 1.3666, "step": 640 }, { "epoch": 0.007525586995785672, "grad_norm": 1.2157585620880127, "learning_rate": 1.001852423587527e-05, "loss": 1.3589, "step": 650 }, { "epoch": 0.007641365257259297, "grad_norm": 1.3706347942352295, "learning_rate": 1.0172892868169188e-05, "loss": 1.3604, "step": 660 }, { "epoch": 0.007757143518732923, "grad_norm": 0.9463343024253845, "learning_rate": 1.0327261500463107e-05, "loss": 1.32, "step": 670 }, { "epoch": 0.007872921780206548, "grad_norm": 1.4948500394821167, "learning_rate": 1.0481630132757024e-05, "loss": 1.307, "step": 680 }, { "epoch": 0.007988700041680175, "grad_norm": 1.2556153535842896, "learning_rate": 1.0635998765050941e-05, "loss": 1.3867, "step": 690 }, { "epoch": 0.0081044783031538, "grad_norm": 1.2945194244384766, "learning_rate": 1.079036739734486e-05, "loss": 1.311, "step": 700 }, { "epoch": 0.008220256564627425, "grad_norm": 1.147867202758789, "learning_rate": 1.0944736029638778e-05, "loss": 1.2464, "step": 710 }, { "epoch": 0.008336034826101051, "grad_norm": 1.147763967514038, "learning_rate": 1.1099104661932695e-05, "loss": 1.2917, "step": 720 }, { "epoch": 0.008451813087574677, "grad_norm": 1.3554691076278687, "learning_rate": 1.1253473294226612e-05, "loss": 1.333, "step": 730 }, { "epoch": 0.008567591349048304, "grad_norm": 1.6088576316833496, "learning_rate": 1.1407841926520532e-05, "loss": 1.3111, "step": 740 }, { "epoch": 0.008683369610521928, "grad_norm": 1.7919034957885742, "learning_rate": 1.156221055881445e-05, "loss": 1.2456, "step": 750 }, { "epoch": 0.008799147871995554, "grad_norm": 1.6351501941680908, "learning_rate": 1.1716579191108368e-05, "loss": 1.2741, "step": 760 }, { "epoch": 0.00891492613346918, "grad_norm": 1.5512449741363525, "learning_rate": 1.1870947823402285e-05, "loss": 1.2488, "step": 770 }, { "epoch": 0.009030704394942806, "grad_norm": 1.9897394180297852, "learning_rate": 1.2025316455696203e-05, "loss": 1.2266, "step": 780 }, { "epoch": 0.00914648265641643, "grad_norm": 1.7279316186904907, "learning_rate": 1.2179685087990122e-05, "loss": 1.2552, "step": 790 }, { "epoch": 0.009262260917890057, "grad_norm": 1.7041466236114502, "learning_rate": 1.2334053720284039e-05, "loss": 1.288, "step": 800 }, { "epoch": 0.009378039179363683, "grad_norm": 1.6459987163543701, "learning_rate": 1.2488422352577956e-05, "loss": 1.3163, "step": 810 }, { "epoch": 0.00949381744083731, "grad_norm": 1.9403908252716064, "learning_rate": 1.2642790984871874e-05, "loss": 1.2484, "step": 820 }, { "epoch": 0.009609595702310934, "grad_norm": 2.0014758110046387, "learning_rate": 1.2797159617165794e-05, "loss": 1.2432, "step": 830 }, { "epoch": 0.00972537396378456, "grad_norm": 1.9093619585037231, "learning_rate": 1.295152824945971e-05, "loss": 1.256, "step": 840 }, { "epoch": 0.009841152225258186, "grad_norm": 1.548734188079834, "learning_rate": 1.3105896881753629e-05, "loss": 1.2634, "step": 850 }, { "epoch": 0.009956930486731812, "grad_norm": 2.2801010608673096, "learning_rate": 1.3260265514047546e-05, "loss": 1.2874, "step": 860 }, { "epoch": 0.010072708748205437, "grad_norm": 2.1263022422790527, "learning_rate": 1.3414634146341466e-05, "loss": 1.2556, "step": 870 }, { "epoch": 0.010188487009679063, "grad_norm": 1.7874159812927246, "learning_rate": 1.3569002778635381e-05, "loss": 1.2273, "step": 880 }, { "epoch": 0.010304265271152689, "grad_norm": 1.8987889289855957, "learning_rate": 1.37233714109293e-05, "loss": 1.241, "step": 890 }, { "epoch": 0.010420043532626315, "grad_norm": 1.6198410987854004, "learning_rate": 1.3877740043223218e-05, "loss": 1.2792, "step": 900 }, { "epoch": 0.01053582179409994, "grad_norm": 1.868829369544983, "learning_rate": 1.4032108675517137e-05, "loss": 1.258, "step": 910 }, { "epoch": 0.010651600055573565, "grad_norm": 1.9847484827041626, "learning_rate": 1.4186477307811052e-05, "loss": 1.2298, "step": 920 }, { "epoch": 0.010767378317047192, "grad_norm": 2.0772342681884766, "learning_rate": 1.4340845940104971e-05, "loss": 1.215, "step": 930 }, { "epoch": 0.010883156578520818, "grad_norm": 2.3518121242523193, "learning_rate": 1.4495214572398889e-05, "loss": 1.217, "step": 940 }, { "epoch": 0.010998934839994442, "grad_norm": 2.6899712085723877, "learning_rate": 1.4649583204692808e-05, "loss": 1.2741, "step": 950 }, { "epoch": 0.011114713101468068, "grad_norm": 2.2902116775512695, "learning_rate": 1.4803951836986727e-05, "loss": 1.273, "step": 960 }, { "epoch": 0.011230491362941694, "grad_norm": 1.927801489830017, "learning_rate": 1.4958320469280642e-05, "loss": 1.2444, "step": 970 }, { "epoch": 0.01134626962441532, "grad_norm": 2.3730885982513428, "learning_rate": 1.5112689101574561e-05, "loss": 1.1811, "step": 980 }, { "epoch": 0.011462047885888945, "grad_norm": 2.474961280822754, "learning_rate": 1.526705773386848e-05, "loss": 1.1951, "step": 990 }, { "epoch": 0.011577826147362571, "grad_norm": 2.8506340980529785, "learning_rate": 1.5421426366162398e-05, "loss": 1.1713, "step": 1000 }, { "epoch": 0.011577826147362571, "eval_chrf": 70.673277017744, "eval_loss": 1.8353379964828491, "eval_runtime": 373.8216, "eval_samples_per_second": 0.268, "eval_steps_per_second": 0.011, "step": 1000 }, { "epoch": 0.011693604408836197, "grad_norm": 2.152484893798828, "learning_rate": 1.5575794998456313e-05, "loss": 1.1834, "step": 1010 }, { "epoch": 0.011809382670309823, "grad_norm": 1.8658829927444458, "learning_rate": 1.5730163630750233e-05, "loss": 1.1938, "step": 1020 }, { "epoch": 0.011925160931783448, "grad_norm": 2.368344783782959, "learning_rate": 1.5884532263044148e-05, "loss": 1.1408, "step": 1030 }, { "epoch": 0.012040939193257074, "grad_norm": 2.0136966705322266, "learning_rate": 1.6038900895338067e-05, "loss": 1.187, "step": 1040 }, { "epoch": 0.0121567174547307, "grad_norm": 2.2445461750030518, "learning_rate": 1.6193269527631986e-05, "loss": 1.2375, "step": 1050 }, { "epoch": 0.012272495716204326, "grad_norm": 2.9442145824432373, "learning_rate": 1.6347638159925905e-05, "loss": 1.207, "step": 1060 }, { "epoch": 0.01238827397767795, "grad_norm": 2.0814785957336426, "learning_rate": 1.650200679221982e-05, "loss": 1.1357, "step": 1070 }, { "epoch": 0.012504052239151577, "grad_norm": 1.9440767765045166, "learning_rate": 1.665637542451374e-05, "loss": 1.2145, "step": 1080 }, { "epoch": 0.012619830500625203, "grad_norm": 2.155322551727295, "learning_rate": 1.6810744056807656e-05, "loss": 1.234, "step": 1090 }, { "epoch": 0.01273560876209883, "grad_norm": 2.5245254039764404, "learning_rate": 1.6965112689101575e-05, "loss": 1.1983, "step": 1100 }, { "epoch": 0.012851387023572454, "grad_norm": 2.82124662399292, "learning_rate": 1.711948132139549e-05, "loss": 1.1659, "step": 1110 }, { "epoch": 0.01296716528504608, "grad_norm": 2.253861665725708, "learning_rate": 1.727384995368941e-05, "loss": 1.1145, "step": 1120 }, { "epoch": 0.013082943546519706, "grad_norm": 2.3893697261810303, "learning_rate": 1.7428218585983332e-05, "loss": 1.2086, "step": 1130 }, { "epoch": 0.013198721807993332, "grad_norm": 2.7133824825286865, "learning_rate": 1.7582587218277247e-05, "loss": 1.1192, "step": 1140 }, { "epoch": 0.013314500069466956, "grad_norm": 2.9232800006866455, "learning_rate": 1.7736955850571167e-05, "loss": 1.1828, "step": 1150 }, { "epoch": 0.013430278330940583, "grad_norm": 2.678086280822754, "learning_rate": 1.7891324482865082e-05, "loss": 1.1349, "step": 1160 }, { "epoch": 0.013546056592414209, "grad_norm": 2.729440689086914, "learning_rate": 1.8045693115159e-05, "loss": 1.1445, "step": 1170 }, { "epoch": 0.013661834853887835, "grad_norm": 2.464496374130249, "learning_rate": 1.8200061747452917e-05, "loss": 1.1899, "step": 1180 }, { "epoch": 0.01377761311536146, "grad_norm": 2.538877487182617, "learning_rate": 1.8354430379746836e-05, "loss": 1.1528, "step": 1190 }, { "epoch": 0.013893391376835085, "grad_norm": 1.8316330909729004, "learning_rate": 1.8508799012040755e-05, "loss": 1.1342, "step": 1200 }, { "epoch": 0.014009169638308712, "grad_norm": 2.8184266090393066, "learning_rate": 1.8663167644334674e-05, "loss": 1.1521, "step": 1210 }, { "epoch": 0.014124947899782338, "grad_norm": 2.5736935138702393, "learning_rate": 1.881753627662859e-05, "loss": 1.0831, "step": 1220 }, { "epoch": 0.014240726161255962, "grad_norm": 2.4134764671325684, "learning_rate": 1.897190490892251e-05, "loss": 1.1569, "step": 1230 }, { "epoch": 0.014356504422729588, "grad_norm": 2.6627583503723145, "learning_rate": 1.9126273541216424e-05, "loss": 1.168, "step": 1240 }, { "epoch": 0.014472282684203214, "grad_norm": 2.3337690830230713, "learning_rate": 1.9280642173510343e-05, "loss": 1.0915, "step": 1250 }, { "epoch": 0.01458806094567684, "grad_norm": 2.935228109359741, "learning_rate": 1.943501080580426e-05, "loss": 1.154, "step": 1260 }, { "epoch": 0.014703839207150465, "grad_norm": 2.7111053466796875, "learning_rate": 1.9589379438098178e-05, "loss": 1.0839, "step": 1270 }, { "epoch": 0.014819617468624091, "grad_norm": 1.9878060817718506, "learning_rate": 1.9743748070392097e-05, "loss": 1.1184, "step": 1280 }, { "epoch": 0.014935395730097717, "grad_norm": 2.0981931686401367, "learning_rate": 1.9898116702686016e-05, "loss": 1.1044, "step": 1290 }, { "epoch": 0.015051173991571343, "grad_norm": 2.7092490196228027, "learning_rate": 2.0052485334979935e-05, "loss": 1.0753, "step": 1300 }, { "epoch": 0.015166952253044968, "grad_norm": 2.7931082248687744, "learning_rate": 2.020685396727385e-05, "loss": 1.1314, "step": 1310 }, { "epoch": 0.015282730514518594, "grad_norm": 2.5221099853515625, "learning_rate": 2.036122259956777e-05, "loss": 1.0728, "step": 1320 }, { "epoch": 0.01539850877599222, "grad_norm": 2.549616813659668, "learning_rate": 2.0515591231861686e-05, "loss": 1.1195, "step": 1330 }, { "epoch": 0.015514287037465846, "grad_norm": 3.0201480388641357, "learning_rate": 2.0669959864155605e-05, "loss": 1.0502, "step": 1340 }, { "epoch": 0.015630065298939472, "grad_norm": 3.0371510982513428, "learning_rate": 2.082432849644952e-05, "loss": 1.0779, "step": 1350 }, { "epoch": 0.015745843560413097, "grad_norm": 3.3051536083221436, "learning_rate": 2.097869712874344e-05, "loss": 1.0712, "step": 1360 }, { "epoch": 0.01586162182188672, "grad_norm": 3.4807722568511963, "learning_rate": 2.113306576103736e-05, "loss": 1.169, "step": 1370 }, { "epoch": 0.01597740008336035, "grad_norm": 2.8707168102264404, "learning_rate": 2.1287434393331277e-05, "loss": 1.01, "step": 1380 }, { "epoch": 0.016093178344833974, "grad_norm": 3.265197515487671, "learning_rate": 2.1441803025625193e-05, "loss": 1.1368, "step": 1390 }, { "epoch": 0.0162089566063076, "grad_norm": 2.2836358547210693, "learning_rate": 2.1596171657919112e-05, "loss": 1.0972, "step": 1400 }, { "epoch": 0.016324734867781226, "grad_norm": 3.0558969974517822, "learning_rate": 2.1750540290213028e-05, "loss": 1.0455, "step": 1410 }, { "epoch": 0.01644051312925485, "grad_norm": 2.7946484088897705, "learning_rate": 2.1904908922506947e-05, "loss": 1.1051, "step": 1420 }, { "epoch": 0.016556291390728478, "grad_norm": 2.8424155712127686, "learning_rate": 2.2059277554800866e-05, "loss": 1.0662, "step": 1430 }, { "epoch": 0.016672069652202103, "grad_norm": 2.8506076335906982, "learning_rate": 2.2213646187094785e-05, "loss": 1.0649, "step": 1440 }, { "epoch": 0.016787847913675727, "grad_norm": 3.030529260635376, "learning_rate": 2.2368014819388704e-05, "loss": 1.0523, "step": 1450 }, { "epoch": 0.016903626175149355, "grad_norm": 2.6668131351470947, "learning_rate": 2.252238345168262e-05, "loss": 1.0682, "step": 1460 }, { "epoch": 0.01701940443662298, "grad_norm": 2.3751957416534424, "learning_rate": 2.267675208397654e-05, "loss": 1.0783, "step": 1470 }, { "epoch": 0.017135182698096607, "grad_norm": 2.4701077938079834, "learning_rate": 2.2831120716270454e-05, "loss": 1.1051, "step": 1480 }, { "epoch": 0.01725096095957023, "grad_norm": 3.0305330753326416, "learning_rate": 2.2985489348564373e-05, "loss": 1.1174, "step": 1490 }, { "epoch": 0.017366739221043856, "grad_norm": 2.4100780487060547, "learning_rate": 2.313985798085829e-05, "loss": 1.0384, "step": 1500 }, { "epoch": 0.017482517482517484, "grad_norm": 3.0715460777282715, "learning_rate": 2.3294226613152208e-05, "loss": 1.0816, "step": 1510 }, { "epoch": 0.017598295743991108, "grad_norm": 3.546391248703003, "learning_rate": 2.3448595245446127e-05, "loss": 1.0105, "step": 1520 }, { "epoch": 0.017714074005464733, "grad_norm": 3.3128504753112793, "learning_rate": 2.3602963877740046e-05, "loss": 1.0383, "step": 1530 }, { "epoch": 0.01782985226693836, "grad_norm": 2.636296510696411, "learning_rate": 2.3757332510033962e-05, "loss": 1.0495, "step": 1540 }, { "epoch": 0.017945630528411985, "grad_norm": 3.293440580368042, "learning_rate": 2.391170114232788e-05, "loss": 1.0215, "step": 1550 }, { "epoch": 0.018061408789885613, "grad_norm": 3.3361904621124268, "learning_rate": 2.4066069774621796e-05, "loss": 1.0356, "step": 1560 }, { "epoch": 0.018177187051359237, "grad_norm": 3.039552688598633, "learning_rate": 2.4220438406915715e-05, "loss": 1.0869, "step": 1570 }, { "epoch": 0.01829296531283286, "grad_norm": 2.745058298110962, "learning_rate": 2.4374807039209634e-05, "loss": 1.0952, "step": 1580 }, { "epoch": 0.01840874357430649, "grad_norm": 2.920832395553589, "learning_rate": 2.452917567150355e-05, "loss": 1.0461, "step": 1590 }, { "epoch": 0.018524521835780114, "grad_norm": 3.1426374912261963, "learning_rate": 2.468354430379747e-05, "loss": 1.0186, "step": 1600 }, { "epoch": 0.01864030009725374, "grad_norm": 3.40806245803833, "learning_rate": 2.4837912936091388e-05, "loss": 1.0087, "step": 1610 }, { "epoch": 0.018756078358727366, "grad_norm": 3.004892349243164, "learning_rate": 2.4992281568385307e-05, "loss": 1.0264, "step": 1620 }, { "epoch": 0.01887185662020099, "grad_norm": 3.021807909011841, "learning_rate": 2.5146650200679223e-05, "loss": 1.0895, "step": 1630 }, { "epoch": 0.01898763488167462, "grad_norm": 3.2554380893707275, "learning_rate": 2.530101883297314e-05, "loss": 1.0906, "step": 1640 }, { "epoch": 0.019103413143148243, "grad_norm": 3.3182787895202637, "learning_rate": 2.545538746526706e-05, "loss": 1.0478, "step": 1650 }, { "epoch": 0.019219191404621867, "grad_norm": 3.3247146606445312, "learning_rate": 2.5609756097560977e-05, "loss": 1.0827, "step": 1660 }, { "epoch": 0.019334969666095495, "grad_norm": 2.731297731399536, "learning_rate": 2.5764124729854892e-05, "loss": 1.0529, "step": 1670 }, { "epoch": 0.01945074792756912, "grad_norm": 3.013205051422119, "learning_rate": 2.591849336214881e-05, "loss": 1.0359, "step": 1680 }, { "epoch": 0.019566526189042744, "grad_norm": 3.552138328552246, "learning_rate": 2.6072861994442734e-05, "loss": 1.0454, "step": 1690 }, { "epoch": 0.019682304450516372, "grad_norm": 3.9224207401275635, "learning_rate": 2.622723062673665e-05, "loss": 1.0597, "step": 1700 }, { "epoch": 0.019798082711989996, "grad_norm": 3.3173727989196777, "learning_rate": 2.6381599259030565e-05, "loss": 1.049, "step": 1710 }, { "epoch": 0.019913860973463624, "grad_norm": 3.405595541000366, "learning_rate": 2.6535967891324488e-05, "loss": 1.0833, "step": 1720 }, { "epoch": 0.02002963923493725, "grad_norm": 2.7375831604003906, "learning_rate": 2.6690336523618403e-05, "loss": 1.003, "step": 1730 }, { "epoch": 0.020145417496410873, "grad_norm": 3.2522525787353516, "learning_rate": 2.684470515591232e-05, "loss": 1.0083, "step": 1740 }, { "epoch": 0.0202611957578845, "grad_norm": 4.265419960021973, "learning_rate": 2.6999073788206234e-05, "loss": 1.0428, "step": 1750 }, { "epoch": 0.020376974019358125, "grad_norm": 3.507509708404541, "learning_rate": 2.7153442420500157e-05, "loss": 1.0111, "step": 1760 }, { "epoch": 0.02049275228083175, "grad_norm": 3.173712968826294, "learning_rate": 2.7307811052794076e-05, "loss": 1.0135, "step": 1770 }, { "epoch": 0.020608530542305378, "grad_norm": 2.615588903427124, "learning_rate": 2.746217968508799e-05, "loss": 1.0133, "step": 1780 }, { "epoch": 0.020724308803779002, "grad_norm": 3.384016990661621, "learning_rate": 2.7616548317381907e-05, "loss": 0.9682, "step": 1790 }, { "epoch": 0.02084008706525263, "grad_norm": 3.2637808322906494, "learning_rate": 2.777091694967583e-05, "loss": 1.0344, "step": 1800 }, { "epoch": 0.020955865326726254, "grad_norm": 3.193427562713623, "learning_rate": 2.7925285581969745e-05, "loss": 0.9591, "step": 1810 }, { "epoch": 0.02107164358819988, "grad_norm": 3.3400566577911377, "learning_rate": 2.807965421426366e-05, "loss": 0.988, "step": 1820 }, { "epoch": 0.021187421849673507, "grad_norm": 4.1463189125061035, "learning_rate": 2.823402284655758e-05, "loss": 1.0367, "step": 1830 }, { "epoch": 0.02130320011114713, "grad_norm": 3.716966152191162, "learning_rate": 2.83883914788515e-05, "loss": 1.0261, "step": 1840 }, { "epoch": 0.021418978372620755, "grad_norm": 3.9861702919006348, "learning_rate": 2.8542760111145418e-05, "loss": 0.9644, "step": 1850 }, { "epoch": 0.021534756634094383, "grad_norm": 3.443084716796875, "learning_rate": 2.8697128743439334e-05, "loss": 0.9853, "step": 1860 }, { "epoch": 0.021650534895568008, "grad_norm": 3.5585291385650635, "learning_rate": 2.885149737573325e-05, "loss": 1.003, "step": 1870 }, { "epoch": 0.021766313157041636, "grad_norm": 3.945314645767212, "learning_rate": 2.9005866008027172e-05, "loss": 0.9488, "step": 1880 }, { "epoch": 0.02188209141851526, "grad_norm": 2.5524964332580566, "learning_rate": 2.9160234640321088e-05, "loss": 0.9994, "step": 1890 }, { "epoch": 0.021997869679988884, "grad_norm": 3.669727325439453, "learning_rate": 2.9314603272615003e-05, "loss": 1.0113, "step": 1900 }, { "epoch": 0.022113647941462512, "grad_norm": 3.417371988296509, "learning_rate": 2.9468971904908926e-05, "loss": 1.0186, "step": 1910 }, { "epoch": 0.022229426202936137, "grad_norm": 3.546546459197998, "learning_rate": 2.962334053720284e-05, "loss": 1.0105, "step": 1920 }, { "epoch": 0.02234520446440976, "grad_norm": 3.285242795944214, "learning_rate": 2.977770916949676e-05, "loss": 0.9804, "step": 1930 }, { "epoch": 0.02246098272588339, "grad_norm": 3.5312914848327637, "learning_rate": 2.9932077801790676e-05, "loss": 1.0216, "step": 1940 }, { "epoch": 0.022576760987357013, "grad_norm": 4.406423568725586, "learning_rate": 3.00864464340846e-05, "loss": 1.0774, "step": 1950 }, { "epoch": 0.02269253924883064, "grad_norm": 3.41166353225708, "learning_rate": 3.0240815066378514e-05, "loss": 1.0042, "step": 1960 }, { "epoch": 0.022808317510304266, "grad_norm": 3.5296311378479004, "learning_rate": 3.039518369867243e-05, "loss": 0.9685, "step": 1970 }, { "epoch": 0.02292409577177789, "grad_norm": 3.5020294189453125, "learning_rate": 3.0549552330966345e-05, "loss": 1.0113, "step": 1980 }, { "epoch": 0.023039874033251518, "grad_norm": 3.420305013656616, "learning_rate": 3.070392096326027e-05, "loss": 1.0196, "step": 1990 }, { "epoch": 0.023155652294725142, "grad_norm": 3.3157858848571777, "learning_rate": 3.0858289595554183e-05, "loss": 0.9866, "step": 2000 }, { "epoch": 0.023155652294725142, "eval_chrf": 66.63596109779625, "eval_loss": 1.5073097944259644, "eval_runtime": 360.1486, "eval_samples_per_second": 0.278, "eval_steps_per_second": 0.011, "step": 2000 }, { "epoch": 0.023271430556198767, "grad_norm": 3.2328410148620605, "learning_rate": 3.10126582278481e-05, "loss": 1.0005, "step": 2010 }, { "epoch": 0.023387208817672395, "grad_norm": 2.708080768585205, "learning_rate": 3.1167026860142015e-05, "loss": 1.019, "step": 2020 }, { "epoch": 0.02350298707914602, "grad_norm": 3.409646511077881, "learning_rate": 3.132139549243594e-05, "loss": 1.0132, "step": 2030 }, { "epoch": 0.023618765340619647, "grad_norm": 3.2843925952911377, "learning_rate": 3.147576412472986e-05, "loss": 0.9537, "step": 2040 }, { "epoch": 0.02373454360209327, "grad_norm": 3.7965049743652344, "learning_rate": 3.163013275702377e-05, "loss": 0.9789, "step": 2050 }, { "epoch": 0.023850321863566896, "grad_norm": 3.3306093215942383, "learning_rate": 3.17845013893177e-05, "loss": 1.0042, "step": 2060 }, { "epoch": 0.023966100125040524, "grad_norm": 2.9587419033050537, "learning_rate": 3.193887002161161e-05, "loss": 0.9922, "step": 2070 }, { "epoch": 0.024081878386514148, "grad_norm": 3.425163984298706, "learning_rate": 3.209323865390553e-05, "loss": 0.9196, "step": 2080 }, { "epoch": 0.024197656647987772, "grad_norm": 2.6284561157226562, "learning_rate": 3.224760728619944e-05, "loss": 0.9698, "step": 2090 }, { "epoch": 0.0243134349094614, "grad_norm": 3.568027973175049, "learning_rate": 3.240197591849337e-05, "loss": 0.9104, "step": 2100 }, { "epoch": 0.024429213170935025, "grad_norm": 3.238100528717041, "learning_rate": 3.255634455078728e-05, "loss": 1.0187, "step": 2110 }, { "epoch": 0.024544991432408653, "grad_norm": 3.090458631515503, "learning_rate": 3.27107131830812e-05, "loss": 0.961, "step": 2120 }, { "epoch": 0.024660769693882277, "grad_norm": 3.2551889419555664, "learning_rate": 3.286508181537512e-05, "loss": 0.9695, "step": 2130 }, { "epoch": 0.0247765479553559, "grad_norm": 3.4830145835876465, "learning_rate": 3.3019450447669036e-05, "loss": 0.9376, "step": 2140 }, { "epoch": 0.02489232621682953, "grad_norm": 3.2776224613189697, "learning_rate": 3.3173819079962956e-05, "loss": 0.9539, "step": 2150 }, { "epoch": 0.025008104478303154, "grad_norm": 3.374253273010254, "learning_rate": 3.332818771225687e-05, "loss": 0.9837, "step": 2160 }, { "epoch": 0.025123882739776778, "grad_norm": 3.049576997756958, "learning_rate": 3.348255634455079e-05, "loss": 0.8857, "step": 2170 }, { "epoch": 0.025239661001250406, "grad_norm": 3.490880250930786, "learning_rate": 3.3636924976844706e-05, "loss": 1.0045, "step": 2180 }, { "epoch": 0.02535543926272403, "grad_norm": 3.2462692260742188, "learning_rate": 3.3791293609138625e-05, "loss": 0.9645, "step": 2190 }, { "epoch": 0.02547121752419766, "grad_norm": 3.759615659713745, "learning_rate": 3.3945662241432544e-05, "loss": 0.9464, "step": 2200 }, { "epoch": 0.025586995785671283, "grad_norm": 3.589405059814453, "learning_rate": 3.410003087372646e-05, "loss": 0.9054, "step": 2210 }, { "epoch": 0.025702774047144907, "grad_norm": 4.128746032714844, "learning_rate": 3.425439950602038e-05, "loss": 0.9756, "step": 2220 }, { "epoch": 0.025818552308618535, "grad_norm": 3.282271385192871, "learning_rate": 3.4408768138314294e-05, "loss": 1.0265, "step": 2230 }, { "epoch": 0.02593433057009216, "grad_norm": 3.7959372997283936, "learning_rate": 3.456313677060821e-05, "loss": 0.9413, "step": 2240 }, { "epoch": 0.026050108831565784, "grad_norm": 4.119252681732178, "learning_rate": 3.471750540290213e-05, "loss": 1.0083, "step": 2250 }, { "epoch": 0.026165887093039412, "grad_norm": 3.5423946380615234, "learning_rate": 3.487187403519605e-05, "loss": 0.9939, "step": 2260 }, { "epoch": 0.026281665354513036, "grad_norm": 4.1071882247924805, "learning_rate": 3.5026242667489964e-05, "loss": 0.9486, "step": 2270 }, { "epoch": 0.026397443615986664, "grad_norm": 2.39046573638916, "learning_rate": 3.518061129978388e-05, "loss": 0.9348, "step": 2280 }, { "epoch": 0.02651322187746029, "grad_norm": 3.1564602851867676, "learning_rate": 3.533497993207781e-05, "loss": 0.9627, "step": 2290 }, { "epoch": 0.026629000138933913, "grad_norm": 3.6290247440338135, "learning_rate": 3.548934856437172e-05, "loss": 0.9517, "step": 2300 }, { "epoch": 0.02674477840040754, "grad_norm": 3.5867044925689697, "learning_rate": 3.564371719666564e-05, "loss": 0.9746, "step": 2310 }, { "epoch": 0.026860556661881165, "grad_norm": 4.0372114181518555, "learning_rate": 3.579808582895955e-05, "loss": 0.9921, "step": 2320 }, { "epoch": 0.02697633492335479, "grad_norm": 3.113901138305664, "learning_rate": 3.595245446125348e-05, "loss": 0.9084, "step": 2330 }, { "epoch": 0.027092113184828417, "grad_norm": 3.802921772003174, "learning_rate": 3.610682309354739e-05, "loss": 0.9022, "step": 2340 }, { "epoch": 0.027207891446302042, "grad_norm": 3.4507830142974854, "learning_rate": 3.626119172584131e-05, "loss": 0.9464, "step": 2350 }, { "epoch": 0.02732366970777567, "grad_norm": 4.381093502044678, "learning_rate": 3.641556035813523e-05, "loss": 0.9977, "step": 2360 }, { "epoch": 0.027439447969249294, "grad_norm": 2.51751446723938, "learning_rate": 3.656992899042915e-05, "loss": 0.9143, "step": 2370 }, { "epoch": 0.02755522623072292, "grad_norm": 3.534101724624634, "learning_rate": 3.6724297622723066e-05, "loss": 0.9669, "step": 2380 }, { "epoch": 0.027671004492196546, "grad_norm": 4.157338619232178, "learning_rate": 3.687866625501698e-05, "loss": 0.9401, "step": 2390 }, { "epoch": 0.02778678275367017, "grad_norm": 2.6612493991851807, "learning_rate": 3.7033034887310904e-05, "loss": 0.8944, "step": 2400 }, { "epoch": 0.027902561015143795, "grad_norm": 3.8302664756774902, "learning_rate": 3.718740351960482e-05, "loss": 0.9386, "step": 2410 }, { "epoch": 0.028018339276617423, "grad_norm": 3.27974009513855, "learning_rate": 3.7341772151898736e-05, "loss": 0.9775, "step": 2420 }, { "epoch": 0.028134117538091048, "grad_norm": 3.8576836585998535, "learning_rate": 3.7496140784192655e-05, "loss": 0.931, "step": 2430 }, { "epoch": 0.028249895799564675, "grad_norm": 3.122047185897827, "learning_rate": 3.7650509416486574e-05, "loss": 0.9224, "step": 2440 }, { "epoch": 0.0283656740610383, "grad_norm": 3.2555081844329834, "learning_rate": 3.780487804878049e-05, "loss": 0.9484, "step": 2450 }, { "epoch": 0.028481452322511924, "grad_norm": 4.440473556518555, "learning_rate": 3.7959246681074405e-05, "loss": 0.9022, "step": 2460 }, { "epoch": 0.028597230583985552, "grad_norm": 4.415891647338867, "learning_rate": 3.8113615313368324e-05, "loss": 0.9589, "step": 2470 }, { "epoch": 0.028713008845459177, "grad_norm": 3.2047066688537598, "learning_rate": 3.826798394566224e-05, "loss": 0.9532, "step": 2480 }, { "epoch": 0.0288287871069328, "grad_norm": 3.310581684112549, "learning_rate": 3.842235257795616e-05, "loss": 0.9224, "step": 2490 }, { "epoch": 0.02894456536840643, "grad_norm": 3.6949031352996826, "learning_rate": 3.8576721210250075e-05, "loss": 0.9472, "step": 2500 }, { "epoch": 0.029060343629880053, "grad_norm": 3.2445971965789795, "learning_rate": 3.8731089842544e-05, "loss": 0.9207, "step": 2510 }, { "epoch": 0.02917612189135368, "grad_norm": 3.180643320083618, "learning_rate": 3.888545847483791e-05, "loss": 0.9137, "step": 2520 }, { "epoch": 0.029291900152827306, "grad_norm": 3.478769063949585, "learning_rate": 3.903982710713183e-05, "loss": 0.9437, "step": 2530 }, { "epoch": 0.02940767841430093, "grad_norm": 3.4851291179656982, "learning_rate": 3.919419573942575e-05, "loss": 0.9451, "step": 2540 }, { "epoch": 0.029523456675774558, "grad_norm": 3.5303795337677, "learning_rate": 3.934856437171967e-05, "loss": 0.9534, "step": 2550 }, { "epoch": 0.029639234937248182, "grad_norm": 3.5847115516662598, "learning_rate": 3.950293300401359e-05, "loss": 0.8699, "step": 2560 }, { "epoch": 0.029755013198721807, "grad_norm": 4.173857688903809, "learning_rate": 3.96573016363075e-05, "loss": 0.9464, "step": 2570 }, { "epoch": 0.029870791460195435, "grad_norm": 3.6040806770324707, "learning_rate": 3.981167026860142e-05, "loss": 0.9125, "step": 2580 }, { "epoch": 0.02998656972166906, "grad_norm": 3.881631374359131, "learning_rate": 3.996603890089534e-05, "loss": 0.9155, "step": 2590 }, { "epoch": 0.030102347983142687, "grad_norm": 3.304704189300537, "learning_rate": 4.012040753318926e-05, "loss": 0.9379, "step": 2600 }, { "epoch": 0.03021812624461631, "grad_norm": 3.695401430130005, "learning_rate": 4.027477616548318e-05, "loss": 0.906, "step": 2610 }, { "epoch": 0.030333904506089936, "grad_norm": 2.877660036087036, "learning_rate": 4.042914479777709e-05, "loss": 0.8771, "step": 2620 }, { "epoch": 0.030449682767563564, "grad_norm": 3.3087306022644043, "learning_rate": 4.0583513430071015e-05, "loss": 0.9575, "step": 2630 }, { "epoch": 0.030565461029037188, "grad_norm": 2.551759719848633, "learning_rate": 4.073788206236493e-05, "loss": 0.8743, "step": 2640 }, { "epoch": 0.030681239290510812, "grad_norm": 3.864423990249634, "learning_rate": 4.089225069465885e-05, "loss": 0.908, "step": 2650 }, { "epoch": 0.03079701755198444, "grad_norm": 3.080941915512085, "learning_rate": 4.1046619326952766e-05, "loss": 0.8992, "step": 2660 }, { "epoch": 0.030912795813458065, "grad_norm": 3.5077943801879883, "learning_rate": 4.1200987959246685e-05, "loss": 0.9176, "step": 2670 }, { "epoch": 0.031028574074931693, "grad_norm": 3.443204641342163, "learning_rate": 4.1355356591540604e-05, "loss": 0.8974, "step": 2680 }, { "epoch": 0.031144352336405317, "grad_norm": 3.79953932762146, "learning_rate": 4.1509725223834516e-05, "loss": 0.8789, "step": 2690 }, { "epoch": 0.031260130597878945, "grad_norm": 2.7392163276672363, "learning_rate": 4.166409385612844e-05, "loss": 0.9331, "step": 2700 }, { "epoch": 0.03137590885935257, "grad_norm": 4.023155212402344, "learning_rate": 4.1818462488422354e-05, "loss": 0.9074, "step": 2710 }, { "epoch": 0.031491687120826194, "grad_norm": 3.5471322536468506, "learning_rate": 4.197283112071627e-05, "loss": 0.92, "step": 2720 }, { "epoch": 0.03160746538229982, "grad_norm": 3.721235513687134, "learning_rate": 4.2127199753010185e-05, "loss": 0.9536, "step": 2730 }, { "epoch": 0.03172324364377344, "grad_norm": 3.322194814682007, "learning_rate": 4.228156838530411e-05, "loss": 0.8973, "step": 2740 }, { "epoch": 0.031839021905247074, "grad_norm": 3.457873821258545, "learning_rate": 4.2435937017598024e-05, "loss": 0.9056, "step": 2750 }, { "epoch": 0.0319548001667207, "grad_norm": 3.555284023284912, "learning_rate": 4.259030564989194e-05, "loss": 0.8985, "step": 2760 }, { "epoch": 0.03207057842819432, "grad_norm": 3.6158485412597656, "learning_rate": 4.274467428218586e-05, "loss": 0.8745, "step": 2770 }, { "epoch": 0.03218635668966795, "grad_norm": 3.0284039974212646, "learning_rate": 4.289904291447978e-05, "loss": 0.9364, "step": 2780 }, { "epoch": 0.03230213495114157, "grad_norm": 3.9668619632720947, "learning_rate": 4.30534115467737e-05, "loss": 0.9246, "step": 2790 }, { "epoch": 0.0324179132126152, "grad_norm": 3.699307680130005, "learning_rate": 4.320778017906761e-05, "loss": 0.9134, "step": 2800 }, { "epoch": 0.03253369147408883, "grad_norm": 2.898625612258911, "learning_rate": 4.336214881136154e-05, "loss": 0.9559, "step": 2810 }, { "epoch": 0.03264946973556245, "grad_norm": 3.613478660583496, "learning_rate": 4.351651744365545e-05, "loss": 0.929, "step": 2820 }, { "epoch": 0.032765247997036076, "grad_norm": 3.6123273372650146, "learning_rate": 4.367088607594937e-05, "loss": 0.8947, "step": 2830 }, { "epoch": 0.0328810262585097, "grad_norm": 3.690481662750244, "learning_rate": 4.382525470824329e-05, "loss": 0.9236, "step": 2840 }, { "epoch": 0.032996804519983325, "grad_norm": 3.019315481185913, "learning_rate": 4.397962334053721e-05, "loss": 0.928, "step": 2850 }, { "epoch": 0.033112582781456956, "grad_norm": 3.4528145790100098, "learning_rate": 4.4133991972831126e-05, "loss": 0.8853, "step": 2860 }, { "epoch": 0.03322836104293058, "grad_norm": 3.455610752105713, "learning_rate": 4.428836060512504e-05, "loss": 0.8617, "step": 2870 }, { "epoch": 0.033344139304404205, "grad_norm": 3.1545498371124268, "learning_rate": 4.444272923741896e-05, "loss": 0.8815, "step": 2880 }, { "epoch": 0.03345991756587783, "grad_norm": 3.3913047313690186, "learning_rate": 4.4597097869712877e-05, "loss": 0.9454, "step": 2890 }, { "epoch": 0.033575695827351454, "grad_norm": 3.386596441268921, "learning_rate": 4.4751466502006796e-05, "loss": 0.9323, "step": 2900 }, { "epoch": 0.033691474088825085, "grad_norm": 3.378140687942505, "learning_rate": 4.490583513430071e-05, "loss": 0.9317, "step": 2910 }, { "epoch": 0.03380725235029871, "grad_norm": 3.180781841278076, "learning_rate": 4.506020376659463e-05, "loss": 0.8937, "step": 2920 }, { "epoch": 0.033923030611772334, "grad_norm": 3.7732934951782227, "learning_rate": 4.521457239888855e-05, "loss": 0.9222, "step": 2930 }, { "epoch": 0.03403880887324596, "grad_norm": 3.6625473499298096, "learning_rate": 4.5368941031182465e-05, "loss": 0.9137, "step": 2940 }, { "epoch": 0.03415458713471958, "grad_norm": 3.2060325145721436, "learning_rate": 4.5523309663476384e-05, "loss": 0.9044, "step": 2950 }, { "epoch": 0.034270365396193214, "grad_norm": 3.0926353931427, "learning_rate": 4.56776782957703e-05, "loss": 0.9117, "step": 2960 }, { "epoch": 0.03438614365766684, "grad_norm": 3.5341603755950928, "learning_rate": 4.583204692806422e-05, "loss": 0.8675, "step": 2970 }, { "epoch": 0.03450192191914046, "grad_norm": 2.6664083003997803, "learning_rate": 4.5986415560358134e-05, "loss": 0.8186, "step": 2980 }, { "epoch": 0.03461770018061409, "grad_norm": 3.1512250900268555, "learning_rate": 4.6140784192652053e-05, "loss": 0.8495, "step": 2990 }, { "epoch": 0.03473347844208771, "grad_norm": 3.44207763671875, "learning_rate": 4.629515282494597e-05, "loss": 0.8995, "step": 3000 }, { "epoch": 0.03473347844208771, "eval_chrf": 70.13015374581238, "eval_loss": 1.3304837942123413, "eval_runtime": 345.8057, "eval_samples_per_second": 0.289, "eval_steps_per_second": 0.012, "step": 3000 }, { "epoch": 0.034849256703561336, "grad_norm": 3.2957146167755127, "learning_rate": 4.644952145723989e-05, "loss": 0.9227, "step": 3010 }, { "epoch": 0.03496503496503497, "grad_norm": 3.4464991092681885, "learning_rate": 4.660389008953381e-05, "loss": 0.8515, "step": 3020 }, { "epoch": 0.03508081322650859, "grad_norm": 3.763947010040283, "learning_rate": 4.675825872182772e-05, "loss": 0.8865, "step": 3030 }, { "epoch": 0.035196591487982216, "grad_norm": 2.4498541355133057, "learning_rate": 4.691262735412165e-05, "loss": 0.8898, "step": 3040 }, { "epoch": 0.03531236974945584, "grad_norm": 3.7224974632263184, "learning_rate": 4.706699598641556e-05, "loss": 0.922, "step": 3050 }, { "epoch": 0.035428148010929465, "grad_norm": 2.902953863143921, "learning_rate": 4.722136461870948e-05, "loss": 0.8597, "step": 3060 }, { "epoch": 0.0355439262724031, "grad_norm": 2.5387275218963623, "learning_rate": 4.73757332510034e-05, "loss": 0.8588, "step": 3070 }, { "epoch": 0.03565970453387672, "grad_norm": 2.914139986038208, "learning_rate": 4.753010188329732e-05, "loss": 0.8211, "step": 3080 }, { "epoch": 0.035775482795350345, "grad_norm": 3.456786632537842, "learning_rate": 4.768447051559124e-05, "loss": 0.8712, "step": 3090 }, { "epoch": 0.03589126105682397, "grad_norm": 3.0146403312683105, "learning_rate": 4.783883914788515e-05, "loss": 0.8565, "step": 3100 }, { "epoch": 0.036007039318297594, "grad_norm": 2.8797733783721924, "learning_rate": 4.7993207780179075e-05, "loss": 0.8537, "step": 3110 }, { "epoch": 0.036122817579771226, "grad_norm": 3.228199005126953, "learning_rate": 4.814757641247299e-05, "loss": 0.8665, "step": 3120 }, { "epoch": 0.03623859584124485, "grad_norm": 3.2365682125091553, "learning_rate": 4.8301945044766906e-05, "loss": 0.8557, "step": 3130 }, { "epoch": 0.036354374102718474, "grad_norm": 2.7745420932769775, "learning_rate": 4.845631367706082e-05, "loss": 0.8259, "step": 3140 }, { "epoch": 0.0364701523641921, "grad_norm": 2.7285120487213135, "learning_rate": 4.8610682309354745e-05, "loss": 0.8716, "step": 3150 }, { "epoch": 0.03658593062566572, "grad_norm": 3.227248430252075, "learning_rate": 4.876505094164866e-05, "loss": 0.8937, "step": 3160 }, { "epoch": 0.03670170888713935, "grad_norm": 3.636167287826538, "learning_rate": 4.8919419573942576e-05, "loss": 0.8709, "step": 3170 }, { "epoch": 0.03681748714861298, "grad_norm": 2.8551406860351562, "learning_rate": 4.9073788206236495e-05, "loss": 0.9412, "step": 3180 }, { "epoch": 0.0369332654100866, "grad_norm": 3.8716530799865723, "learning_rate": 4.9228156838530414e-05, "loss": 0.8917, "step": 3190 }, { "epoch": 0.03704904367156023, "grad_norm": 2.37703275680542, "learning_rate": 4.938252547082433e-05, "loss": 0.8364, "step": 3200 }, { "epoch": 0.03716482193303385, "grad_norm": 2.83221435546875, "learning_rate": 4.9536894103118245e-05, "loss": 0.8734, "step": 3210 }, { "epoch": 0.03728060019450748, "grad_norm": 2.612856388092041, "learning_rate": 4.9691262735412164e-05, "loss": 0.7954, "step": 3220 }, { "epoch": 0.03739637845598111, "grad_norm": 2.5937700271606445, "learning_rate": 4.984563136770608e-05, "loss": 0.802, "step": 3230 }, { "epoch": 0.03751215671745473, "grad_norm": 3.6801981925964355, "learning_rate": 5e-05, "loss": 0.8902, "step": 3240 }, { "epoch": 0.03762793497892836, "grad_norm": 2.671705484390259, "learning_rate": 5.015436863229392e-05, "loss": 0.894, "step": 3250 }, { "epoch": 0.03774371324040198, "grad_norm": 3.6469271183013916, "learning_rate": 5.0308737264587834e-05, "loss": 0.9043, "step": 3260 }, { "epoch": 0.037859491501875606, "grad_norm": 2.4723293781280518, "learning_rate": 5.046310589688176e-05, "loss": 0.8541, "step": 3270 }, { "epoch": 0.03797526976334924, "grad_norm": 3.370495080947876, "learning_rate": 5.061747452917568e-05, "loss": 0.88, "step": 3280 }, { "epoch": 0.03809104802482286, "grad_norm": 3.356306314468384, "learning_rate": 5.077184316146959e-05, "loss": 0.8632, "step": 3290 }, { "epoch": 0.038206826286296486, "grad_norm": 3.166931390762329, "learning_rate": 5.092621179376351e-05, "loss": 0.873, "step": 3300 }, { "epoch": 0.03832260454777011, "grad_norm": 3.311189651489258, "learning_rate": 5.108058042605742e-05, "loss": 0.8115, "step": 3310 }, { "epoch": 0.038438382809243735, "grad_norm": 3.2933318614959717, "learning_rate": 5.123494905835135e-05, "loss": 0.8813, "step": 3320 }, { "epoch": 0.03855416107071736, "grad_norm": 3.240525245666504, "learning_rate": 5.138931769064527e-05, "loss": 0.8657, "step": 3330 }, { "epoch": 0.03866993933219099, "grad_norm": 3.965378999710083, "learning_rate": 5.154368632293918e-05, "loss": 0.8354, "step": 3340 }, { "epoch": 0.038785717593664615, "grad_norm": 3.094787359237671, "learning_rate": 5.16980549552331e-05, "loss": 0.8763, "step": 3350 }, { "epoch": 0.03890149585513824, "grad_norm": 2.820939779281616, "learning_rate": 5.1852423587527024e-05, "loss": 0.8765, "step": 3360 }, { "epoch": 0.039017274116611864, "grad_norm": 3.478208303451538, "learning_rate": 5.200679221982093e-05, "loss": 0.866, "step": 3370 }, { "epoch": 0.03913305237808549, "grad_norm": 2.8255369663238525, "learning_rate": 5.2161160852114855e-05, "loss": 0.8165, "step": 3380 }, { "epoch": 0.03924883063955912, "grad_norm": 3.1648597717285156, "learning_rate": 5.231552948440877e-05, "loss": 0.8875, "step": 3390 }, { "epoch": 0.039364608901032744, "grad_norm": 3.260051727294922, "learning_rate": 5.246989811670269e-05, "loss": 0.8923, "step": 3400 }, { "epoch": 0.03948038716250637, "grad_norm": 3.078834295272827, "learning_rate": 5.2624266748996606e-05, "loss": 0.8659, "step": 3410 }, { "epoch": 0.03959616542397999, "grad_norm": 3.1070268154144287, "learning_rate": 5.277863538129052e-05, "loss": 0.8446, "step": 3420 }, { "epoch": 0.03971194368545362, "grad_norm": 3.4536848068237305, "learning_rate": 5.2933004013584444e-05, "loss": 0.8689, "step": 3430 }, { "epoch": 0.03982772194692725, "grad_norm": 2.8955140113830566, "learning_rate": 5.308737264587836e-05, "loss": 0.8374, "step": 3440 }, { "epoch": 0.03994350020840087, "grad_norm": 4.241433143615723, "learning_rate": 5.3241741278172275e-05, "loss": 0.8498, "step": 3450 }, { "epoch": 0.0400592784698745, "grad_norm": 3.111825942993164, "learning_rate": 5.3396109910466194e-05, "loss": 0.8377, "step": 3460 }, { "epoch": 0.04017505673134812, "grad_norm": 3.1120669841766357, "learning_rate": 5.355047854276012e-05, "loss": 0.8353, "step": 3470 }, { "epoch": 0.040290834992821746, "grad_norm": 3.282771110534668, "learning_rate": 5.370484717505403e-05, "loss": 0.8499, "step": 3480 }, { "epoch": 0.04040661325429537, "grad_norm": 3.2864034175872803, "learning_rate": 5.385921580734795e-05, "loss": 0.8721, "step": 3490 }, { "epoch": 0.040522391515769, "grad_norm": 3.350191831588745, "learning_rate": 5.4013584439641864e-05, "loss": 0.8412, "step": 3500 }, { "epoch": 0.040638169777242626, "grad_norm": 2.9095778465270996, "learning_rate": 5.416795307193578e-05, "loss": 0.8362, "step": 3510 }, { "epoch": 0.04075394803871625, "grad_norm": 2.626868963241577, "learning_rate": 5.432232170422971e-05, "loss": 0.8676, "step": 3520 }, { "epoch": 0.040869726300189875, "grad_norm": 2.8825254440307617, "learning_rate": 5.4476690336523614e-05, "loss": 0.8388, "step": 3530 }, { "epoch": 0.0409855045616635, "grad_norm": 2.8298516273498535, "learning_rate": 5.463105896881754e-05, "loss": 0.7971, "step": 3540 }, { "epoch": 0.04110128282313713, "grad_norm": 3.4504618644714355, "learning_rate": 5.478542760111146e-05, "loss": 0.8803, "step": 3550 }, { "epoch": 0.041217061084610755, "grad_norm": 2.8215651512145996, "learning_rate": 5.493979623340537e-05, "loss": 0.8311, "step": 3560 }, { "epoch": 0.04133283934608438, "grad_norm": 2.7110390663146973, "learning_rate": 5.50941648656993e-05, "loss": 0.7798, "step": 3570 }, { "epoch": 0.041448617607558004, "grad_norm": 3.101635694503784, "learning_rate": 5.5248533497993216e-05, "loss": 0.8916, "step": 3580 }, { "epoch": 0.04156439586903163, "grad_norm": 2.4397974014282227, "learning_rate": 5.540290213028713e-05, "loss": 0.82, "step": 3590 }, { "epoch": 0.04168017413050526, "grad_norm": 3.3154051303863525, "learning_rate": 5.555727076258105e-05, "loss": 0.8032, "step": 3600 }, { "epoch": 0.041795952391978884, "grad_norm": 3.1992838382720947, "learning_rate": 5.571163939487496e-05, "loss": 0.8279, "step": 3610 }, { "epoch": 0.04191173065345251, "grad_norm": 3.188650369644165, "learning_rate": 5.586600802716888e-05, "loss": 0.8039, "step": 3620 }, { "epoch": 0.04202750891492613, "grad_norm": 2.711498737335205, "learning_rate": 5.6020376659462804e-05, "loss": 0.8561, "step": 3630 }, { "epoch": 0.04214328717639976, "grad_norm": 2.9499616622924805, "learning_rate": 5.617474529175672e-05, "loss": 0.8305, "step": 3640 }, { "epoch": 0.04225906543787338, "grad_norm": 3.1685194969177246, "learning_rate": 5.6329113924050636e-05, "loss": 0.8178, "step": 3650 }, { "epoch": 0.04237484369934701, "grad_norm": 2.8285470008850098, "learning_rate": 5.6483482556344555e-05, "loss": 0.825, "step": 3660 }, { "epoch": 0.04249062196082064, "grad_norm": 3.0122334957122803, "learning_rate": 5.663785118863847e-05, "loss": 0.8565, "step": 3670 }, { "epoch": 0.04260640022229426, "grad_norm": 2.9502828121185303, "learning_rate": 5.679221982093239e-05, "loss": 0.8358, "step": 3680 }, { "epoch": 0.042722178483767886, "grad_norm": 3.1885080337524414, "learning_rate": 5.69465884532263e-05, "loss": 0.8375, "step": 3690 }, { "epoch": 0.04283795674524151, "grad_norm": 2.9346580505371094, "learning_rate": 5.7100957085520224e-05, "loss": 0.8415, "step": 3700 }, { "epoch": 0.04295373500671514, "grad_norm": 2.2674832344055176, "learning_rate": 5.725532571781414e-05, "loss": 0.8082, "step": 3710 }, { "epoch": 0.04306951326818877, "grad_norm": 2.7285845279693604, "learning_rate": 5.7409694350108055e-05, "loss": 0.852, "step": 3720 }, { "epoch": 0.04318529152966239, "grad_norm": 3.1136832237243652, "learning_rate": 5.756406298240198e-05, "loss": 0.8124, "step": 3730 }, { "epoch": 0.043301069791136015, "grad_norm": 3.061062812805176, "learning_rate": 5.77184316146959e-05, "loss": 0.7996, "step": 3740 }, { "epoch": 0.04341684805260964, "grad_norm": 3.20890212059021, "learning_rate": 5.787280024698981e-05, "loss": 0.8482, "step": 3750 }, { "epoch": 0.04353262631408327, "grad_norm": 3.1474831104278564, "learning_rate": 5.802716887928373e-05, "loss": 0.7762, "step": 3760 }, { "epoch": 0.043648404575556896, "grad_norm": 2.5395827293395996, "learning_rate": 5.818153751157766e-05, "loss": 0.8322, "step": 3770 }, { "epoch": 0.04376418283703052, "grad_norm": 3.292640447616577, "learning_rate": 5.833590614387156e-05, "loss": 0.8601, "step": 3780 }, { "epoch": 0.043879961098504144, "grad_norm": 3.3988218307495117, "learning_rate": 5.849027477616549e-05, "loss": 0.7854, "step": 3790 }, { "epoch": 0.04399573935997777, "grad_norm": 2.664951801300049, "learning_rate": 5.86446434084594e-05, "loss": 0.8195, "step": 3800 }, { "epoch": 0.04411151762145139, "grad_norm": 3.0127804279327393, "learning_rate": 5.879901204075332e-05, "loss": 0.8289, "step": 3810 }, { "epoch": 0.044227295882925025, "grad_norm": 3.0910518169403076, "learning_rate": 5.8953380673047246e-05, "loss": 0.8144, "step": 3820 }, { "epoch": 0.04434307414439865, "grad_norm": 2.8214893341064453, "learning_rate": 5.910774930534115e-05, "loss": 0.8251, "step": 3830 }, { "epoch": 0.04445885240587227, "grad_norm": 2.9957244396209717, "learning_rate": 5.926211793763508e-05, "loss": 0.7836, "step": 3840 }, { "epoch": 0.0445746306673459, "grad_norm": 2.6521999835968018, "learning_rate": 5.9416486569928996e-05, "loss": 0.8301, "step": 3850 }, { "epoch": 0.04469040892881952, "grad_norm": 2.4596571922302246, "learning_rate": 5.957085520222291e-05, "loss": 0.7771, "step": 3860 }, { "epoch": 0.044806187190293154, "grad_norm": 2.8867480754852295, "learning_rate": 5.972522383451683e-05, "loss": 0.8202, "step": 3870 }, { "epoch": 0.04492196545176678, "grad_norm": 2.1872568130493164, "learning_rate": 5.987959246681075e-05, "loss": 0.8217, "step": 3880 }, { "epoch": 0.0450377437132404, "grad_norm": 3.0329911708831787, "learning_rate": 6.0033961099104666e-05, "loss": 0.7875, "step": 3890 }, { "epoch": 0.04515352197471403, "grad_norm": 2.1675074100494385, "learning_rate": 6.0188329731398585e-05, "loss": 0.8276, "step": 3900 }, { "epoch": 0.04526930023618765, "grad_norm": 3.205519437789917, "learning_rate": 6.03426983636925e-05, "loss": 0.7714, "step": 3910 }, { "epoch": 0.04538507849766128, "grad_norm": 3.124452829360962, "learning_rate": 6.0497066995986416e-05, "loss": 0.8003, "step": 3920 }, { "epoch": 0.04550085675913491, "grad_norm": 3.3183441162109375, "learning_rate": 6.065143562828034e-05, "loss": 0.8199, "step": 3930 }, { "epoch": 0.04561663502060853, "grad_norm": 2.4837539196014404, "learning_rate": 6.080580426057425e-05, "loss": 0.8187, "step": 3940 }, { "epoch": 0.045732413282082156, "grad_norm": 2.7787535190582275, "learning_rate": 6.096017289286817e-05, "loss": 0.823, "step": 3950 }, { "epoch": 0.04584819154355578, "grad_norm": 2.201709508895874, "learning_rate": 6.11145415251621e-05, "loss": 0.8299, "step": 3960 }, { "epoch": 0.045963969805029405, "grad_norm": 2.976959228515625, "learning_rate": 6.1268910157456e-05, "loss": 0.7301, "step": 3970 }, { "epoch": 0.046079748066503036, "grad_norm": 2.4852588176727295, "learning_rate": 6.142327878974992e-05, "loss": 0.7821, "step": 3980 }, { "epoch": 0.04619552632797666, "grad_norm": 2.3276824951171875, "learning_rate": 6.157764742204384e-05, "loss": 0.8311, "step": 3990 }, { "epoch": 0.046311304589450285, "grad_norm": 2.7353389263153076, "learning_rate": 6.173201605433776e-05, "loss": 0.7818, "step": 4000 }, { "epoch": 0.046311304589450285, "eval_chrf": 78.10267281278205, "eval_loss": 1.2255442142486572, "eval_runtime": 335.4954, "eval_samples_per_second": 0.298, "eval_steps_per_second": 0.012, "step": 4000 }, { "epoch": 0.04642708285092391, "grad_norm": 2.5188751220703125, "learning_rate": 6.188638468663168e-05, "loss": 0.8627, "step": 4010 }, { "epoch": 0.046542861112397534, "grad_norm": 2.4813973903656006, "learning_rate": 6.20407533189256e-05, "loss": 0.8123, "step": 4020 }, { "epoch": 0.046658639373871165, "grad_norm": 2.739382028579712, "learning_rate": 6.219512195121952e-05, "loss": 0.8058, "step": 4030 }, { "epoch": 0.04677441763534479, "grad_norm": 2.2167294025421143, "learning_rate": 6.234949058351344e-05, "loss": 0.7937, "step": 4040 }, { "epoch": 0.046890195896818414, "grad_norm": 2.842698097229004, "learning_rate": 6.250385921580734e-05, "loss": 0.7537, "step": 4050 }, { "epoch": 0.04700597415829204, "grad_norm": 1.9638904333114624, "learning_rate": 6.265822784810128e-05, "loss": 0.7799, "step": 4060 }, { "epoch": 0.04712175241976566, "grad_norm": 2.0738861560821533, "learning_rate": 6.28125964803952e-05, "loss": 0.8036, "step": 4070 }, { "epoch": 0.047237530681239294, "grad_norm": 2.02272367477417, "learning_rate": 6.29669651126891e-05, "loss": 0.816, "step": 4080 }, { "epoch": 0.04735330894271292, "grad_norm": 3.2900118827819824, "learning_rate": 6.312133374498302e-05, "loss": 0.8152, "step": 4090 }, { "epoch": 0.04746908720418654, "grad_norm": 2.37040114402771, "learning_rate": 6.327570237727694e-05, "loss": 0.7987, "step": 4100 }, { "epoch": 0.04758486546566017, "grad_norm": 2.4891092777252197, "learning_rate": 6.343007100957086e-05, "loss": 0.8258, "step": 4110 }, { "epoch": 0.04770064372713379, "grad_norm": 2.619110345840454, "learning_rate": 6.358443964186478e-05, "loss": 0.8003, "step": 4120 }, { "epoch": 0.047816421988607416, "grad_norm": 2.712148666381836, "learning_rate": 6.37388082741587e-05, "loss": 0.7675, "step": 4130 }, { "epoch": 0.04793220025008105, "grad_norm": 2.545774221420288, "learning_rate": 6.389317690645261e-05, "loss": 0.8105, "step": 4140 }, { "epoch": 0.04804797851155467, "grad_norm": 2.7815654277801514, "learning_rate": 6.404754553874653e-05, "loss": 0.7548, "step": 4150 }, { "epoch": 0.048163756773028296, "grad_norm": 2.2365033626556396, "learning_rate": 6.420191417104044e-05, "loss": 0.817, "step": 4160 }, { "epoch": 0.04827953503450192, "grad_norm": 2.854419469833374, "learning_rate": 6.435628280333437e-05, "loss": 0.7837, "step": 4170 }, { "epoch": 0.048395313295975545, "grad_norm": 2.573758125305176, "learning_rate": 6.451065143562829e-05, "loss": 0.7692, "step": 4180 }, { "epoch": 0.048511091557449176, "grad_norm": 1.9099613428115845, "learning_rate": 6.46650200679222e-05, "loss": 0.7568, "step": 4190 }, { "epoch": 0.0486268698189228, "grad_norm": 2.696302890777588, "learning_rate": 6.481938870021612e-05, "loss": 0.7838, "step": 4200 }, { "epoch": 0.048742648080396425, "grad_norm": 2.6622250080108643, "learning_rate": 6.497375733251003e-05, "loss": 0.806, "step": 4210 }, { "epoch": 0.04885842634187005, "grad_norm": 2.6504921913146973, "learning_rate": 6.512812596480395e-05, "loss": 0.6977, "step": 4220 }, { "epoch": 0.048974204603343674, "grad_norm": 2.4777019023895264, "learning_rate": 6.528249459709787e-05, "loss": 0.7442, "step": 4230 }, { "epoch": 0.049089982864817305, "grad_norm": 1.877619743347168, "learning_rate": 6.543686322939179e-05, "loss": 0.7939, "step": 4240 }, { "epoch": 0.04920576112629093, "grad_norm": 2.833174467086792, "learning_rate": 6.559123186168571e-05, "loss": 0.7944, "step": 4250 }, { "epoch": 0.049321539387764554, "grad_norm": 2.5579330921173096, "learning_rate": 6.574560049397963e-05, "loss": 0.7618, "step": 4260 }, { "epoch": 0.04943731764923818, "grad_norm": 2.9838063716888428, "learning_rate": 6.589996912627355e-05, "loss": 0.8335, "step": 4270 }, { "epoch": 0.0495530959107118, "grad_norm": 2.114177942276001, "learning_rate": 6.605433775856747e-05, "loss": 0.708, "step": 4280 }, { "epoch": 0.04966887417218543, "grad_norm": 2.3989551067352295, "learning_rate": 6.620870639086137e-05, "loss": 0.7868, "step": 4290 }, { "epoch": 0.04978465243365906, "grad_norm": 2.2982163429260254, "learning_rate": 6.636307502315529e-05, "loss": 0.8518, "step": 4300 }, { "epoch": 0.04990043069513268, "grad_norm": 2.146652936935425, "learning_rate": 6.651744365544922e-05, "loss": 0.7471, "step": 4310 }, { "epoch": 0.05001620895660631, "grad_norm": 2.5075058937072754, "learning_rate": 6.667181228774313e-05, "loss": 0.7963, "step": 4320 }, { "epoch": 0.05013198721807993, "grad_norm": 2.463543176651001, "learning_rate": 6.682618092003705e-05, "loss": 0.768, "step": 4330 }, { "epoch": 0.050247765479553556, "grad_norm": 2.5465927124023438, "learning_rate": 6.698054955233097e-05, "loss": 0.7699, "step": 4340 }, { "epoch": 0.05036354374102719, "grad_norm": 2.5057928562164307, "learning_rate": 6.713491818462489e-05, "loss": 0.7773, "step": 4350 }, { "epoch": 0.05047932200250081, "grad_norm": 2.135197639465332, "learning_rate": 6.72892868169188e-05, "loss": 0.7582, "step": 4360 }, { "epoch": 0.05059510026397444, "grad_norm": 2.8874289989471436, "learning_rate": 6.744365544921273e-05, "loss": 0.7849, "step": 4370 }, { "epoch": 0.05071087852544806, "grad_norm": 2.699106216430664, "learning_rate": 6.759802408150664e-05, "loss": 0.7414, "step": 4380 }, { "epoch": 0.050826656786921685, "grad_norm": 2.337489366531372, "learning_rate": 6.775239271380056e-05, "loss": 0.8291, "step": 4390 }, { "epoch": 0.05094243504839532, "grad_norm": 2.667902946472168, "learning_rate": 6.790676134609447e-05, "loss": 0.797, "step": 4400 }, { "epoch": 0.05105821330986894, "grad_norm": 2.971599578857422, "learning_rate": 6.806112997838839e-05, "loss": 0.7692, "step": 4410 }, { "epoch": 0.051173991571342566, "grad_norm": 2.433220624923706, "learning_rate": 6.821549861068232e-05, "loss": 0.7582, "step": 4420 }, { "epoch": 0.05128976983281619, "grad_norm": 2.4022865295410156, "learning_rate": 6.836986724297623e-05, "loss": 0.7812, "step": 4430 }, { "epoch": 0.051405548094289814, "grad_norm": 2.868147850036621, "learning_rate": 6.852423587527015e-05, "loss": 0.7301, "step": 4440 }, { "epoch": 0.05152132635576344, "grad_norm": 2.374920129776001, "learning_rate": 6.867860450756406e-05, "loss": 0.7682, "step": 4450 }, { "epoch": 0.05163710461723707, "grad_norm": 1.8938639163970947, "learning_rate": 6.883297313985798e-05, "loss": 0.7755, "step": 4460 }, { "epoch": 0.051752882878710695, "grad_norm": 2.5213005542755127, "learning_rate": 6.89873417721519e-05, "loss": 0.7409, "step": 4470 }, { "epoch": 0.05186866114018432, "grad_norm": 2.381471633911133, "learning_rate": 6.914171040444582e-05, "loss": 0.7829, "step": 4480 }, { "epoch": 0.05198443940165794, "grad_norm": 2.480947256088257, "learning_rate": 6.929607903673974e-05, "loss": 0.7664, "step": 4490 }, { "epoch": 0.05210021766313157, "grad_norm": 2.5081028938293457, "learning_rate": 6.945044766903366e-05, "loss": 0.7392, "step": 4500 }, { "epoch": 0.0522159959246052, "grad_norm": 2.8040149211883545, "learning_rate": 6.960481630132756e-05, "loss": 0.7464, "step": 4510 }, { "epoch": 0.052331774186078824, "grad_norm": 2.6462340354919434, "learning_rate": 6.97591849336215e-05, "loss": 0.7852, "step": 4520 }, { "epoch": 0.05244755244755245, "grad_norm": 1.8709461688995361, "learning_rate": 6.991355356591542e-05, "loss": 0.7589, "step": 4530 }, { "epoch": 0.05256333070902607, "grad_norm": 2.481294631958008, "learning_rate": 7.006792219820932e-05, "loss": 0.7656, "step": 4540 }, { "epoch": 0.0526791089704997, "grad_norm": 2.5509347915649414, "learning_rate": 7.022229083050324e-05, "loss": 0.7926, "step": 4550 }, { "epoch": 0.05279488723197333, "grad_norm": 2.2504377365112305, "learning_rate": 7.037665946279717e-05, "loss": 0.7344, "step": 4560 }, { "epoch": 0.05291066549344695, "grad_norm": 2.534723997116089, "learning_rate": 7.053102809509108e-05, "loss": 0.7686, "step": 4570 }, { "epoch": 0.05302644375492058, "grad_norm": 2.498837947845459, "learning_rate": 7.0685396727385e-05, "loss": 0.7859, "step": 4580 }, { "epoch": 0.0531422220163942, "grad_norm": 2.3672940731048584, "learning_rate": 7.083976535967892e-05, "loss": 0.7904, "step": 4590 }, { "epoch": 0.053258000277867826, "grad_norm": 2.424630641937256, "learning_rate": 7.099413399197284e-05, "loss": 0.7482, "step": 4600 }, { "epoch": 0.05337377853934145, "grad_norm": 2.677380084991455, "learning_rate": 7.114850262426676e-05, "loss": 0.7088, "step": 4610 }, { "epoch": 0.05348955680081508, "grad_norm": 2.422776699066162, "learning_rate": 7.130287125656066e-05, "loss": 0.7646, "step": 4620 }, { "epoch": 0.053605335062288706, "grad_norm": 2.2435402870178223, "learning_rate": 7.14572398888546e-05, "loss": 0.7661, "step": 4630 }, { "epoch": 0.05372111332376233, "grad_norm": 2.1754071712493896, "learning_rate": 7.161160852114851e-05, "loss": 0.7185, "step": 4640 }, { "epoch": 0.053836891585235955, "grad_norm": 2.1318485736846924, "learning_rate": 7.176597715344242e-05, "loss": 0.7419, "step": 4650 }, { "epoch": 0.05395266984670958, "grad_norm": 2.4326610565185547, "learning_rate": 7.192034578573634e-05, "loss": 0.7811, "step": 4660 }, { "epoch": 0.05406844810818321, "grad_norm": 2.451073408126831, "learning_rate": 7.207471441803027e-05, "loss": 0.7287, "step": 4670 }, { "epoch": 0.054184226369656835, "grad_norm": 2.6873838901519775, "learning_rate": 7.222908305032418e-05, "loss": 0.7497, "step": 4680 }, { "epoch": 0.05430000463113046, "grad_norm": 2.3071513175964355, "learning_rate": 7.23834516826181e-05, "loss": 0.7473, "step": 4690 }, { "epoch": 0.054415782892604084, "grad_norm": 2.7855887413024902, "learning_rate": 7.253782031491201e-05, "loss": 0.7484, "step": 4700 }, { "epoch": 0.05453156115407771, "grad_norm": 2.7396068572998047, "learning_rate": 7.269218894720593e-05, "loss": 0.7428, "step": 4710 }, { "epoch": 0.05464733941555134, "grad_norm": 2.5614960193634033, "learning_rate": 7.284655757949985e-05, "loss": 0.7378, "step": 4720 }, { "epoch": 0.054763117677024964, "grad_norm": 2.317500352859497, "learning_rate": 7.300092621179376e-05, "loss": 0.7336, "step": 4730 }, { "epoch": 0.05487889593849859, "grad_norm": 2.811732769012451, "learning_rate": 7.315529484408769e-05, "loss": 0.7815, "step": 4740 }, { "epoch": 0.05499467419997221, "grad_norm": 2.2732126712799072, "learning_rate": 7.330966347638161e-05, "loss": 0.7828, "step": 4750 }, { "epoch": 0.05511045246144584, "grad_norm": 2.6949050426483154, "learning_rate": 7.346403210867551e-05, "loss": 0.7451, "step": 4760 }, { "epoch": 0.05522623072291946, "grad_norm": 2.434868335723877, "learning_rate": 7.361840074096945e-05, "loss": 0.7165, "step": 4770 }, { "epoch": 0.05534200898439309, "grad_norm": 2.574965715408325, "learning_rate": 7.377276937326337e-05, "loss": 0.7833, "step": 4780 }, { "epoch": 0.05545778724586672, "grad_norm": 2.523674726486206, "learning_rate": 7.392713800555727e-05, "loss": 0.7812, "step": 4790 }, { "epoch": 0.05557356550734034, "grad_norm": 2.5483083724975586, "learning_rate": 7.408150663785119e-05, "loss": 0.6918, "step": 4800 }, { "epoch": 0.055689343768813966, "grad_norm": 1.6692194938659668, "learning_rate": 7.423587527014511e-05, "loss": 0.681, "step": 4810 }, { "epoch": 0.05580512203028759, "grad_norm": 2.2478420734405518, "learning_rate": 7.439024390243903e-05, "loss": 0.7449, "step": 4820 }, { "epoch": 0.05592090029176122, "grad_norm": 2.313654899597168, "learning_rate": 7.454461253473295e-05, "loss": 0.7744, "step": 4830 }, { "epoch": 0.056036678553234846, "grad_norm": 2.223144769668579, "learning_rate": 7.469898116702687e-05, "loss": 0.7917, "step": 4840 }, { "epoch": 0.05615245681470847, "grad_norm": 2.536388874053955, "learning_rate": 7.485334979932079e-05, "loss": 0.7377, "step": 4850 }, { "epoch": 0.056268235076182095, "grad_norm": 2.5985777378082275, "learning_rate": 7.50077184316147e-05, "loss": 0.741, "step": 4860 }, { "epoch": 0.05638401333765572, "grad_norm": 2.3443901538848877, "learning_rate": 7.516208706390861e-05, "loss": 0.7134, "step": 4870 }, { "epoch": 0.05649979159912935, "grad_norm": 1.9422653913497925, "learning_rate": 7.531645569620254e-05, "loss": 0.719, "step": 4880 }, { "epoch": 0.056615569860602975, "grad_norm": 2.352421998977661, "learning_rate": 7.547082432849645e-05, "loss": 0.8023, "step": 4890 }, { "epoch": 0.0567313481220766, "grad_norm": 2.353652000427246, "learning_rate": 7.562519296079037e-05, "loss": 0.7451, "step": 4900 }, { "epoch": 0.056847126383550224, "grad_norm": 2.4540939331054688, "learning_rate": 7.577956159308429e-05, "loss": 0.7751, "step": 4910 }, { "epoch": 0.05696290464502385, "grad_norm": 2.339121103286743, "learning_rate": 7.59339302253782e-05, "loss": 0.6886, "step": 4920 }, { "epoch": 0.05707868290649747, "grad_norm": 2.1383137702941895, "learning_rate": 7.608829885767212e-05, "loss": 0.7626, "step": 4930 }, { "epoch": 0.057194461167971104, "grad_norm": 2.392554521560669, "learning_rate": 7.624266748996604e-05, "loss": 0.768, "step": 4940 }, { "epoch": 0.05731023942944473, "grad_norm": 1.97488534450531, "learning_rate": 7.639703612225996e-05, "loss": 0.7584, "step": 4950 }, { "epoch": 0.05742601769091835, "grad_norm": 2.549555540084839, "learning_rate": 7.655140475455388e-05, "loss": 0.7074, "step": 4960 }, { "epoch": 0.05754179595239198, "grad_norm": 2.3444621562957764, "learning_rate": 7.67057733868478e-05, "loss": 0.7439, "step": 4970 }, { "epoch": 0.0576575742138656, "grad_norm": 2.538506269454956, "learning_rate": 7.68601420191417e-05, "loss": 0.7035, "step": 4980 }, { "epoch": 0.05777335247533923, "grad_norm": 2.229445219039917, "learning_rate": 7.701451065143564e-05, "loss": 0.7294, "step": 4990 }, { "epoch": 0.05788913073681286, "grad_norm": 2.288196086883545, "learning_rate": 7.716887928372954e-05, "loss": 0.7059, "step": 5000 }, { "epoch": 0.05788913073681286, "eval_chrf": 68.1041571970253, "eval_loss": 1.119584083557129, "eval_runtime": 372.9758, "eval_samples_per_second": 0.268, "eval_steps_per_second": 0.011, "step": 5000 }, { "epoch": 0.05800490899828648, "grad_norm": 2.2075209617614746, "learning_rate": 7.732324791602346e-05, "loss": 0.6979, "step": 5010 }, { "epoch": 0.058120687259760107, "grad_norm": 2.3697617053985596, "learning_rate": 7.74776165483174e-05, "loss": 0.6848, "step": 5020 }, { "epoch": 0.05823646552123373, "grad_norm": 2.216676950454712, "learning_rate": 7.76319851806113e-05, "loss": 0.7588, "step": 5030 }, { "epoch": 0.05835224378270736, "grad_norm": 2.1751492023468018, "learning_rate": 7.778635381290522e-05, "loss": 0.7202, "step": 5040 }, { "epoch": 0.05846802204418099, "grad_norm": 2.577639102935791, "learning_rate": 7.794072244519914e-05, "loss": 0.7168, "step": 5050 }, { "epoch": 0.05858380030565461, "grad_norm": 2.2894887924194336, "learning_rate": 7.809509107749306e-05, "loss": 0.7501, "step": 5060 }, { "epoch": 0.058699578567128236, "grad_norm": 2.4248814582824707, "learning_rate": 7.824945970978698e-05, "loss": 0.7171, "step": 5070 }, { "epoch": 0.05881535682860186, "grad_norm": 2.2205724716186523, "learning_rate": 7.84038283420809e-05, "loss": 0.7034, "step": 5080 }, { "epoch": 0.058931135090075484, "grad_norm": 2.135868549346924, "learning_rate": 7.855819697437482e-05, "loss": 0.7344, "step": 5090 }, { "epoch": 0.059046913351549116, "grad_norm": 2.3834145069122314, "learning_rate": 7.871256560666873e-05, "loss": 0.7241, "step": 5100 }, { "epoch": 0.05916269161302274, "grad_norm": 2.800309896469116, "learning_rate": 7.886693423896264e-05, "loss": 0.6932, "step": 5110 }, { "epoch": 0.059278469874496365, "grad_norm": 2.100050926208496, "learning_rate": 7.902130287125656e-05, "loss": 0.7334, "step": 5120 }, { "epoch": 0.05939424813596999, "grad_norm": 2.2075440883636475, "learning_rate": 7.917567150355049e-05, "loss": 0.718, "step": 5130 }, { "epoch": 0.05951002639744361, "grad_norm": 2.4797189235687256, "learning_rate": 7.93300401358444e-05, "loss": 0.7518, "step": 5140 }, { "epoch": 0.059625804658917245, "grad_norm": 2.4825644493103027, "learning_rate": 7.948440876813832e-05, "loss": 0.7088, "step": 5150 }, { "epoch": 0.05974158292039087, "grad_norm": 1.8724825382232666, "learning_rate": 7.963877740043223e-05, "loss": 0.7398, "step": 5160 }, { "epoch": 0.059857361181864494, "grad_norm": 2.2066917419433594, "learning_rate": 7.979314603272615e-05, "loss": 0.7018, "step": 5170 }, { "epoch": 0.05997313944333812, "grad_norm": 1.7651664018630981, "learning_rate": 7.994751466502007e-05, "loss": 0.7379, "step": 5180 }, { "epoch": 0.06008891770481174, "grad_norm": 2.3731231689453125, "learning_rate": 8.010188329731398e-05, "loss": 0.7332, "step": 5190 }, { "epoch": 0.060204695966285374, "grad_norm": 2.377887725830078, "learning_rate": 8.025625192960791e-05, "loss": 0.7211, "step": 5200 }, { "epoch": 0.060320474227759, "grad_norm": 2.151663303375244, "learning_rate": 8.041062056190183e-05, "loss": 0.6708, "step": 5210 }, { "epoch": 0.06043625248923262, "grad_norm": 2.24853253364563, "learning_rate": 8.056498919419574e-05, "loss": 0.6876, "step": 5220 }, { "epoch": 0.06055203075070625, "grad_norm": 2.00726580619812, "learning_rate": 8.071935782648965e-05, "loss": 0.6922, "step": 5230 }, { "epoch": 0.06066780901217987, "grad_norm": 2.350165367126465, "learning_rate": 8.087372645878359e-05, "loss": 0.7169, "step": 5240 }, { "epoch": 0.060783587273653496, "grad_norm": 1.947046160697937, "learning_rate": 8.102809509107749e-05, "loss": 0.7085, "step": 5250 }, { "epoch": 0.06089936553512713, "grad_norm": 2.224485397338867, "learning_rate": 8.118246372337141e-05, "loss": 0.6767, "step": 5260 }, { "epoch": 0.06101514379660075, "grad_norm": 2.2891488075256348, "learning_rate": 8.133683235566534e-05, "loss": 0.7551, "step": 5270 }, { "epoch": 0.061130922058074376, "grad_norm": 2.366861343383789, "learning_rate": 8.149120098795925e-05, "loss": 0.7257, "step": 5280 }, { "epoch": 0.061246700319548, "grad_norm": 2.1227612495422363, "learning_rate": 8.164556962025317e-05, "loss": 0.715, "step": 5290 }, { "epoch": 0.061362478581021625, "grad_norm": 2.105232000350952, "learning_rate": 8.179993825254709e-05, "loss": 0.7493, "step": 5300 }, { "epoch": 0.061478256842495256, "grad_norm": 1.9392329454421997, "learning_rate": 8.195430688484101e-05, "loss": 0.6857, "step": 5310 }, { "epoch": 0.06159403510396888, "grad_norm": 2.0439629554748535, "learning_rate": 8.210867551713493e-05, "loss": 0.7476, "step": 5320 }, { "epoch": 0.061709813365442505, "grad_norm": 2.4530372619628906, "learning_rate": 8.226304414942883e-05, "loss": 0.7237, "step": 5330 }, { "epoch": 0.06182559162691613, "grad_norm": 2.3262078762054443, "learning_rate": 8.241741278172276e-05, "loss": 0.6685, "step": 5340 }, { "epoch": 0.061941369888389754, "grad_norm": 2.394943952560425, "learning_rate": 8.257178141401668e-05, "loss": 0.693, "step": 5350 }, { "epoch": 0.062057148149863385, "grad_norm": 2.129511594772339, "learning_rate": 8.272615004631059e-05, "loss": 0.7286, "step": 5360 }, { "epoch": 0.06217292641133701, "grad_norm": 2.0786781311035156, "learning_rate": 8.288051867860451e-05, "loss": 0.7012, "step": 5370 }, { "epoch": 0.062288704672810634, "grad_norm": 2.4915850162506104, "learning_rate": 8.303488731089844e-05, "loss": 0.6995, "step": 5380 }, { "epoch": 0.06240448293428426, "grad_norm": 1.8084322214126587, "learning_rate": 8.318925594319235e-05, "loss": 0.7064, "step": 5390 }, { "epoch": 0.06252026119575789, "grad_norm": 2.1116020679473877, "learning_rate": 8.334362457548626e-05, "loss": 0.6557, "step": 5400 }, { "epoch": 0.06263603945723151, "grad_norm": 2.2924602031707764, "learning_rate": 8.349799320778018e-05, "loss": 0.751, "step": 5410 }, { "epoch": 0.06275181771870514, "grad_norm": 2.040170907974243, "learning_rate": 8.36523618400741e-05, "loss": 0.7363, "step": 5420 }, { "epoch": 0.06286759598017876, "grad_norm": 2.3817520141601562, "learning_rate": 8.380673047236802e-05, "loss": 0.7296, "step": 5430 }, { "epoch": 0.06298337424165239, "grad_norm": 2.015960931777954, "learning_rate": 8.396109910466193e-05, "loss": 0.6141, "step": 5440 }, { "epoch": 0.06309915250312602, "grad_norm": 1.9143738746643066, "learning_rate": 8.411546773695586e-05, "loss": 0.7281, "step": 5450 }, { "epoch": 0.06321493076459964, "grad_norm": 2.176298141479492, "learning_rate": 8.426983636924978e-05, "loss": 0.7207, "step": 5460 }, { "epoch": 0.06333070902607327, "grad_norm": 1.8675295114517212, "learning_rate": 8.442420500154368e-05, "loss": 0.6465, "step": 5470 }, { "epoch": 0.06344648728754688, "grad_norm": 1.937580943107605, "learning_rate": 8.45785736338376e-05, "loss": 0.6902, "step": 5480 }, { "epoch": 0.06356226554902052, "grad_norm": 2.13000750541687, "learning_rate": 8.473294226613152e-05, "loss": 0.7225, "step": 5490 }, { "epoch": 0.06367804381049415, "grad_norm": 2.099123477935791, "learning_rate": 8.488731089842544e-05, "loss": 0.6857, "step": 5500 }, { "epoch": 0.06379382207196777, "grad_norm": 1.4179965257644653, "learning_rate": 8.504167953071936e-05, "loss": 0.6895, "step": 5510 }, { "epoch": 0.0639096003334414, "grad_norm": 1.969341516494751, "learning_rate": 8.519604816301328e-05, "loss": 0.7304, "step": 5520 }, { "epoch": 0.06402537859491501, "grad_norm": 1.8461629152297974, "learning_rate": 8.53504167953072e-05, "loss": 0.7309, "step": 5530 }, { "epoch": 0.06414115685638865, "grad_norm": 1.9074101448059082, "learning_rate": 8.550478542760112e-05, "loss": 0.6792, "step": 5540 }, { "epoch": 0.06425693511786228, "grad_norm": 1.8870418071746826, "learning_rate": 8.565915405989504e-05, "loss": 0.7239, "step": 5550 }, { "epoch": 0.0643727133793359, "grad_norm": 2.2057502269744873, "learning_rate": 8.581352269218896e-05, "loss": 0.6806, "step": 5560 }, { "epoch": 0.06448849164080953, "grad_norm": 1.8167786598205566, "learning_rate": 8.596789132448287e-05, "loss": 0.7036, "step": 5570 }, { "epoch": 0.06460426990228314, "grad_norm": 2.0966055393218994, "learning_rate": 8.612225995677678e-05, "loss": 0.7306, "step": 5580 }, { "epoch": 0.06472004816375677, "grad_norm": 2.016758680343628, "learning_rate": 8.627662858907071e-05, "loss": 0.7188, "step": 5590 }, { "epoch": 0.0648358264252304, "grad_norm": 1.7833069562911987, "learning_rate": 8.643099722136462e-05, "loss": 0.6754, "step": 5600 }, { "epoch": 0.06495160468670402, "grad_norm": 1.8291857242584229, "learning_rate": 8.658536585365854e-05, "loss": 0.7223, "step": 5610 }, { "epoch": 0.06506738294817765, "grad_norm": 2.064657688140869, "learning_rate": 8.673973448595246e-05, "loss": 0.6898, "step": 5620 }, { "epoch": 0.06518316120965127, "grad_norm": 2.343154191970825, "learning_rate": 8.689410311824638e-05, "loss": 0.7121, "step": 5630 }, { "epoch": 0.0652989394711249, "grad_norm": 2.1844403743743896, "learning_rate": 8.70484717505403e-05, "loss": 0.7176, "step": 5640 }, { "epoch": 0.06541471773259852, "grad_norm": 2.175757646560669, "learning_rate": 8.720284038283421e-05, "loss": 0.6757, "step": 5650 }, { "epoch": 0.06553049599407215, "grad_norm": 2.3068065643310547, "learning_rate": 8.735720901512813e-05, "loss": 0.6628, "step": 5660 }, { "epoch": 0.06564627425554578, "grad_norm": 1.8900717496871948, "learning_rate": 8.751157764742205e-05, "loss": 0.7111, "step": 5670 }, { "epoch": 0.0657620525170194, "grad_norm": 1.975999355316162, "learning_rate": 8.766594627971597e-05, "loss": 0.6405, "step": 5680 }, { "epoch": 0.06587783077849303, "grad_norm": 2.3730521202087402, "learning_rate": 8.782031491200988e-05, "loss": 0.6701, "step": 5690 }, { "epoch": 0.06599360903996665, "grad_norm": 1.9095561504364014, "learning_rate": 8.797468354430381e-05, "loss": 0.6625, "step": 5700 }, { "epoch": 0.06610938730144028, "grad_norm": 1.8720043897628784, "learning_rate": 8.812905217659771e-05, "loss": 0.681, "step": 5710 }, { "epoch": 0.06622516556291391, "grad_norm": 2.0537919998168945, "learning_rate": 8.828342080889163e-05, "loss": 0.6937, "step": 5720 }, { "epoch": 0.06634094382438753, "grad_norm": 1.5734232664108276, "learning_rate": 8.843778944118555e-05, "loss": 0.7001, "step": 5730 }, { "epoch": 0.06645672208586116, "grad_norm": 2.167491912841797, "learning_rate": 8.859215807347947e-05, "loss": 0.6348, "step": 5740 }, { "epoch": 0.06657250034733478, "grad_norm": 1.6173847913742065, "learning_rate": 8.874652670577339e-05, "loss": 0.7171, "step": 5750 }, { "epoch": 0.06668827860880841, "grad_norm": 2.2063612937927246, "learning_rate": 8.890089533806731e-05, "loss": 0.6922, "step": 5760 }, { "epoch": 0.06680405687028204, "grad_norm": 2.069704294204712, "learning_rate": 8.905526397036123e-05, "loss": 0.6679, "step": 5770 }, { "epoch": 0.06691983513175566, "grad_norm": 1.8291423320770264, "learning_rate": 8.920963260265515e-05, "loss": 0.6771, "step": 5780 }, { "epoch": 0.06703561339322929, "grad_norm": 1.96626877784729, "learning_rate": 8.936400123494905e-05, "loss": 0.6323, "step": 5790 }, { "epoch": 0.06715139165470291, "grad_norm": 1.854010820388794, "learning_rate": 8.951836986724299e-05, "loss": 0.6628, "step": 5800 }, { "epoch": 0.06726716991617654, "grad_norm": 2.073439121246338, "learning_rate": 8.96727384995369e-05, "loss": 0.7042, "step": 5810 }, { "epoch": 0.06738294817765017, "grad_norm": 1.8596131801605225, "learning_rate": 8.982710713183081e-05, "loss": 0.656, "step": 5820 }, { "epoch": 0.06749872643912379, "grad_norm": 1.978877067565918, "learning_rate": 8.998147576412473e-05, "loss": 0.6626, "step": 5830 }, { "epoch": 0.06761450470059742, "grad_norm": 1.5365252494812012, "learning_rate": 9.013584439641866e-05, "loss": 0.7086, "step": 5840 }, { "epoch": 0.06773028296207104, "grad_norm": 1.7259769439697266, "learning_rate": 9.029021302871257e-05, "loss": 0.6991, "step": 5850 }, { "epoch": 0.06784606122354467, "grad_norm": 1.5910617113113403, "learning_rate": 9.044458166100649e-05, "loss": 0.6643, "step": 5860 }, { "epoch": 0.0679618394850183, "grad_norm": 2.133162498474121, "learning_rate": 9.05989502933004e-05, "loss": 0.6835, "step": 5870 }, { "epoch": 0.06807761774649192, "grad_norm": 1.6416977643966675, "learning_rate": 9.075331892559432e-05, "loss": 0.6663, "step": 5880 }, { "epoch": 0.06819339600796555, "grad_norm": 1.7024892568588257, "learning_rate": 9.090768755788824e-05, "loss": 0.7164, "step": 5890 }, { "epoch": 0.06830917426943917, "grad_norm": 2.0171029567718506, "learning_rate": 9.106205619018215e-05, "loss": 0.683, "step": 5900 }, { "epoch": 0.0684249525309128, "grad_norm": 1.9070229530334473, "learning_rate": 9.121642482247608e-05, "loss": 0.6946, "step": 5910 }, { "epoch": 0.06854073079238643, "grad_norm": 1.8838920593261719, "learning_rate": 9.137079345477e-05, "loss": 0.6907, "step": 5920 }, { "epoch": 0.06865650905386005, "grad_norm": 1.930190086364746, "learning_rate": 9.15251620870639e-05, "loss": 0.6846, "step": 5930 }, { "epoch": 0.06877228731533368, "grad_norm": 2.060683012008667, "learning_rate": 9.167953071935783e-05, "loss": 0.6526, "step": 5940 }, { "epoch": 0.0688880655768073, "grad_norm": 2.043300151824951, "learning_rate": 9.183389935165176e-05, "loss": 0.6677, "step": 5950 }, { "epoch": 0.06900384383828093, "grad_norm": 1.9208158254623413, "learning_rate": 9.198826798394566e-05, "loss": 0.6815, "step": 5960 }, { "epoch": 0.06911962209975454, "grad_norm": 1.7970118522644043, "learning_rate": 9.214263661623958e-05, "loss": 0.665, "step": 5970 }, { "epoch": 0.06923540036122817, "grad_norm": 1.8869653940200806, "learning_rate": 9.22970052485335e-05, "loss": 0.6529, "step": 5980 }, { "epoch": 0.0693511786227018, "grad_norm": 1.8884750604629517, "learning_rate": 9.245137388082742e-05, "loss": 0.6602, "step": 5990 }, { "epoch": 0.06946695688417542, "grad_norm": 1.7585941553115845, "learning_rate": 9.260574251312134e-05, "loss": 0.7157, "step": 6000 }, { "epoch": 0.06946695688417542, "eval_chrf": 77.35801631389266, "eval_loss": 1.0322370529174805, "eval_runtime": 346.6588, "eval_samples_per_second": 0.288, "eval_steps_per_second": 0.012, "step": 6000 }, { "epoch": 0.06958273514564906, "grad_norm": 1.3285300731658936, "learning_rate": 9.276011114541525e-05, "loss": 0.6524, "step": 6010 }, { "epoch": 0.06969851340712267, "grad_norm": 1.652048945426941, "learning_rate": 9.291447977770918e-05, "loss": 0.6673, "step": 6020 }, { "epoch": 0.0698142916685963, "grad_norm": 2.03122615814209, "learning_rate": 9.30688484100031e-05, "loss": 0.7098, "step": 6030 }, { "epoch": 0.06993006993006994, "grad_norm": 2.149397373199463, "learning_rate": 9.3223217042297e-05, "loss": 0.6784, "step": 6040 }, { "epoch": 0.07004584819154355, "grad_norm": 1.895490050315857, "learning_rate": 9.337758567459093e-05, "loss": 0.6849, "step": 6050 }, { "epoch": 0.07016162645301718, "grad_norm": 1.8744556903839111, "learning_rate": 9.353195430688485e-05, "loss": 0.6621, "step": 6060 }, { "epoch": 0.0702774047144908, "grad_norm": 1.6119526624679565, "learning_rate": 9.368632293917876e-05, "loss": 0.6741, "step": 6070 }, { "epoch": 0.07039318297596443, "grad_norm": 1.7818323373794556, "learning_rate": 9.384069157147268e-05, "loss": 0.676, "step": 6080 }, { "epoch": 0.07050896123743806, "grad_norm": 2.0417943000793457, "learning_rate": 9.39950602037666e-05, "loss": 0.6644, "step": 6090 }, { "epoch": 0.07062473949891168, "grad_norm": 1.7990567684173584, "learning_rate": 9.414942883606052e-05, "loss": 0.6457, "step": 6100 }, { "epoch": 0.07074051776038531, "grad_norm": 2.0464746952056885, "learning_rate": 9.430379746835444e-05, "loss": 0.7082, "step": 6110 }, { "epoch": 0.07085629602185893, "grad_norm": 1.8701130151748657, "learning_rate": 9.445816610064835e-05, "loss": 0.6289, "step": 6120 }, { "epoch": 0.07097207428333256, "grad_norm": 1.685515284538269, "learning_rate": 9.461253473294227e-05, "loss": 0.658, "step": 6130 }, { "epoch": 0.0710878525448062, "grad_norm": 1.3846675157546997, "learning_rate": 9.476690336523619e-05, "loss": 0.6543, "step": 6140 }, { "epoch": 0.07120363080627981, "grad_norm": 1.9423093795776367, "learning_rate": 9.49212719975301e-05, "loss": 0.6854, "step": 6150 }, { "epoch": 0.07131940906775344, "grad_norm": 2.0019657611846924, "learning_rate": 9.507564062982403e-05, "loss": 0.633, "step": 6160 }, { "epoch": 0.07143518732922706, "grad_norm": 1.8133070468902588, "learning_rate": 9.523000926211795e-05, "loss": 0.6293, "step": 6170 }, { "epoch": 0.07155096559070069, "grad_norm": 1.6339232921600342, "learning_rate": 9.538437789441186e-05, "loss": 0.6923, "step": 6180 }, { "epoch": 0.07166674385217432, "grad_norm": 1.6258397102355957, "learning_rate": 9.553874652670577e-05, "loss": 0.6674, "step": 6190 }, { "epoch": 0.07178252211364794, "grad_norm": 1.7933048009872437, "learning_rate": 9.56931151589997e-05, "loss": 0.676, "step": 6200 }, { "epoch": 0.07189830037512157, "grad_norm": 1.7312748432159424, "learning_rate": 9.584748379129361e-05, "loss": 0.6616, "step": 6210 }, { "epoch": 0.07201407863659519, "grad_norm": 1.8328841924667358, "learning_rate": 9.600185242358753e-05, "loss": 0.6387, "step": 6220 }, { "epoch": 0.07212985689806882, "grad_norm": 1.982265591621399, "learning_rate": 9.615622105588145e-05, "loss": 0.6548, "step": 6230 }, { "epoch": 0.07224563515954245, "grad_norm": 1.8598612546920776, "learning_rate": 9.631058968817537e-05, "loss": 0.6652, "step": 6240 }, { "epoch": 0.07236141342101607, "grad_norm": 1.7716618776321411, "learning_rate": 9.646495832046929e-05, "loss": 0.6585, "step": 6250 }, { "epoch": 0.0724771916824897, "grad_norm": 1.708778738975525, "learning_rate": 9.66193269527632e-05, "loss": 0.6569, "step": 6260 }, { "epoch": 0.07259296994396332, "grad_norm": 1.8234045505523682, "learning_rate": 9.677369558505713e-05, "loss": 0.6378, "step": 6270 }, { "epoch": 0.07270874820543695, "grad_norm": 1.9949054718017578, "learning_rate": 9.692806421735103e-05, "loss": 0.6505, "step": 6280 }, { "epoch": 0.07282452646691057, "grad_norm": 2.0804336071014404, "learning_rate": 9.708243284964495e-05, "loss": 0.6193, "step": 6290 }, { "epoch": 0.0729403047283842, "grad_norm": 1.501078486442566, "learning_rate": 9.723680148193888e-05, "loss": 0.669, "step": 6300 }, { "epoch": 0.07305608298985783, "grad_norm": 1.410736083984375, "learning_rate": 9.739117011423279e-05, "loss": 0.7089, "step": 6310 }, { "epoch": 0.07317186125133145, "grad_norm": 1.65840744972229, "learning_rate": 9.754553874652671e-05, "loss": 0.686, "step": 6320 }, { "epoch": 0.07328763951280508, "grad_norm": 1.8569484949111938, "learning_rate": 9.769990737882063e-05, "loss": 0.6426, "step": 6330 }, { "epoch": 0.0734034177742787, "grad_norm": 1.770147442817688, "learning_rate": 9.785427601111455e-05, "loss": 0.6409, "step": 6340 }, { "epoch": 0.07351919603575233, "grad_norm": 1.8238513469696045, "learning_rate": 9.800864464340847e-05, "loss": 0.6453, "step": 6350 }, { "epoch": 0.07363497429722596, "grad_norm": 1.641202688217163, "learning_rate": 9.816301327570238e-05, "loss": 0.6341, "step": 6360 }, { "epoch": 0.07375075255869958, "grad_norm": 1.791733741760254, "learning_rate": 9.83173819079963e-05, "loss": 0.6288, "step": 6370 }, { "epoch": 0.0738665308201732, "grad_norm": 1.7087746858596802, "learning_rate": 9.847175054029022e-05, "loss": 0.6619, "step": 6380 }, { "epoch": 0.07398230908164682, "grad_norm": 1.6210119724273682, "learning_rate": 9.862611917258413e-05, "loss": 0.6911, "step": 6390 }, { "epoch": 0.07409808734312046, "grad_norm": 1.6889116764068604, "learning_rate": 9.878048780487805e-05, "loss": 0.6798, "step": 6400 }, { "epoch": 0.07421386560459409, "grad_norm": 1.9681158065795898, "learning_rate": 9.893485643717198e-05, "loss": 0.6133, "step": 6410 }, { "epoch": 0.0743296438660677, "grad_norm": 1.7416598796844482, "learning_rate": 9.908922506946589e-05, "loss": 0.6307, "step": 6420 }, { "epoch": 0.07444542212754134, "grad_norm": 1.6089060306549072, "learning_rate": 9.92435937017598e-05, "loss": 0.6464, "step": 6430 }, { "epoch": 0.07456120038901495, "grad_norm": 1.7866533994674683, "learning_rate": 9.939796233405372e-05, "loss": 0.6392, "step": 6440 }, { "epoch": 0.07467697865048858, "grad_norm": 1.6842327117919922, "learning_rate": 9.955233096634764e-05, "loss": 0.6453, "step": 6450 }, { "epoch": 0.07479275691196222, "grad_norm": 1.7159595489501953, "learning_rate": 9.970669959864156e-05, "loss": 0.6356, "step": 6460 }, { "epoch": 0.07490853517343583, "grad_norm": 1.9100960493087769, "learning_rate": 9.986106823093548e-05, "loss": 0.6543, "step": 6470 }, { "epoch": 0.07502431343490946, "grad_norm": 1.9316266775131226, "learning_rate": 0.0001000154368632294, "loss": 0.6438, "step": 6480 }, { "epoch": 0.07514009169638308, "grad_norm": 1.6655845642089844, "learning_rate": 0.0001001698054955233, "loss": 0.6288, "step": 6490 }, { "epoch": 0.07525586995785671, "grad_norm": 1.2088539600372314, "learning_rate": 0.00010032417412781722, "loss": 0.6163, "step": 6500 }, { "epoch": 0.07537164821933034, "grad_norm": 1.6582393646240234, "learning_rate": 0.00010047854276011114, "loss": 0.6158, "step": 6510 }, { "epoch": 0.07548742648080396, "grad_norm": 1.978683352470398, "learning_rate": 0.00010063291139240508, "loss": 0.6355, "step": 6520 }, { "epoch": 0.0756032047422776, "grad_norm": 1.8670979738235474, "learning_rate": 0.000100787280024699, "loss": 0.6742, "step": 6530 }, { "epoch": 0.07571898300375121, "grad_norm": 1.7858290672302246, "learning_rate": 0.00010094164865699289, "loss": 0.6186, "step": 6540 }, { "epoch": 0.07583476126522484, "grad_norm": 1.8994193077087402, "learning_rate": 0.00010109601728928682, "loss": 0.6253, "step": 6550 }, { "epoch": 0.07595053952669847, "grad_norm": 1.5134297609329224, "learning_rate": 0.00010125038592158074, "loss": 0.6484, "step": 6560 }, { "epoch": 0.07606631778817209, "grad_norm": 1.7415775060653687, "learning_rate": 0.00010140475455387466, "loss": 0.6594, "step": 6570 }, { "epoch": 0.07618209604964572, "grad_norm": 1.5696642398834229, "learning_rate": 0.00010155912318616858, "loss": 0.671, "step": 6580 }, { "epoch": 0.07629787431111934, "grad_norm": 1.5940064191818237, "learning_rate": 0.00010171349181846251, "loss": 0.6236, "step": 6590 }, { "epoch": 0.07641365257259297, "grad_norm": 1.937171220779419, "learning_rate": 0.0001018678604507564, "loss": 0.6295, "step": 6600 }, { "epoch": 0.07652943083406659, "grad_norm": 1.5327917337417603, "learning_rate": 0.00010202222908305032, "loss": 0.6579, "step": 6610 }, { "epoch": 0.07664520909554022, "grad_norm": 1.7471036911010742, "learning_rate": 0.00010217659771534425, "loss": 0.6712, "step": 6620 }, { "epoch": 0.07676098735701385, "grad_norm": 1.5663530826568604, "learning_rate": 0.00010233096634763817, "loss": 0.6341, "step": 6630 }, { "epoch": 0.07687676561848747, "grad_norm": 1.6636271476745605, "learning_rate": 0.00010248533497993209, "loss": 0.6562, "step": 6640 }, { "epoch": 0.0769925438799611, "grad_norm": 1.8766775131225586, "learning_rate": 0.000102639703612226, "loss": 0.6055, "step": 6650 }, { "epoch": 0.07710832214143472, "grad_norm": 1.5774224996566772, "learning_rate": 0.00010279407224451992, "loss": 0.6323, "step": 6660 }, { "epoch": 0.07722410040290835, "grad_norm": 1.676321268081665, "learning_rate": 0.00010294844087681383, "loss": 0.6477, "step": 6670 }, { "epoch": 0.07733987866438198, "grad_norm": 1.6336504220962524, "learning_rate": 0.00010310280950910775, "loss": 0.636, "step": 6680 }, { "epoch": 0.0774556569258556, "grad_norm": 1.5671923160552979, "learning_rate": 0.00010325717814140167, "loss": 0.6283, "step": 6690 }, { "epoch": 0.07757143518732923, "grad_norm": 2.129922866821289, "learning_rate": 0.0001034115467736956, "loss": 0.6285, "step": 6700 }, { "epoch": 0.07768721344880285, "grad_norm": 1.6730690002441406, "learning_rate": 0.0001035659154059895, "loss": 0.6697, "step": 6710 }, { "epoch": 0.07780299171027648, "grad_norm": 1.8635300397872925, "learning_rate": 0.00010372028403828342, "loss": 0.6522, "step": 6720 }, { "epoch": 0.07791876997175011, "grad_norm": 1.6877497434616089, "learning_rate": 0.00010387465267057735, "loss": 0.6962, "step": 6730 }, { "epoch": 0.07803454823322373, "grad_norm": 1.301317572593689, "learning_rate": 0.00010402902130287127, "loss": 0.6564, "step": 6740 }, { "epoch": 0.07815032649469736, "grad_norm": 1.2891700267791748, "learning_rate": 0.00010418338993516519, "loss": 0.6255, "step": 6750 }, { "epoch": 0.07826610475617098, "grad_norm": 1.6739258766174316, "learning_rate": 0.00010433775856745909, "loss": 0.6477, "step": 6760 }, { "epoch": 0.07838188301764461, "grad_norm": 1.4691791534423828, "learning_rate": 0.00010449212719975301, "loss": 0.626, "step": 6770 }, { "epoch": 0.07849766127911824, "grad_norm": 1.7983348369598389, "learning_rate": 0.00010464649583204693, "loss": 0.6411, "step": 6780 }, { "epoch": 0.07861343954059186, "grad_norm": 1.7828774452209473, "learning_rate": 0.00010480086446434085, "loss": 0.6596, "step": 6790 }, { "epoch": 0.07872921780206549, "grad_norm": 1.6177442073822021, "learning_rate": 0.00010495523309663478, "loss": 0.6568, "step": 6800 }, { "epoch": 0.0788449960635391, "grad_norm": 1.5620723962783813, "learning_rate": 0.0001051096017289287, "loss": 0.6078, "step": 6810 }, { "epoch": 0.07896077432501274, "grad_norm": 1.6690117120742798, "learning_rate": 0.00010526397036122259, "loss": 0.6515, "step": 6820 }, { "epoch": 0.07907655258648637, "grad_norm": 1.6047637462615967, "learning_rate": 0.00010541833899351653, "loss": 0.624, "step": 6830 }, { "epoch": 0.07919233084795999, "grad_norm": 1.6738725900650024, "learning_rate": 0.00010557270762581044, "loss": 0.6863, "step": 6840 }, { "epoch": 0.07930810910943362, "grad_norm": 1.6993305683135986, "learning_rate": 0.00010572707625810436, "loss": 0.6439, "step": 6850 }, { "epoch": 0.07942388737090723, "grad_norm": 1.7397960424423218, "learning_rate": 0.00010588144489039828, "loss": 0.6463, "step": 6860 }, { "epoch": 0.07953966563238087, "grad_norm": 1.7887377738952637, "learning_rate": 0.00010603581352269219, "loss": 0.634, "step": 6870 }, { "epoch": 0.0796554438938545, "grad_norm": 1.6251155138015747, "learning_rate": 0.00010619018215498611, "loss": 0.646, "step": 6880 }, { "epoch": 0.07977122215532811, "grad_norm": 1.9751814603805542, "learning_rate": 0.00010634455078728003, "loss": 0.592, "step": 6890 }, { "epoch": 0.07988700041680175, "grad_norm": 1.8172136545181274, "learning_rate": 0.00010649891941957394, "loss": 0.6708, "step": 6900 }, { "epoch": 0.08000277867827536, "grad_norm": 1.7657650709152222, "learning_rate": 0.00010665328805186788, "loss": 0.6044, "step": 6910 }, { "epoch": 0.080118556939749, "grad_norm": 1.883344292640686, "learning_rate": 0.0001068076566841618, "loss": 0.6487, "step": 6920 }, { "epoch": 0.08023433520122261, "grad_norm": 1.8151952028274536, "learning_rate": 0.00010696202531645569, "loss": 0.5774, "step": 6930 }, { "epoch": 0.08035011346269624, "grad_norm": 1.6558256149291992, "learning_rate": 0.00010711639394874962, "loss": 0.605, "step": 6940 }, { "epoch": 0.08046589172416987, "grad_norm": 1.4244712591171265, "learning_rate": 0.00010727076258104354, "loss": 0.6425, "step": 6950 }, { "epoch": 0.08058166998564349, "grad_norm": 1.6205344200134277, "learning_rate": 0.00010742513121333746, "loss": 0.6271, "step": 6960 }, { "epoch": 0.08069744824711712, "grad_norm": 1.7787622213363647, "learning_rate": 0.00010757949984563138, "loss": 0.6597, "step": 6970 }, { "epoch": 0.08081322650859074, "grad_norm": 1.4365336894989014, "learning_rate": 0.00010773386847792528, "loss": 0.6342, "step": 6980 }, { "epoch": 0.08092900477006437, "grad_norm": 1.8610031604766846, "learning_rate": 0.0001078882371102192, "loss": 0.6075, "step": 6990 }, { "epoch": 0.081044783031538, "grad_norm": 1.6070164442062378, "learning_rate": 0.00010804260574251312, "loss": 0.6092, "step": 7000 }, { "epoch": 0.081044783031538, "eval_chrf": 77.5078644779532, "eval_loss": 0.9781879186630249, "eval_runtime": 288.9861, "eval_samples_per_second": 0.346, "eval_steps_per_second": 0.014, "step": 7000 }, { "epoch": 0.08116056129301162, "grad_norm": 1.9760247468948364, "learning_rate": 0.00010819697437480704, "loss": 0.6607, "step": 7010 }, { "epoch": 0.08127633955448525, "grad_norm": 1.7072561979293823, "learning_rate": 0.00010835134300710097, "loss": 0.6457, "step": 7020 }, { "epoch": 0.08139211781595887, "grad_norm": 1.1423382759094238, "learning_rate": 0.00010850571163939487, "loss": 0.6077, "step": 7030 }, { "epoch": 0.0815078960774325, "grad_norm": 1.8201282024383545, "learning_rate": 0.00010866008027168878, "loss": 0.6134, "step": 7040 }, { "epoch": 0.08162367433890613, "grad_norm": 1.670220136642456, "learning_rate": 0.00010881444890398272, "loss": 0.618, "step": 7050 }, { "epoch": 0.08173945260037975, "grad_norm": 1.679893970489502, "learning_rate": 0.00010896881753627664, "loss": 0.6427, "step": 7060 }, { "epoch": 0.08185523086185338, "grad_norm": 1.770871639251709, "learning_rate": 0.00010912318616857056, "loss": 0.6009, "step": 7070 }, { "epoch": 0.081971009123327, "grad_norm": 1.300878643989563, "learning_rate": 0.00010927755480086447, "loss": 0.6365, "step": 7080 }, { "epoch": 0.08208678738480063, "grad_norm": 1.7360892295837402, "learning_rate": 0.00010943192343315838, "loss": 0.5799, "step": 7090 }, { "epoch": 0.08220256564627426, "grad_norm": 1.4603334665298462, "learning_rate": 0.0001095862920654523, "loss": 0.6345, "step": 7100 }, { "epoch": 0.08231834390774788, "grad_norm": 1.6318318843841553, "learning_rate": 0.00010974066069774622, "loss": 0.637, "step": 7110 }, { "epoch": 0.08243412216922151, "grad_norm": 1.1125714778900146, "learning_rate": 0.00010989502933004015, "loss": 0.5513, "step": 7120 }, { "epoch": 0.08254990043069513, "grad_norm": 1.4028571844100952, "learning_rate": 0.00011004939796233407, "loss": 0.6456, "step": 7130 }, { "epoch": 0.08266567869216876, "grad_norm": 1.685958981513977, "learning_rate": 0.00011020376659462796, "loss": 0.6268, "step": 7140 }, { "epoch": 0.08278145695364239, "grad_norm": 1.6421723365783691, "learning_rate": 0.0001103581352269219, "loss": 0.6307, "step": 7150 }, { "epoch": 0.08289723521511601, "grad_norm": 1.5831868648529053, "learning_rate": 0.00011051250385921581, "loss": 0.6581, "step": 7160 }, { "epoch": 0.08301301347658964, "grad_norm": 1.552307367324829, "learning_rate": 0.00011066687249150973, "loss": 0.6187, "step": 7170 }, { "epoch": 0.08312879173806326, "grad_norm": 1.8470345735549927, "learning_rate": 0.00011082124112380365, "loss": 0.6344, "step": 7180 }, { "epoch": 0.08324456999953689, "grad_norm": 1.4212722778320312, "learning_rate": 0.00011097560975609757, "loss": 0.6249, "step": 7190 }, { "epoch": 0.08336034826101052, "grad_norm": 1.5102179050445557, "learning_rate": 0.00011112997838839148, "loss": 0.6786, "step": 7200 }, { "epoch": 0.08347612652248414, "grad_norm": 1.4406299591064453, "learning_rate": 0.0001112843470206854, "loss": 0.634, "step": 7210 }, { "epoch": 0.08359190478395777, "grad_norm": 1.5112054347991943, "learning_rate": 0.00011143871565297931, "loss": 0.6184, "step": 7220 }, { "epoch": 0.08370768304543139, "grad_norm": 1.8691558837890625, "learning_rate": 0.00011159308428527325, "loss": 0.609, "step": 7230 }, { "epoch": 0.08382346130690502, "grad_norm": 1.6059640645980835, "learning_rate": 0.00011174745291756717, "loss": 0.6323, "step": 7240 }, { "epoch": 0.08393923956837863, "grad_norm": 1.6076180934906006, "learning_rate": 0.00011190182154986106, "loss": 0.6072, "step": 7250 }, { "epoch": 0.08405501782985227, "grad_norm": 1.495972752571106, "learning_rate": 0.00011205619018215499, "loss": 0.5986, "step": 7260 }, { "epoch": 0.0841707960913259, "grad_norm": 1.6854097843170166, "learning_rate": 0.00011221055881444891, "loss": 0.5849, "step": 7270 }, { "epoch": 0.08428657435279951, "grad_norm": 1.2864974737167358, "learning_rate": 0.00011236492744674283, "loss": 0.6189, "step": 7280 }, { "epoch": 0.08440235261427315, "grad_norm": 1.587349772453308, "learning_rate": 0.00011251929607903675, "loss": 0.6366, "step": 7290 }, { "epoch": 0.08451813087574676, "grad_norm": 1.5801544189453125, "learning_rate": 0.00011267366471133068, "loss": 0.5971, "step": 7300 }, { "epoch": 0.0846339091372204, "grad_norm": 1.655573844909668, "learning_rate": 0.00011282803334362457, "loss": 0.6089, "step": 7310 }, { "epoch": 0.08474968739869403, "grad_norm": 1.6956554651260376, "learning_rate": 0.00011298240197591849, "loss": 0.6097, "step": 7320 }, { "epoch": 0.08486546566016764, "grad_norm": 1.423643946647644, "learning_rate": 0.00011313677060821242, "loss": 0.65, "step": 7330 }, { "epoch": 0.08498124392164128, "grad_norm": 1.6977276802062988, "learning_rate": 0.00011329113924050634, "loss": 0.6087, "step": 7340 }, { "epoch": 0.08509702218311489, "grad_norm": 1.547904372215271, "learning_rate": 0.00011344550787280026, "loss": 0.6229, "step": 7350 }, { "epoch": 0.08521280044458852, "grad_norm": 1.526370644569397, "learning_rate": 0.00011359987650509417, "loss": 0.5783, "step": 7360 }, { "epoch": 0.08532857870606216, "grad_norm": 1.516898512840271, "learning_rate": 0.00011375424513738809, "loss": 0.6316, "step": 7370 }, { "epoch": 0.08544435696753577, "grad_norm": 1.36735999584198, "learning_rate": 0.000113908613769682, "loss": 0.5945, "step": 7380 }, { "epoch": 0.0855601352290094, "grad_norm": 1.5668267011642456, "learning_rate": 0.00011406298240197592, "loss": 0.6167, "step": 7390 }, { "epoch": 0.08567591349048302, "grad_norm": 1.4169343709945679, "learning_rate": 0.00011421735103426984, "loss": 0.65, "step": 7400 }, { "epoch": 0.08579169175195665, "grad_norm": 1.229204773902893, "learning_rate": 0.00011437171966656378, "loss": 0.6223, "step": 7410 }, { "epoch": 0.08590747001343028, "grad_norm": 1.7227059602737427, "learning_rate": 0.00011452608829885767, "loss": 0.6522, "step": 7420 }, { "epoch": 0.0860232482749039, "grad_norm": 1.2250372171401978, "learning_rate": 0.00011468045693115159, "loss": 0.5795, "step": 7430 }, { "epoch": 0.08613902653637753, "grad_norm": 1.1256134510040283, "learning_rate": 0.00011483482556344552, "loss": 0.577, "step": 7440 }, { "epoch": 0.08625480479785115, "grad_norm": 1.6174768209457397, "learning_rate": 0.00011498919419573944, "loss": 0.6206, "step": 7450 }, { "epoch": 0.08637058305932478, "grad_norm": 1.5744134187698364, "learning_rate": 0.00011514356282803336, "loss": 0.6299, "step": 7460 }, { "epoch": 0.08648636132079841, "grad_norm": 1.511730670928955, "learning_rate": 0.00011529793146032726, "loss": 0.6113, "step": 7470 }, { "epoch": 0.08660213958227203, "grad_norm": 1.0650959014892578, "learning_rate": 0.00011545230009262118, "loss": 0.6214, "step": 7480 }, { "epoch": 0.08671791784374566, "grad_norm": 1.4732906818389893, "learning_rate": 0.0001156066687249151, "loss": 0.6, "step": 7490 }, { "epoch": 0.08683369610521928, "grad_norm": 1.5763410329818726, "learning_rate": 0.00011576103735720902, "loss": 0.6316, "step": 7500 }, { "epoch": 0.08694947436669291, "grad_norm": 1.3677054643630981, "learning_rate": 0.00011591540598950294, "loss": 0.6364, "step": 7510 }, { "epoch": 0.08706525262816654, "grad_norm": 1.6371885538101196, "learning_rate": 0.00011606977462179687, "loss": 0.6186, "step": 7520 }, { "epoch": 0.08718103088964016, "grad_norm": 1.5173264741897583, "learning_rate": 0.00011622414325409076, "loss": 0.5895, "step": 7530 }, { "epoch": 0.08729680915111379, "grad_norm": 1.356893539428711, "learning_rate": 0.00011637851188638468, "loss": 0.5909, "step": 7540 }, { "epoch": 0.08741258741258741, "grad_norm": 1.4249447584152222, "learning_rate": 0.00011653288051867861, "loss": 0.6154, "step": 7550 }, { "epoch": 0.08752836567406104, "grad_norm": 1.709651231765747, "learning_rate": 0.00011668724915097253, "loss": 0.645, "step": 7560 }, { "epoch": 0.08764414393553466, "grad_norm": 1.1198627948760986, "learning_rate": 0.00011684161778326645, "loss": 0.6013, "step": 7570 }, { "epoch": 0.08775992219700829, "grad_norm": 1.6907554864883423, "learning_rate": 0.00011699598641556036, "loss": 0.5891, "step": 7580 }, { "epoch": 0.08787570045848192, "grad_norm": 1.4322824478149414, "learning_rate": 0.00011715035504785428, "loss": 0.6245, "step": 7590 }, { "epoch": 0.08799147871995554, "grad_norm": 1.5642427206039429, "learning_rate": 0.0001173047236801482, "loss": 0.5773, "step": 7600 }, { "epoch": 0.08810725698142917, "grad_norm": 1.4554600715637207, "learning_rate": 0.00011745909231244212, "loss": 0.5773, "step": 7610 }, { "epoch": 0.08822303524290279, "grad_norm": 1.5032012462615967, "learning_rate": 0.00011761346094473605, "loss": 0.6052, "step": 7620 }, { "epoch": 0.08833881350437642, "grad_norm": 1.4786330461502075, "learning_rate": 0.00011776782957702994, "loss": 0.6391, "step": 7630 }, { "epoch": 0.08845459176585005, "grad_norm": 1.6468702554702759, "learning_rate": 0.00011792219820932386, "loss": 0.5931, "step": 7640 }, { "epoch": 0.08857037002732367, "grad_norm": 1.1737236976623535, "learning_rate": 0.00011807656684161779, "loss": 0.5508, "step": 7650 }, { "epoch": 0.0886861482887973, "grad_norm": 1.3755773305892944, "learning_rate": 0.00011823093547391171, "loss": 0.617, "step": 7660 }, { "epoch": 0.08880192655027092, "grad_norm": 1.4953662157058716, "learning_rate": 0.00011838530410620563, "loss": 0.5913, "step": 7670 }, { "epoch": 0.08891770481174455, "grad_norm": 1.3691350221633911, "learning_rate": 0.00011853967273849955, "loss": 0.6014, "step": 7680 }, { "epoch": 0.08903348307321818, "grad_norm": 1.5081861019134521, "learning_rate": 0.00011869404137079345, "loss": 0.6098, "step": 7690 }, { "epoch": 0.0891492613346918, "grad_norm": 1.0612900257110596, "learning_rate": 0.00011884841000308737, "loss": 0.5549, "step": 7700 }, { "epoch": 0.08926503959616543, "grad_norm": 1.838680624961853, "learning_rate": 0.00011900277863538129, "loss": 0.5754, "step": 7710 }, { "epoch": 0.08938081785763904, "grad_norm": 1.3452513217926025, "learning_rate": 0.00011915714726767521, "loss": 0.5955, "step": 7720 }, { "epoch": 0.08949659611911268, "grad_norm": 1.6873469352722168, "learning_rate": 0.00011931151589996914, "loss": 0.643, "step": 7730 }, { "epoch": 0.08961237438058631, "grad_norm": 1.3446232080459595, "learning_rate": 0.00011946588453226304, "loss": 0.6297, "step": 7740 }, { "epoch": 0.08972815264205992, "grad_norm": 1.4106768369674683, "learning_rate": 0.00011962025316455696, "loss": 0.6031, "step": 7750 }, { "epoch": 0.08984393090353356, "grad_norm": 1.3622088432312012, "learning_rate": 0.00011977462179685089, "loss": 0.5842, "step": 7760 }, { "epoch": 0.08995970916500717, "grad_norm": 1.4048413038253784, "learning_rate": 0.00011992899042914481, "loss": 0.627, "step": 7770 }, { "epoch": 0.0900754874264808, "grad_norm": 1.5896952152252197, "learning_rate": 0.00012008335906143873, "loss": 0.5832, "step": 7780 }, { "epoch": 0.09019126568795444, "grad_norm": 1.556296706199646, "learning_rate": 0.00012023772769373264, "loss": 0.6067, "step": 7790 }, { "epoch": 0.09030704394942805, "grad_norm": 1.18021821975708, "learning_rate": 0.00012039209632602655, "loss": 0.5565, "step": 7800 }, { "epoch": 0.09042282221090168, "grad_norm": 1.2262533903121948, "learning_rate": 0.00012054646495832047, "loss": 0.5884, "step": 7810 }, { "epoch": 0.0905386004723753, "grad_norm": 1.611538052558899, "learning_rate": 0.00012070083359061439, "loss": 0.6222, "step": 7820 }, { "epoch": 0.09065437873384893, "grad_norm": 1.4492065906524658, "learning_rate": 0.00012085520222290832, "loss": 0.6462, "step": 7830 }, { "epoch": 0.09077015699532257, "grad_norm": 1.315789818763733, "learning_rate": 0.00012100957085520224, "loss": 0.6197, "step": 7840 }, { "epoch": 0.09088593525679618, "grad_norm": 1.4071003198623657, "learning_rate": 0.00012116393948749613, "loss": 0.6291, "step": 7850 }, { "epoch": 0.09100171351826981, "grad_norm": 1.6243146657943726, "learning_rate": 0.00012131830811979006, "loss": 0.6315, "step": 7860 }, { "epoch": 0.09111749177974343, "grad_norm": 1.271146535873413, "learning_rate": 0.00012147267675208398, "loss": 0.5774, "step": 7870 }, { "epoch": 0.09123327004121706, "grad_norm": 1.40945565700531, "learning_rate": 0.0001216270453843779, "loss": 0.5967, "step": 7880 }, { "epoch": 0.09134904830269068, "grad_norm": 1.4639146327972412, "learning_rate": 0.00012178141401667182, "loss": 0.6151, "step": 7890 }, { "epoch": 0.09146482656416431, "grad_norm": 1.2797602415084839, "learning_rate": 0.00012193578264896574, "loss": 0.6145, "step": 7900 }, { "epoch": 0.09158060482563794, "grad_norm": 1.627112865447998, "learning_rate": 0.00012209015128125965, "loss": 0.577, "step": 7910 }, { "epoch": 0.09169638308711156, "grad_norm": 1.6429121494293213, "learning_rate": 0.00012224451991355357, "loss": 0.6166, "step": 7920 }, { "epoch": 0.09181216134858519, "grad_norm": 1.3286457061767578, "learning_rate": 0.00012239888854584748, "loss": 0.6161, "step": 7930 }, { "epoch": 0.09192793961005881, "grad_norm": 1.244727373123169, "learning_rate": 0.0001225532571781414, "loss": 0.6118, "step": 7940 }, { "epoch": 0.09204371787153244, "grad_norm": 1.2672816514968872, "learning_rate": 0.00012270762581043532, "loss": 0.6022, "step": 7950 }, { "epoch": 0.09215949613300607, "grad_norm": 1.8414747714996338, "learning_rate": 0.00012286199444272924, "loss": 0.5904, "step": 7960 }, { "epoch": 0.09227527439447969, "grad_norm": 1.3384901285171509, "learning_rate": 0.00012301636307502316, "loss": 0.6023, "step": 7970 }, { "epoch": 0.09239105265595332, "grad_norm": 1.671437382698059, "learning_rate": 0.00012317073170731708, "loss": 0.6095, "step": 7980 }, { "epoch": 0.09250683091742694, "grad_norm": 1.3710498809814453, "learning_rate": 0.000123325100339611, "loss": 0.5638, "step": 7990 }, { "epoch": 0.09262260917890057, "grad_norm": 1.2797064781188965, "learning_rate": 0.00012347946897190492, "loss": 0.6445, "step": 8000 }, { "epoch": 0.09262260917890057, "eval_chrf": 81.30991015560114, "eval_loss": 0.9110472202301025, "eval_runtime": 196.7086, "eval_samples_per_second": 0.508, "eval_steps_per_second": 0.02, "step": 8000 }, { "epoch": 0.0927383874403742, "grad_norm": 1.5262354612350464, "learning_rate": 0.00012363383760419884, "loss": 0.5924, "step": 8010 }, { "epoch": 0.09285416570184782, "grad_norm": 1.577624797821045, "learning_rate": 0.00012378820623649276, "loss": 0.5941, "step": 8020 }, { "epoch": 0.09296994396332145, "grad_norm": 1.403977632522583, "learning_rate": 0.00012394257486878667, "loss": 0.5668, "step": 8030 }, { "epoch": 0.09308572222479507, "grad_norm": 1.437752366065979, "learning_rate": 0.0001240969435010806, "loss": 0.5817, "step": 8040 }, { "epoch": 0.0932015004862687, "grad_norm": 1.4250694513320923, "learning_rate": 0.0001242513121333745, "loss": 0.5788, "step": 8050 }, { "epoch": 0.09331727874774233, "grad_norm": 1.5299127101898193, "learning_rate": 0.00012440568076566843, "loss": 0.5923, "step": 8060 }, { "epoch": 0.09343305700921595, "grad_norm": 1.3042469024658203, "learning_rate": 0.00012456004939796232, "loss": 0.588, "step": 8070 }, { "epoch": 0.09354883527068958, "grad_norm": 1.6172361373901367, "learning_rate": 0.00012471441803025624, "loss": 0.6053, "step": 8080 }, { "epoch": 0.0936646135321632, "grad_norm": 1.2567100524902344, "learning_rate": 0.00012486878666255016, "loss": 0.606, "step": 8090 }, { "epoch": 0.09378039179363683, "grad_norm": 1.529083251953125, "learning_rate": 0.0001250231552948441, "loss": 0.5402, "step": 8100 }, { "epoch": 0.09389617005511046, "grad_norm": 1.569347858428955, "learning_rate": 0.00012517752392713803, "loss": 0.6213, "step": 8110 }, { "epoch": 0.09401194831658408, "grad_norm": 1.103323221206665, "learning_rate": 0.00012533189255943195, "loss": 0.5959, "step": 8120 }, { "epoch": 0.09412772657805771, "grad_norm": 1.4739731550216675, "learning_rate": 0.00012548626119172584, "loss": 0.6149, "step": 8130 }, { "epoch": 0.09424350483953133, "grad_norm": 1.3428549766540527, "learning_rate": 0.00012564062982401976, "loss": 0.5954, "step": 8140 }, { "epoch": 0.09435928310100496, "grad_norm": 1.268978238105774, "learning_rate": 0.00012579499845631368, "loss": 0.6771, "step": 8150 }, { "epoch": 0.09447506136247859, "grad_norm": 1.729616641998291, "learning_rate": 0.0001259493670886076, "loss": 0.6056, "step": 8160 }, { "epoch": 0.0945908396239522, "grad_norm": 1.1826121807098389, "learning_rate": 0.00012610373572090154, "loss": 0.5792, "step": 8170 }, { "epoch": 0.09470661788542584, "grad_norm": 1.5031476020812988, "learning_rate": 0.00012625810435319543, "loss": 0.6175, "step": 8180 }, { "epoch": 0.09482239614689945, "grad_norm": 1.440767526626587, "learning_rate": 0.00012641247298548935, "loss": 0.5999, "step": 8190 }, { "epoch": 0.09493817440837309, "grad_norm": 1.4809963703155518, "learning_rate": 0.00012656684161778327, "loss": 0.6238, "step": 8200 }, { "epoch": 0.0950539526698467, "grad_norm": 1.3183481693267822, "learning_rate": 0.0001267212102500772, "loss": 0.6007, "step": 8210 }, { "epoch": 0.09516973093132033, "grad_norm": 1.6722873449325562, "learning_rate": 0.0001268755788823711, "loss": 0.6092, "step": 8220 }, { "epoch": 0.09528550919279397, "grad_norm": 1.3660516738891602, "learning_rate": 0.00012702994751466503, "loss": 0.6316, "step": 8230 }, { "epoch": 0.09540128745426758, "grad_norm": 1.3543256521224976, "learning_rate": 0.00012718431614695895, "loss": 0.6008, "step": 8240 }, { "epoch": 0.09551706571574121, "grad_norm": 1.416183352470398, "learning_rate": 0.00012733868477925287, "loss": 0.5911, "step": 8250 }, { "epoch": 0.09563284397721483, "grad_norm": 1.6643449068069458, "learning_rate": 0.00012749305341154679, "loss": 0.5987, "step": 8260 }, { "epoch": 0.09574862223868846, "grad_norm": 1.0764458179473877, "learning_rate": 0.0001276474220438407, "loss": 0.5964, "step": 8270 }, { "epoch": 0.0958644005001621, "grad_norm": 0.9228554964065552, "learning_rate": 0.00012780179067613462, "loss": 0.5634, "step": 8280 }, { "epoch": 0.09598017876163571, "grad_norm": 1.8675537109375, "learning_rate": 0.00012795615930842852, "loss": 0.5647, "step": 8290 }, { "epoch": 0.09609595702310934, "grad_norm": 1.2835443019866943, "learning_rate": 0.00012811052794072243, "loss": 0.6156, "step": 8300 }, { "epoch": 0.09621173528458296, "grad_norm": 1.5996533632278442, "learning_rate": 0.00012826489657301638, "loss": 0.5868, "step": 8310 }, { "epoch": 0.09632751354605659, "grad_norm": 1.4104992151260376, "learning_rate": 0.0001284192652053103, "loss": 0.5801, "step": 8320 }, { "epoch": 0.09644329180753022, "grad_norm": 1.3446601629257202, "learning_rate": 0.00012857363383760422, "loss": 0.5937, "step": 8330 }, { "epoch": 0.09655907006900384, "grad_norm": 1.3941295146942139, "learning_rate": 0.0001287280024698981, "loss": 0.63, "step": 8340 }, { "epoch": 0.09667484833047747, "grad_norm": 1.4139586687088013, "learning_rate": 0.00012888237110219203, "loss": 0.5843, "step": 8350 }, { "epoch": 0.09679062659195109, "grad_norm": 1.4007705450057983, "learning_rate": 0.00012903673973448595, "loss": 0.6374, "step": 8360 }, { "epoch": 0.09690640485342472, "grad_norm": 1.3027223348617554, "learning_rate": 0.00012919110836677987, "loss": 0.5998, "step": 8370 }, { "epoch": 0.09702218311489835, "grad_norm": 1.2569539546966553, "learning_rate": 0.0001293454769990738, "loss": 0.5713, "step": 8380 }, { "epoch": 0.09713796137637197, "grad_norm": 1.8045423030853271, "learning_rate": 0.00012949984563136773, "loss": 0.5923, "step": 8390 }, { "epoch": 0.0972537396378456, "grad_norm": 1.31775963306427, "learning_rate": 0.00012965421426366163, "loss": 0.5698, "step": 8400 }, { "epoch": 0.09736951789931922, "grad_norm": 1.3272954225540161, "learning_rate": 0.00012980858289595554, "loss": 0.6066, "step": 8410 }, { "epoch": 0.09748529616079285, "grad_norm": 1.4235128164291382, "learning_rate": 0.00012996295152824946, "loss": 0.6103, "step": 8420 }, { "epoch": 0.09760107442226648, "grad_norm": 1.5209492444992065, "learning_rate": 0.00013011732016054338, "loss": 0.5898, "step": 8430 }, { "epoch": 0.0977168526837401, "grad_norm": 1.377868890762329, "learning_rate": 0.0001302716887928373, "loss": 0.5395, "step": 8440 }, { "epoch": 0.09783263094521373, "grad_norm": 1.3593305349349976, "learning_rate": 0.00013042605742513122, "loss": 0.5797, "step": 8450 }, { "epoch": 0.09794840920668735, "grad_norm": 1.4212123155593872, "learning_rate": 0.00013058042605742514, "loss": 0.5823, "step": 8460 }, { "epoch": 0.09806418746816098, "grad_norm": 1.3460135459899902, "learning_rate": 0.00013073479468971906, "loss": 0.5999, "step": 8470 }, { "epoch": 0.09817996572963461, "grad_norm": 1.0044142007827759, "learning_rate": 0.00013088916332201298, "loss": 0.5579, "step": 8480 }, { "epoch": 0.09829574399110823, "grad_norm": 1.573012113571167, "learning_rate": 0.0001310435319543069, "loss": 0.5778, "step": 8490 }, { "epoch": 0.09841152225258186, "grad_norm": 1.374923586845398, "learning_rate": 0.00013119790058660082, "loss": 0.5413, "step": 8500 }, { "epoch": 0.09852730051405548, "grad_norm": 1.5594185590744019, "learning_rate": 0.0001313522692188947, "loss": 0.602, "step": 8510 }, { "epoch": 0.09864307877552911, "grad_norm": 1.262014627456665, "learning_rate": 0.00013150663785118865, "loss": 0.5776, "step": 8520 }, { "epoch": 0.09875885703700273, "grad_norm": 1.3214367628097534, "learning_rate": 0.00013166100648348257, "loss": 0.5749, "step": 8530 }, { "epoch": 0.09887463529847636, "grad_norm": 1.3003894090652466, "learning_rate": 0.0001318153751157765, "loss": 0.5972, "step": 8540 }, { "epoch": 0.09899041355994999, "grad_norm": 1.3032628297805786, "learning_rate": 0.0001319697437480704, "loss": 0.6025, "step": 8550 }, { "epoch": 0.0991061918214236, "grad_norm": 1.1279217004776, "learning_rate": 0.0001321241123803643, "loss": 0.6171, "step": 8560 }, { "epoch": 0.09922197008289724, "grad_norm": 1.5024949312210083, "learning_rate": 0.00013227848101265822, "loss": 0.5989, "step": 8570 }, { "epoch": 0.09933774834437085, "grad_norm": 1.2569438219070435, "learning_rate": 0.00013243284964495214, "loss": 0.5829, "step": 8580 }, { "epoch": 0.09945352660584449, "grad_norm": 2.529432535171509, "learning_rate": 0.00013258721827724606, "loss": 0.5594, "step": 8590 }, { "epoch": 0.09956930486731812, "grad_norm": 1.4286943674087524, "learning_rate": 0.00013274158690954, "loss": 0.5716, "step": 8600 }, { "epoch": 0.09968508312879173, "grad_norm": 0.9087528586387634, "learning_rate": 0.00013289595554183393, "loss": 0.5778, "step": 8610 }, { "epoch": 0.09980086139026537, "grad_norm": 1.2014377117156982, "learning_rate": 0.00013305032417412782, "loss": 0.5037, "step": 8620 }, { "epoch": 0.09991663965173898, "grad_norm": 0.9955915808677673, "learning_rate": 0.00013320469280642174, "loss": 0.5882, "step": 8630 }, { "epoch": 0.10003241791321262, "grad_norm": 1.3380610942840576, "learning_rate": 0.00013335906143871566, "loss": 0.5788, "step": 8640 }, { "epoch": 0.10014819617468625, "grad_norm": 1.39886474609375, "learning_rate": 0.00013351343007100957, "loss": 0.5603, "step": 8650 }, { "epoch": 0.10026397443615986, "grad_norm": 1.0779165029525757, "learning_rate": 0.0001336677987033035, "loss": 0.5589, "step": 8660 }, { "epoch": 0.1003797526976335, "grad_norm": 1.258846640586853, "learning_rate": 0.0001338221673355974, "loss": 0.5421, "step": 8670 }, { "epoch": 0.10049553095910711, "grad_norm": 1.3092535734176636, "learning_rate": 0.00013397653596789133, "loss": 0.6237, "step": 8680 }, { "epoch": 0.10061130922058074, "grad_norm": 1.2816115617752075, "learning_rate": 0.00013413090460018525, "loss": 0.6091, "step": 8690 }, { "epoch": 0.10072708748205438, "grad_norm": 1.2709742784500122, "learning_rate": 0.00013428527323247917, "loss": 0.5725, "step": 8700 }, { "epoch": 0.10084286574352799, "grad_norm": 0.8437705636024475, "learning_rate": 0.0001344396418647731, "loss": 0.5561, "step": 8710 }, { "epoch": 0.10095864400500162, "grad_norm": 1.5167697668075562, "learning_rate": 0.000134594010497067, "loss": 0.5879, "step": 8720 }, { "epoch": 0.10107442226647524, "grad_norm": 1.2363792657852173, "learning_rate": 0.00013474837912936093, "loss": 0.5552, "step": 8730 }, { "epoch": 0.10119020052794887, "grad_norm": 1.4121090173721313, "learning_rate": 0.00013490274776165485, "loss": 0.5799, "step": 8740 }, { "epoch": 0.1013059787894225, "grad_norm": 1.3154006004333496, "learning_rate": 0.00013505711639394876, "loss": 0.5816, "step": 8750 }, { "epoch": 0.10142175705089612, "grad_norm": 1.5440751314163208, "learning_rate": 0.00013521148502624268, "loss": 0.5653, "step": 8760 }, { "epoch": 0.10153753531236975, "grad_norm": 1.2971959114074707, "learning_rate": 0.0001353658536585366, "loss": 0.5651, "step": 8770 }, { "epoch": 0.10165331357384337, "grad_norm": 1.615846037864685, "learning_rate": 0.0001355202222908305, "loss": 0.6039, "step": 8780 }, { "epoch": 0.101769091835317, "grad_norm": 1.3674498796463013, "learning_rate": 0.00013567459092312441, "loss": 0.629, "step": 8790 }, { "epoch": 0.10188487009679063, "grad_norm": 1.279589056968689, "learning_rate": 0.00013582895955541833, "loss": 0.5647, "step": 8800 }, { "epoch": 0.10200064835826425, "grad_norm": 1.3363662958145142, "learning_rate": 0.00013598332818771228, "loss": 0.5757, "step": 8810 }, { "epoch": 0.10211642661973788, "grad_norm": 1.4299280643463135, "learning_rate": 0.0001361376968200062, "loss": 0.5597, "step": 8820 }, { "epoch": 0.1022322048812115, "grad_norm": 1.4442036151885986, "learning_rate": 0.0001362920654523001, "loss": 0.587, "step": 8830 }, { "epoch": 0.10234798314268513, "grad_norm": 1.5106290578842163, "learning_rate": 0.000136446434084594, "loss": 0.6177, "step": 8840 }, { "epoch": 0.10246376140415875, "grad_norm": 1.4563581943511963, "learning_rate": 0.00013660080271688793, "loss": 0.5966, "step": 8850 }, { "epoch": 0.10257953966563238, "grad_norm": 1.373263955116272, "learning_rate": 0.00013675517134918185, "loss": 0.594, "step": 8860 }, { "epoch": 0.10269531792710601, "grad_norm": 1.3267521858215332, "learning_rate": 0.00013690953998147577, "loss": 0.5384, "step": 8870 }, { "epoch": 0.10281109618857963, "grad_norm": 1.08661949634552, "learning_rate": 0.00013706390861376968, "loss": 0.59, "step": 8880 }, { "epoch": 0.10292687445005326, "grad_norm": 1.2311760187149048, "learning_rate": 0.0001372182772460636, "loss": 0.5724, "step": 8890 }, { "epoch": 0.10304265271152688, "grad_norm": 1.1044796705245972, "learning_rate": 0.00013737264587835752, "loss": 0.5803, "step": 8900 }, { "epoch": 0.10315843097300051, "grad_norm": 1.4755815267562866, "learning_rate": 0.00013752701451065144, "loss": 0.5938, "step": 8910 }, { "epoch": 0.10327420923447414, "grad_norm": 1.1818537712097168, "learning_rate": 0.00013768138314294536, "loss": 0.6255, "step": 8920 }, { "epoch": 0.10338998749594776, "grad_norm": 1.1751924753189087, "learning_rate": 0.00013783575177523928, "loss": 0.5437, "step": 8930 }, { "epoch": 0.10350576575742139, "grad_norm": 1.4293769598007202, "learning_rate": 0.00013799012040753317, "loss": 0.5531, "step": 8940 }, { "epoch": 0.103621544018895, "grad_norm": 1.4072513580322266, "learning_rate": 0.00013814448903982712, "loss": 0.5822, "step": 8950 }, { "epoch": 0.10373732228036864, "grad_norm": 1.3803318738937378, "learning_rate": 0.00013829885767212104, "loss": 0.5506, "step": 8960 }, { "epoch": 0.10385310054184227, "grad_norm": 1.2164582014083862, "learning_rate": 0.00013845322630441496, "loss": 0.6014, "step": 8970 }, { "epoch": 0.10396887880331589, "grad_norm": 1.150188684463501, "learning_rate": 0.00013860759493670888, "loss": 0.5452, "step": 8980 }, { "epoch": 0.10408465706478952, "grad_norm": 1.3069425821304321, "learning_rate": 0.0001387619635690028, "loss": 0.5472, "step": 8990 }, { "epoch": 0.10420043532626314, "grad_norm": 0.8681260347366333, "learning_rate": 0.00013891633220129669, "loss": 0.514, "step": 9000 }, { "epoch": 0.10420043532626314, "eval_chrf": 80.49578165416115, "eval_loss": 0.8683168888092041, "eval_runtime": 349.3705, "eval_samples_per_second": 0.286, "eval_steps_per_second": 0.011, "step": 9000 }, { "epoch": 0.10431621358773677, "grad_norm": 1.2877256870269775, "learning_rate": 0.0001390707008335906, "loss": 0.6079, "step": 9010 }, { "epoch": 0.1044319918492104, "grad_norm": 1.2810430526733398, "learning_rate": 0.00013922506946588455, "loss": 0.5683, "step": 9020 }, { "epoch": 0.10454777011068402, "grad_norm": 1.2778046131134033, "learning_rate": 0.00013937943809817847, "loss": 0.5905, "step": 9030 }, { "epoch": 0.10466354837215765, "grad_norm": 1.2640587091445923, "learning_rate": 0.0001395338067304724, "loss": 0.5385, "step": 9040 }, { "epoch": 0.10477932663363126, "grad_norm": 1.288314938545227, "learning_rate": 0.00013968817536276628, "loss": 0.5956, "step": 9050 }, { "epoch": 0.1048951048951049, "grad_norm": 1.3123701810836792, "learning_rate": 0.0001398425439950602, "loss": 0.5521, "step": 9060 }, { "epoch": 0.10501088315657853, "grad_norm": 1.1706702709197998, "learning_rate": 0.00013999691262735412, "loss": 0.5635, "step": 9070 }, { "epoch": 0.10512666141805214, "grad_norm": 1.2759759426116943, "learning_rate": 0.00014015128125964804, "loss": 0.5647, "step": 9080 }, { "epoch": 0.10524243967952578, "grad_norm": 1.2104543447494507, "learning_rate": 0.00014030564989194196, "loss": 0.5641, "step": 9090 }, { "epoch": 0.1053582179409994, "grad_norm": 1.4197536706924438, "learning_rate": 0.0001404600185242359, "loss": 0.608, "step": 9100 }, { "epoch": 0.10547399620247302, "grad_norm": 1.3618944883346558, "learning_rate": 0.0001406143871565298, "loss": 0.5961, "step": 9110 }, { "epoch": 0.10558977446394666, "grad_norm": 1.3979140520095825, "learning_rate": 0.00014076875578882371, "loss": 0.5534, "step": 9120 }, { "epoch": 0.10570555272542027, "grad_norm": 1.2438030242919922, "learning_rate": 0.00014092312442111763, "loss": 0.5645, "step": 9130 }, { "epoch": 0.1058213309868939, "grad_norm": 1.3422703742980957, "learning_rate": 0.00014107749305341155, "loss": 0.6022, "step": 9140 }, { "epoch": 0.10593710924836752, "grad_norm": 1.2845385074615479, "learning_rate": 0.00014123186168570547, "loss": 0.5796, "step": 9150 }, { "epoch": 0.10605288750984115, "grad_norm": 1.369936466217041, "learning_rate": 0.0001413862303179994, "loss": 0.6256, "step": 9160 }, { "epoch": 0.10616866577131477, "grad_norm": 1.415257215499878, "learning_rate": 0.0001415405989502933, "loss": 0.5833, "step": 9170 }, { "epoch": 0.1062844440327884, "grad_norm": 1.2637728452682495, "learning_rate": 0.00014169496758258723, "loss": 0.5591, "step": 9180 }, { "epoch": 0.10640022229426203, "grad_norm": 1.3961775302886963, "learning_rate": 0.00014184933621488115, "loss": 0.5871, "step": 9190 }, { "epoch": 0.10651600055573565, "grad_norm": 1.3229669332504272, "learning_rate": 0.00014200370484717507, "loss": 0.5524, "step": 9200 }, { "epoch": 0.10663177881720928, "grad_norm": 1.2066811323165894, "learning_rate": 0.00014215807347946899, "loss": 0.5421, "step": 9210 }, { "epoch": 0.1067475570786829, "grad_norm": 1.528618335723877, "learning_rate": 0.00014231244211176288, "loss": 0.5878, "step": 9220 }, { "epoch": 0.10686333534015653, "grad_norm": 1.3731118440628052, "learning_rate": 0.00014246681074405682, "loss": 0.5883, "step": 9230 }, { "epoch": 0.10697911360163016, "grad_norm": 1.430810570716858, "learning_rate": 0.00014262117937635074, "loss": 0.571, "step": 9240 }, { "epoch": 0.10709489186310378, "grad_norm": 1.2471226453781128, "learning_rate": 0.00014277554800864466, "loss": 0.5479, "step": 9250 }, { "epoch": 0.10721067012457741, "grad_norm": 1.3020845651626587, "learning_rate": 0.00014292991664093858, "loss": 0.5917, "step": 9260 }, { "epoch": 0.10732644838605103, "grad_norm": 1.3037395477294922, "learning_rate": 0.00014308428527323247, "loss": 0.5787, "step": 9270 }, { "epoch": 0.10744222664752466, "grad_norm": 1.2151628732681274, "learning_rate": 0.0001432386539055264, "loss": 0.5855, "step": 9280 }, { "epoch": 0.10755800490899829, "grad_norm": 1.2218369245529175, "learning_rate": 0.0001433930225378203, "loss": 0.5505, "step": 9290 }, { "epoch": 0.10767378317047191, "grad_norm": 1.1271525621414185, "learning_rate": 0.00014354739117011423, "loss": 0.5586, "step": 9300 }, { "epoch": 0.10778956143194554, "grad_norm": 1.2802969217300415, "learning_rate": 0.00014370175980240818, "loss": 0.5864, "step": 9310 }, { "epoch": 0.10790533969341916, "grad_norm": 1.2270424365997314, "learning_rate": 0.00014385612843470207, "loss": 0.5869, "step": 9320 }, { "epoch": 0.10802111795489279, "grad_norm": 1.2666281461715698, "learning_rate": 0.000144010497066996, "loss": 0.5723, "step": 9330 }, { "epoch": 0.10813689621636642, "grad_norm": 1.2750580310821533, "learning_rate": 0.0001441648656992899, "loss": 0.5624, "step": 9340 }, { "epoch": 0.10825267447784004, "grad_norm": 1.2398009300231934, "learning_rate": 0.00014431923433158383, "loss": 0.5702, "step": 9350 }, { "epoch": 0.10836845273931367, "grad_norm": 1.1327173709869385, "learning_rate": 0.00014447360296387774, "loss": 0.5369, "step": 9360 }, { "epoch": 0.10848423100078729, "grad_norm": 1.3858134746551514, "learning_rate": 0.00014462797159617166, "loss": 0.6502, "step": 9370 }, { "epoch": 0.10860000926226092, "grad_norm": 1.3318617343902588, "learning_rate": 0.00014478234022846558, "loss": 0.5877, "step": 9380 }, { "epoch": 0.10871578752373455, "grad_norm": 1.3926358222961426, "learning_rate": 0.0001449367088607595, "loss": 0.5974, "step": 9390 }, { "epoch": 0.10883156578520817, "grad_norm": 1.5860700607299805, "learning_rate": 0.00014509107749305342, "loss": 0.6003, "step": 9400 }, { "epoch": 0.1089473440466818, "grad_norm": 1.2389644384384155, "learning_rate": 0.00014524544612534734, "loss": 0.602, "step": 9410 }, { "epoch": 0.10906312230815542, "grad_norm": 1.2141090631484985, "learning_rate": 0.00014539981475764126, "loss": 0.537, "step": 9420 }, { "epoch": 0.10917890056962905, "grad_norm": 1.2273863554000854, "learning_rate": 0.00014555418338993515, "loss": 0.5828, "step": 9430 }, { "epoch": 0.10929467883110268, "grad_norm": 1.179559588432312, "learning_rate": 0.00014570855202222907, "loss": 0.6051, "step": 9440 }, { "epoch": 0.1094104570925763, "grad_norm": 1.3141292333602905, "learning_rate": 0.00014586292065452302, "loss": 0.5817, "step": 9450 }, { "epoch": 0.10952623535404993, "grad_norm": 1.328954815864563, "learning_rate": 0.00014601728928681694, "loss": 0.5679, "step": 9460 }, { "epoch": 0.10964201361552355, "grad_norm": 1.5461632013320923, "learning_rate": 0.00014617165791911085, "loss": 0.5483, "step": 9470 }, { "epoch": 0.10975779187699718, "grad_norm": 1.4829483032226562, "learning_rate": 0.00014632602655140477, "loss": 0.5539, "step": 9480 }, { "epoch": 0.1098735701384708, "grad_norm": 1.3567897081375122, "learning_rate": 0.00014648039518369867, "loss": 0.5556, "step": 9490 }, { "epoch": 0.10998934839994443, "grad_norm": 1.104142427444458, "learning_rate": 0.00014663476381599258, "loss": 0.6215, "step": 9500 }, { "epoch": 0.11010512666141806, "grad_norm": 1.3122401237487793, "learning_rate": 0.0001467891324482865, "loss": 0.5685, "step": 9510 }, { "epoch": 0.11022090492289167, "grad_norm": 1.4448779821395874, "learning_rate": 0.00014694350108058045, "loss": 0.5635, "step": 9520 }, { "epoch": 0.1103366831843653, "grad_norm": 1.151503324508667, "learning_rate": 0.00014709786971287437, "loss": 0.5701, "step": 9530 }, { "epoch": 0.11045246144583892, "grad_norm": 1.3240145444869995, "learning_rate": 0.00014725223834516826, "loss": 0.5519, "step": 9540 }, { "epoch": 0.11056823970731255, "grad_norm": 1.1416387557983398, "learning_rate": 0.00014740660697746218, "loss": 0.5548, "step": 9550 }, { "epoch": 0.11068401796878619, "grad_norm": 1.630345106124878, "learning_rate": 0.0001475609756097561, "loss": 0.589, "step": 9560 }, { "epoch": 0.1107997962302598, "grad_norm": 1.276701807975769, "learning_rate": 0.00014771534424205002, "loss": 0.5667, "step": 9570 }, { "epoch": 0.11091557449173343, "grad_norm": 0.7283602952957153, "learning_rate": 0.00014786971287434394, "loss": 0.5594, "step": 9580 }, { "epoch": 0.11103135275320705, "grad_norm": 1.2396055459976196, "learning_rate": 0.00014802408150663786, "loss": 0.5745, "step": 9590 }, { "epoch": 0.11114713101468068, "grad_norm": 1.2529735565185547, "learning_rate": 0.00014817845013893177, "loss": 0.5734, "step": 9600 }, { "epoch": 0.11126290927615431, "grad_norm": 1.3989439010620117, "learning_rate": 0.0001483328187712257, "loss": 0.6088, "step": 9610 }, { "epoch": 0.11137868753762793, "grad_norm": 1.3028852939605713, "learning_rate": 0.0001484871874035196, "loss": 0.5693, "step": 9620 }, { "epoch": 0.11149446579910156, "grad_norm": 1.315389633178711, "learning_rate": 0.00014864155603581353, "loss": 0.5542, "step": 9630 }, { "epoch": 0.11161024406057518, "grad_norm": 1.3869110345840454, "learning_rate": 0.00014879592466810745, "loss": 0.5712, "step": 9640 }, { "epoch": 0.11172602232204881, "grad_norm": 1.3050357103347778, "learning_rate": 0.00014895029330040134, "loss": 0.5527, "step": 9650 }, { "epoch": 0.11184180058352244, "grad_norm": 1.2478455305099487, "learning_rate": 0.0001491046619326953, "loss": 0.5343, "step": 9660 }, { "epoch": 0.11195757884499606, "grad_norm": 1.173696756362915, "learning_rate": 0.0001492590305649892, "loss": 0.5837, "step": 9670 }, { "epoch": 0.11207335710646969, "grad_norm": 1.0571203231811523, "learning_rate": 0.00014941339919728313, "loss": 0.5847, "step": 9680 }, { "epoch": 0.11218913536794331, "grad_norm": 1.2881155014038086, "learning_rate": 0.00014956776782957705, "loss": 0.5816, "step": 9690 }, { "epoch": 0.11230491362941694, "grad_norm": 1.19301438331604, "learning_rate": 0.00014972213646187097, "loss": 0.5027, "step": 9700 }, { "epoch": 0.11242069189089057, "grad_norm": 0.8694561123847961, "learning_rate": 0.00014987650509416486, "loss": 0.5323, "step": 9710 }, { "epoch": 0.11253647015236419, "grad_norm": 1.2321244478225708, "learning_rate": 0.00015003087372645878, "loss": 0.5573, "step": 9720 }, { "epoch": 0.11265224841383782, "grad_norm": 1.078788161277771, "learning_rate": 0.00015018524235875272, "loss": 0.5648, "step": 9730 }, { "epoch": 0.11276802667531144, "grad_norm": 0.8772777915000916, "learning_rate": 0.00015033961099104664, "loss": 0.4924, "step": 9740 }, { "epoch": 0.11288380493678507, "grad_norm": 1.4024667739868164, "learning_rate": 0.00015049397962334056, "loss": 0.5817, "step": 9750 }, { "epoch": 0.1129995831982587, "grad_norm": 1.1767064332962036, "learning_rate": 0.00015064834825563445, "loss": 0.5338, "step": 9760 }, { "epoch": 0.11311536145973232, "grad_norm": 1.348203420639038, "learning_rate": 0.00015080271688792837, "loss": 0.5618, "step": 9770 }, { "epoch": 0.11323113972120595, "grad_norm": 1.195645809173584, "learning_rate": 0.0001509570855202223, "loss": 0.5786, "step": 9780 }, { "epoch": 0.11334691798267957, "grad_norm": 1.1947705745697021, "learning_rate": 0.0001511114541525162, "loss": 0.5669, "step": 9790 }, { "epoch": 0.1134626962441532, "grad_norm": 1.0327008962631226, "learning_rate": 0.00015126582278481013, "loss": 0.5822, "step": 9800 }, { "epoch": 0.11357847450562682, "grad_norm": 1.0953099727630615, "learning_rate": 0.00015142019141710407, "loss": 0.5642, "step": 9810 }, { "epoch": 0.11369425276710045, "grad_norm": 1.1357226371765137, "learning_rate": 0.00015157456004939797, "loss": 0.5204, "step": 9820 }, { "epoch": 0.11381003102857408, "grad_norm": 1.1709736585617065, "learning_rate": 0.00015172892868169189, "loss": 0.5532, "step": 9830 }, { "epoch": 0.1139258092900477, "grad_norm": 1.457765817642212, "learning_rate": 0.0001518832973139858, "loss": 0.5645, "step": 9840 }, { "epoch": 0.11404158755152133, "grad_norm": 1.2417305707931519, "learning_rate": 0.00015203766594627972, "loss": 0.5925, "step": 9850 }, { "epoch": 0.11415736581299495, "grad_norm": 1.1331074237823486, "learning_rate": 0.00015219203457857364, "loss": 0.5902, "step": 9860 }, { "epoch": 0.11427314407446858, "grad_norm": 1.1287226676940918, "learning_rate": 0.00015234640321086756, "loss": 0.5959, "step": 9870 }, { "epoch": 0.11438892233594221, "grad_norm": 1.2500649690628052, "learning_rate": 0.00015250077184316148, "loss": 0.6099, "step": 9880 }, { "epoch": 0.11450470059741583, "grad_norm": 1.2514734268188477, "learning_rate": 0.0001526551404754554, "loss": 0.541, "step": 9890 }, { "epoch": 0.11462047885888946, "grad_norm": 1.2987282276153564, "learning_rate": 0.00015280950910774932, "loss": 0.5874, "step": 9900 }, { "epoch": 0.11473625712036307, "grad_norm": 1.2376899719238281, "learning_rate": 0.00015296387774004324, "loss": 0.5443, "step": 9910 }, { "epoch": 0.1148520353818367, "grad_norm": 1.0586546659469604, "learning_rate": 0.00015311824637233713, "loss": 0.5576, "step": 9920 }, { "epoch": 0.11496781364331034, "grad_norm": 1.2568001747131348, "learning_rate": 0.00015327261500463105, "loss": 0.56, "step": 9930 }, { "epoch": 0.11508359190478396, "grad_norm": 1.0285547971725464, "learning_rate": 0.00015342698363692497, "loss": 0.563, "step": 9940 }, { "epoch": 0.11519937016625759, "grad_norm": 1.4542441368103027, "learning_rate": 0.00015358135226921891, "loss": 0.5849, "step": 9950 }, { "epoch": 0.1153151484277312, "grad_norm": 1.1740046739578247, "learning_rate": 0.00015373572090151283, "loss": 0.5787, "step": 9960 }, { "epoch": 0.11543092668920484, "grad_norm": 1.2496739625930786, "learning_rate": 0.00015389008953380675, "loss": 0.5311, "step": 9970 }, { "epoch": 0.11554670495067847, "grad_norm": 1.2663365602493286, "learning_rate": 0.00015404445816610064, "loss": 0.5589, "step": 9980 }, { "epoch": 0.11566248321215208, "grad_norm": 1.1600028276443481, "learning_rate": 0.00015419882679839456, "loss": 0.5149, "step": 9990 }, { "epoch": 0.11577826147362572, "grad_norm": 1.1629509925842285, "learning_rate": 0.00015435319543068848, "loss": 0.5537, "step": 10000 }, { "epoch": 0.11577826147362572, "eval_chrf": 84.638197697528, "eval_loss": 0.8278533816337585, "eval_runtime": 130.0577, "eval_samples_per_second": 0.769, "eval_steps_per_second": 0.031, "step": 10000 }, { "epoch": 0.11589403973509933, "grad_norm": 1.1861213445663452, "learning_rate": 0.0001545075640629824, "loss": 0.5576, "step": 10010 }, { "epoch": 0.11600981799657296, "grad_norm": 1.050795316696167, "learning_rate": 0.00015466193269527635, "loss": 0.5668, "step": 10020 }, { "epoch": 0.1161255962580466, "grad_norm": 1.1185582876205444, "learning_rate": 0.00015481630132757024, "loss": 0.5434, "step": 10030 }, { "epoch": 0.11624137451952021, "grad_norm": 1.116737961769104, "learning_rate": 0.00015497066995986416, "loss": 0.5336, "step": 10040 }, { "epoch": 0.11635715278099384, "grad_norm": 0.8800601959228516, "learning_rate": 0.00015512503859215808, "loss": 0.565, "step": 10050 }, { "epoch": 0.11647293104246746, "grad_norm": 1.2967404127120972, "learning_rate": 0.000155279407224452, "loss": 0.5465, "step": 10060 }, { "epoch": 0.1165887093039411, "grad_norm": 1.1198583841323853, "learning_rate": 0.00015543377585674592, "loss": 0.5601, "step": 10070 }, { "epoch": 0.11670448756541472, "grad_norm": 1.3123164176940918, "learning_rate": 0.00015558814448903983, "loss": 0.5623, "step": 10080 }, { "epoch": 0.11682026582688834, "grad_norm": 1.3212881088256836, "learning_rate": 0.00015574251312133375, "loss": 0.5587, "step": 10090 }, { "epoch": 0.11693604408836197, "grad_norm": 1.1772260665893555, "learning_rate": 0.00015589688175362767, "loss": 0.5482, "step": 10100 }, { "epoch": 0.11705182234983559, "grad_norm": 1.3611674308776855, "learning_rate": 0.0001560512503859216, "loss": 0.5797, "step": 10110 }, { "epoch": 0.11716760061130922, "grad_norm": 1.2402210235595703, "learning_rate": 0.0001562056190182155, "loss": 0.5826, "step": 10120 }, { "epoch": 0.11728337887278284, "grad_norm": 1.1671757698059082, "learning_rate": 0.00015635998765050943, "loss": 0.5527, "step": 10130 }, { "epoch": 0.11739915713425647, "grad_norm": 1.2186027765274048, "learning_rate": 0.00015651435628280332, "loss": 0.5107, "step": 10140 }, { "epoch": 0.1175149353957301, "grad_norm": 0.8878567814826965, "learning_rate": 0.00015666872491509724, "loss": 0.5451, "step": 10150 }, { "epoch": 0.11763071365720372, "grad_norm": 1.2072186470031738, "learning_rate": 0.0001568230935473912, "loss": 0.575, "step": 10160 }, { "epoch": 0.11774649191867735, "grad_norm": 1.0611921548843384, "learning_rate": 0.0001569774621796851, "loss": 0.5637, "step": 10170 }, { "epoch": 0.11786227018015097, "grad_norm": 1.129486083984375, "learning_rate": 0.00015713183081197902, "loss": 0.56, "step": 10180 }, { "epoch": 0.1179780484416246, "grad_norm": 1.0836197137832642, "learning_rate": 0.00015728619944427294, "loss": 0.5332, "step": 10190 }, { "epoch": 0.11809382670309823, "grad_norm": 1.149590015411377, "learning_rate": 0.00015744056807656684, "loss": 0.5665, "step": 10200 }, { "epoch": 0.11820960496457185, "grad_norm": 1.039841651916504, "learning_rate": 0.00015759493670886075, "loss": 0.5666, "step": 10210 }, { "epoch": 0.11832538322604548, "grad_norm": 1.2359282970428467, "learning_rate": 0.00015774930534115467, "loss": 0.5774, "step": 10220 }, { "epoch": 0.1184411614875191, "grad_norm": 1.1911756992340088, "learning_rate": 0.00015790367397344862, "loss": 0.6242, "step": 10230 }, { "epoch": 0.11855693974899273, "grad_norm": 1.0651416778564453, "learning_rate": 0.00015805804260574254, "loss": 0.5214, "step": 10240 }, { "epoch": 0.11867271801046636, "grad_norm": 1.1892896890640259, "learning_rate": 0.00015821241123803643, "loss": 0.5544, "step": 10250 }, { "epoch": 0.11878849627193998, "grad_norm": 1.1992381811141968, "learning_rate": 0.00015836677987033035, "loss": 0.5596, "step": 10260 }, { "epoch": 0.11890427453341361, "grad_norm": 1.320581316947937, "learning_rate": 0.00015852114850262427, "loss": 0.5642, "step": 10270 }, { "epoch": 0.11902005279488723, "grad_norm": 1.2349812984466553, "learning_rate": 0.0001586755171349182, "loss": 0.5277, "step": 10280 }, { "epoch": 0.11913583105636086, "grad_norm": 1.3728326559066772, "learning_rate": 0.0001588298857672121, "loss": 0.5688, "step": 10290 }, { "epoch": 0.11925160931783449, "grad_norm": 1.085245966911316, "learning_rate": 0.00015898425439950603, "loss": 0.5629, "step": 10300 }, { "epoch": 0.1193673875793081, "grad_norm": 1.0424472093582153, "learning_rate": 0.00015913862303179995, "loss": 0.5813, "step": 10310 }, { "epoch": 0.11948316584078174, "grad_norm": 1.1915884017944336, "learning_rate": 0.00015929299166409386, "loss": 0.5753, "step": 10320 }, { "epoch": 0.11959894410225536, "grad_norm": 1.059067964553833, "learning_rate": 0.00015944736029638778, "loss": 0.5296, "step": 10330 }, { "epoch": 0.11971472236372899, "grad_norm": 0.9762371778488159, "learning_rate": 0.0001596017289286817, "loss": 0.5748, "step": 10340 }, { "epoch": 0.11983050062520262, "grad_norm": 1.168055772781372, "learning_rate": 0.00015975609756097562, "loss": 0.5447, "step": 10350 }, { "epoch": 0.11994627888667624, "grad_norm": 1.1109033823013306, "learning_rate": 0.0001599104661932695, "loss": 0.5771, "step": 10360 }, { "epoch": 0.12006205714814987, "grad_norm": 1.4880404472351074, "learning_rate": 0.00016006483482556346, "loss": 0.5418, "step": 10370 }, { "epoch": 0.12017783540962348, "grad_norm": 1.2616000175476074, "learning_rate": 0.00016021920345785738, "loss": 0.5306, "step": 10380 }, { "epoch": 0.12029361367109712, "grad_norm": 1.141715407371521, "learning_rate": 0.0001603735720901513, "loss": 0.5536, "step": 10390 }, { "epoch": 0.12040939193257075, "grad_norm": 1.0811337232589722, "learning_rate": 0.00016052794072244522, "loss": 0.5698, "step": 10400 }, { "epoch": 0.12052517019404436, "grad_norm": 1.1806341409683228, "learning_rate": 0.00016068230935473914, "loss": 0.5538, "step": 10410 }, { "epoch": 0.120640948455518, "grad_norm": 1.2402247190475464, "learning_rate": 0.00016083667798703303, "loss": 0.5628, "step": 10420 }, { "epoch": 0.12075672671699161, "grad_norm": 1.0643714666366577, "learning_rate": 0.00016099104661932695, "loss": 0.5335, "step": 10430 }, { "epoch": 0.12087250497846524, "grad_norm": 1.2642136812210083, "learning_rate": 0.00016114541525162087, "loss": 0.5176, "step": 10440 }, { "epoch": 0.12098828323993886, "grad_norm": 1.1404954195022583, "learning_rate": 0.0001612997838839148, "loss": 0.5596, "step": 10450 }, { "epoch": 0.1211040615014125, "grad_norm": 1.1933493614196777, "learning_rate": 0.00016145415251620873, "loss": 0.5763, "step": 10460 }, { "epoch": 0.12121983976288613, "grad_norm": 1.1639156341552734, "learning_rate": 0.00016160852114850262, "loss": 0.5465, "step": 10470 }, { "epoch": 0.12133561802435974, "grad_norm": 1.1753767728805542, "learning_rate": 0.00016176288978079654, "loss": 0.5268, "step": 10480 }, { "epoch": 0.12145139628583337, "grad_norm": 1.1199126243591309, "learning_rate": 0.00016191725841309046, "loss": 0.5464, "step": 10490 }, { "epoch": 0.12156717454730699, "grad_norm": 1.1204979419708252, "learning_rate": 0.00016207162704538438, "loss": 0.5632, "step": 10500 }, { "epoch": 0.12168295280878062, "grad_norm": 1.3289235830307007, "learning_rate": 0.0001622259956776783, "loss": 0.5738, "step": 10510 }, { "epoch": 0.12179873107025425, "grad_norm": 1.3842880725860596, "learning_rate": 0.00016238036430997222, "loss": 0.5596, "step": 10520 }, { "epoch": 0.12191450933172787, "grad_norm": 1.0078904628753662, "learning_rate": 0.00016253473294226614, "loss": 0.5113, "step": 10530 }, { "epoch": 0.1220302875932015, "grad_norm": 1.1998320817947388, "learning_rate": 0.00016268910157456006, "loss": 0.524, "step": 10540 }, { "epoch": 0.12214606585467512, "grad_norm": 1.0711064338684082, "learning_rate": 0.00016284347020685398, "loss": 0.5834, "step": 10550 }, { "epoch": 0.12226184411614875, "grad_norm": 1.2888844013214111, "learning_rate": 0.0001629978388391479, "loss": 0.5644, "step": 10560 }, { "epoch": 0.12237762237762238, "grad_norm": 1.1922492980957031, "learning_rate": 0.0001631522074714418, "loss": 0.5798, "step": 10570 }, { "epoch": 0.122493400639096, "grad_norm": 1.1588951349258423, "learning_rate": 0.00016330657610373573, "loss": 0.5432, "step": 10580 }, { "epoch": 0.12260917890056963, "grad_norm": 1.0158624649047852, "learning_rate": 0.00016346094473602965, "loss": 0.5374, "step": 10590 }, { "epoch": 0.12272495716204325, "grad_norm": 1.221661925315857, "learning_rate": 0.00016361531336832357, "loss": 0.6158, "step": 10600 }, { "epoch": 0.12284073542351688, "grad_norm": 1.276640772819519, "learning_rate": 0.0001637696820006175, "loss": 0.5408, "step": 10610 }, { "epoch": 0.12295651368499051, "grad_norm": 0.9623101949691772, "learning_rate": 0.0001639240506329114, "loss": 0.562, "step": 10620 }, { "epoch": 0.12307229194646413, "grad_norm": 1.1899003982543945, "learning_rate": 0.0001640784192652053, "loss": 0.577, "step": 10630 }, { "epoch": 0.12318807020793776, "grad_norm": 1.1083171367645264, "learning_rate": 0.00016423278789749922, "loss": 0.5248, "step": 10640 }, { "epoch": 0.12330384846941138, "grad_norm": 0.945651650428772, "learning_rate": 0.00016438715652979314, "loss": 0.5576, "step": 10650 }, { "epoch": 0.12341962673088501, "grad_norm": 1.1229653358459473, "learning_rate": 0.00016454152516208708, "loss": 0.5506, "step": 10660 }, { "epoch": 0.12353540499235864, "grad_norm": 1.126285433769226, "learning_rate": 0.000164695893794381, "loss": 0.5586, "step": 10670 }, { "epoch": 0.12365118325383226, "grad_norm": 1.1712100505828857, "learning_rate": 0.00016485026242667492, "loss": 0.5512, "step": 10680 }, { "epoch": 0.12376696151530589, "grad_norm": 1.2397513389587402, "learning_rate": 0.00016500463105896881, "loss": 0.5823, "step": 10690 }, { "epoch": 0.12388273977677951, "grad_norm": 1.1539273262023926, "learning_rate": 0.00016515899969126273, "loss": 0.5426, "step": 10700 }, { "epoch": 0.12399851803825314, "grad_norm": 1.3479893207550049, "learning_rate": 0.00016531336832355665, "loss": 0.5407, "step": 10710 }, { "epoch": 0.12411429629972677, "grad_norm": 1.0497949123382568, "learning_rate": 0.00016546773695585057, "loss": 0.5763, "step": 10720 }, { "epoch": 0.12423007456120039, "grad_norm": 1.5127085447311401, "learning_rate": 0.00016562210558814452, "loss": 0.5126, "step": 10730 }, { "epoch": 0.12434585282267402, "grad_norm": 1.3668838739395142, "learning_rate": 0.0001657764742204384, "loss": 0.5827, "step": 10740 }, { "epoch": 0.12446163108414764, "grad_norm": 1.3154889345169067, "learning_rate": 0.00016593084285273233, "loss": 0.5621, "step": 10750 }, { "epoch": 0.12457740934562127, "grad_norm": 1.0343090295791626, "learning_rate": 0.00016608521148502625, "loss": 0.5287, "step": 10760 }, { "epoch": 0.12469318760709489, "grad_norm": 1.1013846397399902, "learning_rate": 0.00016623958011732017, "loss": 0.5613, "step": 10770 }, { "epoch": 0.12480896586856852, "grad_norm": 1.0979907512664795, "learning_rate": 0.00016639394874961409, "loss": 0.569, "step": 10780 }, { "epoch": 0.12492474413004215, "grad_norm": 1.4586269855499268, "learning_rate": 0.000166548317381908, "loss": 0.5544, "step": 10790 }, { "epoch": 0.12504052239151578, "grad_norm": 0.8446677327156067, "learning_rate": 0.00016670268601420192, "loss": 0.5613, "step": 10800 }, { "epoch": 0.12515630065298938, "grad_norm": 1.0182301998138428, "learning_rate": 0.00016685705464649584, "loss": 0.597, "step": 10810 }, { "epoch": 0.12527207891446301, "grad_norm": 1.5233309268951416, "learning_rate": 0.00016701142327878976, "loss": 0.597, "step": 10820 }, { "epoch": 0.12538785717593665, "grad_norm": 1.3310496807098389, "learning_rate": 0.00016716579191108368, "loss": 0.5362, "step": 10830 }, { "epoch": 0.12550363543741028, "grad_norm": 1.1259944438934326, "learning_rate": 0.0001673201605433776, "loss": 0.5224, "step": 10840 }, { "epoch": 0.1256194136988839, "grad_norm": 0.9475477337837219, "learning_rate": 0.0001674745291756715, "loss": 0.5637, "step": 10850 }, { "epoch": 0.1257351919603575, "grad_norm": 0.7506548762321472, "learning_rate": 0.0001676288978079654, "loss": 0.5621, "step": 10860 }, { "epoch": 0.12585097022183114, "grad_norm": 1.293733835220337, "learning_rate": 0.00016778326644025936, "loss": 0.6133, "step": 10870 }, { "epoch": 0.12596674848330477, "grad_norm": 1.132787823677063, "learning_rate": 0.00016793763507255328, "loss": 0.5535, "step": 10880 }, { "epoch": 0.1260825267447784, "grad_norm": 0.9605393409729004, "learning_rate": 0.0001680920037048472, "loss": 0.5293, "step": 10890 }, { "epoch": 0.12619830500625204, "grad_norm": 1.193603754043579, "learning_rate": 0.00016824637233714111, "loss": 0.5348, "step": 10900 }, { "epoch": 0.12631408326772564, "grad_norm": 0.9524046778678894, "learning_rate": 0.000168400740969435, "loss": 0.5466, "step": 10910 }, { "epoch": 0.12642986152919927, "grad_norm": 0.9687754511833191, "learning_rate": 0.00016855510960172893, "loss": 0.5615, "step": 10920 }, { "epoch": 0.1265456397906729, "grad_norm": 1.1673355102539062, "learning_rate": 0.00016870947823402284, "loss": 0.5191, "step": 10930 }, { "epoch": 0.12666141805214653, "grad_norm": 1.1563589572906494, "learning_rate": 0.00016886384686631676, "loss": 0.5525, "step": 10940 }, { "epoch": 0.12677719631362017, "grad_norm": 1.19508695602417, "learning_rate": 0.0001690182154986107, "loss": 0.5689, "step": 10950 }, { "epoch": 0.12689297457509377, "grad_norm": 1.2190064191818237, "learning_rate": 0.0001691725841309046, "loss": 0.5316, "step": 10960 }, { "epoch": 0.1270087528365674, "grad_norm": 1.0793712139129639, "learning_rate": 0.00016932695276319852, "loss": 0.5668, "step": 10970 }, { "epoch": 0.12712453109804103, "grad_norm": 1.0726120471954346, "learning_rate": 0.00016948132139549244, "loss": 0.5692, "step": 10980 }, { "epoch": 0.12724030935951466, "grad_norm": 1.1050915718078613, "learning_rate": 0.00016963569002778636, "loss": 0.542, "step": 10990 }, { "epoch": 0.1273560876209883, "grad_norm": 1.142502784729004, "learning_rate": 0.00016979005866008028, "loss": 0.5347, "step": 11000 }, { "epoch": 0.1273560876209883, "eval_chrf": 83.23927244399603, "eval_loss": 0.8144679665565491, "eval_runtime": 266.851, "eval_samples_per_second": 0.375, "eval_steps_per_second": 0.015, "step": 11000 }, { "epoch": 0.1274718658824619, "grad_norm": 1.0333619117736816, "learning_rate": 0.0001699444272923742, "loss": 0.5571, "step": 11010 }, { "epoch": 0.12758764414393553, "grad_norm": 1.0597230195999146, "learning_rate": 0.00017009879592466812, "loss": 0.508, "step": 11020 }, { "epoch": 0.12770342240540916, "grad_norm": 1.0315881967544556, "learning_rate": 0.00017025316455696204, "loss": 0.5524, "step": 11030 }, { "epoch": 0.1278192006668828, "grad_norm": 1.2254493236541748, "learning_rate": 0.00017040753318925595, "loss": 0.5297, "step": 11040 }, { "epoch": 0.12793497892835642, "grad_norm": 0.9213074445724487, "learning_rate": 0.00017056190182154987, "loss": 0.5336, "step": 11050 }, { "epoch": 0.12805075718983003, "grad_norm": 1.309411644935608, "learning_rate": 0.0001707162704538438, "loss": 0.5068, "step": 11060 }, { "epoch": 0.12816653545130366, "grad_norm": 1.152342677116394, "learning_rate": 0.00017087063908613768, "loss": 0.5822, "step": 11070 }, { "epoch": 0.1282823137127773, "grad_norm": 1.1705478429794312, "learning_rate": 0.00017102500771843163, "loss": 0.532, "step": 11080 }, { "epoch": 0.12839809197425092, "grad_norm": 1.048901915550232, "learning_rate": 0.00017117937635072555, "loss": 0.4915, "step": 11090 }, { "epoch": 0.12851387023572455, "grad_norm": 1.1591132879257202, "learning_rate": 0.00017133374498301947, "loss": 0.577, "step": 11100 }, { "epoch": 0.12862964849719816, "grad_norm": 0.8415403962135315, "learning_rate": 0.0001714881136153134, "loss": 0.5413, "step": 11110 }, { "epoch": 0.1287454267586718, "grad_norm": 1.2463173866271973, "learning_rate": 0.00017164248224760728, "loss": 0.5185, "step": 11120 }, { "epoch": 0.12886120502014542, "grad_norm": 1.1211379766464233, "learning_rate": 0.0001717968508799012, "loss": 0.5642, "step": 11130 }, { "epoch": 0.12897698328161905, "grad_norm": 0.8294934034347534, "learning_rate": 0.00017195121951219512, "loss": 0.5478, "step": 11140 }, { "epoch": 0.12909276154309268, "grad_norm": 0.9989081025123596, "learning_rate": 0.00017210558814448904, "loss": 0.5407, "step": 11150 }, { "epoch": 0.12920853980456629, "grad_norm": 1.027762532234192, "learning_rate": 0.00017225995677678298, "loss": 0.5974, "step": 11160 }, { "epoch": 0.12932431806603992, "grad_norm": 1.0650486946105957, "learning_rate": 0.0001724143254090769, "loss": 0.5158, "step": 11170 }, { "epoch": 0.12944009632751355, "grad_norm": 1.0328350067138672, "learning_rate": 0.0001725686940413708, "loss": 0.5319, "step": 11180 }, { "epoch": 0.12955587458898718, "grad_norm": 1.0063869953155518, "learning_rate": 0.0001727230626736647, "loss": 0.5116, "step": 11190 }, { "epoch": 0.1296716528504608, "grad_norm": 1.168185830116272, "learning_rate": 0.00017287743130595863, "loss": 0.5596, "step": 11200 }, { "epoch": 0.12978743111193441, "grad_norm": 1.115496277809143, "learning_rate": 0.00017303179993825255, "loss": 0.5578, "step": 11210 }, { "epoch": 0.12990320937340805, "grad_norm": 1.2830357551574707, "learning_rate": 0.00017318616857054647, "loss": 0.5645, "step": 11220 }, { "epoch": 0.13001898763488168, "grad_norm": 1.221888780593872, "learning_rate": 0.0001733405372028404, "loss": 0.5344, "step": 11230 }, { "epoch": 0.1301347658963553, "grad_norm": 1.1060632467269897, "learning_rate": 0.0001734949058351343, "loss": 0.5318, "step": 11240 }, { "epoch": 0.1302505441578289, "grad_norm": 0.777014970779419, "learning_rate": 0.00017364927446742823, "loss": 0.5489, "step": 11250 }, { "epoch": 0.13036632241930254, "grad_norm": 1.0885053873062134, "learning_rate": 0.00017380364309972215, "loss": 0.5687, "step": 11260 }, { "epoch": 0.13048210068077618, "grad_norm": 1.1854456663131714, "learning_rate": 0.00017395801173201607, "loss": 0.5516, "step": 11270 }, { "epoch": 0.1305978789422498, "grad_norm": 1.1444556713104248, "learning_rate": 0.00017411238036430998, "loss": 0.5766, "step": 11280 }, { "epoch": 0.13071365720372344, "grad_norm": 1.273538589477539, "learning_rate": 0.0001742667489966039, "loss": 0.5706, "step": 11290 }, { "epoch": 0.13082943546519704, "grad_norm": 0.9996747970581055, "learning_rate": 0.00017442111762889782, "loss": 0.525, "step": 11300 }, { "epoch": 0.13094521372667067, "grad_norm": 1.1024609804153442, "learning_rate": 0.00017457548626119174, "loss": 0.5166, "step": 11310 }, { "epoch": 0.1310609919881443, "grad_norm": 1.111045002937317, "learning_rate": 0.00017472985489348566, "loss": 0.5581, "step": 11320 }, { "epoch": 0.13117677024961794, "grad_norm": 1.2413759231567383, "learning_rate": 0.00017488422352577958, "loss": 0.5832, "step": 11330 }, { "epoch": 0.13129254851109157, "grad_norm": 1.2619898319244385, "learning_rate": 0.00017503859215807347, "loss": 0.5329, "step": 11340 }, { "epoch": 0.13140832677256517, "grad_norm": 1.0059072971343994, "learning_rate": 0.0001751929607903674, "loss": 0.5782, "step": 11350 }, { "epoch": 0.1315241050340388, "grad_norm": 1.025973916053772, "learning_rate": 0.0001753473294226613, "loss": 0.5213, "step": 11360 }, { "epoch": 0.13163988329551243, "grad_norm": 1.1603498458862305, "learning_rate": 0.00017550169805495526, "loss": 0.5222, "step": 11370 }, { "epoch": 0.13175566155698606, "grad_norm": 1.2103770971298218, "learning_rate": 0.00017565606668724917, "loss": 0.5256, "step": 11380 }, { "epoch": 0.1318714398184597, "grad_norm": 1.1276682615280151, "learning_rate": 0.0001758104353195431, "loss": 0.5853, "step": 11390 }, { "epoch": 0.1319872180799333, "grad_norm": 1.136440634727478, "learning_rate": 0.00017596480395183699, "loss": 0.5244, "step": 11400 }, { "epoch": 0.13210299634140693, "grad_norm": 1.136427402496338, "learning_rate": 0.0001761191725841309, "loss": 0.5478, "step": 11410 }, { "epoch": 0.13221877460288056, "grad_norm": 1.0788636207580566, "learning_rate": 0.00017627354121642482, "loss": 0.5525, "step": 11420 }, { "epoch": 0.1323345528643542, "grad_norm": 1.2557777166366577, "learning_rate": 0.00017642790984871874, "loss": 0.5372, "step": 11430 }, { "epoch": 0.13245033112582782, "grad_norm": 1.040418267250061, "learning_rate": 0.00017658227848101266, "loss": 0.5575, "step": 11440 }, { "epoch": 0.13256610938730143, "grad_norm": 1.2515392303466797, "learning_rate": 0.00017673664711330658, "loss": 0.5553, "step": 11450 }, { "epoch": 0.13268188764877506, "grad_norm": 0.915041983127594, "learning_rate": 0.0001768910157456005, "loss": 0.5841, "step": 11460 }, { "epoch": 0.1327976659102487, "grad_norm": 1.2221473455429077, "learning_rate": 0.00017704538437789442, "loss": 0.5139, "step": 11470 }, { "epoch": 0.13291344417172232, "grad_norm": 0.9782427549362183, "learning_rate": 0.00017719975301018834, "loss": 0.504, "step": 11480 }, { "epoch": 0.13302922243319595, "grad_norm": 1.1323063373565674, "learning_rate": 0.00017735412164248226, "loss": 0.5084, "step": 11490 }, { "epoch": 0.13314500069466956, "grad_norm": 1.3384109735488892, "learning_rate": 0.00017750849027477618, "loss": 0.53, "step": 11500 }, { "epoch": 0.1332607789561432, "grad_norm": 0.8835853338241577, "learning_rate": 0.0001776628589070701, "loss": 0.5243, "step": 11510 }, { "epoch": 0.13337655721761682, "grad_norm": 1.2791578769683838, "learning_rate": 0.00017781722753936401, "loss": 0.5791, "step": 11520 }, { "epoch": 0.13349233547909045, "grad_norm": 0.806826651096344, "learning_rate": 0.00017797159617165793, "loss": 0.5318, "step": 11530 }, { "epoch": 0.13360811374056408, "grad_norm": 1.084456443786621, "learning_rate": 0.00017812596480395185, "loss": 0.5219, "step": 11540 }, { "epoch": 0.1337238920020377, "grad_norm": 1.0938810110092163, "learning_rate": 0.00017828033343624577, "loss": 0.528, "step": 11550 }, { "epoch": 0.13383967026351132, "grad_norm": 0.9293761849403381, "learning_rate": 0.00017843470206853966, "loss": 0.4871, "step": 11560 }, { "epoch": 0.13395544852498495, "grad_norm": 1.3259512186050415, "learning_rate": 0.00017858907070083358, "loss": 0.5218, "step": 11570 }, { "epoch": 0.13407122678645858, "grad_norm": 1.0393692255020142, "learning_rate": 0.00017874343933312753, "loss": 0.5824, "step": 11580 }, { "epoch": 0.1341870050479322, "grad_norm": 1.091562271118164, "learning_rate": 0.00017889780796542145, "loss": 0.5312, "step": 11590 }, { "epoch": 0.13430278330940582, "grad_norm": 0.9571833610534668, "learning_rate": 0.00017905217659771537, "loss": 0.4809, "step": 11600 }, { "epoch": 0.13441856157087945, "grad_norm": 0.9122828841209412, "learning_rate": 0.00017920654523000929, "loss": 0.5268, "step": 11610 }, { "epoch": 0.13453433983235308, "grad_norm": 1.106863021850586, "learning_rate": 0.00017936091386230318, "loss": 0.4979, "step": 11620 }, { "epoch": 0.1346501180938267, "grad_norm": 0.9412614107131958, "learning_rate": 0.0001795152824945971, "loss": 0.5449, "step": 11630 }, { "epoch": 0.13476589635530034, "grad_norm": 1.068676233291626, "learning_rate": 0.00017966965112689102, "loss": 0.5392, "step": 11640 }, { "epoch": 0.13488167461677394, "grad_norm": 0.9199798703193665, "learning_rate": 0.00017982401975918493, "loss": 0.553, "step": 11650 }, { "epoch": 0.13499745287824758, "grad_norm": 1.0406941175460815, "learning_rate": 0.00017997838839147888, "loss": 0.5119, "step": 11660 }, { "epoch": 0.1351132311397212, "grad_norm": 1.0165780782699585, "learning_rate": 0.00018013275702377277, "loss": 0.5964, "step": 11670 }, { "epoch": 0.13522900940119484, "grad_norm": 1.1605157852172852, "learning_rate": 0.0001802871256560667, "loss": 0.5357, "step": 11680 }, { "epoch": 0.13534478766266847, "grad_norm": 1.2324901819229126, "learning_rate": 0.0001804414942883606, "loss": 0.5654, "step": 11690 }, { "epoch": 0.13546056592414207, "grad_norm": 1.203242301940918, "learning_rate": 0.00018059586292065453, "loss": 0.546, "step": 11700 }, { "epoch": 0.1355763441856157, "grad_norm": 0.955553412437439, "learning_rate": 0.00018075023155294845, "loss": 0.5792, "step": 11710 }, { "epoch": 0.13569212244708934, "grad_norm": 1.0696978569030762, "learning_rate": 0.00018090460018524237, "loss": 0.497, "step": 11720 }, { "epoch": 0.13580790070856297, "grad_norm": 0.9858132004737854, "learning_rate": 0.0001810589688175363, "loss": 0.5201, "step": 11730 }, { "epoch": 0.1359236789700366, "grad_norm": 1.1289206743240356, "learning_rate": 0.0001812133374498302, "loss": 0.5504, "step": 11740 }, { "epoch": 0.1360394572315102, "grad_norm": 0.9448925852775574, "learning_rate": 0.00018136770608212412, "loss": 0.5174, "step": 11750 }, { "epoch": 0.13615523549298383, "grad_norm": 0.9734740853309631, "learning_rate": 0.00018152207471441804, "loss": 0.4766, "step": 11760 }, { "epoch": 0.13627101375445747, "grad_norm": 1.3037446737289429, "learning_rate": 0.00018167644334671196, "loss": 0.5263, "step": 11770 }, { "epoch": 0.1363867920159311, "grad_norm": 1.0228137969970703, "learning_rate": 0.00018183081197900585, "loss": 0.5461, "step": 11780 }, { "epoch": 0.13650257027740473, "grad_norm": 1.0178076028823853, "learning_rate": 0.0001819851806112998, "loss": 0.5633, "step": 11790 }, { "epoch": 0.13661834853887833, "grad_norm": 1.0332564115524292, "learning_rate": 0.00018213954924359372, "loss": 0.5081, "step": 11800 }, { "epoch": 0.13673412680035196, "grad_norm": 1.2683144807815552, "learning_rate": 0.00018229391787588764, "loss": 0.5323, "step": 11810 }, { "epoch": 0.1368499050618256, "grad_norm": 1.0793180465698242, "learning_rate": 0.00018244828650818156, "loss": 0.5571, "step": 11820 }, { "epoch": 0.13696568332329923, "grad_norm": 1.2837165594100952, "learning_rate": 0.00018260265514047545, "loss": 0.5191, "step": 11830 }, { "epoch": 0.13708146158477286, "grad_norm": 1.0644558668136597, "learning_rate": 0.00018275702377276937, "loss": 0.5665, "step": 11840 }, { "epoch": 0.13719723984624646, "grad_norm": 0.9577881097793579, "learning_rate": 0.0001829113924050633, "loss": 0.5169, "step": 11850 }, { "epoch": 0.1373130181077201, "grad_norm": 1.0442769527435303, "learning_rate": 0.0001830657610373572, "loss": 0.5538, "step": 11860 }, { "epoch": 0.13742879636919372, "grad_norm": 1.006024718284607, "learning_rate": 0.00018322012966965115, "loss": 0.5206, "step": 11870 }, { "epoch": 0.13754457463066735, "grad_norm": 1.124611735343933, "learning_rate": 0.00018337449830194507, "loss": 0.5528, "step": 11880 }, { "epoch": 0.13766035289214096, "grad_norm": 1.016611933708191, "learning_rate": 0.00018352886693423896, "loss": 0.514, "step": 11890 }, { "epoch": 0.1377761311536146, "grad_norm": 1.1806875467300415, "learning_rate": 0.00018368323556653288, "loss": 0.5298, "step": 11900 }, { "epoch": 0.13789190941508822, "grad_norm": 1.2107545137405396, "learning_rate": 0.0001838376041988268, "loss": 0.5286, "step": 11910 }, { "epoch": 0.13800768767656185, "grad_norm": 1.073745608329773, "learning_rate": 0.00018399197283112072, "loss": 0.5396, "step": 11920 }, { "epoch": 0.13812346593803548, "grad_norm": 0.9077839851379395, "learning_rate": 0.00018414634146341464, "loss": 0.5423, "step": 11930 }, { "epoch": 0.1382392441995091, "grad_norm": 0.799449622631073, "learning_rate": 0.00018430071009570856, "loss": 0.5247, "step": 11940 }, { "epoch": 0.13835502246098272, "grad_norm": 0.9549907445907593, "learning_rate": 0.00018445507872800248, "loss": 0.521, "step": 11950 }, { "epoch": 0.13847080072245635, "grad_norm": 1.1224545240402222, "learning_rate": 0.0001846094473602964, "loss": 0.5819, "step": 11960 }, { "epoch": 0.13858657898392998, "grad_norm": 1.1244319677352905, "learning_rate": 0.00018476381599259032, "loss": 0.5349, "step": 11970 }, { "epoch": 0.1387023572454036, "grad_norm": 0.9261943101882935, "learning_rate": 0.00018491818462488424, "loss": 0.5422, "step": 11980 }, { "epoch": 0.13881813550687722, "grad_norm": 0.9949973225593567, "learning_rate": 0.00018507255325717815, "loss": 0.5169, "step": 11990 }, { "epoch": 0.13893391376835085, "grad_norm": 1.7659807205200195, "learning_rate": 0.00018522692188947207, "loss": 0.536, "step": 12000 }, { "epoch": 0.13893391376835085, "eval_chrf": 82.54549731700665, "eval_loss": 0.8094607591629028, "eval_runtime": 202.044, "eval_samples_per_second": 0.495, "eval_steps_per_second": 0.02, "step": 12000 }, { "epoch": 0.13904969202982448, "grad_norm": 1.013721227645874, "learning_rate": 0.000185381290521766, "loss": 0.534, "step": 12010 }, { "epoch": 0.1391654702912981, "grad_norm": 0.9654140472412109, "learning_rate": 0.0001855356591540599, "loss": 0.5169, "step": 12020 }, { "epoch": 0.13928124855277174, "grad_norm": 1.178539514541626, "learning_rate": 0.00018569002778635383, "loss": 0.5652, "step": 12030 }, { "epoch": 0.13939702681424535, "grad_norm": 1.261973261833191, "learning_rate": 0.00018584439641864775, "loss": 0.4845, "step": 12040 }, { "epoch": 0.13951280507571898, "grad_norm": 0.8742697834968567, "learning_rate": 0.00018599876505094164, "loss": 0.527, "step": 12050 }, { "epoch": 0.1396285833371926, "grad_norm": 1.3083447217941284, "learning_rate": 0.00018615313368323556, "loss": 0.5299, "step": 12060 }, { "epoch": 0.13974436159866624, "grad_norm": 1.1984888315200806, "learning_rate": 0.00018630750231552948, "loss": 0.5648, "step": 12070 }, { "epoch": 0.13986013986013987, "grad_norm": 0.8685519099235535, "learning_rate": 0.00018646187094782343, "loss": 0.539, "step": 12080 }, { "epoch": 0.13997591812161347, "grad_norm": 0.8454098701477051, "learning_rate": 0.00018661623958011735, "loss": 0.5362, "step": 12090 }, { "epoch": 0.1400916963830871, "grad_norm": 1.0055502653121948, "learning_rate": 0.00018677060821241126, "loss": 0.5422, "step": 12100 }, { "epoch": 0.14020747464456074, "grad_norm": 1.48152494430542, "learning_rate": 0.00018692497684470516, "loss": 0.5189, "step": 12110 }, { "epoch": 0.14032325290603437, "grad_norm": 0.9812230467796326, "learning_rate": 0.00018707934547699908, "loss": 0.5244, "step": 12120 }, { "epoch": 0.140439031167508, "grad_norm": 1.140608310699463, "learning_rate": 0.000187233714109293, "loss": 0.5373, "step": 12130 }, { "epoch": 0.1405548094289816, "grad_norm": 0.9877954721450806, "learning_rate": 0.0001873880827415869, "loss": 0.5195, "step": 12140 }, { "epoch": 0.14067058769045523, "grad_norm": 1.1157594919204712, "learning_rate": 0.00018754245137388083, "loss": 0.5606, "step": 12150 }, { "epoch": 0.14078636595192887, "grad_norm": 0.8598747849464417, "learning_rate": 0.00018769682000617475, "loss": 0.5366, "step": 12160 }, { "epoch": 0.1409021442134025, "grad_norm": 0.6583921909332275, "learning_rate": 0.00018785118863846867, "loss": 0.5138, "step": 12170 }, { "epoch": 0.14101792247487613, "grad_norm": 1.0767642259597778, "learning_rate": 0.0001880055572707626, "loss": 0.538, "step": 12180 }, { "epoch": 0.14113370073634973, "grad_norm": 1.048838496208191, "learning_rate": 0.0001881599259030565, "loss": 0.5153, "step": 12190 }, { "epoch": 0.14124947899782336, "grad_norm": 1.035570502281189, "learning_rate": 0.00018831429453535043, "loss": 0.5404, "step": 12200 }, { "epoch": 0.141365257259297, "grad_norm": 1.0647659301757812, "learning_rate": 0.00018846866316764432, "loss": 0.5562, "step": 12210 }, { "epoch": 0.14148103552077063, "grad_norm": 0.991861879825592, "learning_rate": 0.00018862303179993827, "loss": 0.5005, "step": 12220 }, { "epoch": 0.14159681378224426, "grad_norm": 0.977424681186676, "learning_rate": 0.00018877740043223218, "loss": 0.5177, "step": 12230 }, { "epoch": 0.14171259204371786, "grad_norm": 1.1738947629928589, "learning_rate": 0.0001889317690645261, "loss": 0.5283, "step": 12240 }, { "epoch": 0.1418283703051915, "grad_norm": 1.055560827255249, "learning_rate": 0.00018908613769682002, "loss": 0.5506, "step": 12250 }, { "epoch": 0.14194414856666512, "grad_norm": 1.1133482456207275, "learning_rate": 0.00018924050632911394, "loss": 0.5264, "step": 12260 }, { "epoch": 0.14205992682813876, "grad_norm": 1.02242112159729, "learning_rate": 0.00018939487496140783, "loss": 0.4913, "step": 12270 }, { "epoch": 0.1421757050896124, "grad_norm": 0.967154860496521, "learning_rate": 0.00018954924359370175, "loss": 0.5375, "step": 12280 }, { "epoch": 0.142291483351086, "grad_norm": 1.0422741174697876, "learning_rate": 0.0001897036122259957, "loss": 0.5827, "step": 12290 }, { "epoch": 0.14240726161255962, "grad_norm": 1.1207256317138672, "learning_rate": 0.00018985798085828962, "loss": 0.5528, "step": 12300 }, { "epoch": 0.14252303987403325, "grad_norm": 1.1637494564056396, "learning_rate": 0.00019001234949058354, "loss": 0.5411, "step": 12310 }, { "epoch": 0.14263881813550688, "grad_norm": 0.9992698431015015, "learning_rate": 0.00019016671812287743, "loss": 0.575, "step": 12320 }, { "epoch": 0.14275459639698052, "grad_norm": 1.29230535030365, "learning_rate": 0.00019032108675517135, "loss": 0.5392, "step": 12330 }, { "epoch": 0.14287037465845412, "grad_norm": 0.9479355812072754, "learning_rate": 0.00019047545538746527, "loss": 0.5542, "step": 12340 }, { "epoch": 0.14298615291992775, "grad_norm": 1.1067039966583252, "learning_rate": 0.00019062982401975919, "loss": 0.531, "step": 12350 }, { "epoch": 0.14310193118140138, "grad_norm": 1.1938244104385376, "learning_rate": 0.0001907841926520531, "loss": 0.5431, "step": 12360 }, { "epoch": 0.143217709442875, "grad_norm": 1.029172420501709, "learning_rate": 0.00019093856128434705, "loss": 0.52, "step": 12370 }, { "epoch": 0.14333348770434864, "grad_norm": 1.0918617248535156, "learning_rate": 0.00019109292991664094, "loss": 0.5157, "step": 12380 }, { "epoch": 0.14344926596582225, "grad_norm": 1.1285583972930908, "learning_rate": 0.00019124729854893486, "loss": 0.5182, "step": 12390 }, { "epoch": 0.14356504422729588, "grad_norm": 0.9844357371330261, "learning_rate": 0.00019140166718122878, "loss": 0.5348, "step": 12400 }, { "epoch": 0.1436808224887695, "grad_norm": 0.8017510175704956, "learning_rate": 0.0001915560358135227, "loss": 0.535, "step": 12410 }, { "epoch": 0.14379660075024314, "grad_norm": 1.1872317790985107, "learning_rate": 0.00019171040444581662, "loss": 0.5435, "step": 12420 }, { "epoch": 0.14391237901171677, "grad_norm": 1.1137135028839111, "learning_rate": 0.00019186477307811054, "loss": 0.5706, "step": 12430 }, { "epoch": 0.14402815727319038, "grad_norm": 0.9512019157409668, "learning_rate": 0.00019201914171040446, "loss": 0.5228, "step": 12440 }, { "epoch": 0.144143935534664, "grad_norm": 1.0688989162445068, "learning_rate": 0.00019217351034269838, "loss": 0.5394, "step": 12450 }, { "epoch": 0.14425971379613764, "grad_norm": 1.0268332958221436, "learning_rate": 0.0001923278789749923, "loss": 0.5156, "step": 12460 }, { "epoch": 0.14437549205761127, "grad_norm": 1.5388939380645752, "learning_rate": 0.00019248224760728621, "loss": 0.5755, "step": 12470 }, { "epoch": 0.1444912703190849, "grad_norm": 0.9914690852165222, "learning_rate": 0.00019263661623958013, "loss": 0.548, "step": 12480 }, { "epoch": 0.1446070485805585, "grad_norm": 0.959027886390686, "learning_rate": 0.00019279098487187403, "loss": 0.5403, "step": 12490 }, { "epoch": 0.14472282684203214, "grad_norm": 0.9708199501037598, "learning_rate": 0.00019294535350416797, "loss": 0.5218, "step": 12500 }, { "epoch": 0.14483860510350577, "grad_norm": 1.0253245830535889, "learning_rate": 0.0001930997221364619, "loss": 0.549, "step": 12510 }, { "epoch": 0.1449543833649794, "grad_norm": 1.1119471788406372, "learning_rate": 0.0001932540907687558, "loss": 0.5643, "step": 12520 }, { "epoch": 0.145070161626453, "grad_norm": 0.9286425709724426, "learning_rate": 0.00019340845940104973, "loss": 0.515, "step": 12530 }, { "epoch": 0.14518593988792663, "grad_norm": 0.9852153062820435, "learning_rate": 0.00019356282803334362, "loss": 0.5124, "step": 12540 }, { "epoch": 0.14530171814940027, "grad_norm": 1.0258785486221313, "learning_rate": 0.00019371719666563754, "loss": 0.498, "step": 12550 }, { "epoch": 0.1454174964108739, "grad_norm": 1.0606341361999512, "learning_rate": 0.00019387156529793146, "loss": 0.5348, "step": 12560 }, { "epoch": 0.14553327467234753, "grad_norm": 0.9994125366210938, "learning_rate": 0.00019402593393022538, "loss": 0.5179, "step": 12570 }, { "epoch": 0.14564905293382113, "grad_norm": 0.9378136396408081, "learning_rate": 0.00019418030256251932, "loss": 0.5007, "step": 12580 }, { "epoch": 0.14576483119529476, "grad_norm": 0.981959342956543, "learning_rate": 0.00019433467119481324, "loss": 0.541, "step": 12590 }, { "epoch": 0.1458806094567684, "grad_norm": 1.0490880012512207, "learning_rate": 0.00019448903982710714, "loss": 0.5489, "step": 12600 }, { "epoch": 0.14599638771824203, "grad_norm": 1.0558048486709595, "learning_rate": 0.00019464340845940105, "loss": 0.5072, "step": 12610 }, { "epoch": 0.14611216597971566, "grad_norm": 1.2084742784500122, "learning_rate": 0.00019479777709169497, "loss": 0.5364, "step": 12620 }, { "epoch": 0.14622794424118926, "grad_norm": 1.0054224729537964, "learning_rate": 0.0001949521457239889, "loss": 0.5353, "step": 12630 }, { "epoch": 0.1463437225026629, "grad_norm": 1.053541898727417, "learning_rate": 0.0001951065143562828, "loss": 0.5115, "step": 12640 }, { "epoch": 0.14645950076413652, "grad_norm": 1.075671911239624, "learning_rate": 0.00019526088298857673, "loss": 0.548, "step": 12650 }, { "epoch": 0.14657527902561016, "grad_norm": 1.0095752477645874, "learning_rate": 0.00019541525162087065, "loss": 0.5233, "step": 12660 }, { "epoch": 0.1466910572870838, "grad_norm": 1.2185146808624268, "learning_rate": 0.00019556962025316457, "loss": 0.5293, "step": 12670 }, { "epoch": 0.1468068355485574, "grad_norm": 0.9857822060585022, "learning_rate": 0.0001957239888854585, "loss": 0.5244, "step": 12680 }, { "epoch": 0.14692261381003102, "grad_norm": 1.0182740688323975, "learning_rate": 0.0001958783575177524, "loss": 0.5435, "step": 12690 }, { "epoch": 0.14703839207150465, "grad_norm": 0.9528821706771851, "learning_rate": 0.00019603272615004633, "loss": 0.5242, "step": 12700 }, { "epoch": 0.14715417033297828, "grad_norm": 0.8967875838279724, "learning_rate": 0.00019618709478234022, "loss": 0.5303, "step": 12710 }, { "epoch": 0.14726994859445192, "grad_norm": 0.9793334603309631, "learning_rate": 0.00019634146341463416, "loss": 0.529, "step": 12720 }, { "epoch": 0.14738572685592552, "grad_norm": 1.0513063669204712, "learning_rate": 0.00019649583204692808, "loss": 0.5534, "step": 12730 }, { "epoch": 0.14750150511739915, "grad_norm": 1.0110771656036377, "learning_rate": 0.000196650200679222, "loss": 0.5449, "step": 12740 }, { "epoch": 0.14761728337887278, "grad_norm": 1.05746328830719, "learning_rate": 0.00019680456931151592, "loss": 0.5395, "step": 12750 }, { "epoch": 0.1477330616403464, "grad_norm": 0.9189900755882263, "learning_rate": 0.0001969589379438098, "loss": 0.5224, "step": 12760 }, { "epoch": 0.14784883990182004, "grad_norm": 1.0486233234405518, "learning_rate": 0.00019711330657610373, "loss": 0.5275, "step": 12770 }, { "epoch": 0.14796461816329365, "grad_norm": 0.9569796323776245, "learning_rate": 0.00019726767520839765, "loss": 0.4871, "step": 12780 }, { "epoch": 0.14808039642476728, "grad_norm": 0.894376277923584, "learning_rate": 0.0001974220438406916, "loss": 0.5224, "step": 12790 }, { "epoch": 0.1481961746862409, "grad_norm": 0.9812209010124207, "learning_rate": 0.00019757641247298552, "loss": 0.5305, "step": 12800 }, { "epoch": 0.14831195294771454, "grad_norm": 1.0676387548446655, "learning_rate": 0.0001977307811052794, "loss": 0.5152, "step": 12810 }, { "epoch": 0.14842773120918817, "grad_norm": 0.9315291047096252, "learning_rate": 0.00019788514973757333, "loss": 0.5546, "step": 12820 }, { "epoch": 0.14854350947066178, "grad_norm": 0.7768070697784424, "learning_rate": 0.00019803951836986725, "loss": 0.5038, "step": 12830 }, { "epoch": 0.1486592877321354, "grad_norm": 0.975273609161377, "learning_rate": 0.00019819388700216116, "loss": 0.5401, "step": 12840 }, { "epoch": 0.14877506599360904, "grad_norm": 1.1329939365386963, "learning_rate": 0.00019834825563445508, "loss": 0.5497, "step": 12850 }, { "epoch": 0.14889084425508267, "grad_norm": 1.0359381437301636, "learning_rate": 0.000198502624266749, "loss": 0.4876, "step": 12860 }, { "epoch": 0.1490066225165563, "grad_norm": 0.9096277952194214, "learning_rate": 0.00019865699289904292, "loss": 0.4975, "step": 12870 }, { "epoch": 0.1491224007780299, "grad_norm": 0.8534004092216492, "learning_rate": 0.00019881136153133684, "loss": 0.5196, "step": 12880 }, { "epoch": 0.14923817903950354, "grad_norm": 1.071838140487671, "learning_rate": 0.00019896573016363076, "loss": 0.5364, "step": 12890 }, { "epoch": 0.14935395730097717, "grad_norm": 0.9981087446212769, "learning_rate": 0.00019912009879592468, "loss": 0.5281, "step": 12900 }, { "epoch": 0.1494697355624508, "grad_norm": 0.8559280633926392, "learning_rate": 0.0001992744674282186, "loss": 0.4899, "step": 12910 }, { "epoch": 0.14958551382392443, "grad_norm": 1.156166434288025, "learning_rate": 0.0001994288360605125, "loss": 0.5433, "step": 12920 }, { "epoch": 0.14970129208539804, "grad_norm": 0.9986305832862854, "learning_rate": 0.00019958320469280644, "loss": 0.5355, "step": 12930 }, { "epoch": 0.14981707034687167, "grad_norm": 1.0870987176895142, "learning_rate": 0.00019973757332510036, "loss": 0.5211, "step": 12940 }, { "epoch": 0.1499328486083453, "grad_norm": 0.9895318746566772, "learning_rate": 0.00019989194195739427, "loss": 0.5423, "step": 12950 }, { "epoch": 0.15004862686981893, "grad_norm": 1.019115924835205, "learning_rate": 0.0001999999999267045, "loss": 0.4864, "step": 12960 }, { "epoch": 0.15016440513129256, "grad_norm": 0.9224790930747986, "learning_rate": 0.00019999999862367344, "loss": 0.5464, "step": 12970 }, { "epoch": 0.15028018339276616, "grad_norm": 1.016212821006775, "learning_rate": 0.0001999999956918536, "loss": 0.5633, "step": 12980 }, { "epoch": 0.1503959616542398, "grad_norm": 0.9151487946510315, "learning_rate": 0.00019999999113124502, "loss": 0.53, "step": 12990 }, { "epoch": 0.15051173991571343, "grad_norm": 1.353114128112793, "learning_rate": 0.00019999998494184777, "loss": 0.5318, "step": 13000 }, { "epoch": 0.15051173991571343, "eval_chrf": 83.9289537905008, "eval_loss": 0.7918225526809692, "eval_runtime": 211.6443, "eval_samples_per_second": 0.472, "eval_steps_per_second": 0.019, "step": 13000 }, { "epoch": 0.15062751817718706, "grad_norm": 1.2324007749557495, "learning_rate": 0.00019999997712366195, "loss": 0.5601, "step": 13010 }, { "epoch": 0.1507432964386607, "grad_norm": 1.0476680994033813, "learning_rate": 0.00019999996767668766, "loss": 0.5061, "step": 13020 }, { "epoch": 0.1508590747001343, "grad_norm": 0.9604538083076477, "learning_rate": 0.00019999995660092512, "loss": 0.4906, "step": 13030 }, { "epoch": 0.15097485296160792, "grad_norm": 0.952919602394104, "learning_rate": 0.00019999994389637444, "loss": 0.522, "step": 13040 }, { "epoch": 0.15109063122308156, "grad_norm": 0.9822264909744263, "learning_rate": 0.0001999999295630359, "loss": 0.5136, "step": 13050 }, { "epoch": 0.1512064094845552, "grad_norm": 1.010438323020935, "learning_rate": 0.00019999991360090963, "loss": 0.5169, "step": 13060 }, { "epoch": 0.15132218774602882, "grad_norm": 1.0127623081207275, "learning_rate": 0.000199999896009996, "loss": 0.5399, "step": 13070 }, { "epoch": 0.15143796600750242, "grad_norm": 0.9422155022621155, "learning_rate": 0.00019999987679029523, "loss": 0.5439, "step": 13080 }, { "epoch": 0.15155374426897605, "grad_norm": 0.8040795922279358, "learning_rate": 0.00019999985594180766, "loss": 0.5234, "step": 13090 }, { "epoch": 0.15166952253044969, "grad_norm": 1.2307194471359253, "learning_rate": 0.0001999998334645336, "loss": 0.4947, "step": 13100 }, { "epoch": 0.15178530079192332, "grad_norm": 0.9566722512245178, "learning_rate": 0.0001999998093584735, "loss": 0.5329, "step": 13110 }, { "epoch": 0.15190107905339695, "grad_norm": 0.9066735506057739, "learning_rate": 0.00019999978362362762, "loss": 0.5071, "step": 13120 }, { "epoch": 0.15201685731487055, "grad_norm": 1.259695291519165, "learning_rate": 0.0001999997562599965, "loss": 0.547, "step": 13130 }, { "epoch": 0.15213263557634418, "grad_norm": 0.8511571884155273, "learning_rate": 0.0001999997272675805, "loss": 0.4821, "step": 13140 }, { "epoch": 0.15224841383781781, "grad_norm": 0.9403756260871887, "learning_rate": 0.00019999969664638014, "loss": 0.5393, "step": 13150 }, { "epoch": 0.15236419209929145, "grad_norm": 1.1131285429000854, "learning_rate": 0.0001999996643963959, "loss": 0.5155, "step": 13160 }, { "epoch": 0.15247997036076505, "grad_norm": 0.9601148962974548, "learning_rate": 0.0001999996305176283, "loss": 0.5146, "step": 13170 }, { "epoch": 0.15259574862223868, "grad_norm": 0.798585832118988, "learning_rate": 0.00019999959501007793, "loss": 0.5366, "step": 13180 }, { "epoch": 0.1527115268837123, "grad_norm": 1.0919116735458374, "learning_rate": 0.00019999955787374535, "loss": 0.5241, "step": 13190 }, { "epoch": 0.15282730514518594, "grad_norm": 0.9964574575424194, "learning_rate": 0.0001999995191086311, "loss": 0.5363, "step": 13200 }, { "epoch": 0.15294308340665957, "grad_norm": 1.0141183137893677, "learning_rate": 0.00019999947871473592, "loss": 0.5157, "step": 13210 }, { "epoch": 0.15305886166813318, "grad_norm": 1.152384877204895, "learning_rate": 0.0001999994366920604, "loss": 0.5328, "step": 13220 }, { "epoch": 0.1531746399296068, "grad_norm": 1.1605803966522217, "learning_rate": 0.00019999939304060524, "loss": 0.5297, "step": 13230 }, { "epoch": 0.15329041819108044, "grad_norm": 0.8750791549682617, "learning_rate": 0.00019999934776037117, "loss": 0.5387, "step": 13240 }, { "epoch": 0.15340619645255407, "grad_norm": 0.986000120639801, "learning_rate": 0.00019999930085135887, "loss": 0.5055, "step": 13250 }, { "epoch": 0.1535219747140277, "grad_norm": 0.9871428608894348, "learning_rate": 0.00019999925231356917, "loss": 0.525, "step": 13260 }, { "epoch": 0.1536377529755013, "grad_norm": 1.1668473482131958, "learning_rate": 0.00019999920214700283, "loss": 0.5097, "step": 13270 }, { "epoch": 0.15375353123697494, "grad_norm": 0.807448148727417, "learning_rate": 0.00019999915035166067, "loss": 0.5416, "step": 13280 }, { "epoch": 0.15386930949844857, "grad_norm": 0.9240652918815613, "learning_rate": 0.00019999909692754354, "loss": 0.5622, "step": 13290 }, { "epoch": 0.1539850877599222, "grad_norm": 0.8307350277900696, "learning_rate": 0.00019999904187465229, "loss": 0.4935, "step": 13300 }, { "epoch": 0.15410086602139583, "grad_norm": 0.808761477470398, "learning_rate": 0.00019999898519298782, "loss": 0.5065, "step": 13310 }, { "epoch": 0.15421664428286944, "grad_norm": 0.878026008605957, "learning_rate": 0.00019999892688255108, "loss": 0.4912, "step": 13320 }, { "epoch": 0.15433242254434307, "grad_norm": 0.9887682199478149, "learning_rate": 0.00019999886694334302, "loss": 0.5182, "step": 13330 }, { "epoch": 0.1544482008058167, "grad_norm": 1.0253299474716187, "learning_rate": 0.00019999880537536458, "loss": 0.528, "step": 13340 }, { "epoch": 0.15456397906729033, "grad_norm": 0.8339182734489441, "learning_rate": 0.00019999874217861677, "loss": 0.4959, "step": 13350 }, { "epoch": 0.15467975732876396, "grad_norm": 1.1303633451461792, "learning_rate": 0.00019999867735310067, "loss": 0.5416, "step": 13360 }, { "epoch": 0.15479553559023757, "grad_norm": 1.045961856842041, "learning_rate": 0.00019999861089881727, "loss": 0.5228, "step": 13370 }, { "epoch": 0.1549113138517112, "grad_norm": 1.3213156461715698, "learning_rate": 0.00019999854281576768, "loss": 0.5223, "step": 13380 }, { "epoch": 0.15502709211318483, "grad_norm": 0.9789355993270874, "learning_rate": 0.00019999847310395305, "loss": 0.5075, "step": 13390 }, { "epoch": 0.15514287037465846, "grad_norm": 1.0346044301986694, "learning_rate": 0.00019999840176337442, "loss": 0.5329, "step": 13400 }, { "epoch": 0.1552586486361321, "grad_norm": 1.0019253492355347, "learning_rate": 0.00019999832879403302, "loss": 0.5126, "step": 13410 }, { "epoch": 0.1553744268976057, "grad_norm": 1.1664997339248657, "learning_rate": 0.00019999825419593004, "loss": 0.5561, "step": 13420 }, { "epoch": 0.15549020515907933, "grad_norm": 1.0309034585952759, "learning_rate": 0.00019999817796906668, "loss": 0.495, "step": 13430 }, { "epoch": 0.15560598342055296, "grad_norm": 1.0485267639160156, "learning_rate": 0.00019999810011344418, "loss": 0.5099, "step": 13440 }, { "epoch": 0.1557217616820266, "grad_norm": 0.6741553544998169, "learning_rate": 0.0001999980206290638, "loss": 0.5028, "step": 13450 }, { "epoch": 0.15583753994350022, "grad_norm": 0.9793708324432373, "learning_rate": 0.00019999793951592688, "loss": 0.5219, "step": 13460 }, { "epoch": 0.15595331820497382, "grad_norm": 0.8301864266395569, "learning_rate": 0.00019999785677403463, "loss": 0.572, "step": 13470 }, { "epoch": 0.15606909646644745, "grad_norm": 0.9063313603401184, "learning_rate": 0.00019999777240338855, "loss": 0.5587, "step": 13480 }, { "epoch": 0.15618487472792109, "grad_norm": 0.9104887843132019, "learning_rate": 0.0001999976864039899, "loss": 0.5409, "step": 13490 }, { "epoch": 0.15630065298939472, "grad_norm": 0.9351223707199097, "learning_rate": 0.00019999759877584015, "loss": 0.4699, "step": 13500 }, { "epoch": 0.15641643125086835, "grad_norm": 1.0543067455291748, "learning_rate": 0.00019999750951894067, "loss": 0.5453, "step": 13510 }, { "epoch": 0.15653220951234195, "grad_norm": 0.950364351272583, "learning_rate": 0.00019999741863329295, "loss": 0.5182, "step": 13520 }, { "epoch": 0.15664798777381558, "grad_norm": 0.9859875440597534, "learning_rate": 0.00019999732611889845, "loss": 0.522, "step": 13530 }, { "epoch": 0.15676376603528921, "grad_norm": 1.1516504287719727, "learning_rate": 0.0001999972319757587, "loss": 0.5281, "step": 13540 }, { "epoch": 0.15687954429676285, "grad_norm": 0.9546409249305725, "learning_rate": 0.00019999713620387518, "loss": 0.542, "step": 13550 }, { "epoch": 0.15699532255823648, "grad_norm": 0.8956830501556396, "learning_rate": 0.00019999703880324953, "loss": 0.5171, "step": 13560 }, { "epoch": 0.15711110081971008, "grad_norm": 0.9716196060180664, "learning_rate": 0.0001999969397738833, "loss": 0.5131, "step": 13570 }, { "epoch": 0.1572268790811837, "grad_norm": 1.1810029745101929, "learning_rate": 0.00019999683911577808, "loss": 0.5289, "step": 13580 }, { "epoch": 0.15734265734265734, "grad_norm": 1.1359663009643555, "learning_rate": 0.00019999673682893554, "loss": 0.4823, "step": 13590 }, { "epoch": 0.15745843560413098, "grad_norm": 1.0935301780700684, "learning_rate": 0.00019999663291335736, "loss": 0.4927, "step": 13600 }, { "epoch": 0.1575742138656046, "grad_norm": 0.9837374091148376, "learning_rate": 0.00019999652736904516, "loss": 0.4688, "step": 13610 }, { "epoch": 0.1576899921270782, "grad_norm": 1.077224612236023, "learning_rate": 0.00019999642019600074, "loss": 0.5423, "step": 13620 }, { "epoch": 0.15780577038855184, "grad_norm": 0.9799237251281738, "learning_rate": 0.00019999631139422578, "loss": 0.5028, "step": 13630 }, { "epoch": 0.15792154865002547, "grad_norm": 1.0222835540771484, "learning_rate": 0.00019999620096372212, "loss": 0.5321, "step": 13640 }, { "epoch": 0.1580373269114991, "grad_norm": 0.7173193097114563, "learning_rate": 0.00019999608890449152, "loss": 0.5225, "step": 13650 }, { "epoch": 0.15815310517297274, "grad_norm": 0.8962286710739136, "learning_rate": 0.00019999597521653577, "loss": 0.4806, "step": 13660 }, { "epoch": 0.15826888343444634, "grad_norm": 1.0847452878952026, "learning_rate": 0.00019999585989985682, "loss": 0.5528, "step": 13670 }, { "epoch": 0.15838466169591997, "grad_norm": 0.8121541142463684, "learning_rate": 0.00019999574295445643, "loss": 0.552, "step": 13680 }, { "epoch": 0.1585004399573936, "grad_norm": 0.8168137073516846, "learning_rate": 0.0001999956243803366, "loss": 0.5343, "step": 13690 }, { "epoch": 0.15861621821886723, "grad_norm": 0.984264612197876, "learning_rate": 0.0001999955041774992, "loss": 0.5343, "step": 13700 }, { "epoch": 0.15873199648034086, "grad_norm": 0.9627836346626282, "learning_rate": 0.00019999538234594623, "loss": 0.499, "step": 13710 }, { "epoch": 0.15884777474181447, "grad_norm": 1.0787767171859741, "learning_rate": 0.00019999525888567967, "loss": 0.5169, "step": 13720 }, { "epoch": 0.1589635530032881, "grad_norm": 1.0938972234725952, "learning_rate": 0.00019999513379670146, "loss": 0.5098, "step": 13730 }, { "epoch": 0.15907933126476173, "grad_norm": 0.7843903303146362, "learning_rate": 0.00019999500707901377, "loss": 0.5162, "step": 13740 }, { "epoch": 0.15919510952623536, "grad_norm": 0.99320387840271, "learning_rate": 0.00019999487873261854, "loss": 0.5313, "step": 13750 }, { "epoch": 0.159310887787709, "grad_norm": 1.0005385875701904, "learning_rate": 0.00019999474875751793, "loss": 0.5345, "step": 13760 }, { "epoch": 0.1594266660491826, "grad_norm": 0.8248289227485657, "learning_rate": 0.00019999461715371403, "loss": 0.5342, "step": 13770 }, { "epoch": 0.15954244431065623, "grad_norm": 1.2158162593841553, "learning_rate": 0.00019999448392120898, "loss": 0.4959, "step": 13780 }, { "epoch": 0.15965822257212986, "grad_norm": 0.9897249937057495, "learning_rate": 0.000199994349060005, "loss": 0.5572, "step": 13790 }, { "epoch": 0.1597740008336035, "grad_norm": 0.9399228692054749, "learning_rate": 0.0001999942125701042, "loss": 0.5656, "step": 13800 }, { "epoch": 0.1598897790950771, "grad_norm": 1.1157760620117188, "learning_rate": 0.0001999940744515089, "loss": 0.5138, "step": 13810 }, { "epoch": 0.16000555735655073, "grad_norm": 1.0835034847259521, "learning_rate": 0.00019999393470422126, "loss": 0.484, "step": 13820 }, { "epoch": 0.16012133561802436, "grad_norm": 0.9482656717300415, "learning_rate": 0.0001999937933282436, "loss": 0.5382, "step": 13830 }, { "epoch": 0.160237113879498, "grad_norm": 0.8925505876541138, "learning_rate": 0.00019999365032357823, "loss": 0.5222, "step": 13840 }, { "epoch": 0.16035289214097162, "grad_norm": 0.9050923585891724, "learning_rate": 0.00019999350569022747, "loss": 0.5213, "step": 13850 }, { "epoch": 0.16046867040244522, "grad_norm": 1.175371766090393, "learning_rate": 0.00019999335942819365, "loss": 0.5126, "step": 13860 }, { "epoch": 0.16058444866391886, "grad_norm": 0.9089219570159912, "learning_rate": 0.0001999932115374792, "loss": 0.5079, "step": 13870 }, { "epoch": 0.1607002269253925, "grad_norm": 0.9556267261505127, "learning_rate": 0.0001999930620180865, "loss": 0.5313, "step": 13880 }, { "epoch": 0.16081600518686612, "grad_norm": 1.0205243825912476, "learning_rate": 0.00019999291087001797, "loss": 0.4936, "step": 13890 }, { "epoch": 0.16093178344833975, "grad_norm": 1.4007447957992554, "learning_rate": 0.0001999927580932761, "loss": 0.5548, "step": 13900 }, { "epoch": 0.16104756170981335, "grad_norm": 1.0816422700881958, "learning_rate": 0.0001999926036878634, "loss": 0.4974, "step": 13910 }, { "epoch": 0.16116333997128698, "grad_norm": 1.1491740942001343, "learning_rate": 0.0001999924476537823, "loss": 0.5298, "step": 13920 }, { "epoch": 0.16127911823276062, "grad_norm": 0.7216561436653137, "learning_rate": 0.00019999228999103546, "loss": 0.4947, "step": 13930 }, { "epoch": 0.16139489649423425, "grad_norm": 0.9310114979743958, "learning_rate": 0.00019999213069962535, "loss": 0.5117, "step": 13940 }, { "epoch": 0.16151067475570788, "grad_norm": 0.8562695980072021, "learning_rate": 0.00019999196977955462, "loss": 0.5051, "step": 13950 }, { "epoch": 0.16162645301718148, "grad_norm": 1.0261586904525757, "learning_rate": 0.00019999180723082583, "loss": 0.506, "step": 13960 }, { "epoch": 0.1617422312786551, "grad_norm": 0.9514285326004028, "learning_rate": 0.0001999916430534417, "loss": 0.5328, "step": 13970 }, { "epoch": 0.16185800954012874, "grad_norm": 0.9260467886924744, "learning_rate": 0.00019999147724740487, "loss": 0.5432, "step": 13980 }, { "epoch": 0.16197378780160238, "grad_norm": 0.9063887000083923, "learning_rate": 0.00019999130981271805, "loss": 0.491, "step": 13990 }, { "epoch": 0.162089566063076, "grad_norm": 1.0284674167633057, "learning_rate": 0.0001999911407493839, "loss": 0.5596, "step": 14000 }, { "epoch": 0.162089566063076, "eval_chrf": 81.43673640677459, "eval_loss": 0.7931199073791504, "eval_runtime": 349.9972, "eval_samples_per_second": 0.286, "eval_steps_per_second": 0.011, "step": 14000 }, { "epoch": 0.1622053443245496, "grad_norm": 0.8409836888313293, "learning_rate": 0.00019999097005740532, "loss": 0.5129, "step": 14010 }, { "epoch": 0.16232112258602324, "grad_norm": 0.8699890971183777, "learning_rate": 0.00019999079773678498, "loss": 0.4965, "step": 14020 }, { "epoch": 0.16243690084749687, "grad_norm": 0.8601651787757874, "learning_rate": 0.00019999062378752566, "loss": 0.5138, "step": 14030 }, { "epoch": 0.1625526791089705, "grad_norm": 0.9185842275619507, "learning_rate": 0.0001999904482096303, "loss": 0.4865, "step": 14040 }, { "epoch": 0.16266845737044414, "grad_norm": 0.781516432762146, "learning_rate": 0.00019999027100310167, "loss": 0.5206, "step": 14050 }, { "epoch": 0.16278423563191774, "grad_norm": 0.9632774591445923, "learning_rate": 0.0001999900921679427, "loss": 0.5376, "step": 14060 }, { "epoch": 0.16290001389339137, "grad_norm": 0.9048920273780823, "learning_rate": 0.00019998991170415632, "loss": 0.507, "step": 14070 }, { "epoch": 0.163015792154865, "grad_norm": 1.1058522462844849, "learning_rate": 0.0001999897296117454, "loss": 0.4937, "step": 14080 }, { "epoch": 0.16313157041633863, "grad_norm": 0.8302661180496216, "learning_rate": 0.00019998954589071298, "loss": 0.5234, "step": 14090 }, { "epoch": 0.16324734867781227, "grad_norm": 0.8306782245635986, "learning_rate": 0.000199989360541062, "loss": 0.5215, "step": 14100 }, { "epoch": 0.16336312693928587, "grad_norm": 1.0566506385803223, "learning_rate": 0.00019998917356279548, "loss": 0.5306, "step": 14110 }, { "epoch": 0.1634789052007595, "grad_norm": 1.0093319416046143, "learning_rate": 0.00019998898495591653, "loss": 0.5165, "step": 14120 }, { "epoch": 0.16359468346223313, "grad_norm": 0.8950533866882324, "learning_rate": 0.00019998879472042815, "loss": 0.5321, "step": 14130 }, { "epoch": 0.16371046172370676, "grad_norm": 0.926193118095398, "learning_rate": 0.0001999886028563335, "loss": 0.4834, "step": 14140 }, { "epoch": 0.1638262399851804, "grad_norm": 0.7696795463562012, "learning_rate": 0.00019998840936363564, "loss": 0.5467, "step": 14150 }, { "epoch": 0.163942018246654, "grad_norm": 0.9309307932853699, "learning_rate": 0.00019998821424233775, "loss": 0.5062, "step": 14160 }, { "epoch": 0.16405779650812763, "grad_norm": 0.8849621415138245, "learning_rate": 0.00019998801749244302, "loss": 0.5309, "step": 14170 }, { "epoch": 0.16417357476960126, "grad_norm": 1.0118907690048218, "learning_rate": 0.00019998781911395464, "loss": 0.5307, "step": 14180 }, { "epoch": 0.1642893530310749, "grad_norm": 0.9496971368789673, "learning_rate": 0.00019998761910687584, "loss": 0.5341, "step": 14190 }, { "epoch": 0.16440513129254852, "grad_norm": 0.8178584575653076, "learning_rate": 0.00019998741747120989, "loss": 0.4948, "step": 14200 }, { "epoch": 0.16452090955402213, "grad_norm": 0.8048959374427795, "learning_rate": 0.00019998721420696008, "loss": 0.5327, "step": 14210 }, { "epoch": 0.16463668781549576, "grad_norm": 0.9143779277801514, "learning_rate": 0.0001999870093141297, "loss": 0.5188, "step": 14220 }, { "epoch": 0.1647524660769694, "grad_norm": 0.8038369417190552, "learning_rate": 0.0001999868027927221, "loss": 0.5485, "step": 14230 }, { "epoch": 0.16486824433844302, "grad_norm": 0.859785795211792, "learning_rate": 0.0001999865946427406, "loss": 0.4995, "step": 14240 }, { "epoch": 0.16498402259991665, "grad_norm": 0.8410471081733704, "learning_rate": 0.00019998638486418867, "loss": 0.5308, "step": 14250 }, { "epoch": 0.16509980086139026, "grad_norm": 1.0899213552474976, "learning_rate": 0.0001999861734570697, "loss": 0.5215, "step": 14260 }, { "epoch": 0.1652155791228639, "grad_norm": 0.8579217791557312, "learning_rate": 0.00019998596042138708, "loss": 0.5162, "step": 14270 }, { "epoch": 0.16533135738433752, "grad_norm": 0.8680914044380188, "learning_rate": 0.00019998574575714434, "loss": 0.4844, "step": 14280 }, { "epoch": 0.16544713564581115, "grad_norm": 0.9365717172622681, "learning_rate": 0.00019998552946434494, "loss": 0.5281, "step": 14290 }, { "epoch": 0.16556291390728478, "grad_norm": 0.9904703497886658, "learning_rate": 0.00019998531154299246, "loss": 0.4714, "step": 14300 }, { "epoch": 0.16567869216875838, "grad_norm": 0.9480510950088501, "learning_rate": 0.00019998509199309038, "loss": 0.5137, "step": 14310 }, { "epoch": 0.16579447043023202, "grad_norm": 0.9532126188278198, "learning_rate": 0.0001999848708146423, "loss": 0.5092, "step": 14320 }, { "epoch": 0.16591024869170565, "grad_norm": 1.0289478302001953, "learning_rate": 0.00019998464800765185, "loss": 0.4897, "step": 14330 }, { "epoch": 0.16602602695317928, "grad_norm": 0.9527698159217834, "learning_rate": 0.0001999844235721226, "loss": 0.5203, "step": 14340 }, { "epoch": 0.1661418052146529, "grad_norm": 0.8923049569129944, "learning_rate": 0.0001999841975080583, "loss": 0.4317, "step": 14350 }, { "epoch": 0.1662575834761265, "grad_norm": 0.9820677042007446, "learning_rate": 0.00019998396981546251, "loss": 0.514, "step": 14360 }, { "epoch": 0.16637336173760015, "grad_norm": 0.8312664031982422, "learning_rate": 0.00019998374049433906, "loss": 0.5393, "step": 14370 }, { "epoch": 0.16648913999907378, "grad_norm": 0.8548251390457153, "learning_rate": 0.00019998350954469158, "loss": 0.507, "step": 14380 }, { "epoch": 0.1666049182605474, "grad_norm": 0.835356593132019, "learning_rate": 0.00019998327696652392, "loss": 0.5075, "step": 14390 }, { "epoch": 0.16672069652202104, "grad_norm": 0.7591782212257385, "learning_rate": 0.0001999830427598398, "loss": 0.4992, "step": 14400 }, { "epoch": 0.16683647478349464, "grad_norm": 0.845977783203125, "learning_rate": 0.0001999828069246431, "loss": 0.5341, "step": 14410 }, { "epoch": 0.16695225304496827, "grad_norm": 0.9690206050872803, "learning_rate": 0.00019998256946093758, "loss": 0.5106, "step": 14420 }, { "epoch": 0.1670680313064419, "grad_norm": 1.0200231075286865, "learning_rate": 0.00019998233036872716, "loss": 0.5564, "step": 14430 }, { "epoch": 0.16718380956791554, "grad_norm": 1.0304076671600342, "learning_rate": 0.00019998208964801573, "loss": 0.5094, "step": 14440 }, { "epoch": 0.16729958782938914, "grad_norm": 0.9124101996421814, "learning_rate": 0.00019998184729880722, "loss": 0.4673, "step": 14450 }, { "epoch": 0.16741536609086277, "grad_norm": 1.0167433023452759, "learning_rate": 0.00019998160332110554, "loss": 0.5141, "step": 14460 }, { "epoch": 0.1675311443523364, "grad_norm": 1.0613348484039307, "learning_rate": 0.00019998135771491472, "loss": 0.4761, "step": 14470 }, { "epoch": 0.16764692261381003, "grad_norm": 1.0821583271026611, "learning_rate": 0.0001999811104802387, "loss": 0.5348, "step": 14480 }, { "epoch": 0.16776270087528367, "grad_norm": 0.9306504726409912, "learning_rate": 0.00019998086161708155, "loss": 0.5123, "step": 14490 }, { "epoch": 0.16787847913675727, "grad_norm": 0.7311174273490906, "learning_rate": 0.0001999806111254473, "loss": 0.4772, "step": 14500 }, { "epoch": 0.1679942573982309, "grad_norm": 1.2023673057556152, "learning_rate": 0.00019998035900534004, "loss": 0.4854, "step": 14510 }, { "epoch": 0.16811003565970453, "grad_norm": 1.234139323234558, "learning_rate": 0.00019998010525676388, "loss": 0.4833, "step": 14520 }, { "epoch": 0.16822581392117816, "grad_norm": 0.8681570291519165, "learning_rate": 0.00019997984987972297, "loss": 0.5141, "step": 14530 }, { "epoch": 0.1683415921826518, "grad_norm": 0.805265486240387, "learning_rate": 0.0001999795928742214, "loss": 0.5431, "step": 14540 }, { "epoch": 0.1684573704441254, "grad_norm": 0.8005607724189758, "learning_rate": 0.00019997933424026345, "loss": 0.4831, "step": 14550 }, { "epoch": 0.16857314870559903, "grad_norm": 1.0039647817611694, "learning_rate": 0.00019997907397785328, "loss": 0.5067, "step": 14560 }, { "epoch": 0.16868892696707266, "grad_norm": 0.8453294038772583, "learning_rate": 0.0001999788120869951, "loss": 0.4764, "step": 14570 }, { "epoch": 0.1688047052285463, "grad_norm": 0.9476231336593628, "learning_rate": 0.0001999785485676932, "loss": 0.5278, "step": 14580 }, { "epoch": 0.16892048349001992, "grad_norm": 1.1090285778045654, "learning_rate": 0.00019997828341995194, "loss": 0.4858, "step": 14590 }, { "epoch": 0.16903626175149353, "grad_norm": 0.9511401057243347, "learning_rate": 0.00019997801664377557, "loss": 0.5136, "step": 14600 }, { "epoch": 0.16915204001296716, "grad_norm": 0.8264870643615723, "learning_rate": 0.00019997774823916843, "loss": 0.5244, "step": 14610 }, { "epoch": 0.1692678182744408, "grad_norm": 0.8192489743232727, "learning_rate": 0.00019997747820613496, "loss": 0.4999, "step": 14620 }, { "epoch": 0.16938359653591442, "grad_norm": 0.8035051822662354, "learning_rate": 0.00019997720654467944, "loss": 0.5281, "step": 14630 }, { "epoch": 0.16949937479738805, "grad_norm": 0.9535481333732605, "learning_rate": 0.0001999769332548064, "loss": 0.5127, "step": 14640 }, { "epoch": 0.16961515305886166, "grad_norm": 0.9488846659660339, "learning_rate": 0.0001999766583365202, "loss": 0.5075, "step": 14650 }, { "epoch": 0.1697309313203353, "grad_norm": 0.7904040217399597, "learning_rate": 0.00019997638178982543, "loss": 0.5244, "step": 14660 }, { "epoch": 0.16984670958180892, "grad_norm": 0.9358911514282227, "learning_rate": 0.0001999761036147265, "loss": 0.5164, "step": 14670 }, { "epoch": 0.16996248784328255, "grad_norm": 0.9743123054504395, "learning_rate": 0.00019997582381122802, "loss": 0.5334, "step": 14680 }, { "epoch": 0.17007826610475618, "grad_norm": 1.0591320991516113, "learning_rate": 0.0001999755423793345, "loss": 0.5063, "step": 14690 }, { "epoch": 0.17019404436622979, "grad_norm": 0.8685837984085083, "learning_rate": 0.00019997525931905047, "loss": 0.4992, "step": 14700 }, { "epoch": 0.17030982262770342, "grad_norm": 0.9020100831985474, "learning_rate": 0.00019997497463038064, "loss": 0.4914, "step": 14710 }, { "epoch": 0.17042560088917705, "grad_norm": 1.0350594520568848, "learning_rate": 0.0001999746883133296, "loss": 0.4918, "step": 14720 }, { "epoch": 0.17054137915065068, "grad_norm": 0.8129405379295349, "learning_rate": 0.00019997440036790203, "loss": 0.5009, "step": 14730 }, { "epoch": 0.1706571574121243, "grad_norm": 0.8105813264846802, "learning_rate": 0.0001999741107941026, "loss": 0.5411, "step": 14740 }, { "epoch": 0.17077293567359791, "grad_norm": 0.9189984202384949, "learning_rate": 0.00019997381959193603, "loss": 0.5139, "step": 14750 }, { "epoch": 0.17088871393507155, "grad_norm": 0.8906582593917847, "learning_rate": 0.00019997352676140708, "loss": 0.4861, "step": 14760 }, { "epoch": 0.17100449219654518, "grad_norm": 0.7896034121513367, "learning_rate": 0.0001999732323025205, "loss": 0.5163, "step": 14770 }, { "epoch": 0.1711202704580188, "grad_norm": 0.9161441326141357, "learning_rate": 0.00019997293621528108, "loss": 0.4858, "step": 14780 }, { "epoch": 0.17123604871949244, "grad_norm": 0.8519560694694519, "learning_rate": 0.0001999726384996937, "loss": 0.5316, "step": 14790 }, { "epoch": 0.17135182698096604, "grad_norm": 0.759761393070221, "learning_rate": 0.00019997233915576313, "loss": 0.5132, "step": 14800 }, { "epoch": 0.17146760524243967, "grad_norm": 0.6669837236404419, "learning_rate": 0.0001999720381834943, "loss": 0.4624, "step": 14810 }, { "epoch": 0.1715833835039133, "grad_norm": 0.8967482447624207, "learning_rate": 0.00019997173558289206, "loss": 0.4829, "step": 14820 }, { "epoch": 0.17169916176538694, "grad_norm": 0.7932374477386475, "learning_rate": 0.00019997143135396138, "loss": 0.4793, "step": 14830 }, { "epoch": 0.17181494002686057, "grad_norm": 0.9862871766090393, "learning_rate": 0.00019997112549670725, "loss": 0.5125, "step": 14840 }, { "epoch": 0.17193071828833417, "grad_norm": 0.8630059957504272, "learning_rate": 0.0001999708180111346, "loss": 0.5138, "step": 14850 }, { "epoch": 0.1720464965498078, "grad_norm": 1.0792642831802368, "learning_rate": 0.00019997050889724842, "loss": 0.5013, "step": 14860 }, { "epoch": 0.17216227481128143, "grad_norm": 0.7956786155700684, "learning_rate": 0.00019997019815505378, "loss": 0.494, "step": 14870 }, { "epoch": 0.17227805307275507, "grad_norm": 1.120779037475586, "learning_rate": 0.00019996988578455574, "loss": 0.5057, "step": 14880 }, { "epoch": 0.1723938313342287, "grad_norm": 1.0774561166763306, "learning_rate": 0.0001999695717857594, "loss": 0.5308, "step": 14890 }, { "epoch": 0.1725096095957023, "grad_norm": 1.391444444656372, "learning_rate": 0.0001999692561586698, "loss": 0.4611, "step": 14900 }, { "epoch": 0.17262538785717593, "grad_norm": 1.1209179162979126, "learning_rate": 0.0001999689389032922, "loss": 0.5135, "step": 14910 }, { "epoch": 0.17274116611864956, "grad_norm": 0.8599587678909302, "learning_rate": 0.00019996862001963169, "loss": 0.5263, "step": 14920 }, { "epoch": 0.1728569443801232, "grad_norm": 1.065420389175415, "learning_rate": 0.00019996829950769346, "loss": 0.5095, "step": 14930 }, { "epoch": 0.17297272264159683, "grad_norm": 1.0574932098388672, "learning_rate": 0.00019996797736748277, "loss": 0.5101, "step": 14940 }, { "epoch": 0.17308850090307043, "grad_norm": 1.0462032556533813, "learning_rate": 0.0001999676535990048, "loss": 0.5052, "step": 14950 }, { "epoch": 0.17320427916454406, "grad_norm": 0.8948199152946472, "learning_rate": 0.00019996732820226493, "loss": 0.5418, "step": 14960 }, { "epoch": 0.1733200574260177, "grad_norm": 0.8862332701683044, "learning_rate": 0.00019996700117726836, "loss": 0.5027, "step": 14970 }, { "epoch": 0.17343583568749132, "grad_norm": 0.9259552359580994, "learning_rate": 0.00019996667252402048, "loss": 0.4958, "step": 14980 }, { "epoch": 0.17355161394896496, "grad_norm": 0.913835883140564, "learning_rate": 0.0001999663422425266, "loss": 0.5157, "step": 14990 }, { "epoch": 0.17366739221043856, "grad_norm": 0.9743481278419495, "learning_rate": 0.00019996601033279215, "loss": 0.509, "step": 15000 }, { "epoch": 0.17366739221043856, "eval_chrf": 85.47568328388861, "eval_loss": 0.7757729291915894, "eval_runtime": 105.9694, "eval_samples_per_second": 0.944, "eval_steps_per_second": 0.038, "step": 15000 }, { "epoch": 0.1737831704719122, "grad_norm": 0.8948907852172852, "learning_rate": 0.00019996567679482244, "loss": 0.5093, "step": 15010 }, { "epoch": 0.17389894873338582, "grad_norm": 0.8802430033683777, "learning_rate": 0.00019996534162862304, "loss": 0.5024, "step": 15020 }, { "epoch": 0.17401472699485945, "grad_norm": 0.9146379828453064, "learning_rate": 0.0001999650048341993, "loss": 0.5022, "step": 15030 }, { "epoch": 0.17413050525633308, "grad_norm": 0.8232753276824951, "learning_rate": 0.00019996466641155675, "loss": 0.4949, "step": 15040 }, { "epoch": 0.1742462835178067, "grad_norm": 0.8490627408027649, "learning_rate": 0.0001999643263607009, "loss": 0.5332, "step": 15050 }, { "epoch": 0.17436206177928032, "grad_norm": 1.0217550992965698, "learning_rate": 0.00019996398468163728, "loss": 0.5035, "step": 15060 }, { "epoch": 0.17447784004075395, "grad_norm": 0.8498371243476868, "learning_rate": 0.00019996364137437147, "loss": 0.4705, "step": 15070 }, { "epoch": 0.17459361830222758, "grad_norm": 1.047986388206482, "learning_rate": 0.000199963296438909, "loss": 0.5127, "step": 15080 }, { "epoch": 0.17470939656370119, "grad_norm": 1.009361982345581, "learning_rate": 0.00019996294987525558, "loss": 0.4933, "step": 15090 }, { "epoch": 0.17482517482517482, "grad_norm": 0.49553871154785156, "learning_rate": 0.00019996260168341683, "loss": 0.4879, "step": 15100 }, { "epoch": 0.17494095308664845, "grad_norm": 1.1071234941482544, "learning_rate": 0.0001999622518633984, "loss": 0.5134, "step": 15110 }, { "epoch": 0.17505673134812208, "grad_norm": 0.8918356895446777, "learning_rate": 0.00019996190041520596, "loss": 0.4804, "step": 15120 }, { "epoch": 0.1751725096095957, "grad_norm": 0.9107331037521362, "learning_rate": 0.0001999615473388453, "loss": 0.4847, "step": 15130 }, { "epoch": 0.17528828787106931, "grad_norm": 0.9363231062889099, "learning_rate": 0.0001999611926343221, "loss": 0.4694, "step": 15140 }, { "epoch": 0.17540406613254295, "grad_norm": 1.1326897144317627, "learning_rate": 0.00019996083630164217, "loss": 0.5201, "step": 15150 }, { "epoch": 0.17551984439401658, "grad_norm": 0.9087540507316589, "learning_rate": 0.00019996047834081138, "loss": 0.5203, "step": 15160 }, { "epoch": 0.1756356226554902, "grad_norm": 0.9782746434211731, "learning_rate": 0.00019996011875183544, "loss": 0.5282, "step": 15170 }, { "epoch": 0.17575140091696384, "grad_norm": 0.9152000546455383, "learning_rate": 0.0001999597575347203, "loss": 0.4961, "step": 15180 }, { "epoch": 0.17586717917843744, "grad_norm": 0.9302141070365906, "learning_rate": 0.0001999593946894718, "loss": 0.5099, "step": 15190 }, { "epoch": 0.17598295743991108, "grad_norm": 0.860459566116333, "learning_rate": 0.00019995903021609586, "loss": 0.5068, "step": 15200 }, { "epoch": 0.1760987357013847, "grad_norm": 0.8768830299377441, "learning_rate": 0.00019995866411459844, "loss": 0.5152, "step": 15210 }, { "epoch": 0.17621451396285834, "grad_norm": 0.6508819460868835, "learning_rate": 0.00019995829638498545, "loss": 0.4922, "step": 15220 }, { "epoch": 0.17633029222433197, "grad_norm": 0.9003118872642517, "learning_rate": 0.0001999579270272629, "loss": 0.4852, "step": 15230 }, { "epoch": 0.17644607048580557, "grad_norm": 0.8923856019973755, "learning_rate": 0.0001999575560414368, "loss": 0.4897, "step": 15240 }, { "epoch": 0.1765618487472792, "grad_norm": 1.007230520248413, "learning_rate": 0.00019995718342751326, "loss": 0.51, "step": 15250 }, { "epoch": 0.17667762700875284, "grad_norm": 1.056197166442871, "learning_rate": 0.00019995680918549828, "loss": 0.5056, "step": 15260 }, { "epoch": 0.17679340527022647, "grad_norm": 0.9003465175628662, "learning_rate": 0.00019995643331539796, "loss": 0.5005, "step": 15270 }, { "epoch": 0.1769091835317001, "grad_norm": 0.7962557673454285, "learning_rate": 0.00019995605581721844, "loss": 0.4852, "step": 15280 }, { "epoch": 0.1770249617931737, "grad_norm": 1.1030645370483398, "learning_rate": 0.00019995567669096585, "loss": 0.4941, "step": 15290 }, { "epoch": 0.17714074005464733, "grad_norm": 0.9637556672096252, "learning_rate": 0.00019995529593664642, "loss": 0.4518, "step": 15300 }, { "epoch": 0.17725651831612096, "grad_norm": 1.0759456157684326, "learning_rate": 0.00019995491355426624, "loss": 0.4993, "step": 15310 }, { "epoch": 0.1773722965775946, "grad_norm": 1.3181650638580322, "learning_rate": 0.00019995452954383168, "loss": 0.5236, "step": 15320 }, { "epoch": 0.17748807483906823, "grad_norm": 0.907558798789978, "learning_rate": 0.0001999541439053489, "loss": 0.4732, "step": 15330 }, { "epoch": 0.17760385310054183, "grad_norm": 0.8819958567619324, "learning_rate": 0.0001999537566388242, "loss": 0.481, "step": 15340 }, { "epoch": 0.17771963136201546, "grad_norm": 0.9222123622894287, "learning_rate": 0.00019995336774426386, "loss": 0.5429, "step": 15350 }, { "epoch": 0.1778354096234891, "grad_norm": 0.916992723941803, "learning_rate": 0.00019995297722167428, "loss": 0.5154, "step": 15360 }, { "epoch": 0.17795118788496272, "grad_norm": 0.9565227627754211, "learning_rate": 0.0001999525850710618, "loss": 0.5035, "step": 15370 }, { "epoch": 0.17806696614643636, "grad_norm": 1.0885106325149536, "learning_rate": 0.00019995219129243275, "loss": 0.5016, "step": 15380 }, { "epoch": 0.17818274440790996, "grad_norm": 0.9579272270202637, "learning_rate": 0.00019995179588579363, "loss": 0.5178, "step": 15390 }, { "epoch": 0.1782985226693836, "grad_norm": 0.8931837677955627, "learning_rate": 0.0001999513988511508, "loss": 0.4913, "step": 15400 }, { "epoch": 0.17841430093085722, "grad_norm": 0.842904269695282, "learning_rate": 0.0001999510001885108, "loss": 0.524, "step": 15410 }, { "epoch": 0.17853007919233085, "grad_norm": 1.0296918153762817, "learning_rate": 0.00019995059989788008, "loss": 0.5002, "step": 15420 }, { "epoch": 0.17864585745380449, "grad_norm": 0.8142427206039429, "learning_rate": 0.00019995019797926515, "loss": 0.489, "step": 15430 }, { "epoch": 0.1787616357152781, "grad_norm": 0.955742359161377, "learning_rate": 0.00019994979443267258, "loss": 0.5261, "step": 15440 }, { "epoch": 0.17887741397675172, "grad_norm": 0.9862329959869385, "learning_rate": 0.00019994938925810895, "loss": 0.4873, "step": 15450 }, { "epoch": 0.17899319223822535, "grad_norm": 1.1219494342803955, "learning_rate": 0.00019994898245558084, "loss": 0.5111, "step": 15460 }, { "epoch": 0.17910897049969898, "grad_norm": 0.8652596473693848, "learning_rate": 0.0001999485740250949, "loss": 0.5194, "step": 15470 }, { "epoch": 0.17922474876117261, "grad_norm": 0.9113269448280334, "learning_rate": 0.00019994816396665775, "loss": 0.5139, "step": 15480 }, { "epoch": 0.17934052702264622, "grad_norm": 1.0372772216796875, "learning_rate": 0.0001999477522802761, "loss": 0.4604, "step": 15490 }, { "epoch": 0.17945630528411985, "grad_norm": 0.8772414922714233, "learning_rate": 0.0001999473389659566, "loss": 0.4644, "step": 15500 }, { "epoch": 0.17957208354559348, "grad_norm": 0.9783409833908081, "learning_rate": 0.00019994692402370605, "loss": 0.521, "step": 15510 }, { "epoch": 0.1796878618070671, "grad_norm": 1.0312986373901367, "learning_rate": 0.00019994650745353115, "loss": 0.5288, "step": 15520 }, { "epoch": 0.17980364006854074, "grad_norm": 0.951167106628418, "learning_rate": 0.0001999460892554388, "loss": 0.5269, "step": 15530 }, { "epoch": 0.17991941833001435, "grad_norm": 0.9194570779800415, "learning_rate": 0.00019994566942943564, "loss": 0.5372, "step": 15540 }, { "epoch": 0.18003519659148798, "grad_norm": 0.8009816408157349, "learning_rate": 0.00019994524797552863, "loss": 0.5026, "step": 15550 }, { "epoch": 0.1801509748529616, "grad_norm": 0.7390750646591187, "learning_rate": 0.0001999448248937246, "loss": 0.493, "step": 15560 }, { "epoch": 0.18026675311443524, "grad_norm": 0.908737063407898, "learning_rate": 0.00019994440018403044, "loss": 0.4882, "step": 15570 }, { "epoch": 0.18038253137590887, "grad_norm": 0.838344931602478, "learning_rate": 0.00019994397384645306, "loss": 0.4914, "step": 15580 }, { "epoch": 0.18049830963738248, "grad_norm": 0.8958513736724854, "learning_rate": 0.00019994354588099943, "loss": 0.499, "step": 15590 }, { "epoch": 0.1806140878988561, "grad_norm": 0.8887691497802734, "learning_rate": 0.0001999431162876765, "loss": 0.5049, "step": 15600 }, { "epoch": 0.18072986616032974, "grad_norm": 0.8820658326148987, "learning_rate": 0.00019994268506649126, "loss": 0.4739, "step": 15610 }, { "epoch": 0.18084564442180337, "grad_norm": 0.8708551526069641, "learning_rate": 0.00019994225221745072, "loss": 0.498, "step": 15620 }, { "epoch": 0.180961422683277, "grad_norm": 0.5771028995513916, "learning_rate": 0.000199941817740562, "loss": 0.4589, "step": 15630 }, { "epoch": 0.1810772009447506, "grad_norm": 0.9449729919433594, "learning_rate": 0.00019994138163583212, "loss": 0.4865, "step": 15640 }, { "epoch": 0.18119297920622424, "grad_norm": 0.8463087677955627, "learning_rate": 0.0001999409439032682, "loss": 0.4967, "step": 15650 }, { "epoch": 0.18130875746769787, "grad_norm": 0.8847309350967407, "learning_rate": 0.00019994050454287733, "loss": 0.4768, "step": 15660 }, { "epoch": 0.1814245357291715, "grad_norm": 0.8302356600761414, "learning_rate": 0.00019994006355466672, "loss": 0.4865, "step": 15670 }, { "epoch": 0.18154031399064513, "grad_norm": 0.6166642904281616, "learning_rate": 0.00019993962093864355, "loss": 0.4882, "step": 15680 }, { "epoch": 0.18165609225211873, "grad_norm": 0.9548183083534241, "learning_rate": 0.000199939176694815, "loss": 0.513, "step": 15690 }, { "epoch": 0.18177187051359237, "grad_norm": 0.6811615228652954, "learning_rate": 0.0001999387308231883, "loss": 0.488, "step": 15700 }, { "epoch": 0.181887648775066, "grad_norm": 0.829233705997467, "learning_rate": 0.00019993828332377076, "loss": 0.4918, "step": 15710 }, { "epoch": 0.18200342703653963, "grad_norm": 0.8032342791557312, "learning_rate": 0.00019993783419656962, "loss": 0.4877, "step": 15720 }, { "epoch": 0.18211920529801323, "grad_norm": 1.0349808931350708, "learning_rate": 0.00019993738344159222, "loss": 0.5181, "step": 15730 }, { "epoch": 0.18223498355948686, "grad_norm": 0.9067845940589905, "learning_rate": 0.00019993693105884588, "loss": 0.5159, "step": 15740 }, { "epoch": 0.1823507618209605, "grad_norm": 0.8226310014724731, "learning_rate": 0.000199936477048338, "loss": 0.4973, "step": 15750 }, { "epoch": 0.18246654008243413, "grad_norm": 0.9729318618774414, "learning_rate": 0.00019993602141007597, "loss": 0.4934, "step": 15760 }, { "epoch": 0.18258231834390776, "grad_norm": 0.6730903387069702, "learning_rate": 0.0001999355641440672, "loss": 0.4999, "step": 15770 }, { "epoch": 0.18269809660538136, "grad_norm": 0.8104208707809448, "learning_rate": 0.00019993510525031914, "loss": 0.5241, "step": 15780 }, { "epoch": 0.182813874866855, "grad_norm": 0.7883507609367371, "learning_rate": 0.00019993464472883925, "loss": 0.4929, "step": 15790 }, { "epoch": 0.18292965312832862, "grad_norm": 0.7942865490913391, "learning_rate": 0.00019993418257963506, "loss": 0.4741, "step": 15800 }, { "epoch": 0.18304543138980225, "grad_norm": 0.7424351572990417, "learning_rate": 0.00019993371880271409, "loss": 0.4863, "step": 15810 }, { "epoch": 0.18316120965127589, "grad_norm": 0.9395301938056946, "learning_rate": 0.00019993325339808383, "loss": 0.5142, "step": 15820 }, { "epoch": 0.1832769879127495, "grad_norm": 1.0837334394454956, "learning_rate": 0.00019993278636575197, "loss": 0.5143, "step": 15830 }, { "epoch": 0.18339276617422312, "grad_norm": 0.8544259071350098, "learning_rate": 0.00019993231770572604, "loss": 0.4793, "step": 15840 }, { "epoch": 0.18350854443569675, "grad_norm": 0.9475684762001038, "learning_rate": 0.00019993184741801372, "loss": 0.4949, "step": 15850 }, { "epoch": 0.18362432269717038, "grad_norm": 1.132094144821167, "learning_rate": 0.00019993137550262263, "loss": 0.4985, "step": 15860 }, { "epoch": 0.18374010095864401, "grad_norm": 0.8986677527427673, "learning_rate": 0.00019993090195956044, "loss": 0.4791, "step": 15870 }, { "epoch": 0.18385587922011762, "grad_norm": 0.8654846549034119, "learning_rate": 0.00019993042678883492, "loss": 0.5135, "step": 15880 }, { "epoch": 0.18397165748159125, "grad_norm": 0.8354236483573914, "learning_rate": 0.00019992994999045382, "loss": 0.4945, "step": 15890 }, { "epoch": 0.18408743574306488, "grad_norm": 0.9234731197357178, "learning_rate": 0.00019992947156442485, "loss": 0.5155, "step": 15900 }, { "epoch": 0.1842032140045385, "grad_norm": 0.7724660038948059, "learning_rate": 0.00019992899151075584, "loss": 0.4996, "step": 15910 }, { "epoch": 0.18431899226601214, "grad_norm": 0.8467785716056824, "learning_rate": 0.00019992850982945455, "loss": 0.5351, "step": 15920 }, { "epoch": 0.18443477052748575, "grad_norm": 0.7764392495155334, "learning_rate": 0.0001999280265205289, "loss": 0.4691, "step": 15930 }, { "epoch": 0.18455054878895938, "grad_norm": 1.137548804283142, "learning_rate": 0.00019992754158398675, "loss": 0.5135, "step": 15940 }, { "epoch": 0.184666327050433, "grad_norm": 0.918748676776886, "learning_rate": 0.00019992705501983596, "loss": 0.505, "step": 15950 }, { "epoch": 0.18478210531190664, "grad_norm": 0.6120473742485046, "learning_rate": 0.00019992656682808443, "loss": 0.4766, "step": 15960 }, { "epoch": 0.18489788357338027, "grad_norm": 1.143615484237671, "learning_rate": 0.0001999260770087402, "loss": 0.4559, "step": 15970 }, { "epoch": 0.18501366183485388, "grad_norm": 0.8118082284927368, "learning_rate": 0.00019992558556181122, "loss": 0.4886, "step": 15980 }, { "epoch": 0.1851294400963275, "grad_norm": 0.7622039914131165, "learning_rate": 0.00019992509248730546, "loss": 0.4969, "step": 15990 }, { "epoch": 0.18524521835780114, "grad_norm": 1.1264930963516235, "learning_rate": 0.00019992459778523096, "loss": 0.5206, "step": 16000 }, { "epoch": 0.18524521835780114, "eval_chrf": 77.97940200596634, "eval_loss": 0.7463719248771667, "eval_runtime": 246.7953, "eval_samples_per_second": 0.405, "eval_steps_per_second": 0.016, "step": 16000 }, { "epoch": 0.18536099661927477, "grad_norm": 0.8578152060508728, "learning_rate": 0.0001999241014555958, "loss": 0.5409, "step": 16010 }, { "epoch": 0.1854767748807484, "grad_norm": 0.9739165902137756, "learning_rate": 0.00019992360349840803, "loss": 0.462, "step": 16020 }, { "epoch": 0.185592553142222, "grad_norm": 0.7403093576431274, "learning_rate": 0.00019992310391367583, "loss": 0.4987, "step": 16030 }, { "epoch": 0.18570833140369564, "grad_norm": 0.797931969165802, "learning_rate": 0.00019992260270140726, "loss": 0.5003, "step": 16040 }, { "epoch": 0.18582410966516927, "grad_norm": 0.9602776169776917, "learning_rate": 0.00019992209986161052, "loss": 0.4905, "step": 16050 }, { "epoch": 0.1859398879266429, "grad_norm": 0.8908615708351135, "learning_rate": 0.00019992159539429378, "loss": 0.4865, "step": 16060 }, { "epoch": 0.18605566618811653, "grad_norm": 0.9004862904548645, "learning_rate": 0.00019992108929946527, "loss": 0.5411, "step": 16070 }, { "epoch": 0.18617144444959013, "grad_norm": 0.8991667628288269, "learning_rate": 0.00019992058157713326, "loss": 0.4813, "step": 16080 }, { "epoch": 0.18628722271106377, "grad_norm": 0.7413689494132996, "learning_rate": 0.00019992007222730597, "loss": 0.5018, "step": 16090 }, { "epoch": 0.1864030009725374, "grad_norm": 0.931563138961792, "learning_rate": 0.00019991956124999175, "loss": 0.4659, "step": 16100 }, { "epoch": 0.18651877923401103, "grad_norm": 0.7988595366477966, "learning_rate": 0.00019991904864519885, "loss": 0.4937, "step": 16110 }, { "epoch": 0.18663455749548466, "grad_norm": 0.9304957389831543, "learning_rate": 0.0001999185344129357, "loss": 0.5235, "step": 16120 }, { "epoch": 0.18675033575695826, "grad_norm": 0.8325493931770325, "learning_rate": 0.0001999180185532106, "loss": 0.5058, "step": 16130 }, { "epoch": 0.1868661140184319, "grad_norm": 0.9377426505088806, "learning_rate": 0.000199917501066032, "loss": 0.5201, "step": 16140 }, { "epoch": 0.18698189227990553, "grad_norm": 0.876797616481781, "learning_rate": 0.00019991698195140836, "loss": 0.4893, "step": 16150 }, { "epoch": 0.18709767054137916, "grad_norm": 0.922610878944397, "learning_rate": 0.00019991646120934804, "loss": 0.5103, "step": 16160 }, { "epoch": 0.1872134488028528, "grad_norm": 0.8169971108436584, "learning_rate": 0.0001999159388398596, "loss": 0.486, "step": 16170 }, { "epoch": 0.1873292270643264, "grad_norm": 0.7925812602043152, "learning_rate": 0.00019991541484295152, "loss": 0.4962, "step": 16180 }, { "epoch": 0.18744500532580002, "grad_norm": 0.8858729004859924, "learning_rate": 0.00019991488921863236, "loss": 0.4933, "step": 16190 }, { "epoch": 0.18756078358727366, "grad_norm": 0.7224738001823425, "learning_rate": 0.0001999143619669106, "loss": 0.5051, "step": 16200 }, { "epoch": 0.1876765618487473, "grad_norm": 0.7886340618133545, "learning_rate": 0.00019991383308779495, "loss": 0.5091, "step": 16210 }, { "epoch": 0.18779234011022092, "grad_norm": 0.8229026794433594, "learning_rate": 0.00019991330258129395, "loss": 0.5004, "step": 16220 }, { "epoch": 0.18790811837169452, "grad_norm": 0.7977997660636902, "learning_rate": 0.00019991277044741624, "loss": 0.5029, "step": 16230 }, { "epoch": 0.18802389663316815, "grad_norm": 0.9360793828964233, "learning_rate": 0.0001999122366861705, "loss": 0.4849, "step": 16240 }, { "epoch": 0.18813967489464178, "grad_norm": 1.0242421627044678, "learning_rate": 0.0001999117012975654, "loss": 0.4977, "step": 16250 }, { "epoch": 0.18825545315611542, "grad_norm": 0.6850507259368896, "learning_rate": 0.00019991116428160974, "loss": 0.4827, "step": 16260 }, { "epoch": 0.18837123141758905, "grad_norm": 1.1516903638839722, "learning_rate": 0.00019991062563831218, "loss": 0.5193, "step": 16270 }, { "epoch": 0.18848700967906265, "grad_norm": 1.1835291385650635, "learning_rate": 0.00019991008536768149, "loss": 0.4797, "step": 16280 }, { "epoch": 0.18860278794053628, "grad_norm": 0.9349057078361511, "learning_rate": 0.00019990954346972656, "loss": 0.522, "step": 16290 }, { "epoch": 0.1887185662020099, "grad_norm": 0.9771071076393127, "learning_rate": 0.00019990899994445615, "loss": 0.5127, "step": 16300 }, { "epoch": 0.18883434446348354, "grad_norm": 0.801821231842041, "learning_rate": 0.00019990845479187912, "loss": 0.5034, "step": 16310 }, { "epoch": 0.18895012272495718, "grad_norm": 0.8890246748924255, "learning_rate": 0.00019990790801200435, "loss": 0.5167, "step": 16320 }, { "epoch": 0.18906590098643078, "grad_norm": 0.9110584855079651, "learning_rate": 0.00019990735960484073, "loss": 0.497, "step": 16330 }, { "epoch": 0.1891816792479044, "grad_norm": 0.8443339467048645, "learning_rate": 0.00019990680957039726, "loss": 0.4911, "step": 16340 }, { "epoch": 0.18929745750937804, "grad_norm": 0.9822980761528015, "learning_rate": 0.0001999062579086828, "loss": 0.4723, "step": 16350 }, { "epoch": 0.18941323577085167, "grad_norm": 1.1315723657608032, "learning_rate": 0.00019990570461970642, "loss": 0.4852, "step": 16360 }, { "epoch": 0.18952901403232528, "grad_norm": 0.9471389055252075, "learning_rate": 0.00019990514970347708, "loss": 0.4802, "step": 16370 }, { "epoch": 0.1896447922937989, "grad_norm": 0.9146233201026917, "learning_rate": 0.00019990459316000382, "loss": 0.5348, "step": 16380 }, { "epoch": 0.18976057055527254, "grad_norm": 0.8330798745155334, "learning_rate": 0.00019990403498929576, "loss": 0.5, "step": 16390 }, { "epoch": 0.18987634881674617, "grad_norm": 0.9550514221191406, "learning_rate": 0.00019990347519136193, "loss": 0.509, "step": 16400 }, { "epoch": 0.1899921270782198, "grad_norm": 1.097962498664856, "learning_rate": 0.00019990291376621147, "loss": 0.5237, "step": 16410 }, { "epoch": 0.1901079053396934, "grad_norm": 0.9184927940368652, "learning_rate": 0.00019990235071385353, "loss": 0.5036, "step": 16420 }, { "epoch": 0.19022368360116704, "grad_norm": 0.7849079966545105, "learning_rate": 0.00019990178603429727, "loss": 0.5163, "step": 16430 }, { "epoch": 0.19033946186264067, "grad_norm": 0.8647149205207825, "learning_rate": 0.0001999012197275519, "loss": 0.509, "step": 16440 }, { "epoch": 0.1904552401241143, "grad_norm": 0.9983821511268616, "learning_rate": 0.00019990065179362662, "loss": 0.4997, "step": 16450 }, { "epoch": 0.19057101838558793, "grad_norm": 0.7585316300392151, "learning_rate": 0.00019990008223253071, "loss": 0.5101, "step": 16460 }, { "epoch": 0.19068679664706153, "grad_norm": 1.0215591192245483, "learning_rate": 0.00019989951104427346, "loss": 0.4907, "step": 16470 }, { "epoch": 0.19080257490853517, "grad_norm": 0.8907660245895386, "learning_rate": 0.00019989893822886412, "loss": 0.5184, "step": 16480 }, { "epoch": 0.1909183531700088, "grad_norm": 0.8353692293167114, "learning_rate": 0.00019989836378631206, "loss": 0.478, "step": 16490 }, { "epoch": 0.19103413143148243, "grad_norm": 0.7806422114372253, "learning_rate": 0.0001998977877166266, "loss": 0.4648, "step": 16500 }, { "epoch": 0.19114990969295606, "grad_norm": 0.8553751111030579, "learning_rate": 0.00019989721001981717, "loss": 0.5173, "step": 16510 }, { "epoch": 0.19126568795442966, "grad_norm": 0.960426390171051, "learning_rate": 0.00019989663069589316, "loss": 0.4601, "step": 16520 }, { "epoch": 0.1913814662159033, "grad_norm": 0.8828798532485962, "learning_rate": 0.000199896049744864, "loss": 0.4783, "step": 16530 }, { "epoch": 0.19149724447737693, "grad_norm": 0.9241445660591125, "learning_rate": 0.00019989546716673911, "loss": 0.4973, "step": 16540 }, { "epoch": 0.19161302273885056, "grad_norm": 0.9370666742324829, "learning_rate": 0.00019989488296152808, "loss": 0.5126, "step": 16550 }, { "epoch": 0.1917288010003242, "grad_norm": 0.7989693880081177, "learning_rate": 0.00019989429712924034, "loss": 0.4761, "step": 16560 }, { "epoch": 0.1918445792617978, "grad_norm": 0.8280283212661743, "learning_rate": 0.0001998937096698855, "loss": 0.5093, "step": 16570 }, { "epoch": 0.19196035752327142, "grad_norm": 0.8971747159957886, "learning_rate": 0.00019989312058347304, "loss": 0.4789, "step": 16580 }, { "epoch": 0.19207613578474506, "grad_norm": 0.7944716215133667, "learning_rate": 0.00019989252987001263, "loss": 0.4637, "step": 16590 }, { "epoch": 0.1921919140462187, "grad_norm": 0.7768067121505737, "learning_rate": 0.00019989193752951388, "loss": 0.5141, "step": 16600 }, { "epoch": 0.19230769230769232, "grad_norm": 0.8733528256416321, "learning_rate": 0.0001998913435619864, "loss": 0.5265, "step": 16610 }, { "epoch": 0.19242347056916592, "grad_norm": 0.8538109064102173, "learning_rate": 0.0001998907479674399, "loss": 0.5208, "step": 16620 }, { "epoch": 0.19253924883063955, "grad_norm": 0.7162742614746094, "learning_rate": 0.00019989015074588408, "loss": 0.4527, "step": 16630 }, { "epoch": 0.19265502709211318, "grad_norm": 0.8274531364440918, "learning_rate": 0.00019988955189732864, "loss": 0.4602, "step": 16640 }, { "epoch": 0.19277080535358682, "grad_norm": 0.8357710242271423, "learning_rate": 0.00019988895142178335, "loss": 0.5099, "step": 16650 }, { "epoch": 0.19288658361506045, "grad_norm": 0.905566394329071, "learning_rate": 0.000199888349319258, "loss": 0.4664, "step": 16660 }, { "epoch": 0.19300236187653405, "grad_norm": 0.7365356683731079, "learning_rate": 0.0001998877455897624, "loss": 0.4684, "step": 16670 }, { "epoch": 0.19311814013800768, "grad_norm": 0.9022588729858398, "learning_rate": 0.00019988714023330638, "loss": 0.5147, "step": 16680 }, { "epoch": 0.1932339183994813, "grad_norm": 0.9667847156524658, "learning_rate": 0.00019988653324989975, "loss": 0.4792, "step": 16690 }, { "epoch": 0.19334969666095495, "grad_norm": 0.8369743227958679, "learning_rate": 0.00019988592463955244, "loss": 0.5217, "step": 16700 }, { "epoch": 0.19346547492242858, "grad_norm": 0.9116846323013306, "learning_rate": 0.0001998853144022744, "loss": 0.4942, "step": 16710 }, { "epoch": 0.19358125318390218, "grad_norm": 0.877573549747467, "learning_rate": 0.0001998847025380755, "loss": 0.5424, "step": 16720 }, { "epoch": 0.1936970314453758, "grad_norm": 0.9384279847145081, "learning_rate": 0.00019988408904696576, "loss": 0.4611, "step": 16730 }, { "epoch": 0.19381280970684944, "grad_norm": 0.8224205374717712, "learning_rate": 0.00019988347392895515, "loss": 0.4718, "step": 16740 }, { "epoch": 0.19392858796832307, "grad_norm": 0.7887263298034668, "learning_rate": 0.00019988285718405369, "loss": 0.4862, "step": 16750 }, { "epoch": 0.1940443662297967, "grad_norm": 0.882061243057251, "learning_rate": 0.0001998822388122714, "loss": 0.4502, "step": 16760 }, { "epoch": 0.1941601444912703, "grad_norm": 0.8657705783843994, "learning_rate": 0.0001998816188136184, "loss": 0.4894, "step": 16770 }, { "epoch": 0.19427592275274394, "grad_norm": 0.7500507235527039, "learning_rate": 0.00019988099718810476, "loss": 0.4899, "step": 16780 }, { "epoch": 0.19439170101421757, "grad_norm": 1.0131944417953491, "learning_rate": 0.0001998803739357406, "loss": 0.4917, "step": 16790 }, { "epoch": 0.1945074792756912, "grad_norm": 0.9293574690818787, "learning_rate": 0.0001998797490565361, "loss": 0.5272, "step": 16800 }, { "epoch": 0.19462325753716483, "grad_norm": 0.8435137867927551, "learning_rate": 0.00019987912255050138, "loss": 0.4913, "step": 16810 }, { "epoch": 0.19473903579863844, "grad_norm": 0.86867356300354, "learning_rate": 0.0001998784944176467, "loss": 0.5008, "step": 16820 }, { "epoch": 0.19485481406011207, "grad_norm": 0.7702768445014954, "learning_rate": 0.00019987786465798228, "loss": 0.4924, "step": 16830 }, { "epoch": 0.1949705923215857, "grad_norm": 0.930141806602478, "learning_rate": 0.0001998772332715184, "loss": 0.5379, "step": 16840 }, { "epoch": 0.19508637058305933, "grad_norm": 0.9449251294136047, "learning_rate": 0.0001998766002582653, "loss": 0.479, "step": 16850 }, { "epoch": 0.19520214884453296, "grad_norm": 0.8602539896965027, "learning_rate": 0.00019987596561823326, "loss": 0.5001, "step": 16860 }, { "epoch": 0.19531792710600657, "grad_norm": 0.8756052851676941, "learning_rate": 0.00019987532935143273, "loss": 0.5113, "step": 16870 }, { "epoch": 0.1954337053674802, "grad_norm": 1.0247206687927246, "learning_rate": 0.00019987469145787395, "loss": 0.5097, "step": 16880 }, { "epoch": 0.19554948362895383, "grad_norm": 0.7333729863166809, "learning_rate": 0.0001998740519375674, "loss": 0.4928, "step": 16890 }, { "epoch": 0.19566526189042746, "grad_norm": 0.8338857293128967, "learning_rate": 0.00019987341079052347, "loss": 0.5179, "step": 16900 }, { "epoch": 0.1957810401519011, "grad_norm": 0.9072408676147461, "learning_rate": 0.00019987276801675257, "loss": 0.5086, "step": 16910 }, { "epoch": 0.1958968184133747, "grad_norm": 0.7794625163078308, "learning_rate": 0.00019987212361626523, "loss": 0.466, "step": 16920 }, { "epoch": 0.19601259667484833, "grad_norm": 0.8542028069496155, "learning_rate": 0.00019987147758907187, "loss": 0.5003, "step": 16930 }, { "epoch": 0.19612837493632196, "grad_norm": 0.8598357439041138, "learning_rate": 0.00019987082993518307, "loss": 0.4752, "step": 16940 }, { "epoch": 0.1962441531977956, "grad_norm": 1.001768946647644, "learning_rate": 0.00019987018065460935, "loss": 0.4686, "step": 16950 }, { "epoch": 0.19635993145926922, "grad_norm": 0.9171622395515442, "learning_rate": 0.00019986952974736135, "loss": 0.51, "step": 16960 }, { "epoch": 0.19647570972074282, "grad_norm": 0.8419103622436523, "learning_rate": 0.00019986887721344955, "loss": 0.4809, "step": 16970 }, { "epoch": 0.19659148798221646, "grad_norm": 0.7191686034202576, "learning_rate": 0.0001998682230528847, "loss": 0.4762, "step": 16980 }, { "epoch": 0.1967072662436901, "grad_norm": 0.8218206167221069, "learning_rate": 0.00019986756726567739, "loss": 0.479, "step": 16990 }, { "epoch": 0.19682304450516372, "grad_norm": 0.9415873885154724, "learning_rate": 0.00019986690985183828, "loss": 0.4945, "step": 17000 }, { "epoch": 0.19682304450516372, "eval_chrf": 79.72724006061951, "eval_loss": 0.7424191832542419, "eval_runtime": 217.3798, "eval_samples_per_second": 0.46, "eval_steps_per_second": 0.018, "step": 17000 }, { "epoch": 0.19693882276663732, "grad_norm": 0.9736429452896118, "learning_rate": 0.00019986625081137816, "loss": 0.4739, "step": 17010 }, { "epoch": 0.19705460102811095, "grad_norm": 0.825629711151123, "learning_rate": 0.00019986559014430771, "loss": 0.4804, "step": 17020 }, { "epoch": 0.19717037928958459, "grad_norm": 0.8559385538101196, "learning_rate": 0.0001998649278506377, "loss": 0.4479, "step": 17030 }, { "epoch": 0.19728615755105822, "grad_norm": 0.812768816947937, "learning_rate": 0.0001998642639303789, "loss": 0.5129, "step": 17040 }, { "epoch": 0.19740193581253185, "grad_norm": 0.920117974281311, "learning_rate": 0.00019986359838354215, "loss": 0.5177, "step": 17050 }, { "epoch": 0.19751771407400545, "grad_norm": 0.7651894092559814, "learning_rate": 0.00019986293121013825, "loss": 0.4596, "step": 17060 }, { "epoch": 0.19763349233547908, "grad_norm": 0.9783793091773987, "learning_rate": 0.00019986226241017812, "loss": 0.5417, "step": 17070 }, { "epoch": 0.19774927059695271, "grad_norm": 0.88678377866745, "learning_rate": 0.0001998615919836726, "loss": 0.4947, "step": 17080 }, { "epoch": 0.19786504885842635, "grad_norm": 0.8654029965400696, "learning_rate": 0.00019986091993063267, "loss": 0.4614, "step": 17090 }, { "epoch": 0.19798082711989998, "grad_norm": 0.9105015993118286, "learning_rate": 0.00019986024625106926, "loss": 0.4645, "step": 17100 }, { "epoch": 0.19809660538137358, "grad_norm": 1.0340826511383057, "learning_rate": 0.0001998595709449933, "loss": 0.4776, "step": 17110 }, { "epoch": 0.1982123836428472, "grad_norm": 0.7128646969795227, "learning_rate": 0.00019985889401241585, "loss": 0.4806, "step": 17120 }, { "epoch": 0.19832816190432084, "grad_norm": 0.893755316734314, "learning_rate": 0.00019985821545334787, "loss": 0.4744, "step": 17130 }, { "epoch": 0.19844394016579447, "grad_norm": 0.7779598236083984, "learning_rate": 0.00019985753526780044, "loss": 0.475, "step": 17140 }, { "epoch": 0.1985597184272681, "grad_norm": 1.0338503122329712, "learning_rate": 0.00019985685345578466, "loss": 0.4962, "step": 17150 }, { "epoch": 0.1986754966887417, "grad_norm": 0.6588304042816162, "learning_rate": 0.00019985617001731164, "loss": 0.4717, "step": 17160 }, { "epoch": 0.19879127495021534, "grad_norm": 0.7454726696014404, "learning_rate": 0.00019985548495239246, "loss": 0.4616, "step": 17170 }, { "epoch": 0.19890705321168897, "grad_norm": 0.8275521397590637, "learning_rate": 0.0001998547982610383, "loss": 0.4672, "step": 17180 }, { "epoch": 0.1990228314731626, "grad_norm": 0.9495667815208435, "learning_rate": 0.0001998541099432604, "loss": 0.5069, "step": 17190 }, { "epoch": 0.19913860973463623, "grad_norm": 1.0234605073928833, "learning_rate": 0.0001998534199990699, "loss": 0.4999, "step": 17200 }, { "epoch": 0.19925438799610984, "grad_norm": 0.9439220428466797, "learning_rate": 0.00019985272842847808, "loss": 0.4952, "step": 17210 }, { "epoch": 0.19937016625758347, "grad_norm": 0.7583577036857605, "learning_rate": 0.00019985203523149617, "loss": 0.4943, "step": 17220 }, { "epoch": 0.1994859445190571, "grad_norm": 0.9253888726234436, "learning_rate": 0.00019985134040813546, "loss": 0.4831, "step": 17230 }, { "epoch": 0.19960172278053073, "grad_norm": 0.8285670876502991, "learning_rate": 0.00019985064395840733, "loss": 0.4872, "step": 17240 }, { "epoch": 0.19971750104200436, "grad_norm": 0.8756377100944519, "learning_rate": 0.00019984994588232306, "loss": 0.5228, "step": 17250 }, { "epoch": 0.19983327930347797, "grad_norm": 0.9798482060432434, "learning_rate": 0.00019984924617989403, "loss": 0.5004, "step": 17260 }, { "epoch": 0.1999490575649516, "grad_norm": 0.7798986434936523, "learning_rate": 0.00019984854485113166, "loss": 0.4693, "step": 17270 }, { "epoch": 0.20006483582642523, "grad_norm": 0.7864567041397095, "learning_rate": 0.00019984784189604736, "loss": 0.5029, "step": 17280 }, { "epoch": 0.20018061408789886, "grad_norm": 0.8364198207855225, "learning_rate": 0.00019984713731465255, "loss": 0.4862, "step": 17290 }, { "epoch": 0.2002963923493725, "grad_norm": 0.8812498450279236, "learning_rate": 0.00019984643110695878, "loss": 0.4693, "step": 17300 }, { "epoch": 0.2004121706108461, "grad_norm": 0.7478137016296387, "learning_rate": 0.0001998457232729775, "loss": 0.5226, "step": 17310 }, { "epoch": 0.20052794887231973, "grad_norm": 0.8203095197677612, "learning_rate": 0.00019984501381272024, "loss": 0.5393, "step": 17320 }, { "epoch": 0.20064372713379336, "grad_norm": 0.760674238204956, "learning_rate": 0.00019984430272619857, "loss": 0.4953, "step": 17330 }, { "epoch": 0.200759505395267, "grad_norm": 1.0259523391723633, "learning_rate": 0.00019984359001342403, "loss": 0.4814, "step": 17340 }, { "epoch": 0.20087528365674062, "grad_norm": 0.8328045010566711, "learning_rate": 0.0001998428756744083, "loss": 0.4711, "step": 17350 }, { "epoch": 0.20099106191821423, "grad_norm": 0.8433694839477539, "learning_rate": 0.00019984215970916296, "loss": 0.4699, "step": 17360 }, { "epoch": 0.20110684017968786, "grad_norm": 0.8189342021942139, "learning_rate": 0.00019984144211769968, "loss": 0.4815, "step": 17370 }, { "epoch": 0.2012226184411615, "grad_norm": 0.7954742908477783, "learning_rate": 0.00019984072290003019, "loss": 0.5222, "step": 17380 }, { "epoch": 0.20133839670263512, "grad_norm": 0.7419716715812683, "learning_rate": 0.00019984000205616615, "loss": 0.5511, "step": 17390 }, { "epoch": 0.20145417496410875, "grad_norm": 0.771791934967041, "learning_rate": 0.00019983927958611933, "loss": 0.4767, "step": 17400 }, { "epoch": 0.20156995322558235, "grad_norm": 0.7349338531494141, "learning_rate": 0.00019983855548990148, "loss": 0.4666, "step": 17410 }, { "epoch": 0.20168573148705599, "grad_norm": 0.6626814007759094, "learning_rate": 0.00019983782976752443, "loss": 0.4636, "step": 17420 }, { "epoch": 0.20180150974852962, "grad_norm": 0.7729905247688293, "learning_rate": 0.00019983710241899995, "loss": 0.471, "step": 17430 }, { "epoch": 0.20191728801000325, "grad_norm": 0.9113923907279968, "learning_rate": 0.00019983637344433993, "loss": 0.4673, "step": 17440 }, { "epoch": 0.20203306627147688, "grad_norm": 0.867956280708313, "learning_rate": 0.0001998356428435562, "loss": 0.4894, "step": 17450 }, { "epoch": 0.20214884453295048, "grad_norm": 0.9342520833015442, "learning_rate": 0.00019983491061666074, "loss": 0.4456, "step": 17460 }, { "epoch": 0.20226462279442411, "grad_norm": 0.7759660482406616, "learning_rate": 0.0001998341767636654, "loss": 0.494, "step": 17470 }, { "epoch": 0.20238040105589775, "grad_norm": 0.639121413230896, "learning_rate": 0.0001998334412845821, "loss": 0.5031, "step": 17480 }, { "epoch": 0.20249617931737138, "grad_norm": 0.8816165924072266, "learning_rate": 0.00019983270417942294, "loss": 0.4924, "step": 17490 }, { "epoch": 0.202611957578845, "grad_norm": 0.9371243715286255, "learning_rate": 0.00019983196544819985, "loss": 0.5222, "step": 17500 }, { "epoch": 0.2027277358403186, "grad_norm": 0.8375434875488281, "learning_rate": 0.00019983122509092485, "loss": 0.5014, "step": 17510 }, { "epoch": 0.20284351410179224, "grad_norm": 0.7366251349449158, "learning_rate": 0.00019983048310761003, "loss": 0.4943, "step": 17520 }, { "epoch": 0.20295929236326588, "grad_norm": 0.9275548458099365, "learning_rate": 0.00019982973949826747, "loss": 0.4966, "step": 17530 }, { "epoch": 0.2030750706247395, "grad_norm": 0.9041328430175781, "learning_rate": 0.0001998289942629093, "loss": 0.461, "step": 17540 }, { "epoch": 0.20319084888621314, "grad_norm": 0.8144553303718567, "learning_rate": 0.00019982824740154758, "loss": 0.5008, "step": 17550 }, { "epoch": 0.20330662714768674, "grad_norm": 0.8701579570770264, "learning_rate": 0.0001998274989141946, "loss": 0.5044, "step": 17560 }, { "epoch": 0.20342240540916037, "grad_norm": 0.8747161626815796, "learning_rate": 0.00019982674880086244, "loss": 0.5066, "step": 17570 }, { "epoch": 0.203538183670634, "grad_norm": 0.9712308645248413, "learning_rate": 0.00019982599706156338, "loss": 0.4881, "step": 17580 }, { "epoch": 0.20365396193210764, "grad_norm": 0.8672597408294678, "learning_rate": 0.00019982524369630965, "loss": 0.4826, "step": 17590 }, { "epoch": 0.20376974019358127, "grad_norm": 1.026075839996338, "learning_rate": 0.0001998244887051135, "loss": 0.5139, "step": 17600 }, { "epoch": 0.20388551845505487, "grad_norm": 0.7845612168312073, "learning_rate": 0.00019982373208798725, "loss": 0.4905, "step": 17610 }, { "epoch": 0.2040012967165285, "grad_norm": 0.8791155219078064, "learning_rate": 0.00019982297384494322, "loss": 0.4786, "step": 17620 }, { "epoch": 0.20411707497800213, "grad_norm": 0.8501670956611633, "learning_rate": 0.00019982221397599374, "loss": 0.5248, "step": 17630 }, { "epoch": 0.20423285323947576, "grad_norm": 0.7558290362358093, "learning_rate": 0.00019982145248115122, "loss": 0.4341, "step": 17640 }, { "epoch": 0.20434863150094937, "grad_norm": 0.9865317940711975, "learning_rate": 0.00019982068936042806, "loss": 0.4704, "step": 17650 }, { "epoch": 0.204464409762423, "grad_norm": 0.837709367275238, "learning_rate": 0.00019981992461383665, "loss": 0.4333, "step": 17660 }, { "epoch": 0.20458018802389663, "grad_norm": 0.9652982950210571, "learning_rate": 0.00019981915824138948, "loss": 0.4914, "step": 17670 }, { "epoch": 0.20469596628537026, "grad_norm": 0.9558224678039551, "learning_rate": 0.00019981839024309906, "loss": 0.4998, "step": 17680 }, { "epoch": 0.2048117445468439, "grad_norm": 0.8586015105247498, "learning_rate": 0.00019981762061897781, "loss": 0.532, "step": 17690 }, { "epoch": 0.2049275228083175, "grad_norm": 0.7897942662239075, "learning_rate": 0.00019981684936903836, "loss": 0.4904, "step": 17700 }, { "epoch": 0.20504330106979113, "grad_norm": 0.8042513728141785, "learning_rate": 0.0001998160764932932, "loss": 0.4754, "step": 17710 }, { "epoch": 0.20515907933126476, "grad_norm": 0.8123723268508911, "learning_rate": 0.00019981530199175498, "loss": 0.5266, "step": 17720 }, { "epoch": 0.2052748575927384, "grad_norm": 0.7880979180335999, "learning_rate": 0.00019981452586443626, "loss": 0.5238, "step": 17730 }, { "epoch": 0.20539063585421202, "grad_norm": 0.7563484907150269, "learning_rate": 0.00019981374811134974, "loss": 0.4434, "step": 17740 }, { "epoch": 0.20550641411568563, "grad_norm": 0.8079185485839844, "learning_rate": 0.000199812968732508, "loss": 0.469, "step": 17750 }, { "epoch": 0.20562219237715926, "grad_norm": 0.8503212928771973, "learning_rate": 0.00019981218772792384, "loss": 0.46, "step": 17760 }, { "epoch": 0.2057379706386329, "grad_norm": 0.7510192394256592, "learning_rate": 0.0001998114050976099, "loss": 0.4752, "step": 17770 }, { "epoch": 0.20585374890010652, "grad_norm": 0.7266948819160461, "learning_rate": 0.000199810620841579, "loss": 0.5079, "step": 17780 }, { "epoch": 0.20596952716158015, "grad_norm": 0.8466536402702332, "learning_rate": 0.00019980983495984384, "loss": 0.5255, "step": 17790 }, { "epoch": 0.20608530542305376, "grad_norm": 0.940623939037323, "learning_rate": 0.00019980904745241725, "loss": 0.5063, "step": 17800 }, { "epoch": 0.2062010836845274, "grad_norm": 0.5855724811553955, "learning_rate": 0.00019980825831931206, "loss": 0.4807, "step": 17810 }, { "epoch": 0.20631686194600102, "grad_norm": 0.8663539886474609, "learning_rate": 0.00019980746756054113, "loss": 0.483, "step": 17820 }, { "epoch": 0.20643264020747465, "grad_norm": 0.8698544502258301, "learning_rate": 0.00019980667517611732, "loss": 0.505, "step": 17830 }, { "epoch": 0.20654841846894828, "grad_norm": 0.8699235916137695, "learning_rate": 0.00019980588116605356, "loss": 0.4785, "step": 17840 }, { "epoch": 0.20666419673042188, "grad_norm": 0.8365659117698669, "learning_rate": 0.00019980508553036277, "loss": 0.5078, "step": 17850 }, { "epoch": 0.20677997499189552, "grad_norm": 0.905035674571991, "learning_rate": 0.00019980428826905792, "loss": 0.5099, "step": 17860 }, { "epoch": 0.20689575325336915, "grad_norm": 0.8892579078674316, "learning_rate": 0.00019980348938215196, "loss": 0.5173, "step": 17870 }, { "epoch": 0.20701153151484278, "grad_norm": 0.8087046146392822, "learning_rate": 0.00019980268886965794, "loss": 0.4651, "step": 17880 }, { "epoch": 0.2071273097763164, "grad_norm": 0.8616148233413696, "learning_rate": 0.0001998018867315889, "loss": 0.4701, "step": 17890 }, { "epoch": 0.20724308803779, "grad_norm": 0.8882416486740112, "learning_rate": 0.00019980108296795788, "loss": 0.4596, "step": 17900 }, { "epoch": 0.20735886629926364, "grad_norm": 0.9214577078819275, "learning_rate": 0.000199800277578778, "loss": 0.4697, "step": 17910 }, { "epoch": 0.20747464456073728, "grad_norm": 0.976055920124054, "learning_rate": 0.00019979947056406231, "loss": 0.509, "step": 17920 }, { "epoch": 0.2075904228222109, "grad_norm": 0.8006547689437866, "learning_rate": 0.00019979866192382408, "loss": 0.4616, "step": 17930 }, { "epoch": 0.20770620108368454, "grad_norm": 0.8892197608947754, "learning_rate": 0.00019979785165807635, "loss": 0.4711, "step": 17940 }, { "epoch": 0.20782197934515814, "grad_norm": 1.1781281232833862, "learning_rate": 0.0001997970397668324, "loss": 0.5026, "step": 17950 }, { "epoch": 0.20793775760663177, "grad_norm": 0.8484392762184143, "learning_rate": 0.00019979622625010543, "loss": 0.5454, "step": 17960 }, { "epoch": 0.2080535358681054, "grad_norm": 0.953849196434021, "learning_rate": 0.00019979541110790868, "loss": 0.5073, "step": 17970 }, { "epoch": 0.20816931412957904, "grad_norm": 0.8298689126968384, "learning_rate": 0.00019979459434025542, "loss": 0.5069, "step": 17980 }, { "epoch": 0.20828509239105267, "grad_norm": 0.8336108326911926, "learning_rate": 0.00019979377594715901, "loss": 0.4409, "step": 17990 }, { "epoch": 0.20840087065252627, "grad_norm": 0.8721938729286194, "learning_rate": 0.0001997929559286327, "loss": 0.4647, "step": 18000 }, { "epoch": 0.20840087065252627, "eval_chrf": 79.00803519103351, "eval_loss": 0.7419794201850891, "eval_runtime": 300.3688, "eval_samples_per_second": 0.333, "eval_steps_per_second": 0.013, "step": 18000 }, { "epoch": 0.2085166489139999, "grad_norm": 0.8734286427497864, "learning_rate": 0.0001997921342846899, "loss": 0.4872, "step": 18010 }, { "epoch": 0.20863242717547353, "grad_norm": 0.7984320521354675, "learning_rate": 0.00019979131101534398, "loss": 0.5083, "step": 18020 }, { "epoch": 0.20874820543694717, "grad_norm": 0.7219537496566772, "learning_rate": 0.00019979048612060835, "loss": 0.5315, "step": 18030 }, { "epoch": 0.2088639836984208, "grad_norm": 0.909748911857605, "learning_rate": 0.00019978965960049645, "loss": 0.4868, "step": 18040 }, { "epoch": 0.2089797619598944, "grad_norm": 0.8623420596122742, "learning_rate": 0.00019978883145502173, "loss": 0.4723, "step": 18050 }, { "epoch": 0.20909554022136803, "grad_norm": 0.7724198698997498, "learning_rate": 0.00019978800168419765, "loss": 0.4802, "step": 18060 }, { "epoch": 0.20921131848284166, "grad_norm": 0.806288480758667, "learning_rate": 0.0001997871702880378, "loss": 0.519, "step": 18070 }, { "epoch": 0.2093270967443153, "grad_norm": 0.8182432055473328, "learning_rate": 0.0001997863372665557, "loss": 0.4862, "step": 18080 }, { "epoch": 0.20944287500578893, "grad_norm": 1.0057041645050049, "learning_rate": 0.00019978550261976486, "loss": 0.481, "step": 18090 }, { "epoch": 0.20955865326726253, "grad_norm": 0.8643611669540405, "learning_rate": 0.00019978466634767894, "loss": 0.4607, "step": 18100 }, { "epoch": 0.20967443152873616, "grad_norm": 0.7916561961174011, "learning_rate": 0.0001997838284503115, "loss": 0.4953, "step": 18110 }, { "epoch": 0.2097902097902098, "grad_norm": 0.8533985018730164, "learning_rate": 0.00019978298892767626, "loss": 0.4907, "step": 18120 }, { "epoch": 0.20990598805168342, "grad_norm": 0.7255854606628418, "learning_rate": 0.00019978214777978685, "loss": 0.5264, "step": 18130 }, { "epoch": 0.21002176631315705, "grad_norm": 0.6964989304542542, "learning_rate": 0.00019978130500665698, "loss": 0.484, "step": 18140 }, { "epoch": 0.21013754457463066, "grad_norm": 0.8206535577774048, "learning_rate": 0.00019978046060830035, "loss": 0.4833, "step": 18150 }, { "epoch": 0.2102533228361043, "grad_norm": 0.9355806112289429, "learning_rate": 0.0001997796145847308, "loss": 0.4838, "step": 18160 }, { "epoch": 0.21036910109757792, "grad_norm": 0.9480417370796204, "learning_rate": 0.00019977876693596196, "loss": 0.4952, "step": 18170 }, { "epoch": 0.21048487935905155, "grad_norm": 0.7078197002410889, "learning_rate": 0.00019977791766200776, "loss": 0.4418, "step": 18180 }, { "epoch": 0.21060065762052518, "grad_norm": 0.9125195741653442, "learning_rate": 0.000199777066762882, "loss": 0.5197, "step": 18190 }, { "epoch": 0.2107164358819988, "grad_norm": 0.959878146648407, "learning_rate": 0.00019977621423859855, "loss": 0.4901, "step": 18200 }, { "epoch": 0.21083221414347242, "grad_norm": 0.9553471803665161, "learning_rate": 0.00019977536008917128, "loss": 0.4638, "step": 18210 }, { "epoch": 0.21094799240494605, "grad_norm": 0.744727611541748, "learning_rate": 0.0001997745043146141, "loss": 0.4888, "step": 18220 }, { "epoch": 0.21106377066641968, "grad_norm": 0.9068858623504639, "learning_rate": 0.00019977364691494095, "loss": 0.5268, "step": 18230 }, { "epoch": 0.2111795489278933, "grad_norm": 0.9972188472747803, "learning_rate": 0.0001997727878901658, "loss": 0.4882, "step": 18240 }, { "epoch": 0.21129532718936692, "grad_norm": 0.8237280249595642, "learning_rate": 0.00019977192724030264, "loss": 0.4895, "step": 18250 }, { "epoch": 0.21141110545084055, "grad_norm": 0.900851309299469, "learning_rate": 0.0001997710649653655, "loss": 0.4715, "step": 18260 }, { "epoch": 0.21152688371231418, "grad_norm": 0.9079288840293884, "learning_rate": 0.00019977020106536842, "loss": 0.4624, "step": 18270 }, { "epoch": 0.2116426619737878, "grad_norm": 0.8853783011436462, "learning_rate": 0.00019976933554032546, "loss": 0.486, "step": 18280 }, { "epoch": 0.2117584402352614, "grad_norm": 0.7894467115402222, "learning_rate": 0.00019976846839025067, "loss": 0.5234, "step": 18290 }, { "epoch": 0.21187421849673505, "grad_norm": 0.9138008952140808, "learning_rate": 0.00019976759961515828, "loss": 0.5016, "step": 18300 }, { "epoch": 0.21198999675820868, "grad_norm": 1.0169129371643066, "learning_rate": 0.00019976672921506237, "loss": 0.4866, "step": 18310 }, { "epoch": 0.2121057750196823, "grad_norm": 0.8770890831947327, "learning_rate": 0.00019976585718997714, "loss": 0.4903, "step": 18320 }, { "epoch": 0.21222155328115594, "grad_norm": 0.8056181073188782, "learning_rate": 0.00019976498353991678, "loss": 0.5208, "step": 18330 }, { "epoch": 0.21233733154262954, "grad_norm": 0.8492462635040283, "learning_rate": 0.0001997641082648955, "loss": 0.4765, "step": 18340 }, { "epoch": 0.21245310980410317, "grad_norm": 0.8984133005142212, "learning_rate": 0.00019976323136492761, "loss": 0.4735, "step": 18350 }, { "epoch": 0.2125688880655768, "grad_norm": 0.8451970815658569, "learning_rate": 0.0001997623528400274, "loss": 0.4831, "step": 18360 }, { "epoch": 0.21268466632705044, "grad_norm": 0.8115382194519043, "learning_rate": 0.00019976147269020912, "loss": 0.5128, "step": 18370 }, { "epoch": 0.21280044458852407, "grad_norm": 0.8258618712425232, "learning_rate": 0.0001997605909154871, "loss": 0.4956, "step": 18380 }, { "epoch": 0.21291622284999767, "grad_norm": 0.8482604622840881, "learning_rate": 0.00019975970751587578, "loss": 0.4774, "step": 18390 }, { "epoch": 0.2130320011114713, "grad_norm": 0.767049252986908, "learning_rate": 0.00019975882249138947, "loss": 0.5041, "step": 18400 }, { "epoch": 0.21314777937294493, "grad_norm": 0.8439600467681885, "learning_rate": 0.00019975793584204262, "loss": 0.478, "step": 18410 }, { "epoch": 0.21326355763441857, "grad_norm": 0.9389786720275879, "learning_rate": 0.0001997570475678497, "loss": 0.4666, "step": 18420 }, { "epoch": 0.2133793358958922, "grad_norm": 0.843723475933075, "learning_rate": 0.00019975615766882512, "loss": 0.4556, "step": 18430 }, { "epoch": 0.2134951141573658, "grad_norm": 0.931433379650116, "learning_rate": 0.0001997552661449834, "loss": 0.5002, "step": 18440 }, { "epoch": 0.21361089241883943, "grad_norm": 0.8732482194900513, "learning_rate": 0.00019975437299633907, "loss": 0.4442, "step": 18450 }, { "epoch": 0.21372667068031306, "grad_norm": 0.8885776400566101, "learning_rate": 0.0001997534782229067, "loss": 0.4902, "step": 18460 }, { "epoch": 0.2138424489417867, "grad_norm": 0.8338961601257324, "learning_rate": 0.00019975258182470079, "loss": 0.4869, "step": 18470 }, { "epoch": 0.21395822720326033, "grad_norm": 0.8058544993400574, "learning_rate": 0.00019975168380173598, "loss": 0.495, "step": 18480 }, { "epoch": 0.21407400546473393, "grad_norm": 0.8691881895065308, "learning_rate": 0.00019975078415402693, "loss": 0.4589, "step": 18490 }, { "epoch": 0.21418978372620756, "grad_norm": 0.8180102705955505, "learning_rate": 0.00019974988288158825, "loss": 0.5018, "step": 18500 }, { "epoch": 0.2143055619876812, "grad_norm": 0.7971519827842712, "learning_rate": 0.00019974897998443464, "loss": 0.4406, "step": 18510 }, { "epoch": 0.21442134024915482, "grad_norm": 0.9783874154090881, "learning_rate": 0.00019974807546258082, "loss": 0.485, "step": 18520 }, { "epoch": 0.21453711851062846, "grad_norm": 0.9014028310775757, "learning_rate": 0.00019974716931604148, "loss": 0.5166, "step": 18530 }, { "epoch": 0.21465289677210206, "grad_norm": 0.8176358342170715, "learning_rate": 0.0001997462615448314, "loss": 0.4718, "step": 18540 }, { "epoch": 0.2147686750335757, "grad_norm": 0.8621872663497925, "learning_rate": 0.00019974535214896538, "loss": 0.4862, "step": 18550 }, { "epoch": 0.21488445329504932, "grad_norm": 0.9799246191978455, "learning_rate": 0.00019974444112845821, "loss": 0.4704, "step": 18560 }, { "epoch": 0.21500023155652295, "grad_norm": 0.8795552253723145, "learning_rate": 0.00019974352848332476, "loss": 0.5188, "step": 18570 }, { "epoch": 0.21511600981799658, "grad_norm": 0.8586767315864563, "learning_rate": 0.00019974261421357986, "loss": 0.4803, "step": 18580 }, { "epoch": 0.2152317880794702, "grad_norm": 0.9071685075759888, "learning_rate": 0.0001997416983192384, "loss": 0.4896, "step": 18590 }, { "epoch": 0.21534756634094382, "grad_norm": 0.7288635969161987, "learning_rate": 0.00019974078080031534, "loss": 0.5119, "step": 18600 }, { "epoch": 0.21546334460241745, "grad_norm": 0.7109447717666626, "learning_rate": 0.00019973986165682555, "loss": 0.4497, "step": 18610 }, { "epoch": 0.21557912286389108, "grad_norm": 0.8410625457763672, "learning_rate": 0.0001997389408887841, "loss": 0.5218, "step": 18620 }, { "epoch": 0.2156949011253647, "grad_norm": 0.9533514380455017, "learning_rate": 0.00019973801849620595, "loss": 0.4755, "step": 18630 }, { "epoch": 0.21581067938683832, "grad_norm": 0.7994857430458069, "learning_rate": 0.00019973709447910606, "loss": 0.4778, "step": 18640 }, { "epoch": 0.21592645764831195, "grad_norm": 0.7716197371482849, "learning_rate": 0.00019973616883749956, "loss": 0.4969, "step": 18650 }, { "epoch": 0.21604223590978558, "grad_norm": 0.7360429167747498, "learning_rate": 0.0001997352415714015, "loss": 0.4673, "step": 18660 }, { "epoch": 0.2161580141712592, "grad_norm": 0.9170987606048584, "learning_rate": 0.00019973431268082697, "loss": 0.4777, "step": 18670 }, { "epoch": 0.21627379243273284, "grad_norm": 0.9852982759475708, "learning_rate": 0.00019973338216579113, "loss": 0.481, "step": 18680 }, { "epoch": 0.21638957069420645, "grad_norm": 0.9320043921470642, "learning_rate": 0.0001997324500263091, "loss": 0.4411, "step": 18690 }, { "epoch": 0.21650534895568008, "grad_norm": 0.836481511592865, "learning_rate": 0.0001997315162623961, "loss": 0.4544, "step": 18700 }, { "epoch": 0.2166211272171537, "grad_norm": 0.848663866519928, "learning_rate": 0.00019973058087406726, "loss": 0.4626, "step": 18710 }, { "epoch": 0.21673690547862734, "grad_norm": 0.8453987240791321, "learning_rate": 0.00019972964386133794, "loss": 0.4705, "step": 18720 }, { "epoch": 0.21685268374010097, "grad_norm": 0.7466351389884949, "learning_rate": 0.0001997287052242233, "loss": 0.4813, "step": 18730 }, { "epoch": 0.21696846200157457, "grad_norm": 0.8212053775787354, "learning_rate": 0.00019972776496273865, "loss": 0.4623, "step": 18740 }, { "epoch": 0.2170842402630482, "grad_norm": 0.8152648210525513, "learning_rate": 0.00019972682307689937, "loss": 0.4841, "step": 18750 }, { "epoch": 0.21720001852452184, "grad_norm": 0.6814244985580444, "learning_rate": 0.00019972587956672072, "loss": 0.4945, "step": 18760 }, { "epoch": 0.21731579678599547, "grad_norm": 0.8202094435691833, "learning_rate": 0.0001997249344322181, "loss": 0.4876, "step": 18770 }, { "epoch": 0.2174315750474691, "grad_norm": 1.152093529701233, "learning_rate": 0.00019972398767340689, "loss": 0.476, "step": 18780 }, { "epoch": 0.2175473533089427, "grad_norm": 0.9209386706352234, "learning_rate": 0.00019972303929030255, "loss": 0.516, "step": 18790 }, { "epoch": 0.21766313157041633, "grad_norm": 0.9491552710533142, "learning_rate": 0.00019972208928292046, "loss": 0.4581, "step": 18800 }, { "epoch": 0.21777890983188997, "grad_norm": 0.8183296322822571, "learning_rate": 0.00019972113765127615, "loss": 0.4965, "step": 18810 }, { "epoch": 0.2178946880933636, "grad_norm": 0.8461251854896545, "learning_rate": 0.00019972018439538513, "loss": 0.4803, "step": 18820 }, { "epoch": 0.21801046635483723, "grad_norm": 0.7568778991699219, "learning_rate": 0.00019971922951526286, "loss": 0.4944, "step": 18830 }, { "epoch": 0.21812624461631083, "grad_norm": 1.012679934501648, "learning_rate": 0.00019971827301092497, "loss": 0.4773, "step": 18840 }, { "epoch": 0.21824202287778446, "grad_norm": 0.7288202047348022, "learning_rate": 0.00019971731488238698, "loss": 0.4933, "step": 18850 }, { "epoch": 0.2183578011392581, "grad_norm": 0.799356997013092, "learning_rate": 0.00019971635512966454, "loss": 0.5043, "step": 18860 }, { "epoch": 0.21847357940073173, "grad_norm": 0.796561598777771, "learning_rate": 0.00019971539375277325, "loss": 0.5033, "step": 18870 }, { "epoch": 0.21858935766220536, "grad_norm": 0.9243367314338684, "learning_rate": 0.00019971443075172878, "loss": 0.4881, "step": 18880 }, { "epoch": 0.21870513592367896, "grad_norm": 0.8448047637939453, "learning_rate": 0.00019971346612654683, "loss": 0.4917, "step": 18890 }, { "epoch": 0.2188209141851526, "grad_norm": 0.8143089413642883, "learning_rate": 0.00019971249987724306, "loss": 0.4709, "step": 18900 }, { "epoch": 0.21893669244662622, "grad_norm": 0.7472609281539917, "learning_rate": 0.00019971153200383328, "loss": 0.462, "step": 18910 }, { "epoch": 0.21905247070809986, "grad_norm": 0.8620847463607788, "learning_rate": 0.00019971056250633318, "loss": 0.4915, "step": 18920 }, { "epoch": 0.21916824896957346, "grad_norm": 0.8203686475753784, "learning_rate": 0.0001997095913847586, "loss": 0.4868, "step": 18930 }, { "epoch": 0.2192840272310471, "grad_norm": 0.8799416422843933, "learning_rate": 0.0001997086186391254, "loss": 0.4491, "step": 18940 }, { "epoch": 0.21939980549252072, "grad_norm": 0.7626237869262695, "learning_rate": 0.00019970764426944932, "loss": 0.4582, "step": 18950 }, { "epoch": 0.21951558375399435, "grad_norm": 0.9030816555023193, "learning_rate": 0.00019970666827574628, "loss": 0.4721, "step": 18960 }, { "epoch": 0.21963136201546798, "grad_norm": 1.0223565101623535, "learning_rate": 0.00019970569065803223, "loss": 0.4669, "step": 18970 }, { "epoch": 0.2197471402769416, "grad_norm": 0.7570050358772278, "learning_rate": 0.000199704711416323, "loss": 0.5351, "step": 18980 }, { "epoch": 0.21986291853841522, "grad_norm": 0.8550995588302612, "learning_rate": 0.00019970373055063458, "loss": 0.4914, "step": 18990 }, { "epoch": 0.21997869679988885, "grad_norm": 0.7267685532569885, "learning_rate": 0.00019970274806098296, "loss": 0.4644, "step": 19000 }, { "epoch": 0.21997869679988885, "eval_chrf": 84.59165115715571, "eval_loss": 0.7306492924690247, "eval_runtime": 241.9136, "eval_samples_per_second": 0.413, "eval_steps_per_second": 0.017, "step": 19000 }, { "epoch": 0.22009447506136248, "grad_norm": 0.9555398225784302, "learning_rate": 0.00019970176394738413, "loss": 0.5118, "step": 19010 }, { "epoch": 0.2202102533228361, "grad_norm": 0.9046825766563416, "learning_rate": 0.00019970077820985414, "loss": 0.5087, "step": 19020 }, { "epoch": 0.22032603158430972, "grad_norm": 0.9978909492492676, "learning_rate": 0.000199699790848409, "loss": 0.4749, "step": 19030 }, { "epoch": 0.22044180984578335, "grad_norm": 0.753707230091095, "learning_rate": 0.00019969880186306482, "loss": 0.4486, "step": 19040 }, { "epoch": 0.22055758810725698, "grad_norm": 0.7626103758811951, "learning_rate": 0.00019969781125383772, "loss": 0.4875, "step": 19050 }, { "epoch": 0.2206733663687306, "grad_norm": 0.8806884288787842, "learning_rate": 0.00019969681902074382, "loss": 0.4667, "step": 19060 }, { "epoch": 0.22078914463020424, "grad_norm": 0.8446436524391174, "learning_rate": 0.00019969582516379927, "loss": 0.4508, "step": 19070 }, { "epoch": 0.22090492289167785, "grad_norm": 0.7920851111412048, "learning_rate": 0.0001996948296830203, "loss": 0.4924, "step": 19080 }, { "epoch": 0.22102070115315148, "grad_norm": 0.8616964221000671, "learning_rate": 0.00019969383257842303, "loss": 0.4902, "step": 19090 }, { "epoch": 0.2211364794146251, "grad_norm": 0.9056552052497864, "learning_rate": 0.00019969283385002384, "loss": 0.4603, "step": 19100 }, { "epoch": 0.22125225767609874, "grad_norm": 0.8576582670211792, "learning_rate": 0.00019969183349783889, "loss": 0.4778, "step": 19110 }, { "epoch": 0.22136803593757237, "grad_norm": 0.9580732583999634, "learning_rate": 0.00019969083152188453, "loss": 0.5163, "step": 19120 }, { "epoch": 0.22148381419904598, "grad_norm": 0.8541563153266907, "learning_rate": 0.00019968982792217702, "loss": 0.4718, "step": 19130 }, { "epoch": 0.2215995924605196, "grad_norm": 0.7276837825775146, "learning_rate": 0.00019968882269873274, "loss": 0.4637, "step": 19140 }, { "epoch": 0.22171537072199324, "grad_norm": 0.9170542359352112, "learning_rate": 0.0001996878158515681, "loss": 0.4708, "step": 19150 }, { "epoch": 0.22183114898346687, "grad_norm": 0.843813419342041, "learning_rate": 0.00019968680738069946, "loss": 0.4738, "step": 19160 }, { "epoch": 0.2219469272449405, "grad_norm": 0.8809876441955566, "learning_rate": 0.00019968579728614323, "loss": 0.4588, "step": 19170 }, { "epoch": 0.2220627055064141, "grad_norm": 0.9676681160926819, "learning_rate": 0.0001996847855679159, "loss": 0.4981, "step": 19180 }, { "epoch": 0.22217848376788774, "grad_norm": 0.9200544357299805, "learning_rate": 0.00019968377222603394, "loss": 0.4495, "step": 19190 }, { "epoch": 0.22229426202936137, "grad_norm": 0.6957087516784668, "learning_rate": 0.0001996827572605138, "loss": 0.4642, "step": 19200 }, { "epoch": 0.222410040290835, "grad_norm": 0.7724372744560242, "learning_rate": 0.0001996817406713721, "loss": 0.481, "step": 19210 }, { "epoch": 0.22252581855230863, "grad_norm": 0.7524381875991821, "learning_rate": 0.00019968072245862538, "loss": 0.4778, "step": 19220 }, { "epoch": 0.22264159681378223, "grad_norm": 0.8573737740516663, "learning_rate": 0.00019967970262229015, "loss": 0.4721, "step": 19230 }, { "epoch": 0.22275737507525586, "grad_norm": 0.7550068497657776, "learning_rate": 0.0001996786811623831, "loss": 0.4876, "step": 19240 }, { "epoch": 0.2228731533367295, "grad_norm": 0.7729037404060364, "learning_rate": 0.00019967765807892086, "loss": 0.4875, "step": 19250 }, { "epoch": 0.22298893159820313, "grad_norm": 0.7761761546134949, "learning_rate": 0.00019967663337192005, "loss": 0.4485, "step": 19260 }, { "epoch": 0.22310470985967676, "grad_norm": 0.9230116009712219, "learning_rate": 0.0001996756070413974, "loss": 0.4884, "step": 19270 }, { "epoch": 0.22322048812115036, "grad_norm": 0.928087592124939, "learning_rate": 0.00019967457908736958, "loss": 0.5024, "step": 19280 }, { "epoch": 0.223336266382624, "grad_norm": 0.7160900235176086, "learning_rate": 0.00019967354950985338, "loss": 0.4713, "step": 19290 }, { "epoch": 0.22345204464409762, "grad_norm": 0.8185821175575256, "learning_rate": 0.00019967251830886555, "loss": 0.4789, "step": 19300 }, { "epoch": 0.22356782290557126, "grad_norm": 0.8640700578689575, "learning_rate": 0.0001996714854844229, "loss": 0.4669, "step": 19310 }, { "epoch": 0.2236836011670449, "grad_norm": 0.9034169316291809, "learning_rate": 0.00019967045103654223, "loss": 0.5005, "step": 19320 }, { "epoch": 0.2237993794285185, "grad_norm": 0.6736032962799072, "learning_rate": 0.00019966941496524042, "loss": 0.496, "step": 19330 }, { "epoch": 0.22391515768999212, "grad_norm": 0.8957875370979309, "learning_rate": 0.00019966837727053433, "loss": 0.542, "step": 19340 }, { "epoch": 0.22403093595146575, "grad_norm": 0.8206246495246887, "learning_rate": 0.00019966733795244082, "loss": 0.4916, "step": 19350 }, { "epoch": 0.22414671421293939, "grad_norm": 0.694081723690033, "learning_rate": 0.0001996662970109769, "loss": 0.4897, "step": 19360 }, { "epoch": 0.22426249247441302, "grad_norm": 0.8788129091262817, "learning_rate": 0.00019966525444615948, "loss": 0.4645, "step": 19370 }, { "epoch": 0.22437827073588662, "grad_norm": 0.9289214015007019, "learning_rate": 0.00019966421025800554, "loss": 0.4688, "step": 19380 }, { "epoch": 0.22449404899736025, "grad_norm": 0.8340096473693848, "learning_rate": 0.00019966316444653206, "loss": 0.4795, "step": 19390 }, { "epoch": 0.22460982725883388, "grad_norm": 0.8196728229522705, "learning_rate": 0.00019966211701175613, "loss": 0.5069, "step": 19400 }, { "epoch": 0.22472560552030751, "grad_norm": 0.7735697031021118, "learning_rate": 0.00019966106795369477, "loss": 0.5033, "step": 19410 }, { "epoch": 0.22484138378178115, "grad_norm": 0.7703429460525513, "learning_rate": 0.0001996600172723651, "loss": 0.501, "step": 19420 }, { "epoch": 0.22495716204325475, "grad_norm": 0.9691808223724365, "learning_rate": 0.0001996589649677842, "loss": 0.4858, "step": 19430 }, { "epoch": 0.22507294030472838, "grad_norm": 0.9339166879653931, "learning_rate": 0.00019965791103996924, "loss": 0.4672, "step": 19440 }, { "epoch": 0.225188718566202, "grad_norm": 0.9605696797370911, "learning_rate": 0.00019965685548893738, "loss": 0.504, "step": 19450 }, { "epoch": 0.22530449682767564, "grad_norm": 0.7763170599937439, "learning_rate": 0.0001996557983147058, "loss": 0.4923, "step": 19460 }, { "epoch": 0.22542027508914927, "grad_norm": 0.7404714226722717, "learning_rate": 0.0001996547395172917, "loss": 0.4811, "step": 19470 }, { "epoch": 0.22553605335062288, "grad_norm": 0.9595430493354797, "learning_rate": 0.00019965367909671237, "loss": 0.4914, "step": 19480 }, { "epoch": 0.2256518316120965, "grad_norm": 1.0273886919021606, "learning_rate": 0.00019965261705298505, "loss": 0.4628, "step": 19490 }, { "epoch": 0.22576760987357014, "grad_norm": 0.7963073253631592, "learning_rate": 0.00019965155338612703, "loss": 0.4713, "step": 19500 }, { "epoch": 0.22588338813504377, "grad_norm": 0.7545601725578308, "learning_rate": 0.0001996504880961557, "loss": 0.4757, "step": 19510 }, { "epoch": 0.2259991663965174, "grad_norm": 0.9121971726417542, "learning_rate": 0.00019964942118308834, "loss": 0.4904, "step": 19520 }, { "epoch": 0.226114944657991, "grad_norm": 0.822879433631897, "learning_rate": 0.00019964835264694238, "loss": 0.4899, "step": 19530 }, { "epoch": 0.22623072291946464, "grad_norm": 1.001206636428833, "learning_rate": 0.00019964728248773517, "loss": 0.475, "step": 19540 }, { "epoch": 0.22634650118093827, "grad_norm": 0.7777689695358276, "learning_rate": 0.00019964621070548417, "loss": 0.493, "step": 19550 }, { "epoch": 0.2264622794424119, "grad_norm": 0.7859448194503784, "learning_rate": 0.00019964513730020685, "loss": 0.4725, "step": 19560 }, { "epoch": 0.2265780577038855, "grad_norm": 0.7579594850540161, "learning_rate": 0.00019964406227192068, "loss": 0.4634, "step": 19570 }, { "epoch": 0.22669383596535914, "grad_norm": 0.775324285030365, "learning_rate": 0.0001996429856206431, "loss": 0.5071, "step": 19580 }, { "epoch": 0.22680961422683277, "grad_norm": 1.0083378553390503, "learning_rate": 0.0001996419073463918, "loss": 0.4539, "step": 19590 }, { "epoch": 0.2269253924883064, "grad_norm": 0.823955237865448, "learning_rate": 0.00019964082744918424, "loss": 0.4959, "step": 19600 }, { "epoch": 0.22704117074978003, "grad_norm": 0.8164582252502441, "learning_rate": 0.00019963974592903804, "loss": 0.4872, "step": 19610 }, { "epoch": 0.22715694901125363, "grad_norm": 1.0181759595870972, "learning_rate": 0.00019963866278597078, "loss": 0.5058, "step": 19620 }, { "epoch": 0.22727272727272727, "grad_norm": 0.8415988683700562, "learning_rate": 0.00019963757802000012, "loss": 0.451, "step": 19630 }, { "epoch": 0.2273885055342009, "grad_norm": 0.8228923678398132, "learning_rate": 0.00019963649163114372, "loss": 0.4475, "step": 19640 }, { "epoch": 0.22750428379567453, "grad_norm": 0.7857764363288879, "learning_rate": 0.00019963540361941934, "loss": 0.4866, "step": 19650 }, { "epoch": 0.22762006205714816, "grad_norm": 0.7542510628700256, "learning_rate": 0.0001996343139848446, "loss": 0.4396, "step": 19660 }, { "epoch": 0.22773584031862176, "grad_norm": 0.811672031879425, "learning_rate": 0.00019963322272743733, "loss": 0.4919, "step": 19670 }, { "epoch": 0.2278516185800954, "grad_norm": 1.0724990367889404, "learning_rate": 0.00019963212984721524, "loss": 0.5029, "step": 19680 }, { "epoch": 0.22796739684156903, "grad_norm": 0.7121882438659668, "learning_rate": 0.00019963103534419618, "loss": 0.482, "step": 19690 }, { "epoch": 0.22808317510304266, "grad_norm": 0.8476919531822205, "learning_rate": 0.00019962993921839796, "loss": 0.4674, "step": 19700 }, { "epoch": 0.2281989533645163, "grad_norm": 0.7922258973121643, "learning_rate": 0.00019962884146983845, "loss": 0.4814, "step": 19710 }, { "epoch": 0.2283147316259899, "grad_norm": 0.884506344795227, "learning_rate": 0.00019962774209853553, "loss": 0.4585, "step": 19720 }, { "epoch": 0.22843050988746352, "grad_norm": 0.8416809439659119, "learning_rate": 0.00019962664110450705, "loss": 0.5042, "step": 19730 }, { "epoch": 0.22854628814893715, "grad_norm": 0.8013078570365906, "learning_rate": 0.000199625538487771, "loss": 0.498, "step": 19740 }, { "epoch": 0.22866206641041079, "grad_norm": 0.9373008608818054, "learning_rate": 0.0001996244342483453, "loss": 0.5011, "step": 19750 }, { "epoch": 0.22877784467188442, "grad_norm": 0.9532410502433777, "learning_rate": 0.00019962332838624797, "loss": 0.4613, "step": 19760 }, { "epoch": 0.22889362293335802, "grad_norm": 0.8349032402038574, "learning_rate": 0.000199622220901497, "loss": 0.4719, "step": 19770 }, { "epoch": 0.22900940119483165, "grad_norm": 0.9330806732177734, "learning_rate": 0.00019962111179411046, "loss": 0.4935, "step": 19780 }, { "epoch": 0.22912517945630528, "grad_norm": 0.9019778370857239, "learning_rate": 0.00019962000106410642, "loss": 0.4989, "step": 19790 }, { "epoch": 0.22924095771777891, "grad_norm": 0.7046825289726257, "learning_rate": 0.0001996188887115029, "loss": 0.4836, "step": 19800 }, { "epoch": 0.22935673597925255, "grad_norm": 0.8934338092803955, "learning_rate": 0.00019961777473631806, "loss": 0.468, "step": 19810 }, { "epoch": 0.22947251424072615, "grad_norm": 0.828784704208374, "learning_rate": 0.00019961665913857007, "loss": 0.4678, "step": 19820 }, { "epoch": 0.22958829250219978, "grad_norm": 0.718712329864502, "learning_rate": 0.00019961554191827706, "loss": 0.465, "step": 19830 }, { "epoch": 0.2297040707636734, "grad_norm": 0.8049870729446411, "learning_rate": 0.00019961442307545727, "loss": 0.4828, "step": 19840 }, { "epoch": 0.22981984902514704, "grad_norm": 0.7962097525596619, "learning_rate": 0.00019961330261012886, "loss": 0.5186, "step": 19850 }, { "epoch": 0.22993562728662068, "grad_norm": 0.697943925857544, "learning_rate": 0.00019961218052231015, "loss": 0.485, "step": 19860 }, { "epoch": 0.23005140554809428, "grad_norm": 0.7481710314750671, "learning_rate": 0.0001996110568120194, "loss": 0.504, "step": 19870 }, { "epoch": 0.2301671838095679, "grad_norm": 0.8054175972938538, "learning_rate": 0.00019960993147927488, "loss": 0.4685, "step": 19880 }, { "epoch": 0.23028296207104154, "grad_norm": 0.8510473966598511, "learning_rate": 0.0001996088045240949, "loss": 0.4976, "step": 19890 }, { "epoch": 0.23039874033251517, "grad_norm": 0.8469729423522949, "learning_rate": 0.0001996076759464979, "loss": 0.4591, "step": 19900 }, { "epoch": 0.2305145185939888, "grad_norm": 0.8469066619873047, "learning_rate": 0.00019960654574650223, "loss": 0.4941, "step": 19910 }, { "epoch": 0.2306302968554624, "grad_norm": 0.8097986578941345, "learning_rate": 0.00019960541392412625, "loss": 0.487, "step": 19920 }, { "epoch": 0.23074607511693604, "grad_norm": 0.8033235669136047, "learning_rate": 0.00019960428047938845, "loss": 0.4709, "step": 19930 }, { "epoch": 0.23086185337840967, "grad_norm": 0.9559374451637268, "learning_rate": 0.00019960314541230724, "loss": 0.4784, "step": 19940 }, { "epoch": 0.2309776316398833, "grad_norm": 0.7702792882919312, "learning_rate": 0.00019960200872290115, "loss": 0.4649, "step": 19950 }, { "epoch": 0.23109340990135693, "grad_norm": 0.8488996028900146, "learning_rate": 0.00019960087041118868, "loss": 0.4914, "step": 19960 }, { "epoch": 0.23120918816283054, "grad_norm": 0.8142027258872986, "learning_rate": 0.00019959973047718838, "loss": 0.507, "step": 19970 }, { "epoch": 0.23132496642430417, "grad_norm": 0.8428452014923096, "learning_rate": 0.00019959858892091884, "loss": 0.4734, "step": 19980 }, { "epoch": 0.2314407446857778, "grad_norm": 0.9110616445541382, "learning_rate": 0.0001995974457423986, "loss": 0.5022, "step": 19990 }, { "epoch": 0.23155652294725143, "grad_norm": 0.8447798490524292, "learning_rate": 0.00019959630094164627, "loss": 0.4502, "step": 20000 }, { "epoch": 0.23155652294725143, "eval_chrf": 80.23654691744446, "eval_loss": 0.7331246137619019, "eval_runtime": 286.9776, "eval_samples_per_second": 0.348, "eval_steps_per_second": 0.014, "step": 20000 }, { "epoch": 0.23167230120872506, "grad_norm": 0.8785196542739868, "learning_rate": 0.0001995951545186806, "loss": 0.5204, "step": 20010 }, { "epoch": 0.23178807947019867, "grad_norm": 0.8217412233352661, "learning_rate": 0.00019959400647352014, "loss": 0.4694, "step": 20020 }, { "epoch": 0.2319038577316723, "grad_norm": 0.8321861624717712, "learning_rate": 0.00019959285680618366, "loss": 0.4965, "step": 20030 }, { "epoch": 0.23201963599314593, "grad_norm": 0.9536188840866089, "learning_rate": 0.00019959170551668982, "loss": 0.4945, "step": 20040 }, { "epoch": 0.23213541425461956, "grad_norm": 0.8933611512184143, "learning_rate": 0.00019959055260505748, "loss": 0.4873, "step": 20050 }, { "epoch": 0.2322511925160932, "grad_norm": 0.768867552280426, "learning_rate": 0.00019958939807130532, "loss": 0.4659, "step": 20060 }, { "epoch": 0.2323669707775668, "grad_norm": 0.8929364085197449, "learning_rate": 0.00019958824191545222, "loss": 0.4981, "step": 20070 }, { "epoch": 0.23248274903904043, "grad_norm": 0.7275567054748535, "learning_rate": 0.00019958708413751698, "loss": 0.4691, "step": 20080 }, { "epoch": 0.23259852730051406, "grad_norm": 0.6579349637031555, "learning_rate": 0.00019958592473751842, "loss": 0.4903, "step": 20090 }, { "epoch": 0.2327143055619877, "grad_norm": 1.21674382686615, "learning_rate": 0.00019958476371547548, "loss": 0.4657, "step": 20100 }, { "epoch": 0.23283008382346132, "grad_norm": 0.8369050025939941, "learning_rate": 0.000199583601071407, "loss": 0.4864, "step": 20110 }, { "epoch": 0.23294586208493492, "grad_norm": 0.7792148590087891, "learning_rate": 0.00019958243680533202, "loss": 0.4734, "step": 20120 }, { "epoch": 0.23306164034640856, "grad_norm": 0.839080274105072, "learning_rate": 0.0001995812709172694, "loss": 0.4442, "step": 20130 }, { "epoch": 0.2331774186078822, "grad_norm": 0.8466188907623291, "learning_rate": 0.00019958010340723824, "loss": 0.4684, "step": 20140 }, { "epoch": 0.23329319686935582, "grad_norm": 0.8918906450271606, "learning_rate": 0.00019957893427525745, "loss": 0.4473, "step": 20150 }, { "epoch": 0.23340897513082945, "grad_norm": 0.9140417575836182, "learning_rate": 0.00019957776352134613, "loss": 0.4882, "step": 20160 }, { "epoch": 0.23352475339230305, "grad_norm": 0.762043833732605, "learning_rate": 0.00019957659114552332, "loss": 0.4965, "step": 20170 }, { "epoch": 0.23364053165377668, "grad_norm": 0.7834113240242004, "learning_rate": 0.00019957541714780814, "loss": 0.466, "step": 20180 }, { "epoch": 0.23375630991525032, "grad_norm": 0.813154935836792, "learning_rate": 0.0001995742415282197, "loss": 0.493, "step": 20190 }, { "epoch": 0.23387208817672395, "grad_norm": 0.618257462978363, "learning_rate": 0.00019957306428677716, "loss": 0.4677, "step": 20200 }, { "epoch": 0.23398786643819755, "grad_norm": 0.7799302935600281, "learning_rate": 0.00019957188542349966, "loss": 0.4666, "step": 20210 }, { "epoch": 0.23410364469967118, "grad_norm": 0.9057375192642212, "learning_rate": 0.0001995707049384065, "loss": 0.4742, "step": 20220 }, { "epoch": 0.2342194229611448, "grad_norm": 0.7182900309562683, "learning_rate": 0.00019956952283151674, "loss": 0.486, "step": 20230 }, { "epoch": 0.23433520122261844, "grad_norm": 0.7508489489555359, "learning_rate": 0.0001995683391028498, "loss": 0.478, "step": 20240 }, { "epoch": 0.23445097948409208, "grad_norm": 0.7437032461166382, "learning_rate": 0.00019956715375242485, "loss": 0.4787, "step": 20250 }, { "epoch": 0.23456675774556568, "grad_norm": 0.8082559108734131, "learning_rate": 0.00019956596678026125, "loss": 0.5177, "step": 20260 }, { "epoch": 0.2346825360070393, "grad_norm": 0.8698830008506775, "learning_rate": 0.00019956477818637832, "loss": 0.4927, "step": 20270 }, { "epoch": 0.23479831426851294, "grad_norm": 0.7740945219993591, "learning_rate": 0.0001995635879707954, "loss": 0.4821, "step": 20280 }, { "epoch": 0.23491409252998657, "grad_norm": 0.8962322473526001, "learning_rate": 0.00019956239613353193, "loss": 0.4709, "step": 20290 }, { "epoch": 0.2350298707914602, "grad_norm": 0.8895878195762634, "learning_rate": 0.00019956120267460728, "loss": 0.4994, "step": 20300 }, { "epoch": 0.2351456490529338, "grad_norm": 0.8879282474517822, "learning_rate": 0.00019956000759404087, "loss": 0.5123, "step": 20310 }, { "epoch": 0.23526142731440744, "grad_norm": 0.8099833130836487, "learning_rate": 0.00019955881089185222, "loss": 0.4671, "step": 20320 }, { "epoch": 0.23537720557588107, "grad_norm": 0.7878211140632629, "learning_rate": 0.00019955761256806078, "loss": 0.4717, "step": 20330 }, { "epoch": 0.2354929838373547, "grad_norm": 0.8423677086830139, "learning_rate": 0.00019955641262268609, "loss": 0.465, "step": 20340 }, { "epoch": 0.23560876209882833, "grad_norm": 0.9814093112945557, "learning_rate": 0.0001995552110557477, "loss": 0.4418, "step": 20350 }, { "epoch": 0.23572454036030194, "grad_norm": 0.8047503232955933, "learning_rate": 0.00019955400786726512, "loss": 0.4855, "step": 20360 }, { "epoch": 0.23584031862177557, "grad_norm": 0.6284241080284119, "learning_rate": 0.00019955280305725804, "loss": 0.4741, "step": 20370 }, { "epoch": 0.2359560968832492, "grad_norm": 0.6875554919242859, "learning_rate": 0.00019955159662574603, "loss": 0.4552, "step": 20380 }, { "epoch": 0.23607187514472283, "grad_norm": 0.732741117477417, "learning_rate": 0.00019955038857274874, "loss": 0.5087, "step": 20390 }, { "epoch": 0.23618765340619646, "grad_norm": 0.8414047956466675, "learning_rate": 0.00019954917889828587, "loss": 0.4742, "step": 20400 }, { "epoch": 0.23630343166767007, "grad_norm": 0.8171016573905945, "learning_rate": 0.00019954796760237708, "loss": 0.4601, "step": 20410 }, { "epoch": 0.2364192099291437, "grad_norm": 0.7645574808120728, "learning_rate": 0.00019954675468504215, "loss": 0.4893, "step": 20420 }, { "epoch": 0.23653498819061733, "grad_norm": 1.0034972429275513, "learning_rate": 0.0001995455401463008, "loss": 0.5233, "step": 20430 }, { "epoch": 0.23665076645209096, "grad_norm": 0.9084933996200562, "learning_rate": 0.00019954432398617284, "loss": 0.5087, "step": 20440 }, { "epoch": 0.2367665447135646, "grad_norm": 0.8002224564552307, "learning_rate": 0.00019954310620467808, "loss": 0.4835, "step": 20450 }, { "epoch": 0.2368823229750382, "grad_norm": 0.9463514089584351, "learning_rate": 0.0001995418868018363, "loss": 0.5134, "step": 20460 }, { "epoch": 0.23699810123651183, "grad_norm": 0.7522697448730469, "learning_rate": 0.00019954066577766743, "loss": 0.5045, "step": 20470 }, { "epoch": 0.23711387949798546, "grad_norm": 0.9325494766235352, "learning_rate": 0.00019953944313219133, "loss": 0.5108, "step": 20480 }, { "epoch": 0.2372296577594591, "grad_norm": 0.8868902325630188, "learning_rate": 0.00019953821886542792, "loss": 0.4505, "step": 20490 }, { "epoch": 0.23734543602093272, "grad_norm": 0.7130038142204285, "learning_rate": 0.0001995369929773971, "loss": 0.4787, "step": 20500 }, { "epoch": 0.23746121428240632, "grad_norm": 0.7780782580375671, "learning_rate": 0.00019953576546811888, "loss": 0.4649, "step": 20510 }, { "epoch": 0.23757699254387996, "grad_norm": 0.8343157768249512, "learning_rate": 0.00019953453633761328, "loss": 0.4744, "step": 20520 }, { "epoch": 0.2376927708053536, "grad_norm": 0.8360598683357239, "learning_rate": 0.00019953330558590025, "loss": 0.4615, "step": 20530 }, { "epoch": 0.23780854906682722, "grad_norm": 0.7804827690124512, "learning_rate": 0.00019953207321299988, "loss": 0.4845, "step": 20540 }, { "epoch": 0.23792432732830085, "grad_norm": 0.6212272047996521, "learning_rate": 0.00019953083921893226, "loss": 0.4591, "step": 20550 }, { "epoch": 0.23804010558977445, "grad_norm": 0.7534270286560059, "learning_rate": 0.00019952960360371743, "loss": 0.4961, "step": 20560 }, { "epoch": 0.23815588385124808, "grad_norm": 0.7674233317375183, "learning_rate": 0.00019952836636737557, "loss": 0.4365, "step": 20570 }, { "epoch": 0.23827166211272172, "grad_norm": 0.801550567150116, "learning_rate": 0.00019952712750992681, "loss": 0.4585, "step": 20580 }, { "epoch": 0.23838744037419535, "grad_norm": 0.7644691467285156, "learning_rate": 0.00019952588703139133, "loss": 0.4733, "step": 20590 }, { "epoch": 0.23850321863566898, "grad_norm": 0.8335167169570923, "learning_rate": 0.0001995246449317893, "loss": 0.45, "step": 20600 }, { "epoch": 0.23861899689714258, "grad_norm": 0.9844253659248352, "learning_rate": 0.000199523401211141, "loss": 0.4561, "step": 20610 }, { "epoch": 0.2387347751586162, "grad_norm": 0.7373968362808228, "learning_rate": 0.00019952215586946668, "loss": 0.4824, "step": 20620 }, { "epoch": 0.23885055342008985, "grad_norm": 0.9232361912727356, "learning_rate": 0.00019952090890678665, "loss": 0.4896, "step": 20630 }, { "epoch": 0.23896633168156348, "grad_norm": 0.7922347187995911, "learning_rate": 0.00019951966032312116, "loss": 0.4742, "step": 20640 }, { "epoch": 0.2390821099430371, "grad_norm": 0.9157789945602417, "learning_rate": 0.00019951841011849056, "loss": 0.4673, "step": 20650 }, { "epoch": 0.2391978882045107, "grad_norm": 0.7545085549354553, "learning_rate": 0.00019951715829291523, "loss": 0.4469, "step": 20660 }, { "epoch": 0.23931366646598434, "grad_norm": 0.8135688900947571, "learning_rate": 0.00019951590484641558, "loss": 0.461, "step": 20670 }, { "epoch": 0.23942944472745797, "grad_norm": 0.8085801601409912, "learning_rate": 0.000199514649779012, "loss": 0.4579, "step": 20680 }, { "epoch": 0.2395452229889316, "grad_norm": 0.8696886301040649, "learning_rate": 0.0001995133930907249, "loss": 0.4788, "step": 20690 }, { "epoch": 0.23966100125040524, "grad_norm": 0.9587036371231079, "learning_rate": 0.0001995121347815748, "loss": 0.4839, "step": 20700 }, { "epoch": 0.23977677951187884, "grad_norm": 0.9053393006324768, "learning_rate": 0.0001995108748515822, "loss": 0.4807, "step": 20710 }, { "epoch": 0.23989255777335247, "grad_norm": 0.8441702127456665, "learning_rate": 0.0001995096133007676, "loss": 0.4695, "step": 20720 }, { "epoch": 0.2400083360348261, "grad_norm": 0.8357120752334595, "learning_rate": 0.00019950835012915152, "loss": 0.4609, "step": 20730 }, { "epoch": 0.24012411429629973, "grad_norm": 0.7920693755149841, "learning_rate": 0.00019950708533675456, "loss": 0.4981, "step": 20740 }, { "epoch": 0.24023989255777337, "grad_norm": 0.835800290107727, "learning_rate": 0.00019950581892359736, "loss": 0.4719, "step": 20750 }, { "epoch": 0.24035567081924697, "grad_norm": 0.7899350523948669, "learning_rate": 0.0001995045508897005, "loss": 0.475, "step": 20760 }, { "epoch": 0.2404714490807206, "grad_norm": 0.8127520084381104, "learning_rate": 0.00019950328123508464, "loss": 0.4711, "step": 20770 }, { "epoch": 0.24058722734219423, "grad_norm": 0.895070493221283, "learning_rate": 0.00019950200995977045, "loss": 0.5064, "step": 20780 }, { "epoch": 0.24070300560366786, "grad_norm": 0.7311599850654602, "learning_rate": 0.0001995007370637787, "loss": 0.4539, "step": 20790 }, { "epoch": 0.2408187838651415, "grad_norm": 0.7276439070701599, "learning_rate": 0.00019949946254713003, "loss": 0.4945, "step": 20800 }, { "epoch": 0.2409345621266151, "grad_norm": 0.9493795037269592, "learning_rate": 0.00019949818640984526, "loss": 0.4795, "step": 20810 }, { "epoch": 0.24105034038808873, "grad_norm": 0.8072208762168884, "learning_rate": 0.00019949690865194515, "loss": 0.5143, "step": 20820 }, { "epoch": 0.24116611864956236, "grad_norm": 0.8493167757987976, "learning_rate": 0.0001994956292734505, "loss": 0.4468, "step": 20830 }, { "epoch": 0.241281896911036, "grad_norm": 0.7841184735298157, "learning_rate": 0.0001994943482743822, "loss": 0.4572, "step": 20840 }, { "epoch": 0.2413976751725096, "grad_norm": 0.8121345043182373, "learning_rate": 0.00019949306565476106, "loss": 0.4669, "step": 20850 }, { "epoch": 0.24151345343398323, "grad_norm": 0.8304721117019653, "learning_rate": 0.00019949178141460803, "loss": 0.4345, "step": 20860 }, { "epoch": 0.24162923169545686, "grad_norm": 0.7565070986747742, "learning_rate": 0.00019949049555394397, "loss": 0.4826, "step": 20870 }, { "epoch": 0.2417450099569305, "grad_norm": 0.8977930545806885, "learning_rate": 0.00019948920807278983, "loss": 0.5107, "step": 20880 }, { "epoch": 0.24186078821840412, "grad_norm": 0.8773626089096069, "learning_rate": 0.00019948791897116664, "loss": 0.4874, "step": 20890 }, { "epoch": 0.24197656647987772, "grad_norm": 0.7891585230827332, "learning_rate": 0.00019948662824909532, "loss": 0.5013, "step": 20900 }, { "epoch": 0.24209234474135136, "grad_norm": 1.0026624202728271, "learning_rate": 0.00019948533590659695, "loss": 0.4816, "step": 20910 }, { "epoch": 0.242208123002825, "grad_norm": 0.8887225389480591, "learning_rate": 0.00019948404194369251, "loss": 0.497, "step": 20920 }, { "epoch": 0.24232390126429862, "grad_norm": 0.7298649549484253, "learning_rate": 0.00019948274636040315, "loss": 0.491, "step": 20930 }, { "epoch": 0.24243967952577225, "grad_norm": 0.8127931356430054, "learning_rate": 0.00019948144915674996, "loss": 0.4472, "step": 20940 }, { "epoch": 0.24255545778724585, "grad_norm": 0.766441285610199, "learning_rate": 0.00019948015033275404, "loss": 0.4745, "step": 20950 }, { "epoch": 0.24267123604871949, "grad_norm": 0.8924943804740906, "learning_rate": 0.00019947884988843657, "loss": 0.4689, "step": 20960 }, { "epoch": 0.24278701431019312, "grad_norm": 0.9387590885162354, "learning_rate": 0.0001994775478238187, "loss": 0.4496, "step": 20970 }, { "epoch": 0.24290279257166675, "grad_norm": 0.7949775457382202, "learning_rate": 0.00019947624413892167, "loss": 0.475, "step": 20980 }, { "epoch": 0.24301857083314038, "grad_norm": 1.0228512287139893, "learning_rate": 0.00019947493883376672, "loss": 0.4646, "step": 20990 }, { "epoch": 0.24313434909461398, "grad_norm": 0.8532053232192993, "learning_rate": 0.00019947363190837508, "loss": 0.4906, "step": 21000 }, { "epoch": 0.24313434909461398, "eval_chrf": 72.27352234458223, "eval_loss": 0.7107462882995605, "eval_runtime": 349.0144, "eval_samples_per_second": 0.287, "eval_steps_per_second": 0.011, "step": 21000 }, { "epoch": 0.24325012735608761, "grad_norm": 0.9279664158821106, "learning_rate": 0.00019947232336276803, "loss": 0.4713, "step": 21010 }, { "epoch": 0.24336590561756125, "grad_norm": 0.8047536015510559, "learning_rate": 0.0001994710131969669, "loss": 0.4396, "step": 21020 }, { "epoch": 0.24348168387903488, "grad_norm": 0.7375866770744324, "learning_rate": 0.00019946970141099306, "loss": 0.4583, "step": 21030 }, { "epoch": 0.2435974621405085, "grad_norm": 0.9027757048606873, "learning_rate": 0.00019946838800486783, "loss": 0.4886, "step": 21040 }, { "epoch": 0.2437132404019821, "grad_norm": 0.6699516773223877, "learning_rate": 0.00019946707297861265, "loss": 0.4528, "step": 21050 }, { "epoch": 0.24382901866345574, "grad_norm": 0.7650065422058105, "learning_rate": 0.00019946575633224888, "loss": 0.502, "step": 21060 }, { "epoch": 0.24394479692492937, "grad_norm": 0.8151065111160278, "learning_rate": 0.000199464438065798, "loss": 0.5019, "step": 21070 }, { "epoch": 0.244060575186403, "grad_norm": 0.8434141874313354, "learning_rate": 0.00019946311817928148, "loss": 0.4799, "step": 21080 }, { "epoch": 0.24417635344787664, "grad_norm": 0.80101078748703, "learning_rate": 0.00019946179667272083, "loss": 0.4216, "step": 21090 }, { "epoch": 0.24429213170935024, "grad_norm": 0.5379853844642639, "learning_rate": 0.00019946047354613752, "loss": 0.4629, "step": 21100 }, { "epoch": 0.24440790997082387, "grad_norm": 0.8522878289222717, "learning_rate": 0.0001994591487995532, "loss": 0.4587, "step": 21110 }, { "epoch": 0.2445236882322975, "grad_norm": 1.004294991493225, "learning_rate": 0.00019945782243298936, "loss": 0.4456, "step": 21120 }, { "epoch": 0.24463946649377113, "grad_norm": 0.8788337111473083, "learning_rate": 0.00019945649444646764, "loss": 0.4678, "step": 21130 }, { "epoch": 0.24475524475524477, "grad_norm": 0.8699989914894104, "learning_rate": 0.00019945516484000964, "loss": 0.4558, "step": 21140 }, { "epoch": 0.24487102301671837, "grad_norm": 0.783763587474823, "learning_rate": 0.00019945383361363707, "loss": 0.4571, "step": 21150 }, { "epoch": 0.244986801278192, "grad_norm": 0.8254796266555786, "learning_rate": 0.00019945250076737155, "loss": 0.4845, "step": 21160 }, { "epoch": 0.24510257953966563, "grad_norm": 0.8270418643951416, "learning_rate": 0.00019945116630123484, "loss": 0.4934, "step": 21170 }, { "epoch": 0.24521835780113926, "grad_norm": 0.8202553987503052, "learning_rate": 0.0001994498302152487, "loss": 0.4605, "step": 21180 }, { "epoch": 0.2453341360626129, "grad_norm": 0.9639976024627686, "learning_rate": 0.00019944849250943478, "loss": 0.4349, "step": 21190 }, { "epoch": 0.2454499143240865, "grad_norm": 0.7821773290634155, "learning_rate": 0.00019944715318381495, "loss": 0.4975, "step": 21200 }, { "epoch": 0.24556569258556013, "grad_norm": 0.8731173872947693, "learning_rate": 0.00019944581223841102, "loss": 0.4909, "step": 21210 }, { "epoch": 0.24568147084703376, "grad_norm": 0.6022710204124451, "learning_rate": 0.00019944446967324483, "loss": 0.4654, "step": 21220 }, { "epoch": 0.2457972491085074, "grad_norm": 0.8561477661132812, "learning_rate": 0.00019944312548833825, "loss": 0.4781, "step": 21230 }, { "epoch": 0.24591302736998102, "grad_norm": 0.8511117696762085, "learning_rate": 0.00019944177968371314, "loss": 0.4597, "step": 21240 }, { "epoch": 0.24602880563145463, "grad_norm": 0.9651407599449158, "learning_rate": 0.00019944043225939148, "loss": 0.4877, "step": 21250 }, { "epoch": 0.24614458389292826, "grad_norm": 0.6892158389091492, "learning_rate": 0.00019943908321539515, "loss": 0.4843, "step": 21260 }, { "epoch": 0.2462603621544019, "grad_norm": 0.8957681655883789, "learning_rate": 0.00019943773255174616, "loss": 0.4981, "step": 21270 }, { "epoch": 0.24637614041587552, "grad_norm": 0.8759444355964661, "learning_rate": 0.00019943638026846653, "loss": 0.4702, "step": 21280 }, { "epoch": 0.24649191867734915, "grad_norm": 0.7360313534736633, "learning_rate": 0.00019943502636557824, "loss": 0.4502, "step": 21290 }, { "epoch": 0.24660769693882276, "grad_norm": 1.097824215888977, "learning_rate": 0.00019943367084310337, "loss": 0.4718, "step": 21300 }, { "epoch": 0.2467234752002964, "grad_norm": 0.8211607336997986, "learning_rate": 0.00019943231370106397, "loss": 0.4684, "step": 21310 }, { "epoch": 0.24683925346177002, "grad_norm": 0.9314284324645996, "learning_rate": 0.00019943095493948215, "loss": 0.4657, "step": 21320 }, { "epoch": 0.24695503172324365, "grad_norm": 0.925950825214386, "learning_rate": 0.0001994295945583801, "loss": 0.4793, "step": 21330 }, { "epoch": 0.24707080998471728, "grad_norm": 0.9872474670410156, "learning_rate": 0.00019942823255777992, "loss": 0.4526, "step": 21340 }, { "epoch": 0.24718658824619089, "grad_norm": 0.8325170278549194, "learning_rate": 0.00019942686893770383, "loss": 0.4783, "step": 21350 }, { "epoch": 0.24730236650766452, "grad_norm": 0.77191561460495, "learning_rate": 0.000199425503698174, "loss": 0.4882, "step": 21360 }, { "epoch": 0.24741814476913815, "grad_norm": 0.7750169634819031, "learning_rate": 0.00019942413683921267, "loss": 0.4581, "step": 21370 }, { "epoch": 0.24753392303061178, "grad_norm": 0.9309362769126892, "learning_rate": 0.00019942276836084214, "loss": 0.4524, "step": 21380 }, { "epoch": 0.2476497012920854, "grad_norm": 0.821103036403656, "learning_rate": 0.00019942139826308468, "loss": 0.4876, "step": 21390 }, { "epoch": 0.24776547955355901, "grad_norm": 0.8426516652107239, "learning_rate": 0.00019942002654596262, "loss": 0.4518, "step": 21400 }, { "epoch": 0.24788125781503265, "grad_norm": 0.9636382460594177, "learning_rate": 0.00019941865320949827, "loss": 0.4669, "step": 21410 }, { "epoch": 0.24799703607650628, "grad_norm": 0.7734730243682861, "learning_rate": 0.00019941727825371398, "loss": 0.4619, "step": 21420 }, { "epoch": 0.2481128143379799, "grad_norm": 0.9146222472190857, "learning_rate": 0.00019941590167863225, "loss": 0.489, "step": 21430 }, { "epoch": 0.24822859259945354, "grad_norm": 0.9050183296203613, "learning_rate": 0.00019941452348427538, "loss": 0.5036, "step": 21440 }, { "epoch": 0.24834437086092714, "grad_norm": 0.5671589970588684, "learning_rate": 0.0001994131436706659, "loss": 0.4298, "step": 21450 }, { "epoch": 0.24846014912240078, "grad_norm": 0.7186804413795471, "learning_rate": 0.00019941176223782625, "loss": 0.4757, "step": 21460 }, { "epoch": 0.2485759273838744, "grad_norm": 0.7497647404670715, "learning_rate": 0.00019941037918577893, "loss": 0.4913, "step": 21470 }, { "epoch": 0.24869170564534804, "grad_norm": 0.840472400188446, "learning_rate": 0.0001994089945145465, "loss": 0.4651, "step": 21480 }, { "epoch": 0.24880748390682164, "grad_norm": 0.764240026473999, "learning_rate": 0.00019940760822415146, "loss": 0.498, "step": 21490 }, { "epoch": 0.24892326216829527, "grad_norm": 0.8429006934165955, "learning_rate": 0.0001994062203146164, "loss": 0.4926, "step": 21500 }, { "epoch": 0.2490390404297689, "grad_norm": 0.702929675579071, "learning_rate": 0.00019940483078596398, "loss": 0.4713, "step": 21510 }, { "epoch": 0.24915481869124254, "grad_norm": 0.6671960949897766, "learning_rate": 0.00019940343963821677, "loss": 0.4451, "step": 21520 }, { "epoch": 0.24927059695271617, "grad_norm": 0.8115770816802979, "learning_rate": 0.00019940204687139744, "loss": 0.4823, "step": 21530 }, { "epoch": 0.24938637521418977, "grad_norm": 0.9203357696533203, "learning_rate": 0.00019940065248552874, "loss": 0.4935, "step": 21540 }, { "epoch": 0.2495021534756634, "grad_norm": 0.8061996698379517, "learning_rate": 0.0001993992564806333, "loss": 0.4614, "step": 21550 }, { "epoch": 0.24961793173713703, "grad_norm": 0.6012446284294128, "learning_rate": 0.0001993978588567339, "loss": 0.4713, "step": 21560 }, { "epoch": 0.24973370999861066, "grad_norm": 0.8243346214294434, "learning_rate": 0.00019939645961385326, "loss": 0.4648, "step": 21570 }, { "epoch": 0.2498494882600843, "grad_norm": 0.8228342533111572, "learning_rate": 0.0001993950587520142, "loss": 0.481, "step": 21580 }, { "epoch": 0.2499652665215579, "grad_norm": 0.8094422221183777, "learning_rate": 0.00019939365627123957, "loss": 0.4977, "step": 21590 }, { "epoch": 0.25008104478303156, "grad_norm": 0.7727957963943481, "learning_rate": 0.0001993922521715522, "loss": 0.4329, "step": 21600 }, { "epoch": 0.2501968230445052, "grad_norm": 0.8428878784179688, "learning_rate": 0.0001993908464529749, "loss": 0.4573, "step": 21610 }, { "epoch": 0.25031260130597877, "grad_norm": 0.8136334419250488, "learning_rate": 0.00019938943911553065, "loss": 0.4714, "step": 21620 }, { "epoch": 0.2504283795674524, "grad_norm": 0.6701997518539429, "learning_rate": 0.00019938803015924231, "loss": 0.5137, "step": 21630 }, { "epoch": 0.25054415782892603, "grad_norm": 0.7936267256736755, "learning_rate": 0.00019938661958413282, "loss": 0.4945, "step": 21640 }, { "epoch": 0.25065993609039966, "grad_norm": 0.8736158609390259, "learning_rate": 0.00019938520739022525, "loss": 0.497, "step": 21650 }, { "epoch": 0.2507757143518733, "grad_norm": 0.6858282089233398, "learning_rate": 0.00019938379357754252, "loss": 0.4433, "step": 21660 }, { "epoch": 0.2508914926133469, "grad_norm": 0.6970891356468201, "learning_rate": 0.00019938237814610764, "loss": 0.4865, "step": 21670 }, { "epoch": 0.25100727087482055, "grad_norm": 0.9730721712112427, "learning_rate": 0.00019938096109594375, "loss": 0.4848, "step": 21680 }, { "epoch": 0.2511230491362942, "grad_norm": 0.7481698393821716, "learning_rate": 0.00019937954242707387, "loss": 0.4829, "step": 21690 }, { "epoch": 0.2512388273977678, "grad_norm": 0.9864442348480225, "learning_rate": 0.0001993781221395211, "loss": 0.4484, "step": 21700 }, { "epoch": 0.25135460565924145, "grad_norm": 0.7727804183959961, "learning_rate": 0.0001993767002333086, "loss": 0.4431, "step": 21710 }, { "epoch": 0.251470383920715, "grad_norm": 0.8138746023178101, "learning_rate": 0.0001993752767084595, "loss": 0.4761, "step": 21720 }, { "epoch": 0.25158616218218866, "grad_norm": 0.7588403224945068, "learning_rate": 0.00019937385156499704, "loss": 0.4732, "step": 21730 }, { "epoch": 0.2517019404436623, "grad_norm": 0.8095316290855408, "learning_rate": 0.00019937242480294438, "loss": 0.509, "step": 21740 }, { "epoch": 0.2518177187051359, "grad_norm": 0.6787450313568115, "learning_rate": 0.0001993709964223248, "loss": 0.4875, "step": 21750 }, { "epoch": 0.25193349696660955, "grad_norm": 0.7589474320411682, "learning_rate": 0.00019936956642316154, "loss": 0.4774, "step": 21760 }, { "epoch": 0.2520492752280832, "grad_norm": 0.7602103352546692, "learning_rate": 0.0001993681348054779, "loss": 0.499, "step": 21770 }, { "epoch": 0.2521650534895568, "grad_norm": 0.8325792551040649, "learning_rate": 0.00019936670156929718, "loss": 0.4796, "step": 21780 }, { "epoch": 0.25228083175103044, "grad_norm": 0.7022053599357605, "learning_rate": 0.00019936526671464273, "loss": 0.4733, "step": 21790 }, { "epoch": 0.2523966100125041, "grad_norm": 0.8736443519592285, "learning_rate": 0.00019936383024153796, "loss": 0.487, "step": 21800 }, { "epoch": 0.25251238827397765, "grad_norm": 0.6411970853805542, "learning_rate": 0.0001993623921500062, "loss": 0.4415, "step": 21810 }, { "epoch": 0.2526281665354513, "grad_norm": 0.9100762605667114, "learning_rate": 0.00019936095244007095, "loss": 0.5196, "step": 21820 }, { "epoch": 0.2527439447969249, "grad_norm": 0.8505499362945557, "learning_rate": 0.0001993595111117556, "loss": 0.4791, "step": 21830 }, { "epoch": 0.25285972305839854, "grad_norm": 0.7599483132362366, "learning_rate": 0.00019935806816508364, "loss": 0.4553, "step": 21840 }, { "epoch": 0.2529755013198722, "grad_norm": 0.8440554141998291, "learning_rate": 0.00019935662360007862, "loss": 0.5143, "step": 21850 }, { "epoch": 0.2530912795813458, "grad_norm": 0.5842945575714111, "learning_rate": 0.00019935517741676397, "loss": 0.4722, "step": 21860 }, { "epoch": 0.25320705784281944, "grad_norm": 0.734700083732605, "learning_rate": 0.00019935372961516333, "loss": 0.4861, "step": 21870 }, { "epoch": 0.25332283610429307, "grad_norm": 0.8261541128158569, "learning_rate": 0.00019935228019530023, "loss": 0.4453, "step": 21880 }, { "epoch": 0.2534386143657667, "grad_norm": 0.8472097516059875, "learning_rate": 0.00019935082915719833, "loss": 0.4683, "step": 21890 }, { "epoch": 0.25355439262724033, "grad_norm": 0.8597475290298462, "learning_rate": 0.00019934937650088123, "loss": 0.4675, "step": 21900 }, { "epoch": 0.2536701708887139, "grad_norm": 0.7931632995605469, "learning_rate": 0.0001993479222263726, "loss": 0.4528, "step": 21910 }, { "epoch": 0.25378594915018754, "grad_norm": 0.7407145500183105, "learning_rate": 0.00019934646633369614, "loss": 0.4589, "step": 21920 }, { "epoch": 0.25390172741166117, "grad_norm": 0.7493576407432556, "learning_rate": 0.00019934500882287553, "loss": 0.4665, "step": 21930 }, { "epoch": 0.2540175056731348, "grad_norm": 0.7910557985305786, "learning_rate": 0.00019934354969393457, "loss": 0.4462, "step": 21940 }, { "epoch": 0.25413328393460843, "grad_norm": 0.617962658405304, "learning_rate": 0.00019934208894689692, "loss": 0.4332, "step": 21950 }, { "epoch": 0.25424906219608207, "grad_norm": 0.7557736039161682, "learning_rate": 0.00019934062658178648, "loss": 0.4714, "step": 21960 }, { "epoch": 0.2543648404575557, "grad_norm": 0.9161111116409302, "learning_rate": 0.00019933916259862698, "loss": 0.4622, "step": 21970 }, { "epoch": 0.25448061871902933, "grad_norm": 0.8381891846656799, "learning_rate": 0.00019933769699744235, "loss": 0.4812, "step": 21980 }, { "epoch": 0.25459639698050296, "grad_norm": 0.7691875696182251, "learning_rate": 0.00019933622977825644, "loss": 0.4555, "step": 21990 }, { "epoch": 0.2547121752419766, "grad_norm": 0.6883540749549866, "learning_rate": 0.00019933476094109308, "loss": 0.4753, "step": 22000 }, { "epoch": 0.2547121752419766, "eval_chrf": 75.9492548954396, "eval_loss": 0.7260739803314209, "eval_runtime": 345.7023, "eval_samples_per_second": 0.289, "eval_steps_per_second": 0.012, "step": 22000 }, { "epoch": 0.25482795350345017, "grad_norm": 0.7204933762550354, "learning_rate": 0.00019933329048597625, "loss": 0.4661, "step": 22010 }, { "epoch": 0.2549437317649238, "grad_norm": 0.7734494209289551, "learning_rate": 0.00019933181841292995, "loss": 0.4531, "step": 22020 }, { "epoch": 0.25505951002639743, "grad_norm": 0.8410596251487732, "learning_rate": 0.00019933034472197804, "loss": 0.4724, "step": 22030 }, { "epoch": 0.25517528828787106, "grad_norm": 0.8805966377258301, "learning_rate": 0.0001993288694131446, "loss": 0.505, "step": 22040 }, { "epoch": 0.2552910665493447, "grad_norm": 0.8467724323272705, "learning_rate": 0.00019932739248645367, "loss": 0.4869, "step": 22050 }, { "epoch": 0.2554068448108183, "grad_norm": 0.74871826171875, "learning_rate": 0.00019932591394192928, "loss": 0.4997, "step": 22060 }, { "epoch": 0.25552262307229195, "grad_norm": 0.6654328107833862, "learning_rate": 0.00019932443377959549, "loss": 0.4745, "step": 22070 }, { "epoch": 0.2556384013337656, "grad_norm": 0.7821851968765259, "learning_rate": 0.00019932295199947644, "loss": 0.4505, "step": 22080 }, { "epoch": 0.2557541795952392, "grad_norm": 0.8141744136810303, "learning_rate": 0.00019932146860159624, "loss": 0.4815, "step": 22090 }, { "epoch": 0.25586995785671285, "grad_norm": 0.9703858494758606, "learning_rate": 0.0001993199835859791, "loss": 0.4784, "step": 22100 }, { "epoch": 0.2559857361181864, "grad_norm": 0.6984914541244507, "learning_rate": 0.00019931849695264918, "loss": 0.4871, "step": 22110 }, { "epoch": 0.25610151437966006, "grad_norm": 0.7303805947303772, "learning_rate": 0.00019931700870163066, "loss": 0.4643, "step": 22120 }, { "epoch": 0.2562172926411337, "grad_norm": 1.0366698503494263, "learning_rate": 0.0001993155188329478, "loss": 0.4792, "step": 22130 }, { "epoch": 0.2563330709026073, "grad_norm": 0.7457212209701538, "learning_rate": 0.00019931402734662493, "loss": 0.4921, "step": 22140 }, { "epoch": 0.25644884916408095, "grad_norm": 0.7296402454376221, "learning_rate": 0.00019931253424268627, "loss": 0.4553, "step": 22150 }, { "epoch": 0.2565646274255546, "grad_norm": 0.9333901405334473, "learning_rate": 0.00019931103952115615, "loss": 0.4818, "step": 22160 }, { "epoch": 0.2566804056870282, "grad_norm": 0.7972484827041626, "learning_rate": 0.00019930954318205893, "loss": 0.4667, "step": 22170 }, { "epoch": 0.25679618394850184, "grad_norm": 0.7563261389732361, "learning_rate": 0.00019930804522541896, "loss": 0.5111, "step": 22180 }, { "epoch": 0.2569119622099755, "grad_norm": 0.7163794040679932, "learning_rate": 0.0001993065456512607, "loss": 0.4539, "step": 22190 }, { "epoch": 0.2570277404714491, "grad_norm": 0.7803443670272827, "learning_rate": 0.00019930504445960848, "loss": 0.4456, "step": 22200 }, { "epoch": 0.2571435187329227, "grad_norm": 0.8861815929412842, "learning_rate": 0.00019930354165048685, "loss": 0.4574, "step": 22210 }, { "epoch": 0.2572592969943963, "grad_norm": 0.84950190782547, "learning_rate": 0.00019930203722392023, "loss": 0.4711, "step": 22220 }, { "epoch": 0.25737507525586995, "grad_norm": 0.6551049947738647, "learning_rate": 0.0001993005311799331, "loss": 0.4649, "step": 22230 }, { "epoch": 0.2574908535173436, "grad_norm": 0.8828011155128479, "learning_rate": 0.00019929902351855007, "loss": 0.4679, "step": 22240 }, { "epoch": 0.2576066317788172, "grad_norm": 0.5688215494155884, "learning_rate": 0.0001992975142397956, "loss": 0.4583, "step": 22250 }, { "epoch": 0.25772241004029084, "grad_norm": 0.7960038781166077, "learning_rate": 0.0001992960033436944, "loss": 0.4614, "step": 22260 }, { "epoch": 0.25783818830176447, "grad_norm": 0.6251015663146973, "learning_rate": 0.00019929449083027094, "loss": 0.4495, "step": 22270 }, { "epoch": 0.2579539665632381, "grad_norm": 0.6125766634941101, "learning_rate": 0.00019929297669954993, "loss": 0.4775, "step": 22280 }, { "epoch": 0.25806974482471173, "grad_norm": 0.6822196245193481, "learning_rate": 0.00019929146095155607, "loss": 0.4689, "step": 22290 }, { "epoch": 0.25818552308618536, "grad_norm": 0.6716240048408508, "learning_rate": 0.00019928994358631394, "loss": 0.4899, "step": 22300 }, { "epoch": 0.25830130134765894, "grad_norm": 0.9194414019584656, "learning_rate": 0.00019928842460384833, "loss": 0.4918, "step": 22310 }, { "epoch": 0.25841707960913257, "grad_norm": 0.6665068864822388, "learning_rate": 0.000199286904004184, "loss": 0.4822, "step": 22320 }, { "epoch": 0.2585328578706062, "grad_norm": 0.77483731508255, "learning_rate": 0.00019928538178734563, "loss": 0.457, "step": 22330 }, { "epoch": 0.25864863613207983, "grad_norm": 0.7766614556312561, "learning_rate": 0.00019928385795335812, "loss": 0.4912, "step": 22340 }, { "epoch": 0.25876441439355347, "grad_norm": 0.7784126400947571, "learning_rate": 0.0001992823325022462, "loss": 0.4855, "step": 22350 }, { "epoch": 0.2588801926550271, "grad_norm": 0.6947225332260132, "learning_rate": 0.00019928080543403476, "loss": 0.496, "step": 22360 }, { "epoch": 0.25899597091650073, "grad_norm": 0.675093412399292, "learning_rate": 0.00019927927674874866, "loss": 0.4467, "step": 22370 }, { "epoch": 0.25911174917797436, "grad_norm": 0.6956536769866943, "learning_rate": 0.0001992777464464128, "loss": 0.4612, "step": 22380 }, { "epoch": 0.259227527439448, "grad_norm": 0.7089182734489441, "learning_rate": 0.00019927621452705215, "loss": 0.4629, "step": 22390 }, { "epoch": 0.2593433057009216, "grad_norm": 0.7213327884674072, "learning_rate": 0.0001992746809906916, "loss": 0.456, "step": 22400 }, { "epoch": 0.2594590839623952, "grad_norm": 0.847394585609436, "learning_rate": 0.0001992731458373561, "loss": 0.4935, "step": 22410 }, { "epoch": 0.25957486222386883, "grad_norm": 0.6759060025215149, "learning_rate": 0.00019927160906707075, "loss": 0.4527, "step": 22420 }, { "epoch": 0.25969064048534246, "grad_norm": 1.4248216152191162, "learning_rate": 0.00019927007067986055, "loss": 0.4705, "step": 22430 }, { "epoch": 0.2598064187468161, "grad_norm": 0.8298565745353699, "learning_rate": 0.0001992685306757505, "loss": 0.4621, "step": 22440 }, { "epoch": 0.2599221970082897, "grad_norm": 0.735045850276947, "learning_rate": 0.00019926698905476577, "loss": 0.4994, "step": 22450 }, { "epoch": 0.26003797526976336, "grad_norm": 0.7810381650924683, "learning_rate": 0.0001992654458169314, "loss": 0.4553, "step": 22460 }, { "epoch": 0.260153753531237, "grad_norm": 0.9417708516120911, "learning_rate": 0.00019926390096227257, "loss": 0.4583, "step": 22470 }, { "epoch": 0.2602695317927106, "grad_norm": 0.8525703549385071, "learning_rate": 0.0001992623544908144, "loss": 0.4621, "step": 22480 }, { "epoch": 0.26038531005418425, "grad_norm": 0.8209181427955627, "learning_rate": 0.00019926080640258215, "loss": 0.4768, "step": 22490 }, { "epoch": 0.2605010883156578, "grad_norm": 0.7920557260513306, "learning_rate": 0.00019925925669760092, "loss": 0.4422, "step": 22500 }, { "epoch": 0.26061686657713146, "grad_norm": 0.7790732383728027, "learning_rate": 0.00019925770537589606, "loss": 0.46, "step": 22510 }, { "epoch": 0.2607326448386051, "grad_norm": 0.7818410992622375, "learning_rate": 0.00019925615243749278, "loss": 0.4721, "step": 22520 }, { "epoch": 0.2608484231000787, "grad_norm": 0.9033719897270203, "learning_rate": 0.0001992545978824164, "loss": 0.4763, "step": 22530 }, { "epoch": 0.26096420136155235, "grad_norm": 0.8526149988174438, "learning_rate": 0.00019925304171069223, "loss": 0.4666, "step": 22540 }, { "epoch": 0.261079979623026, "grad_norm": 0.7692511677742004, "learning_rate": 0.00019925148392234562, "loss": 0.4616, "step": 22550 }, { "epoch": 0.2611957578844996, "grad_norm": 0.926516592502594, "learning_rate": 0.00019924992451740195, "loss": 0.4872, "step": 22560 }, { "epoch": 0.26131153614597324, "grad_norm": 0.9964279532432556, "learning_rate": 0.00019924836349588662, "loss": 0.4507, "step": 22570 }, { "epoch": 0.2614273144074469, "grad_norm": 0.7452640533447266, "learning_rate": 0.000199246800857825, "loss": 0.4398, "step": 22580 }, { "epoch": 0.2615430926689205, "grad_norm": 0.7037336826324463, "learning_rate": 0.0001992452366032426, "loss": 0.4625, "step": 22590 }, { "epoch": 0.2616588709303941, "grad_norm": 0.763759970664978, "learning_rate": 0.0001992436707321649, "loss": 0.4477, "step": 22600 }, { "epoch": 0.2617746491918677, "grad_norm": 0.7811423540115356, "learning_rate": 0.00019924210324461742, "loss": 0.4678, "step": 22610 }, { "epoch": 0.26189042745334135, "grad_norm": 0.8694429993629456, "learning_rate": 0.00019924053414062563, "loss": 0.4475, "step": 22620 }, { "epoch": 0.262006205714815, "grad_norm": 0.7192367911338806, "learning_rate": 0.0001992389634202151, "loss": 0.4471, "step": 22630 }, { "epoch": 0.2621219839762886, "grad_norm": 0.7533650994300842, "learning_rate": 0.00019923739108341148, "loss": 0.4707, "step": 22640 }, { "epoch": 0.26223776223776224, "grad_norm": 0.8486232757568359, "learning_rate": 0.00019923581713024032, "loss": 0.4706, "step": 22650 }, { "epoch": 0.26235354049923587, "grad_norm": 0.6653669476509094, "learning_rate": 0.00019923424156072724, "loss": 0.4486, "step": 22660 }, { "epoch": 0.2624693187607095, "grad_norm": 0.7561994194984436, "learning_rate": 0.00019923266437489798, "loss": 0.4873, "step": 22670 }, { "epoch": 0.26258509702218313, "grad_norm": 0.7854382395744324, "learning_rate": 0.00019923108557277813, "loss": 0.4639, "step": 22680 }, { "epoch": 0.26270087528365677, "grad_norm": 0.7397041320800781, "learning_rate": 0.00019922950515439348, "loss": 0.4456, "step": 22690 }, { "epoch": 0.26281665354513034, "grad_norm": 0.8341320753097534, "learning_rate": 0.0001992279231197698, "loss": 0.4848, "step": 22700 }, { "epoch": 0.262932431806604, "grad_norm": 0.8480237722396851, "learning_rate": 0.00019922633946893275, "loss": 0.4301, "step": 22710 }, { "epoch": 0.2630482100680776, "grad_norm": 1.00328528881073, "learning_rate": 0.0001992247542019082, "loss": 0.4899, "step": 22720 }, { "epoch": 0.26316398832955123, "grad_norm": 0.7801504731178284, "learning_rate": 0.00019922316731872192, "loss": 0.4937, "step": 22730 }, { "epoch": 0.26327976659102487, "grad_norm": 0.7921590805053711, "learning_rate": 0.00019922157881939982, "loss": 0.4949, "step": 22740 }, { "epoch": 0.2633955448524985, "grad_norm": 0.5391880869865417, "learning_rate": 0.00019921998870396775, "loss": 0.4723, "step": 22750 }, { "epoch": 0.26351132311397213, "grad_norm": 0.8103504180908203, "learning_rate": 0.00019921839697245158, "loss": 0.5043, "step": 22760 }, { "epoch": 0.26362710137544576, "grad_norm": 0.9150616526603699, "learning_rate": 0.00019921680362487728, "loss": 0.5382, "step": 22770 }, { "epoch": 0.2637428796369194, "grad_norm": 0.9048618078231812, "learning_rate": 0.00019921520866127078, "loss": 0.5033, "step": 22780 }, { "epoch": 0.263858657898393, "grad_norm": 0.8628024458885193, "learning_rate": 0.00019921361208165804, "loss": 0.4746, "step": 22790 }, { "epoch": 0.2639744361598666, "grad_norm": 0.7364067435264587, "learning_rate": 0.00019921201388606506, "loss": 0.4604, "step": 22800 }, { "epoch": 0.26409021442134023, "grad_norm": 0.8640881180763245, "learning_rate": 0.00019921041407451793, "loss": 0.4433, "step": 22810 }, { "epoch": 0.26420599268281386, "grad_norm": 0.8773605227470398, "learning_rate": 0.00019920881264704266, "loss": 0.4701, "step": 22820 }, { "epoch": 0.2643217709442875, "grad_norm": 0.8174883127212524, "learning_rate": 0.00019920720960366536, "loss": 0.4318, "step": 22830 }, { "epoch": 0.2644375492057611, "grad_norm": 0.7369967103004456, "learning_rate": 0.00019920560494441212, "loss": 0.4481, "step": 22840 }, { "epoch": 0.26455332746723476, "grad_norm": 0.8240845799446106, "learning_rate": 0.00019920399866930904, "loss": 0.4604, "step": 22850 }, { "epoch": 0.2646691057287084, "grad_norm": 0.5480002164840698, "learning_rate": 0.00019920239077838239, "loss": 0.4587, "step": 22860 }, { "epoch": 0.264784883990182, "grad_norm": 0.8237443566322327, "learning_rate": 0.00019920078127165825, "loss": 0.4721, "step": 22870 }, { "epoch": 0.26490066225165565, "grad_norm": 0.7899153232574463, "learning_rate": 0.00019919917014916294, "loss": 0.5098, "step": 22880 }, { "epoch": 0.2650164405131293, "grad_norm": 0.843370258808136, "learning_rate": 0.00019919755741092262, "loss": 0.4825, "step": 22890 }, { "epoch": 0.26513221877460286, "grad_norm": 0.8073774576187134, "learning_rate": 0.00019919594305696354, "loss": 0.417, "step": 22900 }, { "epoch": 0.2652479970360765, "grad_norm": 0.8770199418067932, "learning_rate": 0.00019919432708731205, "loss": 0.4449, "step": 22910 }, { "epoch": 0.2653637752975501, "grad_norm": 0.6097248196601868, "learning_rate": 0.00019919270950199445, "loss": 0.4898, "step": 22920 }, { "epoch": 0.26547955355902375, "grad_norm": 0.8626429438591003, "learning_rate": 0.00019919109030103714, "loss": 0.451, "step": 22930 }, { "epoch": 0.2655953318204974, "grad_norm": 0.9127733111381531, "learning_rate": 0.0001991894694844664, "loss": 0.4507, "step": 22940 }, { "epoch": 0.265711110081971, "grad_norm": 0.853125810623169, "learning_rate": 0.0001991878470523087, "loss": 0.4632, "step": 22950 }, { "epoch": 0.26582688834344465, "grad_norm": 0.8941962122917175, "learning_rate": 0.0001991862230045904, "loss": 0.4596, "step": 22960 }, { "epoch": 0.2659426666049183, "grad_norm": 0.7871774435043335, "learning_rate": 0.00019918459734133802, "loss": 0.4729, "step": 22970 }, { "epoch": 0.2660584448663919, "grad_norm": 0.806376576423645, "learning_rate": 0.000199182970062578, "loss": 0.4821, "step": 22980 }, { "epoch": 0.26617422312786554, "grad_norm": 0.8112010359764099, "learning_rate": 0.0001991813411683369, "loss": 0.458, "step": 22990 }, { "epoch": 0.2662900013893391, "grad_norm": 0.8459729552268982, "learning_rate": 0.00019917971065864119, "loss": 0.4379, "step": 23000 }, { "epoch": 0.2662900013893391, "eval_chrf": 83.42543709513681, "eval_loss": 0.7086561322212219, "eval_runtime": 195.9424, "eval_samples_per_second": 0.51, "eval_steps_per_second": 0.02, "step": 23000 }, { "epoch": 0.26640577965081275, "grad_norm": 0.8854068517684937, "learning_rate": 0.0001991780785335174, "loss": 0.4698, "step": 23010 }, { "epoch": 0.2665215579122864, "grad_norm": 0.7783320546150208, "learning_rate": 0.00019917644479299222, "loss": 0.4933, "step": 23020 }, { "epoch": 0.26663733617376, "grad_norm": 0.811390221118927, "learning_rate": 0.00019917480943709216, "loss": 0.4801, "step": 23030 }, { "epoch": 0.26675311443523364, "grad_norm": 0.8113588094711304, "learning_rate": 0.00019917317246584393, "loss": 0.4427, "step": 23040 }, { "epoch": 0.26686889269670727, "grad_norm": 0.8738912343978882, "learning_rate": 0.00019917153387927417, "loss": 0.438, "step": 23050 }, { "epoch": 0.2669846709581809, "grad_norm": 0.7134190201759338, "learning_rate": 0.00019916989367740952, "loss": 0.456, "step": 23060 }, { "epoch": 0.26710044921965453, "grad_norm": 0.7650332450866699, "learning_rate": 0.00019916825186027675, "loss": 0.4806, "step": 23070 }, { "epoch": 0.26721622748112817, "grad_norm": 0.8252532482147217, "learning_rate": 0.00019916660842790257, "loss": 0.4686, "step": 23080 }, { "epoch": 0.26733200574260174, "grad_norm": 0.8009850978851318, "learning_rate": 0.00019916496338031378, "loss": 0.4497, "step": 23090 }, { "epoch": 0.2674477840040754, "grad_norm": 0.7717449069023132, "learning_rate": 0.00019916331671753713, "loss": 0.4125, "step": 23100 }, { "epoch": 0.267563562265549, "grad_norm": 0.8198238015174866, "learning_rate": 0.00019916166843959952, "loss": 0.4651, "step": 23110 }, { "epoch": 0.26767934052702264, "grad_norm": 0.8147943019866943, "learning_rate": 0.00019916001854652771, "loss": 0.4538, "step": 23120 }, { "epoch": 0.26779511878849627, "grad_norm": 0.6511247158050537, "learning_rate": 0.00019915836703834861, "loss": 0.499, "step": 23130 }, { "epoch": 0.2679108970499699, "grad_norm": 0.7690098881721497, "learning_rate": 0.00019915671391508913, "loss": 0.4322, "step": 23140 }, { "epoch": 0.26802667531144353, "grad_norm": 0.8259420394897461, "learning_rate": 0.00019915505917677618, "loss": 0.4421, "step": 23150 }, { "epoch": 0.26814245357291716, "grad_norm": 0.7501933574676514, "learning_rate": 0.0001991534028234367, "loss": 0.4475, "step": 23160 }, { "epoch": 0.2682582318343908, "grad_norm": 0.6512454748153687, "learning_rate": 0.00019915174485509772, "loss": 0.4072, "step": 23170 }, { "epoch": 0.2683740100958644, "grad_norm": 0.806197464466095, "learning_rate": 0.0001991500852717862, "loss": 0.4716, "step": 23180 }, { "epoch": 0.268489788357338, "grad_norm": 0.7797796726226807, "learning_rate": 0.00019914842407352915, "loss": 0.4957, "step": 23190 }, { "epoch": 0.26860556661881163, "grad_norm": 0.7226470708847046, "learning_rate": 0.0001991467612603537, "loss": 0.4495, "step": 23200 }, { "epoch": 0.26872134488028526, "grad_norm": 0.8793154358863831, "learning_rate": 0.00019914509683228686, "loss": 0.4713, "step": 23210 }, { "epoch": 0.2688371231417589, "grad_norm": 0.6427780985832214, "learning_rate": 0.00019914343078935578, "loss": 0.4439, "step": 23220 }, { "epoch": 0.2689529014032325, "grad_norm": 0.7348723411560059, "learning_rate": 0.00019914176313158758, "loss": 0.4319, "step": 23230 }, { "epoch": 0.26906867966470616, "grad_norm": 0.7228298783302307, "learning_rate": 0.00019914009385900948, "loss": 0.4662, "step": 23240 }, { "epoch": 0.2691844579261798, "grad_norm": 0.8180272579193115, "learning_rate": 0.00019913842297164858, "loss": 0.4853, "step": 23250 }, { "epoch": 0.2693002361876534, "grad_norm": 0.5619539022445679, "learning_rate": 0.00019913675046953215, "loss": 0.4434, "step": 23260 }, { "epoch": 0.26941601444912705, "grad_norm": 0.7949888110160828, "learning_rate": 0.0001991350763526874, "loss": 0.4535, "step": 23270 }, { "epoch": 0.2695317927106007, "grad_norm": 0.804740309715271, "learning_rate": 0.0001991334006211416, "loss": 0.4743, "step": 23280 }, { "epoch": 0.26964757097207426, "grad_norm": 0.7593053579330444, "learning_rate": 0.0001991317232749221, "loss": 0.45, "step": 23290 }, { "epoch": 0.2697633492335479, "grad_norm": 0.7307462692260742, "learning_rate": 0.00019913004431405614, "loss": 0.4754, "step": 23300 }, { "epoch": 0.2698791274950215, "grad_norm": 0.8215537667274475, "learning_rate": 0.00019912836373857116, "loss": 0.4781, "step": 23310 }, { "epoch": 0.26999490575649515, "grad_norm": 0.8160101175308228, "learning_rate": 0.00019912668154849444, "loss": 0.4654, "step": 23320 }, { "epoch": 0.2701106840179688, "grad_norm": 0.8154664635658264, "learning_rate": 0.0001991249977438534, "loss": 0.4698, "step": 23330 }, { "epoch": 0.2702264622794424, "grad_norm": 0.7719538807868958, "learning_rate": 0.00019912331232467553, "loss": 0.457, "step": 23340 }, { "epoch": 0.27034224054091605, "grad_norm": 0.6792475581169128, "learning_rate": 0.00019912162529098822, "loss": 0.4638, "step": 23350 }, { "epoch": 0.2704580188023897, "grad_norm": 0.700428307056427, "learning_rate": 0.00019911993664281896, "loss": 0.4896, "step": 23360 }, { "epoch": 0.2705737970638633, "grad_norm": 0.6948319673538208, "learning_rate": 0.00019911824638019522, "loss": 0.4809, "step": 23370 }, { "epoch": 0.27068957532533694, "grad_norm": 0.8017446994781494, "learning_rate": 0.00019911655450314465, "loss": 0.4544, "step": 23380 }, { "epoch": 0.2708053535868105, "grad_norm": 0.8272501230239868, "learning_rate": 0.00019911486101169466, "loss": 0.4468, "step": 23390 }, { "epoch": 0.27092113184828415, "grad_norm": 0.7762539386749268, "learning_rate": 0.00019911316590587292, "loss": 0.4735, "step": 23400 }, { "epoch": 0.2710369101097578, "grad_norm": 0.7475146651268005, "learning_rate": 0.00019911146918570702, "loss": 0.4528, "step": 23410 }, { "epoch": 0.2711526883712314, "grad_norm": 0.6763512492179871, "learning_rate": 0.0001991097708512246, "loss": 0.4755, "step": 23420 }, { "epoch": 0.27126846663270504, "grad_norm": 0.933892011642456, "learning_rate": 0.00019910807090245332, "loss": 0.4846, "step": 23430 }, { "epoch": 0.2713842448941787, "grad_norm": 0.800424337387085, "learning_rate": 0.00019910636933942086, "loss": 0.4333, "step": 23440 }, { "epoch": 0.2715000231556523, "grad_norm": 0.9707953929901123, "learning_rate": 0.00019910466616215495, "loss": 0.4411, "step": 23450 }, { "epoch": 0.27161580141712593, "grad_norm": 0.8666991591453552, "learning_rate": 0.0001991029613706833, "loss": 0.481, "step": 23460 }, { "epoch": 0.27173157967859957, "grad_norm": 0.7029927968978882, "learning_rate": 0.00019910125496503374, "loss": 0.4613, "step": 23470 }, { "epoch": 0.2718473579400732, "grad_norm": 0.8219718337059021, "learning_rate": 0.00019909954694523398, "loss": 0.4427, "step": 23480 }, { "epoch": 0.2719631362015468, "grad_norm": 0.7498128414154053, "learning_rate": 0.00019909783731131191, "loss": 0.5028, "step": 23490 }, { "epoch": 0.2720789144630204, "grad_norm": 0.8823832869529724, "learning_rate": 0.00019909612606329536, "loss": 0.4675, "step": 23500 }, { "epoch": 0.27219469272449404, "grad_norm": 0.8480955958366394, "learning_rate": 0.00019909441320121215, "loss": 0.4718, "step": 23510 }, { "epoch": 0.27231047098596767, "grad_norm": 0.8292425870895386, "learning_rate": 0.00019909269872509024, "loss": 0.4632, "step": 23520 }, { "epoch": 0.2724262492474413, "grad_norm": 0.7262698411941528, "learning_rate": 0.00019909098263495755, "loss": 0.4736, "step": 23530 }, { "epoch": 0.27254202750891493, "grad_norm": 0.910237729549408, "learning_rate": 0.00019908926493084203, "loss": 0.4416, "step": 23540 }, { "epoch": 0.27265780577038856, "grad_norm": 0.9715549945831299, "learning_rate": 0.0001990875456127716, "loss": 0.4589, "step": 23550 }, { "epoch": 0.2727735840318622, "grad_norm": 0.767346203327179, "learning_rate": 0.00019908582468077435, "loss": 0.4341, "step": 23560 }, { "epoch": 0.2728893622933358, "grad_norm": 0.8054583668708801, "learning_rate": 0.00019908410213487825, "loss": 0.4563, "step": 23570 }, { "epoch": 0.27300514055480946, "grad_norm": 0.8324166536331177, "learning_rate": 0.0001990823779751114, "loss": 0.5082, "step": 23580 }, { "epoch": 0.27312091881628303, "grad_norm": 1.0959804058074951, "learning_rate": 0.00019908065220150184, "loss": 0.4788, "step": 23590 }, { "epoch": 0.27323669707775666, "grad_norm": 0.800231397151947, "learning_rate": 0.00019907892481407772, "loss": 0.4492, "step": 23600 }, { "epoch": 0.2733524753392303, "grad_norm": 0.789693295955658, "learning_rate": 0.00019907719581286712, "loss": 0.505, "step": 23610 }, { "epoch": 0.2734682536007039, "grad_norm": 0.719916582107544, "learning_rate": 0.00019907546519789827, "loss": 0.4735, "step": 23620 }, { "epoch": 0.27358403186217756, "grad_norm": 0.8244103789329529, "learning_rate": 0.0001990737329691993, "loss": 0.4967, "step": 23630 }, { "epoch": 0.2736998101236512, "grad_norm": 0.818156898021698, "learning_rate": 0.00019907199912679846, "loss": 0.4655, "step": 23640 }, { "epoch": 0.2738155883851248, "grad_norm": 0.7187866568565369, "learning_rate": 0.00019907026367072396, "loss": 0.4873, "step": 23650 }, { "epoch": 0.27393136664659845, "grad_norm": 0.8921357989311218, "learning_rate": 0.00019906852660100412, "loss": 0.445, "step": 23660 }, { "epoch": 0.2740471449080721, "grad_norm": 0.898240864276886, "learning_rate": 0.00019906678791766717, "loss": 0.4881, "step": 23670 }, { "epoch": 0.2741629231695457, "grad_norm": 0.8911371827125549, "learning_rate": 0.0001990650476207415, "loss": 0.4803, "step": 23680 }, { "epoch": 0.2742787014310193, "grad_norm": 0.9182499647140503, "learning_rate": 0.00019906330571025537, "loss": 0.4325, "step": 23690 }, { "epoch": 0.2743944796924929, "grad_norm": 0.7490956783294678, "learning_rate": 0.00019906156218623723, "loss": 0.4376, "step": 23700 }, { "epoch": 0.27451025795396655, "grad_norm": 0.739075243473053, "learning_rate": 0.0001990598170487154, "loss": 0.4593, "step": 23710 }, { "epoch": 0.2746260362154402, "grad_norm": 0.7499712705612183, "learning_rate": 0.00019905807029771842, "loss": 0.4695, "step": 23720 }, { "epoch": 0.2747418144769138, "grad_norm": 0.8054463863372803, "learning_rate": 0.0001990563219332746, "loss": 0.4476, "step": 23730 }, { "epoch": 0.27485759273838745, "grad_norm": 0.7969599366188049, "learning_rate": 0.00019905457195541252, "loss": 0.4675, "step": 23740 }, { "epoch": 0.2749733709998611, "grad_norm": 0.7260996699333191, "learning_rate": 0.00019905282036416065, "loss": 0.4654, "step": 23750 }, { "epoch": 0.2750891492613347, "grad_norm": 1.2067216634750366, "learning_rate": 0.00019905106715954754, "loss": 0.4651, "step": 23760 }, { "epoch": 0.27520492752280834, "grad_norm": 0.8120461702346802, "learning_rate": 0.00019904931234160172, "loss": 0.4659, "step": 23770 }, { "epoch": 0.2753207057842819, "grad_norm": 0.8080825805664062, "learning_rate": 0.00019904755591035176, "loss": 0.4726, "step": 23780 }, { "epoch": 0.27543648404575555, "grad_norm": 0.8611451983451843, "learning_rate": 0.0001990457978658263, "loss": 0.479, "step": 23790 }, { "epoch": 0.2755522623072292, "grad_norm": 0.7268285155296326, "learning_rate": 0.00019904403820805396, "loss": 0.4904, "step": 23800 }, { "epoch": 0.2756680405687028, "grad_norm": 0.5887938141822815, "learning_rate": 0.00019904227693706342, "loss": 0.4441, "step": 23810 }, { "epoch": 0.27578381883017644, "grad_norm": 0.75795978307724, "learning_rate": 0.00019904051405288331, "loss": 0.4746, "step": 23820 }, { "epoch": 0.2758995970916501, "grad_norm": 0.7585770487785339, "learning_rate": 0.0001990387495555424, "loss": 0.4754, "step": 23830 }, { "epoch": 0.2760153753531237, "grad_norm": 0.8079079985618591, "learning_rate": 0.00019903698344506942, "loss": 0.46, "step": 23840 }, { "epoch": 0.27613115361459734, "grad_norm": 0.7400149703025818, "learning_rate": 0.00019903521572149315, "loss": 0.4719, "step": 23850 }, { "epoch": 0.27624693187607097, "grad_norm": 0.6678773760795593, "learning_rate": 0.00019903344638484233, "loss": 0.4365, "step": 23860 }, { "epoch": 0.2763627101375446, "grad_norm": 0.828233540058136, "learning_rate": 0.00019903167543514581, "loss": 0.467, "step": 23870 }, { "epoch": 0.2764784883990182, "grad_norm": 0.7450108528137207, "learning_rate": 0.00019902990287243248, "loss": 0.4694, "step": 23880 }, { "epoch": 0.2765942666604918, "grad_norm": 0.8591787219047546, "learning_rate": 0.00019902812869673116, "loss": 0.4487, "step": 23890 }, { "epoch": 0.27671004492196544, "grad_norm": 0.7450703382492065, "learning_rate": 0.00019902635290807076, "loss": 0.4408, "step": 23900 }, { "epoch": 0.27682582318343907, "grad_norm": 0.850604236125946, "learning_rate": 0.00019902457550648018, "loss": 0.4954, "step": 23910 }, { "epoch": 0.2769416014449127, "grad_norm": 0.9734000563621521, "learning_rate": 0.00019902279649198836, "loss": 0.4977, "step": 23920 }, { "epoch": 0.27705737970638633, "grad_norm": 0.6951797604560852, "learning_rate": 0.00019902101586462436, "loss": 0.4522, "step": 23930 }, { "epoch": 0.27717315796785996, "grad_norm": 0.7991729378700256, "learning_rate": 0.00019901923362441708, "loss": 0.4731, "step": 23940 }, { "epoch": 0.2772889362293336, "grad_norm": 0.6922335028648376, "learning_rate": 0.00019901744977139565, "loss": 0.4691, "step": 23950 }, { "epoch": 0.2774047144908072, "grad_norm": 0.6763920187950134, "learning_rate": 0.00019901566430558903, "loss": 0.466, "step": 23960 }, { "epoch": 0.27752049275228086, "grad_norm": 0.7108004689216614, "learning_rate": 0.00019901387722702637, "loss": 0.4467, "step": 23970 }, { "epoch": 0.27763627101375443, "grad_norm": 0.7842297554016113, "learning_rate": 0.00019901208853573675, "loss": 0.445, "step": 23980 }, { "epoch": 0.27775204927522806, "grad_norm": 0.854042112827301, "learning_rate": 0.0001990102982317493, "loss": 0.4606, "step": 23990 }, { "epoch": 0.2778678275367017, "grad_norm": 0.7814891934394836, "learning_rate": 0.00019900850631509318, "loss": 0.4947, "step": 24000 }, { "epoch": 0.2778678275367017, "eval_chrf": 85.56038719629122, "eval_loss": 0.7117055654525757, "eval_runtime": 196.3131, "eval_samples_per_second": 0.509, "eval_steps_per_second": 0.02, "step": 24000 }, { "epoch": 0.2779836057981753, "grad_norm": 0.7856383919715881, "learning_rate": 0.00019900671278579761, "loss": 0.423, "step": 24010 }, { "epoch": 0.27809938405964896, "grad_norm": 0.7621785402297974, "learning_rate": 0.00019900491764389175, "loss": 0.4836, "step": 24020 }, { "epoch": 0.2782151623211226, "grad_norm": 0.789074718952179, "learning_rate": 0.00019900312088940488, "loss": 0.4815, "step": 24030 }, { "epoch": 0.2783309405825962, "grad_norm": 0.728230357170105, "learning_rate": 0.00019900132252236624, "loss": 0.4615, "step": 24040 }, { "epoch": 0.27844671884406985, "grad_norm": 0.8728474378585815, "learning_rate": 0.00019899952254280514, "loss": 0.4514, "step": 24050 }, { "epoch": 0.2785624971055435, "grad_norm": 0.8630943894386292, "learning_rate": 0.0001989977209507509, "loss": 0.4681, "step": 24060 }, { "epoch": 0.2786782753670171, "grad_norm": 0.931373655796051, "learning_rate": 0.00019899591774623284, "loss": 0.4931, "step": 24070 }, { "epoch": 0.2787940536284907, "grad_norm": 0.5404430031776428, "learning_rate": 0.00019899411292928032, "loss": 0.4285, "step": 24080 }, { "epoch": 0.2789098318899643, "grad_norm": 0.7951238751411438, "learning_rate": 0.0001989923064999228, "loss": 0.4866, "step": 24090 }, { "epoch": 0.27902561015143795, "grad_norm": 0.8958339691162109, "learning_rate": 0.00019899049845818964, "loss": 0.4791, "step": 24100 }, { "epoch": 0.2791413884129116, "grad_norm": 0.688094973564148, "learning_rate": 0.00019898868880411033, "loss": 0.4269, "step": 24110 }, { "epoch": 0.2792571666743852, "grad_norm": 0.7957499027252197, "learning_rate": 0.00019898687753771432, "loss": 0.4259, "step": 24120 }, { "epoch": 0.27937294493585885, "grad_norm": 0.6640834808349609, "learning_rate": 0.00019898506465903113, "loss": 0.4807, "step": 24130 }, { "epoch": 0.2794887231973325, "grad_norm": 0.6267457008361816, "learning_rate": 0.00019898325016809028, "loss": 0.4854, "step": 24140 }, { "epoch": 0.2796045014588061, "grad_norm": 0.7446527481079102, "learning_rate": 0.00019898143406492128, "loss": 0.4627, "step": 24150 }, { "epoch": 0.27972027972027974, "grad_norm": 0.787577211856842, "learning_rate": 0.0001989796163495538, "loss": 0.4501, "step": 24160 }, { "epoch": 0.27983605798175337, "grad_norm": 0.7797445058822632, "learning_rate": 0.0001989777970220174, "loss": 0.4859, "step": 24170 }, { "epoch": 0.27995183624322695, "grad_norm": 0.7492689490318298, "learning_rate": 0.00019897597608234171, "loss": 0.4472, "step": 24180 }, { "epoch": 0.2800676145047006, "grad_norm": 0.7863853573799133, "learning_rate": 0.00019897415353055636, "loss": 0.4835, "step": 24190 }, { "epoch": 0.2801833927661742, "grad_norm": 0.7232769727706909, "learning_rate": 0.0001989723293666911, "loss": 0.4603, "step": 24200 }, { "epoch": 0.28029917102764784, "grad_norm": 0.70110684633255, "learning_rate": 0.00019897050359077558, "loss": 0.4713, "step": 24210 }, { "epoch": 0.2804149492891215, "grad_norm": 0.8409924507141113, "learning_rate": 0.0001989686762028396, "loss": 0.4133, "step": 24220 }, { "epoch": 0.2805307275505951, "grad_norm": 0.8462908864021301, "learning_rate": 0.00019896684720291288, "loss": 0.4721, "step": 24230 }, { "epoch": 0.28064650581206874, "grad_norm": 0.8973506093025208, "learning_rate": 0.00019896501659102522, "loss": 0.4737, "step": 24240 }, { "epoch": 0.28076228407354237, "grad_norm": 0.9143046736717224, "learning_rate": 0.00019896318436720645, "loss": 0.4649, "step": 24250 }, { "epoch": 0.280878062335016, "grad_norm": 0.7367831468582153, "learning_rate": 0.00019896135053148637, "loss": 0.4433, "step": 24260 }, { "epoch": 0.28099384059648963, "grad_norm": 0.8570603132247925, "learning_rate": 0.00019895951508389493, "loss": 0.4962, "step": 24270 }, { "epoch": 0.2811096188579632, "grad_norm": 0.8422549962997437, "learning_rate": 0.00019895767802446192, "loss": 0.4869, "step": 24280 }, { "epoch": 0.28122539711943684, "grad_norm": 0.6377737522125244, "learning_rate": 0.00019895583935321736, "loss": 0.4469, "step": 24290 }, { "epoch": 0.28134117538091047, "grad_norm": 0.7327654361724854, "learning_rate": 0.00019895399907019114, "loss": 0.5111, "step": 24300 }, { "epoch": 0.2814569536423841, "grad_norm": 0.7901021242141724, "learning_rate": 0.00019895215717541327, "loss": 0.4488, "step": 24310 }, { "epoch": 0.28157273190385773, "grad_norm": 0.7474518418312073, "learning_rate": 0.0001989503136689137, "loss": 0.4546, "step": 24320 }, { "epoch": 0.28168851016533136, "grad_norm": 0.7942332625389099, "learning_rate": 0.00019894846855072253, "loss": 0.5048, "step": 24330 }, { "epoch": 0.281804288426805, "grad_norm": 0.7080331444740295, "learning_rate": 0.00019894662182086973, "loss": 0.4663, "step": 24340 }, { "epoch": 0.2819200666882786, "grad_norm": 0.7460561394691467, "learning_rate": 0.00019894477347938544, "loss": 0.4765, "step": 24350 }, { "epoch": 0.28203584494975226, "grad_norm": 0.8166491389274597, "learning_rate": 0.00019894292352629975, "loss": 0.4769, "step": 24360 }, { "epoch": 0.28215162321122583, "grad_norm": 0.7675840258598328, "learning_rate": 0.00019894107196164279, "loss": 0.473, "step": 24370 }, { "epoch": 0.28226740147269946, "grad_norm": 0.8223817348480225, "learning_rate": 0.00019893921878544473, "loss": 0.4489, "step": 24380 }, { "epoch": 0.2823831797341731, "grad_norm": 0.8057273626327515, "learning_rate": 0.0001989373639977357, "loss": 0.4679, "step": 24390 }, { "epoch": 0.2824989579956467, "grad_norm": 0.9901914000511169, "learning_rate": 0.00019893550759854598, "loss": 0.4766, "step": 24400 }, { "epoch": 0.28261473625712036, "grad_norm": 0.589401125907898, "learning_rate": 0.00019893364958790578, "loss": 0.4873, "step": 24410 }, { "epoch": 0.282730514518594, "grad_norm": 0.9426624774932861, "learning_rate": 0.00019893178996584533, "loss": 0.4433, "step": 24420 }, { "epoch": 0.2828462927800676, "grad_norm": 0.7681499123573303, "learning_rate": 0.000198929928732395, "loss": 0.4601, "step": 24430 }, { "epoch": 0.28296207104154125, "grad_norm": 0.6982399225234985, "learning_rate": 0.00019892806588758504, "loss": 0.4303, "step": 24440 }, { "epoch": 0.2830778493030149, "grad_norm": 0.8492203950881958, "learning_rate": 0.00019892620143144578, "loss": 0.494, "step": 24450 }, { "epoch": 0.2831936275644885, "grad_norm": 0.6173506379127502, "learning_rate": 0.00019892433536400762, "loss": 0.4477, "step": 24460 }, { "epoch": 0.2833094058259621, "grad_norm": 0.7925242185592651, "learning_rate": 0.00019892246768530097, "loss": 0.4468, "step": 24470 }, { "epoch": 0.2834251840874357, "grad_norm": 0.6825738549232483, "learning_rate": 0.00019892059839535624, "loss": 0.4374, "step": 24480 }, { "epoch": 0.28354096234890935, "grad_norm": 0.8898781538009644, "learning_rate": 0.00019891872749420384, "loss": 0.4671, "step": 24490 }, { "epoch": 0.283656740610383, "grad_norm": 0.8128108978271484, "learning_rate": 0.0001989168549818743, "loss": 0.429, "step": 24500 }, { "epoch": 0.2837725188718566, "grad_norm": 0.6619224548339844, "learning_rate": 0.00019891498085839807, "loss": 0.4636, "step": 24510 }, { "epoch": 0.28388829713333025, "grad_norm": 0.9081688523292542, "learning_rate": 0.0001989131051238057, "loss": 0.4666, "step": 24520 }, { "epoch": 0.2840040753948039, "grad_norm": 0.7981681227684021, "learning_rate": 0.0001989112277781277, "loss": 0.4642, "step": 24530 }, { "epoch": 0.2841198536562775, "grad_norm": 0.6607206463813782, "learning_rate": 0.00019890934882139477, "loss": 0.4868, "step": 24540 }, { "epoch": 0.28423563191775114, "grad_norm": 0.9970320463180542, "learning_rate": 0.00019890746825363737, "loss": 0.4686, "step": 24550 }, { "epoch": 0.2843514101792248, "grad_norm": 0.7873477935791016, "learning_rate": 0.00019890558607488622, "loss": 0.4781, "step": 24560 }, { "epoch": 0.28446718844069835, "grad_norm": 0.7801017165184021, "learning_rate": 0.0001989037022851719, "loss": 0.493, "step": 24570 }, { "epoch": 0.284582966702172, "grad_norm": 0.7829385995864868, "learning_rate": 0.0001989018168845252, "loss": 0.4723, "step": 24580 }, { "epoch": 0.2846987449636456, "grad_norm": 0.7387408018112183, "learning_rate": 0.0001988999298729767, "loss": 0.4633, "step": 24590 }, { "epoch": 0.28481452322511924, "grad_norm": 0.7763007283210754, "learning_rate": 0.00019889804125055726, "loss": 0.4604, "step": 24600 }, { "epoch": 0.2849303014865929, "grad_norm": 0.8328849673271179, "learning_rate": 0.00019889615101729755, "loss": 0.458, "step": 24610 }, { "epoch": 0.2850460797480665, "grad_norm": 0.7388263940811157, "learning_rate": 0.00019889425917322842, "loss": 0.4495, "step": 24620 }, { "epoch": 0.28516185800954014, "grad_norm": 0.658170223236084, "learning_rate": 0.00019889236571838065, "loss": 0.4284, "step": 24630 }, { "epoch": 0.28527763627101377, "grad_norm": 0.7450670599937439, "learning_rate": 0.00019889047065278507, "loss": 0.4401, "step": 24640 }, { "epoch": 0.2853934145324874, "grad_norm": 0.9235295653343201, "learning_rate": 0.0001988885739764726, "loss": 0.4443, "step": 24650 }, { "epoch": 0.28550919279396103, "grad_norm": 0.8104484677314758, "learning_rate": 0.00019888667568947405, "loss": 0.475, "step": 24660 }, { "epoch": 0.2856249710554346, "grad_norm": 0.7793791890144348, "learning_rate": 0.00019888477579182043, "loss": 0.4617, "step": 24670 }, { "epoch": 0.28574074931690824, "grad_norm": 0.8632597327232361, "learning_rate": 0.00019888287428354264, "loss": 0.4786, "step": 24680 }, { "epoch": 0.28585652757838187, "grad_norm": 0.9871675968170166, "learning_rate": 0.00019888097116467162, "loss": 0.4408, "step": 24690 }, { "epoch": 0.2859723058398555, "grad_norm": 0.7056323289871216, "learning_rate": 0.00019887906643523845, "loss": 0.4515, "step": 24700 }, { "epoch": 0.28608808410132913, "grad_norm": 0.7838559746742249, "learning_rate": 0.0001988771600952741, "loss": 0.4804, "step": 24710 }, { "epoch": 0.28620386236280276, "grad_norm": 1.0138752460479736, "learning_rate": 0.00019887525214480958, "loss": 0.4448, "step": 24720 }, { "epoch": 0.2863196406242764, "grad_norm": 0.740874171257019, "learning_rate": 0.00019887334258387603, "loss": 0.4401, "step": 24730 }, { "epoch": 0.28643541888575, "grad_norm": 0.8224324584007263, "learning_rate": 0.00019887143141250456, "loss": 0.4729, "step": 24740 }, { "epoch": 0.28655119714722366, "grad_norm": 0.6226980090141296, "learning_rate": 0.00019886951863072624, "loss": 0.4261, "step": 24750 }, { "epoch": 0.2866669754086973, "grad_norm": 0.6898742914199829, "learning_rate": 0.0001988676042385723, "loss": 0.4599, "step": 24760 }, { "epoch": 0.28678275367017086, "grad_norm": 0.792374312877655, "learning_rate": 0.00019886568823607385, "loss": 0.4299, "step": 24770 }, { "epoch": 0.2868985319316445, "grad_norm": 0.8704492449760437, "learning_rate": 0.0001988637706232621, "loss": 0.4329, "step": 24780 }, { "epoch": 0.2870143101931181, "grad_norm": 0.8298460245132446, "learning_rate": 0.00019886185140016838, "loss": 0.4673, "step": 24790 }, { "epoch": 0.28713008845459176, "grad_norm": 0.8129158020019531, "learning_rate": 0.00019885993056682384, "loss": 0.4697, "step": 24800 }, { "epoch": 0.2872458667160654, "grad_norm": 0.8497953414916992, "learning_rate": 0.00019885800812325982, "loss": 0.4758, "step": 24810 }, { "epoch": 0.287361644977539, "grad_norm": 0.7291585206985474, "learning_rate": 0.0001988560840695076, "loss": 0.4651, "step": 24820 }, { "epoch": 0.28747742323901265, "grad_norm": 0.7276846170425415, "learning_rate": 0.00019885415840559855, "loss": 0.4444, "step": 24830 }, { "epoch": 0.2875932015004863, "grad_norm": 0.8416005969047546, "learning_rate": 0.00019885223113156407, "loss": 0.4598, "step": 24840 }, { "epoch": 0.2877089797619599, "grad_norm": 0.7282233834266663, "learning_rate": 0.00019885030224743545, "loss": 0.455, "step": 24850 }, { "epoch": 0.28782475802343355, "grad_norm": 0.8418949842453003, "learning_rate": 0.00019884837175324421, "loss": 0.4815, "step": 24860 }, { "epoch": 0.2879405362849071, "grad_norm": 0.9172649383544922, "learning_rate": 0.0001988464396490217, "loss": 0.4585, "step": 24870 }, { "epoch": 0.28805631454638075, "grad_norm": 0.8532072305679321, "learning_rate": 0.00019884450593479948, "loss": 0.465, "step": 24880 }, { "epoch": 0.2881720928078544, "grad_norm": 0.6876764297485352, "learning_rate": 0.00019884257061060897, "loss": 0.4853, "step": 24890 }, { "epoch": 0.288287871069328, "grad_norm": 0.8229912519454956, "learning_rate": 0.00019884063367648172, "loss": 0.4282, "step": 24900 }, { "epoch": 0.28840364933080165, "grad_norm": 1.0079822540283203, "learning_rate": 0.00019883869513244928, "loss": 0.4752, "step": 24910 }, { "epoch": 0.2885194275922753, "grad_norm": 0.7636349201202393, "learning_rate": 0.00019883675497854326, "loss": 0.4775, "step": 24920 }, { "epoch": 0.2886352058537489, "grad_norm": 0.663273811340332, "learning_rate": 0.00019883481321479523, "loss": 0.4346, "step": 24930 }, { "epoch": 0.28875098411522254, "grad_norm": 0.8784067034721375, "learning_rate": 0.00019883286984123682, "loss": 0.4824, "step": 24940 }, { "epoch": 0.2888667623766962, "grad_norm": 0.7610532641410828, "learning_rate": 0.00019883092485789965, "loss": 0.437, "step": 24950 }, { "epoch": 0.2889825406381698, "grad_norm": 0.6961755752563477, "learning_rate": 0.00019882897826481544, "loss": 0.4494, "step": 24960 }, { "epoch": 0.2890983188996434, "grad_norm": 0.8274893760681152, "learning_rate": 0.00019882703006201592, "loss": 0.4395, "step": 24970 }, { "epoch": 0.289214097161117, "grad_norm": 0.7429479360580444, "learning_rate": 0.00019882508024953274, "loss": 0.492, "step": 24980 }, { "epoch": 0.28932987542259064, "grad_norm": 0.8372470140457153, "learning_rate": 0.00019882312882739771, "loss": 0.4698, "step": 24990 }, { "epoch": 0.2894456536840643, "grad_norm": 0.932794988155365, "learning_rate": 0.00019882117579564262, "loss": 0.4482, "step": 25000 }, { "epoch": 0.2894456536840643, "eval_chrf": 73.6551260485451, "eval_loss": 0.6995149254798889, "eval_runtime": 348.6979, "eval_samples_per_second": 0.287, "eval_steps_per_second": 0.011, "step": 25000 }, { "epoch": 0.2895614319455379, "grad_norm": 0.7529760599136353, "learning_rate": 0.00019881922115429928, "loss": 0.4535, "step": 25010 }, { "epoch": 0.28967721020701154, "grad_norm": 0.7579430341720581, "learning_rate": 0.00019881726490339954, "loss": 0.4411, "step": 25020 }, { "epoch": 0.28979298846848517, "grad_norm": 0.8080278635025024, "learning_rate": 0.00019881530704297522, "loss": 0.4636, "step": 25030 }, { "epoch": 0.2899087667299588, "grad_norm": 0.7168752551078796, "learning_rate": 0.00019881334757305822, "loss": 0.4464, "step": 25040 }, { "epoch": 0.29002454499143243, "grad_norm": 0.7604632377624512, "learning_rate": 0.00019881138649368048, "loss": 0.4738, "step": 25050 }, { "epoch": 0.290140323252906, "grad_norm": 0.7843213677406311, "learning_rate": 0.00019880942380487394, "loss": 0.4833, "step": 25060 }, { "epoch": 0.29025610151437964, "grad_norm": 0.9687686562538147, "learning_rate": 0.00019880745950667054, "loss": 0.4678, "step": 25070 }, { "epoch": 0.29037187977585327, "grad_norm": 0.8397155404090881, "learning_rate": 0.0001988054935991023, "loss": 0.4622, "step": 25080 }, { "epoch": 0.2904876580373269, "grad_norm": 0.9570215940475464, "learning_rate": 0.00019880352608220124, "loss": 0.4591, "step": 25090 }, { "epoch": 0.29060343629880053, "grad_norm": 0.85113126039505, "learning_rate": 0.00019880155695599938, "loss": 0.4422, "step": 25100 }, { "epoch": 0.29071921456027416, "grad_norm": 0.5425119996070862, "learning_rate": 0.00019879958622052882, "loss": 0.4483, "step": 25110 }, { "epoch": 0.2908349928217478, "grad_norm": 0.8262937664985657, "learning_rate": 0.00019879761387582164, "loss": 0.4712, "step": 25120 }, { "epoch": 0.2909507710832214, "grad_norm": 0.7128187417984009, "learning_rate": 0.00019879563992190996, "loss": 0.4689, "step": 25130 }, { "epoch": 0.29106654934469506, "grad_norm": 0.7753236293792725, "learning_rate": 0.00019879366435882601, "loss": 0.4646, "step": 25140 }, { "epoch": 0.2911823276061687, "grad_norm": 0.8457900285720825, "learning_rate": 0.00019879168718660184, "loss": 0.4736, "step": 25150 }, { "epoch": 0.29129810586764227, "grad_norm": 0.8182496428489685, "learning_rate": 0.00019878970840526977, "loss": 0.4717, "step": 25160 }, { "epoch": 0.2914138841291159, "grad_norm": 0.709197998046875, "learning_rate": 0.00019878772801486193, "loss": 0.4635, "step": 25170 }, { "epoch": 0.29152966239058953, "grad_norm": 0.7016478776931763, "learning_rate": 0.00019878574601541065, "loss": 0.4442, "step": 25180 }, { "epoch": 0.29164544065206316, "grad_norm": 0.7485529780387878, "learning_rate": 0.0001987837624069482, "loss": 0.4934, "step": 25190 }, { "epoch": 0.2917612189135368, "grad_norm": 1.0375511646270752, "learning_rate": 0.00019878177718950685, "loss": 0.4424, "step": 25200 }, { "epoch": 0.2918769971750104, "grad_norm": 0.7467118501663208, "learning_rate": 0.000198779790363119, "loss": 0.4706, "step": 25210 }, { "epoch": 0.29199277543648405, "grad_norm": 0.712745189666748, "learning_rate": 0.00019877780192781696, "loss": 0.457, "step": 25220 }, { "epoch": 0.2921085536979577, "grad_norm": 0.7153512835502625, "learning_rate": 0.0001987758118836331, "loss": 0.4567, "step": 25230 }, { "epoch": 0.2922243319594313, "grad_norm": 0.8477054834365845, "learning_rate": 0.0001987738202305999, "loss": 0.433, "step": 25240 }, { "epoch": 0.29234011022090495, "grad_norm": 1.225630521774292, "learning_rate": 0.00019877182696874973, "loss": 0.4437, "step": 25250 }, { "epoch": 0.2924558884823785, "grad_norm": 0.6832100749015808, "learning_rate": 0.0001987698320981151, "loss": 0.4771, "step": 25260 }, { "epoch": 0.29257166674385215, "grad_norm": 0.7465460896492004, "learning_rate": 0.0001987678356187285, "loss": 0.4749, "step": 25270 }, { "epoch": 0.2926874450053258, "grad_norm": 0.9117196798324585, "learning_rate": 0.00019876583753062246, "loss": 0.445, "step": 25280 }, { "epoch": 0.2928032232667994, "grad_norm": 0.7943907380104065, "learning_rate": 0.0001987638378338295, "loss": 0.4913, "step": 25290 }, { "epoch": 0.29291900152827305, "grad_norm": 0.8349709510803223, "learning_rate": 0.00019876183652838217, "loss": 0.4657, "step": 25300 }, { "epoch": 0.2930347797897467, "grad_norm": 0.7472047209739685, "learning_rate": 0.0001987598336143131, "loss": 0.4477, "step": 25310 }, { "epoch": 0.2931505580512203, "grad_norm": 0.7242029905319214, "learning_rate": 0.00019875782909165493, "loss": 0.4557, "step": 25320 }, { "epoch": 0.29326633631269394, "grad_norm": 0.5495631098747253, "learning_rate": 0.00019875582296044028, "loss": 0.4794, "step": 25330 }, { "epoch": 0.2933821145741676, "grad_norm": 0.7208079695701599, "learning_rate": 0.0001987538152207018, "loss": 0.4479, "step": 25340 }, { "epoch": 0.2934978928356412, "grad_norm": 0.7286500930786133, "learning_rate": 0.00019875180587247224, "loss": 0.4412, "step": 25350 }, { "epoch": 0.2936136710971148, "grad_norm": 0.7493752241134644, "learning_rate": 0.0001987497949157843, "loss": 0.4625, "step": 25360 }, { "epoch": 0.2937294493585884, "grad_norm": 0.7854233980178833, "learning_rate": 0.0001987477823506708, "loss": 0.4334, "step": 25370 }, { "epoch": 0.29384522762006204, "grad_norm": 0.6529062986373901, "learning_rate": 0.0001987457681771644, "loss": 0.4467, "step": 25380 }, { "epoch": 0.2939610058815357, "grad_norm": 0.7695010304450989, "learning_rate": 0.000198743752395298, "loss": 0.464, "step": 25390 }, { "epoch": 0.2940767841430093, "grad_norm": 0.70816969871521, "learning_rate": 0.00019874173500510441, "loss": 0.4354, "step": 25400 }, { "epoch": 0.29419256240448294, "grad_norm": 0.7394952178001404, "learning_rate": 0.0001987397160066165, "loss": 0.4595, "step": 25410 }, { "epoch": 0.29430834066595657, "grad_norm": 0.7608230113983154, "learning_rate": 0.0001987376953998671, "loss": 0.4819, "step": 25420 }, { "epoch": 0.2944241189274302, "grad_norm": 0.8459185361862183, "learning_rate": 0.0001987356731848892, "loss": 0.4563, "step": 25430 }, { "epoch": 0.29453989718890383, "grad_norm": 0.8780574202537537, "learning_rate": 0.00019873364936171567, "loss": 0.4759, "step": 25440 }, { "epoch": 0.29465567545037746, "grad_norm": 0.7876760363578796, "learning_rate": 0.0001987316239303795, "loss": 0.453, "step": 25450 }, { "epoch": 0.29477145371185104, "grad_norm": 0.7067634463310242, "learning_rate": 0.0001987295968909137, "loss": 0.4401, "step": 25460 }, { "epoch": 0.29488723197332467, "grad_norm": 0.9772910475730896, "learning_rate": 0.00019872756824335127, "loss": 0.4196, "step": 25470 }, { "epoch": 0.2950030102347983, "grad_norm": 0.7131596803665161, "learning_rate": 0.00019872553798772524, "loss": 0.4456, "step": 25480 }, { "epoch": 0.29511878849627193, "grad_norm": 0.6839360594749451, "learning_rate": 0.0001987235061240687, "loss": 0.4659, "step": 25490 }, { "epoch": 0.29523456675774556, "grad_norm": 0.7787697911262512, "learning_rate": 0.00019872147265241472, "loss": 0.4278, "step": 25500 }, { "epoch": 0.2953503450192192, "grad_norm": 0.9587351679801941, "learning_rate": 0.00019871943757279644, "loss": 0.4712, "step": 25510 }, { "epoch": 0.2954661232806928, "grad_norm": 0.8071374893188477, "learning_rate": 0.000198717400885247, "loss": 0.4035, "step": 25520 }, { "epoch": 0.29558190154216646, "grad_norm": 0.9236906170845032, "learning_rate": 0.0001987153625897996, "loss": 0.4902, "step": 25530 }, { "epoch": 0.2956976798036401, "grad_norm": 0.7164406776428223, "learning_rate": 0.00019871332268648741, "loss": 0.4584, "step": 25540 }, { "epoch": 0.2958134580651137, "grad_norm": 0.6880311965942383, "learning_rate": 0.00019871128117534363, "loss": 0.4598, "step": 25550 }, { "epoch": 0.2959292363265873, "grad_norm": 0.9487251043319702, "learning_rate": 0.00019870923805640156, "loss": 0.4638, "step": 25560 }, { "epoch": 0.29604501458806093, "grad_norm": 0.7381792068481445, "learning_rate": 0.00019870719332969446, "loss": 0.4835, "step": 25570 }, { "epoch": 0.29616079284953456, "grad_norm": 0.8834322094917297, "learning_rate": 0.00019870514699525565, "loss": 0.4818, "step": 25580 }, { "epoch": 0.2962765711110082, "grad_norm": 1.2315737009048462, "learning_rate": 0.00019870309905311846, "loss": 0.461, "step": 25590 }, { "epoch": 0.2963923493724818, "grad_norm": 0.8836092948913574, "learning_rate": 0.00019870104950331618, "loss": 0.461, "step": 25600 }, { "epoch": 0.29650812763395545, "grad_norm": 0.5545638203620911, "learning_rate": 0.00019869899834588228, "loss": 0.4154, "step": 25610 }, { "epoch": 0.2966239058954291, "grad_norm": 0.8589716553688049, "learning_rate": 0.00019869694558085013, "loss": 0.4476, "step": 25620 }, { "epoch": 0.2967396841569027, "grad_norm": 0.8191109895706177, "learning_rate": 0.00019869489120825318, "loss": 0.4439, "step": 25630 }, { "epoch": 0.29685546241837635, "grad_norm": 0.6284889578819275, "learning_rate": 0.00019869283522812485, "loss": 0.48, "step": 25640 }, { "epoch": 0.2969712406798499, "grad_norm": 0.7687880396842957, "learning_rate": 0.00019869077764049872, "loss": 0.4544, "step": 25650 }, { "epoch": 0.29708701894132356, "grad_norm": 0.6644411087036133, "learning_rate": 0.00019868871844540818, "loss": 0.4136, "step": 25660 }, { "epoch": 0.2972027972027972, "grad_norm": 0.5750458836555481, "learning_rate": 0.00019868665764288682, "loss": 0.4139, "step": 25670 }, { "epoch": 0.2973185754642708, "grad_norm": 0.7164966464042664, "learning_rate": 0.00019868459523296827, "loss": 0.507, "step": 25680 }, { "epoch": 0.29743435372574445, "grad_norm": 0.6462358236312866, "learning_rate": 0.00019868253121568604, "loss": 0.4783, "step": 25690 }, { "epoch": 0.2975501319872181, "grad_norm": 0.7665681838989258, "learning_rate": 0.0001986804655910738, "loss": 0.4546, "step": 25700 }, { "epoch": 0.2976659102486917, "grad_norm": 0.6653458476066589, "learning_rate": 0.00019867839835916516, "loss": 0.4491, "step": 25710 }, { "epoch": 0.29778168851016534, "grad_norm": 0.9308329224586487, "learning_rate": 0.00019867632951999382, "loss": 0.4757, "step": 25720 }, { "epoch": 0.297897466771639, "grad_norm": 0.7271718382835388, "learning_rate": 0.00019867425907359345, "loss": 0.4553, "step": 25730 }, { "epoch": 0.2980132450331126, "grad_norm": 0.8564931154251099, "learning_rate": 0.0001986721870199978, "loss": 0.4943, "step": 25740 }, { "epoch": 0.2981290232945862, "grad_norm": 0.782024621963501, "learning_rate": 0.00019867011335924058, "loss": 0.4281, "step": 25750 }, { "epoch": 0.2982448015560598, "grad_norm": 0.7796012163162231, "learning_rate": 0.0001986680380913556, "loss": 0.4844, "step": 25760 }, { "epoch": 0.29836057981753344, "grad_norm": 0.8314896821975708, "learning_rate": 0.00019866596121637667, "loss": 0.4414, "step": 25770 }, { "epoch": 0.2984763580790071, "grad_norm": 0.8576761484146118, "learning_rate": 0.00019866388273433758, "loss": 0.4927, "step": 25780 }, { "epoch": 0.2985921363404807, "grad_norm": 0.7437127232551575, "learning_rate": 0.0001986618026452722, "loss": 0.4408, "step": 25790 }, { "epoch": 0.29870791460195434, "grad_norm": 0.7944348454475403, "learning_rate": 0.00019865972094921443, "loss": 0.4267, "step": 25800 }, { "epoch": 0.29882369286342797, "grad_norm": 0.6385840177536011, "learning_rate": 0.00019865763764619814, "loss": 0.4587, "step": 25810 }, { "epoch": 0.2989394711249016, "grad_norm": 0.5998877882957458, "learning_rate": 0.0001986555527362573, "loss": 0.4418, "step": 25820 }, { "epoch": 0.29905524938637523, "grad_norm": 0.7375982999801636, "learning_rate": 0.00019865346621942586, "loss": 0.4457, "step": 25830 }, { "epoch": 0.29917102764784886, "grad_norm": 0.7713582515716553, "learning_rate": 0.00019865137809573776, "loss": 0.478, "step": 25840 }, { "epoch": 0.29928680590932244, "grad_norm": 0.7261648178100586, "learning_rate": 0.00019864928836522708, "loss": 0.4456, "step": 25850 }, { "epoch": 0.29940258417079607, "grad_norm": 0.6686517596244812, "learning_rate": 0.0001986471970279278, "loss": 0.4914, "step": 25860 }, { "epoch": 0.2995183624322697, "grad_norm": 0.7416560053825378, "learning_rate": 0.000198645104083874, "loss": 0.4685, "step": 25870 }, { "epoch": 0.29963414069374333, "grad_norm": 0.6594707369804382, "learning_rate": 0.00019864300953309982, "loss": 0.4418, "step": 25880 }, { "epoch": 0.29974991895521697, "grad_norm": 0.7810648083686829, "learning_rate": 0.00019864091337563932, "loss": 0.4894, "step": 25890 }, { "epoch": 0.2998656972166906, "grad_norm": 0.7893812656402588, "learning_rate": 0.00019863881561152664, "loss": 0.4651, "step": 25900 }, { "epoch": 0.29998147547816423, "grad_norm": 0.8130543231964111, "learning_rate": 0.000198636716240796, "loss": 0.4852, "step": 25910 }, { "epoch": 0.30009725373963786, "grad_norm": 0.7054315209388733, "learning_rate": 0.00019863461526348152, "loss": 0.4326, "step": 25920 }, { "epoch": 0.3002130320011115, "grad_norm": 0.9094119071960449, "learning_rate": 0.0001986325126796175, "loss": 0.4634, "step": 25930 }, { "epoch": 0.3003288102625851, "grad_norm": 0.8726437091827393, "learning_rate": 0.00019863040848923808, "loss": 0.4657, "step": 25940 }, { "epoch": 0.3004445885240587, "grad_norm": 0.7491232752799988, "learning_rate": 0.00019862830269237766, "loss": 0.4307, "step": 25950 }, { "epoch": 0.30056036678553233, "grad_norm": 0.6829655766487122, "learning_rate": 0.00019862619528907046, "loss": 0.4518, "step": 25960 }, { "epoch": 0.30067614504700596, "grad_norm": 0.4723912179470062, "learning_rate": 0.0001986240862793508, "loss": 0.4225, "step": 25970 }, { "epoch": 0.3007919233084796, "grad_norm": 0.7675802111625671, "learning_rate": 0.00019862197566325308, "loss": 0.4748, "step": 25980 }, { "epoch": 0.3009077015699532, "grad_norm": 0.8357563018798828, "learning_rate": 0.00019861986344081164, "loss": 0.4614, "step": 25990 }, { "epoch": 0.30102347983142685, "grad_norm": 0.7614332437515259, "learning_rate": 0.00019861774961206087, "loss": 0.4507, "step": 26000 }, { "epoch": 0.30102347983142685, "eval_chrf": 76.55202599732712, "eval_loss": 0.6856644153594971, "eval_runtime": 349.7237, "eval_samples_per_second": 0.286, "eval_steps_per_second": 0.011, "step": 26000 }, { "epoch": 0.3011392580929005, "grad_norm": 0.9072996377944946, "learning_rate": 0.0001986156341770353, "loss": 0.4475, "step": 26010 }, { "epoch": 0.3012550363543741, "grad_norm": 0.7568226456642151, "learning_rate": 0.00019861351713576923, "loss": 0.4469, "step": 26020 }, { "epoch": 0.30137081461584775, "grad_norm": 0.6582204103469849, "learning_rate": 0.00019861139848829725, "loss": 0.4668, "step": 26030 }, { "epoch": 0.3014865928773214, "grad_norm": 0.8352224826812744, "learning_rate": 0.00019860927823465386, "loss": 0.4859, "step": 26040 }, { "epoch": 0.30160237113879496, "grad_norm": 0.7534937262535095, "learning_rate": 0.00019860715637487356, "loss": 0.4927, "step": 26050 }, { "epoch": 0.3017181494002686, "grad_norm": 0.7749175429344177, "learning_rate": 0.00019860503290899093, "loss": 0.4535, "step": 26060 }, { "epoch": 0.3018339276617422, "grad_norm": 0.8303593993186951, "learning_rate": 0.00019860290783704058, "loss": 0.486, "step": 26070 }, { "epoch": 0.30194970592321585, "grad_norm": 0.7758985757827759, "learning_rate": 0.00019860078115905704, "loss": 0.4451, "step": 26080 }, { "epoch": 0.3020654841846895, "grad_norm": 0.8329030275344849, "learning_rate": 0.00019859865287507505, "loss": 0.4631, "step": 26090 }, { "epoch": 0.3021812624461631, "grad_norm": 0.541795551776886, "learning_rate": 0.0001985965229851292, "loss": 0.4667, "step": 26100 }, { "epoch": 0.30229704070763674, "grad_norm": 0.6967641115188599, "learning_rate": 0.0001985943914892542, "loss": 0.4491, "step": 26110 }, { "epoch": 0.3024128189691104, "grad_norm": 0.813157856464386, "learning_rate": 0.00019859225838748481, "loss": 0.5039, "step": 26120 }, { "epoch": 0.302528597230584, "grad_norm": 0.7288057804107666, "learning_rate": 0.00019859012367985575, "loss": 0.5061, "step": 26130 }, { "epoch": 0.30264437549205764, "grad_norm": 0.8384978175163269, "learning_rate": 0.0001985879873664018, "loss": 0.4781, "step": 26140 }, { "epoch": 0.3027601537535312, "grad_norm": 0.7077749371528625, "learning_rate": 0.00019858584944715767, "loss": 0.4413, "step": 26150 }, { "epoch": 0.30287593201500485, "grad_norm": 0.8025346398353577, "learning_rate": 0.0001985837099221583, "loss": 0.4518, "step": 26160 }, { "epoch": 0.3029917102764785, "grad_norm": 0.8154165148735046, "learning_rate": 0.00019858156879143848, "loss": 0.4544, "step": 26170 }, { "epoch": 0.3031074885379521, "grad_norm": 0.7080431580543518, "learning_rate": 0.0001985794260550331, "loss": 0.4692, "step": 26180 }, { "epoch": 0.30322326679942574, "grad_norm": 0.7371339201927185, "learning_rate": 0.00019857728171297705, "loss": 0.4654, "step": 26190 }, { "epoch": 0.30333904506089937, "grad_norm": 0.7605065703392029, "learning_rate": 0.00019857513576530524, "loss": 0.4424, "step": 26200 }, { "epoch": 0.303454823322373, "grad_norm": 0.8257567286491394, "learning_rate": 0.00019857298821205266, "loss": 0.4726, "step": 26210 }, { "epoch": 0.30357060158384663, "grad_norm": 0.827387809753418, "learning_rate": 0.00019857083905325428, "loss": 0.4483, "step": 26220 }, { "epoch": 0.30368637984532026, "grad_norm": 0.6968565583229065, "learning_rate": 0.0001985686882889451, "loss": 0.4263, "step": 26230 }, { "epoch": 0.3038021581067939, "grad_norm": 0.7551820874214172, "learning_rate": 0.00019856653591916013, "loss": 0.4492, "step": 26240 }, { "epoch": 0.30391793636826747, "grad_norm": 0.7242693305015564, "learning_rate": 0.0001985643819439345, "loss": 0.4898, "step": 26250 }, { "epoch": 0.3040337146297411, "grad_norm": 0.6734589338302612, "learning_rate": 0.00019856222636330318, "loss": 0.4778, "step": 26260 }, { "epoch": 0.30414949289121473, "grad_norm": 0.9003152847290039, "learning_rate": 0.00019856006917730136, "loss": 0.4381, "step": 26270 }, { "epoch": 0.30426527115268837, "grad_norm": 0.7741272449493408, "learning_rate": 0.00019855791038596418, "loss": 0.4827, "step": 26280 }, { "epoch": 0.304381049414162, "grad_norm": 0.5133253931999207, "learning_rate": 0.00019855574998932675, "loss": 0.416, "step": 26290 }, { "epoch": 0.30449682767563563, "grad_norm": 0.8481763005256653, "learning_rate": 0.0001985535879874243, "loss": 0.4661, "step": 26300 }, { "epoch": 0.30461260593710926, "grad_norm": 0.671328604221344, "learning_rate": 0.00019855142438029205, "loss": 0.477, "step": 26310 }, { "epoch": 0.3047283841985829, "grad_norm": 0.6453095078468323, "learning_rate": 0.00019854925916796518, "loss": 0.4579, "step": 26320 }, { "epoch": 0.3048441624600565, "grad_norm": 1.0940552949905396, "learning_rate": 0.00019854709235047902, "loss": 0.4694, "step": 26330 }, { "epoch": 0.3049599407215301, "grad_norm": 0.7535539269447327, "learning_rate": 0.00019854492392786885, "loss": 0.4686, "step": 26340 }, { "epoch": 0.30507571898300373, "grad_norm": 0.8653777837753296, "learning_rate": 0.00019854275390016998, "loss": 0.4336, "step": 26350 }, { "epoch": 0.30519149724447736, "grad_norm": 0.6675240397453308, "learning_rate": 0.00019854058226741777, "loss": 0.4464, "step": 26360 }, { "epoch": 0.305307275505951, "grad_norm": 0.7508164048194885, "learning_rate": 0.00019853840902964756, "loss": 0.4171, "step": 26370 }, { "epoch": 0.3054230537674246, "grad_norm": 0.7988144755363464, "learning_rate": 0.00019853623418689478, "loss": 0.4583, "step": 26380 }, { "epoch": 0.30553883202889826, "grad_norm": 0.8029592633247375, "learning_rate": 0.0001985340577391948, "loss": 0.4374, "step": 26390 }, { "epoch": 0.3056546102903719, "grad_norm": 0.8196548819541931, "learning_rate": 0.00019853187968658315, "loss": 0.4535, "step": 26400 }, { "epoch": 0.3057703885518455, "grad_norm": 0.708761990070343, "learning_rate": 0.00019852970002909526, "loss": 0.448, "step": 26410 }, { "epoch": 0.30588616681331915, "grad_norm": 0.6414585709571838, "learning_rate": 0.0001985275187667666, "loss": 0.4568, "step": 26420 }, { "epoch": 0.3060019450747928, "grad_norm": 0.7461490035057068, "learning_rate": 0.00019852533589963278, "loss": 0.4502, "step": 26430 }, { "epoch": 0.30611772333626636, "grad_norm": 0.7462594509124756, "learning_rate": 0.0001985231514277293, "loss": 0.46, "step": 26440 }, { "epoch": 0.30623350159774, "grad_norm": 0.7091456055641174, "learning_rate": 0.00019852096535109172, "loss": 0.4569, "step": 26450 }, { "epoch": 0.3063492798592136, "grad_norm": 0.7239569425582886, "learning_rate": 0.0001985187776697557, "loss": 0.4467, "step": 26460 }, { "epoch": 0.30646505812068725, "grad_norm": 0.7347016930580139, "learning_rate": 0.00019851658838375685, "loss": 0.4489, "step": 26470 }, { "epoch": 0.3065808363821609, "grad_norm": 0.8039568662643433, "learning_rate": 0.0001985143974931308, "loss": 0.4118, "step": 26480 }, { "epoch": 0.3066966146436345, "grad_norm": 0.7843155264854431, "learning_rate": 0.0001985122049979133, "loss": 0.4426, "step": 26490 }, { "epoch": 0.30681239290510814, "grad_norm": 0.5724373459815979, "learning_rate": 0.00019851001089814, "loss": 0.4835, "step": 26500 }, { "epoch": 0.3069281711665818, "grad_norm": 0.8228092193603516, "learning_rate": 0.0001985078151938467, "loss": 0.4574, "step": 26510 }, { "epoch": 0.3070439494280554, "grad_norm": 0.6921157240867615, "learning_rate": 0.0001985056178850691, "loss": 0.4658, "step": 26520 }, { "epoch": 0.30715972768952904, "grad_norm": 0.8331666588783264, "learning_rate": 0.000198503418971843, "loss": 0.467, "step": 26530 }, { "epoch": 0.3072755059510026, "grad_norm": 0.8443690538406372, "learning_rate": 0.00019850121845420423, "loss": 0.4681, "step": 26540 }, { "epoch": 0.30739128421247625, "grad_norm": 0.7651891112327576, "learning_rate": 0.00019849901633218862, "loss": 0.4576, "step": 26550 }, { "epoch": 0.3075070624739499, "grad_norm": 0.7527976632118225, "learning_rate": 0.0001984968126058321, "loss": 0.4611, "step": 26560 }, { "epoch": 0.3076228407354235, "grad_norm": 0.7612469792366028, "learning_rate": 0.00019849460727517046, "loss": 0.4512, "step": 26570 }, { "epoch": 0.30773861899689714, "grad_norm": 0.8089647889137268, "learning_rate": 0.0001984924003402397, "loss": 0.4182, "step": 26580 }, { "epoch": 0.30785439725837077, "grad_norm": 0.7524958848953247, "learning_rate": 0.00019849019180107573, "loss": 0.4364, "step": 26590 }, { "epoch": 0.3079701755198444, "grad_norm": 0.7265125513076782, "learning_rate": 0.00019848798165771455, "loss": 0.4515, "step": 26600 }, { "epoch": 0.30808595378131803, "grad_norm": 0.8231269121170044, "learning_rate": 0.0001984857699101921, "loss": 0.4867, "step": 26610 }, { "epoch": 0.30820173204279167, "grad_norm": 0.829472005367279, "learning_rate": 0.00019848355655854448, "loss": 0.4419, "step": 26620 }, { "epoch": 0.3083175103042653, "grad_norm": 0.7649688124656677, "learning_rate": 0.0001984813416028077, "loss": 0.4686, "step": 26630 }, { "epoch": 0.3084332885657389, "grad_norm": 0.5475059151649475, "learning_rate": 0.00019847912504301785, "loss": 0.4663, "step": 26640 }, { "epoch": 0.3085490668272125, "grad_norm": 0.7118447422981262, "learning_rate": 0.00019847690687921102, "loss": 0.4534, "step": 26650 }, { "epoch": 0.30866484508868614, "grad_norm": 0.7882592678070068, "learning_rate": 0.0001984746871114233, "loss": 0.441, "step": 26660 }, { "epoch": 0.30878062335015977, "grad_norm": 0.6085837483406067, "learning_rate": 0.00019847246573969095, "loss": 0.4329, "step": 26670 }, { "epoch": 0.3088964016116334, "grad_norm": 0.9138346910476685, "learning_rate": 0.0001984702427640501, "loss": 0.4463, "step": 26680 }, { "epoch": 0.30901217987310703, "grad_norm": 0.8461740612983704, "learning_rate": 0.0001984680181845369, "loss": 0.4341, "step": 26690 }, { "epoch": 0.30912795813458066, "grad_norm": 0.7021976113319397, "learning_rate": 0.0001984657920011877, "loss": 0.4718, "step": 26700 }, { "epoch": 0.3092437363960543, "grad_norm": 0.7682430148124695, "learning_rate": 0.00019846356421403862, "loss": 0.4582, "step": 26710 }, { "epoch": 0.3093595146575279, "grad_norm": 0.7685060501098633, "learning_rate": 0.00019846133482312605, "loss": 0.4395, "step": 26720 }, { "epoch": 0.30947529291900155, "grad_norm": 0.7221096158027649, "learning_rate": 0.0001984591038284863, "loss": 0.464, "step": 26730 }, { "epoch": 0.30959107118047513, "grad_norm": 0.67400062084198, "learning_rate": 0.00019845687123015563, "loss": 0.4644, "step": 26740 }, { "epoch": 0.30970684944194876, "grad_norm": 0.596010148525238, "learning_rate": 0.00019845463702817048, "loss": 0.4319, "step": 26750 }, { "epoch": 0.3098226277034224, "grad_norm": 0.8571289777755737, "learning_rate": 0.00019845240122256722, "loss": 0.4568, "step": 26760 }, { "epoch": 0.309938405964896, "grad_norm": 0.7959387302398682, "learning_rate": 0.00019845016381338224, "loss": 0.4596, "step": 26770 }, { "epoch": 0.31005418422636966, "grad_norm": 0.7275651693344116, "learning_rate": 0.00019844792480065203, "loss": 0.4402, "step": 26780 }, { "epoch": 0.3101699624878433, "grad_norm": 0.7132201194763184, "learning_rate": 0.000198445684184413, "loss": 0.4566, "step": 26790 }, { "epoch": 0.3102857407493169, "grad_norm": 0.6844136714935303, "learning_rate": 0.0001984434419647017, "loss": 0.4523, "step": 26800 }, { "epoch": 0.31040151901079055, "grad_norm": 0.9616363048553467, "learning_rate": 0.00019844119814155464, "loss": 0.4472, "step": 26810 }, { "epoch": 0.3105172972722642, "grad_norm": 0.6819164752960205, "learning_rate": 0.0001984389527150083, "loss": 0.46, "step": 26820 }, { "epoch": 0.3106330755337378, "grad_norm": 0.7731338739395142, "learning_rate": 0.00019843670568509935, "loss": 0.4441, "step": 26830 }, { "epoch": 0.3107488537952114, "grad_norm": 0.6755056381225586, "learning_rate": 0.00019843445705186436, "loss": 0.4542, "step": 26840 }, { "epoch": 0.310864632056685, "grad_norm": 0.8113489151000977, "learning_rate": 0.00019843220681533993, "loss": 0.48, "step": 26850 }, { "epoch": 0.31098041031815865, "grad_norm": 0.8629299998283386, "learning_rate": 0.00019842995497556273, "loss": 0.4458, "step": 26860 }, { "epoch": 0.3110961885796323, "grad_norm": 0.7523194551467896, "learning_rate": 0.0001984277015325694, "loss": 0.4346, "step": 26870 }, { "epoch": 0.3112119668411059, "grad_norm": 0.647796630859375, "learning_rate": 0.00019842544648639675, "loss": 0.4773, "step": 26880 }, { "epoch": 0.31132774510257955, "grad_norm": 0.8183492422103882, "learning_rate": 0.00019842318983708136, "loss": 0.457, "step": 26890 }, { "epoch": 0.3114435233640532, "grad_norm": 0.7981014847755432, "learning_rate": 0.0001984209315846601, "loss": 0.4778, "step": 26900 }, { "epoch": 0.3115593016255268, "grad_norm": 0.5752872824668884, "learning_rate": 0.0001984186717291697, "loss": 0.4416, "step": 26910 }, { "epoch": 0.31167507988700044, "grad_norm": 0.8017776012420654, "learning_rate": 0.000198416410270647, "loss": 0.4837, "step": 26920 }, { "epoch": 0.311790858148474, "grad_norm": 0.7454376816749573, "learning_rate": 0.0001984141472091288, "loss": 0.443, "step": 26930 }, { "epoch": 0.31190663640994765, "grad_norm": 0.7297793626785278, "learning_rate": 0.000198411882544652, "loss": 0.4415, "step": 26940 }, { "epoch": 0.3120224146714213, "grad_norm": 0.7995197772979736, "learning_rate": 0.00019840961627725344, "loss": 0.4308, "step": 26950 }, { "epoch": 0.3121381929328949, "grad_norm": 0.698663055896759, "learning_rate": 0.00019840734840697008, "loss": 0.4123, "step": 26960 }, { "epoch": 0.31225397119436854, "grad_norm": 0.7404296398162842, "learning_rate": 0.00019840507893383882, "loss": 0.4399, "step": 26970 }, { "epoch": 0.31236974945584217, "grad_norm": 0.8033159375190735, "learning_rate": 0.00019840280785789666, "loss": 0.4575, "step": 26980 }, { "epoch": 0.3124855277173158, "grad_norm": 0.7322561740875244, "learning_rate": 0.00019840053517918054, "loss": 0.4495, "step": 26990 }, { "epoch": 0.31260130597878943, "grad_norm": 0.7145832180976868, "learning_rate": 0.00019839826089772751, "loss": 0.4385, "step": 27000 }, { "epoch": 0.31260130597878943, "eval_chrf": 81.7699740110989, "eval_loss": 0.693875253200531, "eval_runtime": 218.5509, "eval_samples_per_second": 0.458, "eval_steps_per_second": 0.018, "step": 27000 }, { "epoch": 0.31271708424026307, "grad_norm": 0.5868543982505798, "learning_rate": 0.00019839598501357464, "loss": 0.4697, "step": 27010 }, { "epoch": 0.3128328625017367, "grad_norm": 1.0674078464508057, "learning_rate": 0.00019839370752675896, "loss": 0.4471, "step": 27020 }, { "epoch": 0.3129486407632103, "grad_norm": 0.832271158695221, "learning_rate": 0.00019839142843731757, "loss": 0.4725, "step": 27030 }, { "epoch": 0.3130644190246839, "grad_norm": 0.8049610257148743, "learning_rate": 0.00019838914774528762, "loss": 0.4375, "step": 27040 }, { "epoch": 0.31318019728615754, "grad_norm": 0.720533013343811, "learning_rate": 0.0001983868654507062, "loss": 0.4732, "step": 27050 }, { "epoch": 0.31329597554763117, "grad_norm": 0.7142242789268494, "learning_rate": 0.00019838458155361054, "loss": 0.4555, "step": 27060 }, { "epoch": 0.3134117538091048, "grad_norm": 0.7077202796936035, "learning_rate": 0.00019838229605403785, "loss": 0.482, "step": 27070 }, { "epoch": 0.31352753207057843, "grad_norm": 0.7336899638175964, "learning_rate": 0.0001983800089520253, "loss": 0.4545, "step": 27080 }, { "epoch": 0.31364331033205206, "grad_norm": 0.8226677775382996, "learning_rate": 0.00019837772024761016, "loss": 0.4723, "step": 27090 }, { "epoch": 0.3137590885935257, "grad_norm": 0.6755543947219849, "learning_rate": 0.00019837542994082972, "loss": 0.4627, "step": 27100 }, { "epoch": 0.3138748668549993, "grad_norm": 0.6180155873298645, "learning_rate": 0.00019837313803172129, "loss": 0.4595, "step": 27110 }, { "epoch": 0.31399064511647296, "grad_norm": 0.8046608567237854, "learning_rate": 0.00019837084452032218, "loss": 0.4628, "step": 27120 }, { "epoch": 0.31410642337794653, "grad_norm": 0.8131647109985352, "learning_rate": 0.00019836854940666974, "loss": 0.4545, "step": 27130 }, { "epoch": 0.31422220163942016, "grad_norm": 0.7022969722747803, "learning_rate": 0.0001983662526908014, "loss": 0.5048, "step": 27140 }, { "epoch": 0.3143379799008938, "grad_norm": 0.779664158821106, "learning_rate": 0.00019836395437275456, "loss": 0.4742, "step": 27150 }, { "epoch": 0.3144537581623674, "grad_norm": 0.9005945324897766, "learning_rate": 0.00019836165445256658, "loss": 0.4638, "step": 27160 }, { "epoch": 0.31456953642384106, "grad_norm": 0.6903424859046936, "learning_rate": 0.00019835935293027501, "loss": 0.4421, "step": 27170 }, { "epoch": 0.3146853146853147, "grad_norm": 0.703633189201355, "learning_rate": 0.00019835704980591728, "loss": 0.432, "step": 27180 }, { "epoch": 0.3148010929467883, "grad_norm": 0.819511353969574, "learning_rate": 0.00019835474507953093, "loss": 0.5039, "step": 27190 }, { "epoch": 0.31491687120826195, "grad_norm": 0.8665363788604736, "learning_rate": 0.0001983524387511535, "loss": 0.474, "step": 27200 }, { "epoch": 0.3150326494697356, "grad_norm": 0.7022074460983276, "learning_rate": 0.00019835013082082251, "loss": 0.4564, "step": 27210 }, { "epoch": 0.3151484277312092, "grad_norm": 0.6639718413352966, "learning_rate": 0.00019834782128857564, "loss": 0.4455, "step": 27220 }, { "epoch": 0.3152642059926828, "grad_norm": 0.7446359395980835, "learning_rate": 0.00019834551015445048, "loss": 0.4513, "step": 27230 }, { "epoch": 0.3153799842541564, "grad_norm": 0.7960302233695984, "learning_rate": 0.00019834319741848458, "loss": 0.4572, "step": 27240 }, { "epoch": 0.31549576251563005, "grad_norm": 0.7684571743011475, "learning_rate": 0.00019834088308071573, "loss": 0.4554, "step": 27250 }, { "epoch": 0.3156115407771037, "grad_norm": 0.7665683627128601, "learning_rate": 0.00019833856714118154, "loss": 0.4246, "step": 27260 }, { "epoch": 0.3157273190385773, "grad_norm": 0.7351424694061279, "learning_rate": 0.0001983362495999198, "loss": 0.4202, "step": 27270 }, { "epoch": 0.31584309730005095, "grad_norm": 0.8117152452468872, "learning_rate": 0.0001983339304569682, "loss": 0.4568, "step": 27280 }, { "epoch": 0.3159588755615246, "grad_norm": 0.709430456161499, "learning_rate": 0.00019833160971236455, "loss": 0.4897, "step": 27290 }, { "epoch": 0.3160746538229982, "grad_norm": 0.7107734680175781, "learning_rate": 0.00019832928736614663, "loss": 0.468, "step": 27300 }, { "epoch": 0.31619043208447184, "grad_norm": 0.6033035516738892, "learning_rate": 0.0001983269634183523, "loss": 0.4446, "step": 27310 }, { "epoch": 0.31630621034594547, "grad_norm": 0.7143062949180603, "learning_rate": 0.00019832463786901937, "loss": 0.443, "step": 27320 }, { "epoch": 0.31642198860741905, "grad_norm": 0.8292445540428162, "learning_rate": 0.00019832231071818574, "loss": 0.4701, "step": 27330 }, { "epoch": 0.3165377668688927, "grad_norm": 0.6561749577522278, "learning_rate": 0.0001983199819658893, "loss": 0.4481, "step": 27340 }, { "epoch": 0.3166535451303663, "grad_norm": 0.8589171171188354, "learning_rate": 0.000198317651612168, "loss": 0.4562, "step": 27350 }, { "epoch": 0.31676932339183994, "grad_norm": 0.5710141658782959, "learning_rate": 0.0001983153196570598, "loss": 0.4779, "step": 27360 }, { "epoch": 0.3168851016533136, "grad_norm": 0.8408932685852051, "learning_rate": 0.00019831298610060264, "loss": 0.4754, "step": 27370 }, { "epoch": 0.3170008799147872, "grad_norm": 0.8772223591804504, "learning_rate": 0.0001983106509428346, "loss": 0.4604, "step": 27380 }, { "epoch": 0.31711665817626083, "grad_norm": 0.7065361738204956, "learning_rate": 0.00019830831418379363, "loss": 0.4164, "step": 27390 }, { "epoch": 0.31723243643773447, "grad_norm": 0.7624000310897827, "learning_rate": 0.00019830597582351788, "loss": 0.4288, "step": 27400 }, { "epoch": 0.3173482146992081, "grad_norm": 0.7695420384407043, "learning_rate": 0.00019830363586204535, "loss": 0.4787, "step": 27410 }, { "epoch": 0.31746399296068173, "grad_norm": 0.7112919092178345, "learning_rate": 0.0001983012942994142, "loss": 0.439, "step": 27420 }, { "epoch": 0.3175797712221553, "grad_norm": 0.9614130854606628, "learning_rate": 0.0001982989511356626, "loss": 0.4717, "step": 27430 }, { "epoch": 0.31769554948362894, "grad_norm": 0.8063520193099976, "learning_rate": 0.00019829660637082864, "loss": 0.4551, "step": 27440 }, { "epoch": 0.31781132774510257, "grad_norm": 0.7403252720832825, "learning_rate": 0.00019829426000495055, "loss": 0.4603, "step": 27450 }, { "epoch": 0.3179271060065762, "grad_norm": 0.5429824590682983, "learning_rate": 0.00019829191203806656, "loss": 0.4419, "step": 27460 }, { "epoch": 0.31804288426804983, "grad_norm": 0.654305100440979, "learning_rate": 0.00019828956247021492, "loss": 0.4816, "step": 27470 }, { "epoch": 0.31815866252952346, "grad_norm": 0.5370567440986633, "learning_rate": 0.00019828721130143385, "loss": 0.4399, "step": 27480 }, { "epoch": 0.3182744407909971, "grad_norm": 0.8804116249084473, "learning_rate": 0.00019828485853176167, "loss": 0.4632, "step": 27490 }, { "epoch": 0.3183902190524707, "grad_norm": 0.8157013654708862, "learning_rate": 0.00019828250416123675, "loss": 0.4136, "step": 27500 }, { "epoch": 0.31850599731394436, "grad_norm": 0.7248451709747314, "learning_rate": 0.00019828014818989734, "loss": 0.449, "step": 27510 }, { "epoch": 0.318621775575418, "grad_norm": 0.7211676239967346, "learning_rate": 0.00019827779061778192, "loss": 0.4805, "step": 27520 }, { "epoch": 0.31873755383689156, "grad_norm": 0.7188708782196045, "learning_rate": 0.00019827543144492878, "loss": 0.4383, "step": 27530 }, { "epoch": 0.3188533320983652, "grad_norm": 0.8172465562820435, "learning_rate": 0.00019827307067137647, "loss": 0.4597, "step": 27540 }, { "epoch": 0.3189691103598388, "grad_norm": 0.8475064039230347, "learning_rate": 0.00019827070829716336, "loss": 0.456, "step": 27550 }, { "epoch": 0.31908488862131246, "grad_norm": 0.7453060150146484, "learning_rate": 0.00019826834432232793, "loss": 0.442, "step": 27560 }, { "epoch": 0.3192006668827861, "grad_norm": 0.8786172270774841, "learning_rate": 0.00019826597874690867, "loss": 0.4642, "step": 27570 }, { "epoch": 0.3193164451442597, "grad_norm": 0.739477813243866, "learning_rate": 0.0001982636115709442, "loss": 0.4445, "step": 27580 }, { "epoch": 0.31943222340573335, "grad_norm": 0.5354799032211304, "learning_rate": 0.000198261242794473, "loss": 0.4527, "step": 27590 }, { "epoch": 0.319548001667207, "grad_norm": 0.7867555618286133, "learning_rate": 0.00019825887241753365, "loss": 0.4462, "step": 27600 }, { "epoch": 0.3196637799286806, "grad_norm": 0.90069979429245, "learning_rate": 0.00019825650044016477, "loss": 0.4519, "step": 27610 }, { "epoch": 0.3197795581901542, "grad_norm": 0.8946885466575623, "learning_rate": 0.000198254126862405, "loss": 0.4782, "step": 27620 }, { "epoch": 0.3198953364516278, "grad_norm": 0.8960312604904175, "learning_rate": 0.00019825175168429305, "loss": 0.4645, "step": 27630 }, { "epoch": 0.32001111471310145, "grad_norm": 0.7747102379798889, "learning_rate": 0.0001982493749058675, "loss": 0.411, "step": 27640 }, { "epoch": 0.3201268929745751, "grad_norm": 0.7728378772735596, "learning_rate": 0.00019824699652716714, "loss": 0.44, "step": 27650 }, { "epoch": 0.3202426712360487, "grad_norm": 0.7578223347663879, "learning_rate": 0.0001982446165482307, "loss": 0.4576, "step": 27660 }, { "epoch": 0.32035844949752235, "grad_norm": 0.71628338098526, "learning_rate": 0.00019824223496909692, "loss": 0.4697, "step": 27670 }, { "epoch": 0.320474227758996, "grad_norm": 0.7491822838783264, "learning_rate": 0.0001982398517898046, "loss": 0.4641, "step": 27680 }, { "epoch": 0.3205900060204696, "grad_norm": 0.8317740559577942, "learning_rate": 0.00019823746701039258, "loss": 0.4566, "step": 27690 }, { "epoch": 0.32070578428194324, "grad_norm": 0.8668158650398254, "learning_rate": 0.0001982350806308997, "loss": 0.4647, "step": 27700 }, { "epoch": 0.32082156254341687, "grad_norm": 0.8157652616500854, "learning_rate": 0.00019823269265136477, "loss": 0.4393, "step": 27710 }, { "epoch": 0.32093734080489045, "grad_norm": 0.7637067437171936, "learning_rate": 0.00019823030307182678, "loss": 0.4794, "step": 27720 }, { "epoch": 0.3210531190663641, "grad_norm": 0.6508481502532959, "learning_rate": 0.00019822791189232457, "loss": 0.434, "step": 27730 }, { "epoch": 0.3211688973278377, "grad_norm": 0.7629069089889526, "learning_rate": 0.00019822551911289713, "loss": 0.4675, "step": 27740 }, { "epoch": 0.32128467558931134, "grad_norm": 0.7749221324920654, "learning_rate": 0.0001982231247335834, "loss": 0.462, "step": 27750 }, { "epoch": 0.321400453850785, "grad_norm": 0.8904512524604797, "learning_rate": 0.0001982207287544224, "loss": 0.4898, "step": 27760 }, { "epoch": 0.3215162321122586, "grad_norm": 0.7557669281959534, "learning_rate": 0.0001982183311754532, "loss": 0.4494, "step": 27770 }, { "epoch": 0.32163201037373224, "grad_norm": 0.8687517642974854, "learning_rate": 0.0001982159319967148, "loss": 0.442, "step": 27780 }, { "epoch": 0.32174778863520587, "grad_norm": 0.6912224888801575, "learning_rate": 0.0001982135312182462, "loss": 0.4746, "step": 27790 }, { "epoch": 0.3218635668966795, "grad_norm": 0.7170769572257996, "learning_rate": 0.0001982111288400867, "loss": 0.4567, "step": 27800 }, { "epoch": 0.32197934515815313, "grad_norm": 0.7778599262237549, "learning_rate": 0.00019820872486227525, "loss": 0.4493, "step": 27810 }, { "epoch": 0.3220951234196267, "grad_norm": 0.8060257434844971, "learning_rate": 0.0001982063192848511, "loss": 0.439, "step": 27820 }, { "epoch": 0.32221090168110034, "grad_norm": 0.8419181704521179, "learning_rate": 0.0001982039121078534, "loss": 0.438, "step": 27830 }, { "epoch": 0.32232667994257397, "grad_norm": 1.0731698274612427, "learning_rate": 0.00019820150333132137, "loss": 0.4705, "step": 27840 }, { "epoch": 0.3224424582040476, "grad_norm": 0.7142056822776794, "learning_rate": 0.0001981990929552942, "loss": 0.4596, "step": 27850 }, { "epoch": 0.32255823646552123, "grad_norm": 0.690332293510437, "learning_rate": 0.00019819668097981123, "loss": 0.461, "step": 27860 }, { "epoch": 0.32267401472699486, "grad_norm": 0.7570613622665405, "learning_rate": 0.00019819426740491172, "loss": 0.4323, "step": 27870 }, { "epoch": 0.3227897929884685, "grad_norm": 0.7912685871124268, "learning_rate": 0.00019819185223063493, "loss": 0.4661, "step": 27880 }, { "epoch": 0.3229055712499421, "grad_norm": 0.6318713426589966, "learning_rate": 0.00019818943545702022, "loss": 0.4537, "step": 27890 }, { "epoch": 0.32302134951141576, "grad_norm": 0.7996380925178528, "learning_rate": 0.000198187017084107, "loss": 0.4708, "step": 27900 }, { "epoch": 0.3231371277728894, "grad_norm": 0.7950218319892883, "learning_rate": 0.00019818459711193462, "loss": 0.4341, "step": 27910 }, { "epoch": 0.32325290603436296, "grad_norm": 0.8847008347511292, "learning_rate": 0.0001981821755405425, "loss": 0.4937, "step": 27920 }, { "epoch": 0.3233686842958366, "grad_norm": 0.5279824733734131, "learning_rate": 0.00019817975236997008, "loss": 0.4492, "step": 27930 }, { "epoch": 0.3234844625573102, "grad_norm": 0.7658901810646057, "learning_rate": 0.00019817732760025686, "loss": 0.4335, "step": 27940 }, { "epoch": 0.32360024081878386, "grad_norm": 0.9087826013565063, "learning_rate": 0.0001981749012314423, "loss": 0.4623, "step": 27950 }, { "epoch": 0.3237160190802575, "grad_norm": 0.7327570915222168, "learning_rate": 0.0001981724732635659, "loss": 0.4456, "step": 27960 }, { "epoch": 0.3238317973417311, "grad_norm": 0.8276339173316956, "learning_rate": 0.00019817004369666728, "loss": 0.4617, "step": 27970 }, { "epoch": 0.32394757560320475, "grad_norm": 0.6314492225646973, "learning_rate": 0.00019816761253078596, "loss": 0.4099, "step": 27980 }, { "epoch": 0.3240633538646784, "grad_norm": 0.9477747678756714, "learning_rate": 0.0001981651797659615, "loss": 0.4657, "step": 27990 }, { "epoch": 0.324179132126152, "grad_norm": 0.7269591689109802, "learning_rate": 0.00019816274540223363, "loss": 0.4677, "step": 28000 }, { "epoch": 0.324179132126152, "eval_chrf": 72.20928585918801, "eval_loss": 0.6838599443435669, "eval_runtime": 279.063, "eval_samples_per_second": 0.358, "eval_steps_per_second": 0.014, "step": 28000 }, { "epoch": 0.32429491038762565, "grad_norm": 0.7083125710487366, "learning_rate": 0.0001981603094396419, "loss": 0.4615, "step": 28010 }, { "epoch": 0.3244106886490992, "grad_norm": 0.7080761790275574, "learning_rate": 0.00019815787187822605, "loss": 0.4653, "step": 28020 }, { "epoch": 0.32452646691057285, "grad_norm": 0.711959719657898, "learning_rate": 0.00019815543271802576, "loss": 0.419, "step": 28030 }, { "epoch": 0.3246422451720465, "grad_norm": 0.8714075088500977, "learning_rate": 0.00019815299195908076, "loss": 0.4829, "step": 28040 }, { "epoch": 0.3247580234335201, "grad_norm": 0.6999655365943909, "learning_rate": 0.00019815054960143082, "loss": 0.4281, "step": 28050 }, { "epoch": 0.32487380169499375, "grad_norm": 0.8160327076911926, "learning_rate": 0.0001981481056451157, "loss": 0.4191, "step": 28060 }, { "epoch": 0.3249895799564674, "grad_norm": 0.7244841456413269, "learning_rate": 0.0001981456600901752, "loss": 0.4435, "step": 28070 }, { "epoch": 0.325105358217941, "grad_norm": 0.7441781163215637, "learning_rate": 0.00019814321293664918, "loss": 0.4232, "step": 28080 }, { "epoch": 0.32522113647941464, "grad_norm": 0.8062184453010559, "learning_rate": 0.00019814076418457746, "loss": 0.4353, "step": 28090 }, { "epoch": 0.32533691474088827, "grad_norm": 0.8151717782020569, "learning_rate": 0.00019813831383399996, "loss": 0.4546, "step": 28100 }, { "epoch": 0.3254526930023619, "grad_norm": 0.6115370988845825, "learning_rate": 0.00019813586188495658, "loss": 0.4454, "step": 28110 }, { "epoch": 0.3255684712638355, "grad_norm": 0.5664581060409546, "learning_rate": 0.00019813340833748727, "loss": 0.463, "step": 28120 }, { "epoch": 0.3256842495253091, "grad_norm": 0.8573354482650757, "learning_rate": 0.000198130953191632, "loss": 0.4688, "step": 28130 }, { "epoch": 0.32580002778678274, "grad_norm": 0.5428782105445862, "learning_rate": 0.0001981284964474307, "loss": 0.453, "step": 28140 }, { "epoch": 0.3259158060482564, "grad_norm": 0.7835739850997925, "learning_rate": 0.00019812603810492343, "loss": 0.4764, "step": 28150 }, { "epoch": 0.32603158430973, "grad_norm": 0.6520804762840271, "learning_rate": 0.00019812357816415026, "loss": 0.4029, "step": 28160 }, { "epoch": 0.32614736257120364, "grad_norm": 0.7318693995475769, "learning_rate": 0.0001981211166251512, "loss": 0.4532, "step": 28170 }, { "epoch": 0.32626314083267727, "grad_norm": 0.8157362937927246, "learning_rate": 0.00019811865348796635, "loss": 0.4572, "step": 28180 }, { "epoch": 0.3263789190941509, "grad_norm": 0.8469082117080688, "learning_rate": 0.0001981161887526359, "loss": 0.4679, "step": 28190 }, { "epoch": 0.32649469735562453, "grad_norm": 0.7483899593353271, "learning_rate": 0.0001981137224191999, "loss": 0.4418, "step": 28200 }, { "epoch": 0.3266104756170981, "grad_norm": 0.7446368932723999, "learning_rate": 0.00019811125448769857, "loss": 0.4408, "step": 28210 }, { "epoch": 0.32672625387857174, "grad_norm": 0.8507893681526184, "learning_rate": 0.00019810878495817212, "loss": 0.4732, "step": 28220 }, { "epoch": 0.32684203214004537, "grad_norm": 0.8535634875297546, "learning_rate": 0.00019810631383066074, "loss": 0.4414, "step": 28230 }, { "epoch": 0.326957810401519, "grad_norm": 0.8075177073478699, "learning_rate": 0.00019810384110520467, "loss": 0.494, "step": 28240 }, { "epoch": 0.32707358866299263, "grad_norm": 0.7646968364715576, "learning_rate": 0.00019810136678184424, "loss": 0.426, "step": 28250 }, { "epoch": 0.32718936692446626, "grad_norm": 0.6302253007888794, "learning_rate": 0.0001980988908606197, "loss": 0.4815, "step": 28260 }, { "epoch": 0.3273051451859399, "grad_norm": 0.8549550771713257, "learning_rate": 0.00019809641334157143, "loss": 0.4603, "step": 28270 }, { "epoch": 0.3274209234474135, "grad_norm": 0.7948998808860779, "learning_rate": 0.0001980939342247397, "loss": 0.4556, "step": 28280 }, { "epoch": 0.32753670170888716, "grad_norm": 0.8027181029319763, "learning_rate": 0.000198091453510165, "loss": 0.4684, "step": 28290 }, { "epoch": 0.3276524799703608, "grad_norm": 0.7599784135818481, "learning_rate": 0.00019808897119788762, "loss": 0.4443, "step": 28300 }, { "epoch": 0.32776825823183436, "grad_norm": 0.8203538656234741, "learning_rate": 0.0001980864872879481, "loss": 0.4707, "step": 28310 }, { "epoch": 0.327884036493308, "grad_norm": 0.8016511797904968, "learning_rate": 0.00019808400178038683, "loss": 0.4688, "step": 28320 }, { "epoch": 0.3279998147547816, "grad_norm": 0.6890262961387634, "learning_rate": 0.0001980815146752443, "loss": 0.4154, "step": 28330 }, { "epoch": 0.32811559301625526, "grad_norm": 0.8005924820899963, "learning_rate": 0.00019807902597256103, "loss": 0.4742, "step": 28340 }, { "epoch": 0.3282313712777289, "grad_norm": 0.7315450310707092, "learning_rate": 0.00019807653567237757, "loss": 0.4368, "step": 28350 }, { "epoch": 0.3283471495392025, "grad_norm": 0.7996712327003479, "learning_rate": 0.00019807404377473445, "loss": 0.4893, "step": 28360 }, { "epoch": 0.32846292780067615, "grad_norm": 0.794567883014679, "learning_rate": 0.00019807155027967225, "loss": 0.4438, "step": 28370 }, { "epoch": 0.3285787060621498, "grad_norm": 0.9396027326583862, "learning_rate": 0.00019806905518723167, "loss": 0.4588, "step": 28380 }, { "epoch": 0.3286944843236234, "grad_norm": 0.7226946949958801, "learning_rate": 0.00019806655849745325, "loss": 0.4672, "step": 28390 }, { "epoch": 0.32881026258509705, "grad_norm": 0.7486585974693298, "learning_rate": 0.0001980640602103777, "loss": 0.4401, "step": 28400 }, { "epoch": 0.3289260408465706, "grad_norm": 0.7915642857551575, "learning_rate": 0.0001980615603260457, "loss": 0.451, "step": 28410 }, { "epoch": 0.32904181910804425, "grad_norm": 0.7100210785865784, "learning_rate": 0.00019805905884449798, "loss": 0.4529, "step": 28420 }, { "epoch": 0.3291575973695179, "grad_norm": 0.7301798462867737, "learning_rate": 0.00019805655576577528, "loss": 0.4579, "step": 28430 }, { "epoch": 0.3292733756309915, "grad_norm": 0.8199750185012817, "learning_rate": 0.00019805405108991837, "loss": 0.4611, "step": 28440 }, { "epoch": 0.32938915389246515, "grad_norm": 0.7680374383926392, "learning_rate": 0.00019805154481696805, "loss": 0.4536, "step": 28450 }, { "epoch": 0.3295049321539388, "grad_norm": 0.7141996622085571, "learning_rate": 0.0001980490369469651, "loss": 0.4659, "step": 28460 }, { "epoch": 0.3296207104154124, "grad_norm": 0.8633837103843689, "learning_rate": 0.00019804652747995043, "loss": 0.458, "step": 28470 }, { "epoch": 0.32973648867688604, "grad_norm": 0.8147624731063843, "learning_rate": 0.0001980440164159649, "loss": 0.491, "step": 28480 }, { "epoch": 0.3298522669383597, "grad_norm": 0.741216778755188, "learning_rate": 0.00019804150375504938, "loss": 0.4706, "step": 28490 }, { "epoch": 0.3299680451998333, "grad_norm": 0.7062682509422302, "learning_rate": 0.0001980389894972448, "loss": 0.4395, "step": 28500 }, { "epoch": 0.3300838234613069, "grad_norm": 0.6140871047973633, "learning_rate": 0.00019803647364259215, "loss": 0.4383, "step": 28510 }, { "epoch": 0.3301996017227805, "grad_norm": 0.8302459716796875, "learning_rate": 0.00019803395619113237, "loss": 0.449, "step": 28520 }, { "epoch": 0.33031537998425414, "grad_norm": 0.7300112247467041, "learning_rate": 0.00019803143714290647, "loss": 0.4883, "step": 28530 }, { "epoch": 0.3304311582457278, "grad_norm": 0.7336609959602356, "learning_rate": 0.00019802891649795549, "loss": 0.4883, "step": 28540 }, { "epoch": 0.3305469365072014, "grad_norm": 0.5155927538871765, "learning_rate": 0.00019802639425632048, "loss": 0.4536, "step": 28550 }, { "epoch": 0.33066271476867504, "grad_norm": 0.7415698766708374, "learning_rate": 0.00019802387041804254, "loss": 0.4322, "step": 28560 }, { "epoch": 0.33077849303014867, "grad_norm": 0.7414401769638062, "learning_rate": 0.00019802134498316277, "loss": 0.4622, "step": 28570 }, { "epoch": 0.3308942712916223, "grad_norm": 0.8409310579299927, "learning_rate": 0.00019801881795172225, "loss": 0.4454, "step": 28580 }, { "epoch": 0.33101004955309593, "grad_norm": 0.6393105983734131, "learning_rate": 0.0001980162893237622, "loss": 0.4252, "step": 28590 }, { "epoch": 0.33112582781456956, "grad_norm": 0.7128394842147827, "learning_rate": 0.00019801375909932385, "loss": 0.4631, "step": 28600 }, { "epoch": 0.33124160607604314, "grad_norm": 0.7944321036338806, "learning_rate": 0.0001980112272784483, "loss": 0.4658, "step": 28610 }, { "epoch": 0.33135738433751677, "grad_norm": 0.6957277655601501, "learning_rate": 0.00019800869386117685, "loss": 0.4486, "step": 28620 }, { "epoch": 0.3314731625989904, "grad_norm": 1.0048245191574097, "learning_rate": 0.00019800615884755076, "loss": 0.4235, "step": 28630 }, { "epoch": 0.33158894086046403, "grad_norm": 0.7389762997627258, "learning_rate": 0.00019800362223761133, "loss": 0.426, "step": 28640 }, { "epoch": 0.33170471912193766, "grad_norm": 0.8358013033866882, "learning_rate": 0.00019800108403139984, "loss": 0.4481, "step": 28650 }, { "epoch": 0.3318204973834113, "grad_norm": 0.7752150297164917, "learning_rate": 0.00019799854422895767, "loss": 0.4602, "step": 28660 }, { "epoch": 0.3319362756448849, "grad_norm": 0.7261896133422852, "learning_rate": 0.00019799600283032618, "loss": 0.4502, "step": 28670 }, { "epoch": 0.33205205390635856, "grad_norm": 0.8711327314376831, "learning_rate": 0.00019799345983554677, "loss": 0.4487, "step": 28680 }, { "epoch": 0.3321678321678322, "grad_norm": 0.6738938689231873, "learning_rate": 0.00019799091524466081, "loss": 0.4594, "step": 28690 }, { "epoch": 0.3322836104293058, "grad_norm": 0.7297458648681641, "learning_rate": 0.00019798836905770978, "loss": 0.4177, "step": 28700 }, { "epoch": 0.3323993886907794, "grad_norm": 0.831659734249115, "learning_rate": 0.0001979858212747352, "loss": 0.4731, "step": 28710 }, { "epoch": 0.332515166952253, "grad_norm": 0.7975872159004211, "learning_rate": 0.0001979832718957785, "loss": 0.4703, "step": 28720 }, { "epoch": 0.33263094521372666, "grad_norm": 0.843238115310669, "learning_rate": 0.00019798072092088123, "loss": 0.4705, "step": 28730 }, { "epoch": 0.3327467234752003, "grad_norm": 0.8644689917564392, "learning_rate": 0.00019797816835008492, "loss": 0.4634, "step": 28740 }, { "epoch": 0.3328625017366739, "grad_norm": 0.724216639995575, "learning_rate": 0.00019797561418343117, "loss": 0.4474, "step": 28750 }, { "epoch": 0.33297827999814755, "grad_norm": 0.8634474873542786, "learning_rate": 0.00019797305842096156, "loss": 0.4737, "step": 28760 }, { "epoch": 0.3330940582596212, "grad_norm": 0.7459732890129089, "learning_rate": 0.00019797050106271774, "loss": 0.4482, "step": 28770 }, { "epoch": 0.3332098365210948, "grad_norm": 0.8599674105644226, "learning_rate": 0.0001979679421087414, "loss": 0.4792, "step": 28780 }, { "epoch": 0.33332561478256845, "grad_norm": 0.7562803030014038, "learning_rate": 0.0001979653815590741, "loss": 0.4429, "step": 28790 }, { "epoch": 0.3334413930440421, "grad_norm": 0.5722392797470093, "learning_rate": 0.0001979628194137577, "loss": 0.4477, "step": 28800 }, { "epoch": 0.33355717130551565, "grad_norm": 0.9015145301818848, "learning_rate": 0.0001979602556728338, "loss": 0.4436, "step": 28810 }, { "epoch": 0.3336729495669893, "grad_norm": 0.708286702632904, "learning_rate": 0.00019795769033634426, "loss": 0.4375, "step": 28820 }, { "epoch": 0.3337887278284629, "grad_norm": 0.7045858502388, "learning_rate": 0.00019795512340433077, "loss": 0.4309, "step": 28830 }, { "epoch": 0.33390450608993655, "grad_norm": 0.5952958464622498, "learning_rate": 0.0001979525548768352, "loss": 0.4651, "step": 28840 }, { "epoch": 0.3340202843514102, "grad_norm": 0.8038861751556396, "learning_rate": 0.00019794998475389938, "loss": 0.4359, "step": 28850 }, { "epoch": 0.3341360626128838, "grad_norm": 0.750450849533081, "learning_rate": 0.00019794741303556516, "loss": 0.4545, "step": 28860 }, { "epoch": 0.33425184087435744, "grad_norm": 0.7617290019989014, "learning_rate": 0.00019794483972187442, "loss": 0.4659, "step": 28870 }, { "epoch": 0.3343676191358311, "grad_norm": 0.7738581299781799, "learning_rate": 0.00019794226481286914, "loss": 0.4279, "step": 28880 }, { "epoch": 0.3344833973973047, "grad_norm": 0.6191737651824951, "learning_rate": 0.00019793968830859114, "loss": 0.4197, "step": 28890 }, { "epoch": 0.3345991756587783, "grad_norm": 0.7525450587272644, "learning_rate": 0.00019793711020908247, "loss": 0.4557, "step": 28900 }, { "epoch": 0.3347149539202519, "grad_norm": 0.8681761622428894, "learning_rate": 0.00019793453051438514, "loss": 0.4452, "step": 28910 }, { "epoch": 0.33483073218172554, "grad_norm": 0.8026580810546875, "learning_rate": 0.0001979319492245411, "loss": 0.4687, "step": 28920 }, { "epoch": 0.3349465104431992, "grad_norm": 0.7069211602210999, "learning_rate": 0.00019792936633959243, "loss": 0.4483, "step": 28930 }, { "epoch": 0.3350622887046728, "grad_norm": 0.7879505157470703, "learning_rate": 0.0001979267818595812, "loss": 0.439, "step": 28940 }, { "epoch": 0.33517806696614644, "grad_norm": 0.8035557866096497, "learning_rate": 0.00019792419578454947, "loss": 0.4152, "step": 28950 }, { "epoch": 0.33529384522762007, "grad_norm": 0.8120391964912415, "learning_rate": 0.0001979216081145394, "loss": 0.4531, "step": 28960 }, { "epoch": 0.3354096234890937, "grad_norm": 0.8428208231925964, "learning_rate": 0.00019791901884959315, "loss": 0.4238, "step": 28970 }, { "epoch": 0.33552540175056733, "grad_norm": 0.7718029022216797, "learning_rate": 0.00019791642798975287, "loss": 0.4463, "step": 28980 }, { "epoch": 0.33564118001204096, "grad_norm": 0.7140524983406067, "learning_rate": 0.00019791383553506076, "loss": 0.4389, "step": 28990 }, { "epoch": 0.33575695827351454, "grad_norm": 0.7626215219497681, "learning_rate": 0.00019791124148555903, "loss": 0.4059, "step": 29000 }, { "epoch": 0.33575695827351454, "eval_chrf": 79.30603599493149, "eval_loss": 0.7022404670715332, "eval_runtime": 348.5992, "eval_samples_per_second": 0.287, "eval_steps_per_second": 0.011, "step": 29000 }, { "epoch": 0.33587273653498817, "grad_norm": 0.7247495055198669, "learning_rate": 0.00019790864584128998, "loss": 0.3932, "step": 29010 }, { "epoch": 0.3359885147964618, "grad_norm": 0.8484256863594055, "learning_rate": 0.00019790604860229585, "loss": 0.4566, "step": 29020 }, { "epoch": 0.33610429305793543, "grad_norm": 0.7210016250610352, "learning_rate": 0.00019790344976861895, "loss": 0.4445, "step": 29030 }, { "epoch": 0.33622007131940906, "grad_norm": 0.6728957891464233, "learning_rate": 0.0001979008493403016, "loss": 0.4357, "step": 29040 }, { "epoch": 0.3363358495808827, "grad_norm": 0.7190279364585876, "learning_rate": 0.00019789824731738617, "loss": 0.4679, "step": 29050 }, { "epoch": 0.3364516278423563, "grad_norm": 0.7486171722412109, "learning_rate": 0.00019789564369991505, "loss": 0.479, "step": 29060 }, { "epoch": 0.33656740610382996, "grad_norm": 0.7148741483688354, "learning_rate": 0.00019789303848793063, "loss": 0.4361, "step": 29070 }, { "epoch": 0.3366831843653036, "grad_norm": 0.6511372327804565, "learning_rate": 0.0001978904316814753, "loss": 0.4669, "step": 29080 }, { "epoch": 0.3367989626267772, "grad_norm": 0.9360610246658325, "learning_rate": 0.00019788782328059165, "loss": 0.4719, "step": 29090 }, { "epoch": 0.3369147408882508, "grad_norm": 0.900389552116394, "learning_rate": 0.00019788521328532204, "loss": 0.4254, "step": 29100 }, { "epoch": 0.33703051914972443, "grad_norm": 0.7916105389595032, "learning_rate": 0.00019788260169570902, "loss": 0.4634, "step": 29110 }, { "epoch": 0.33714629741119806, "grad_norm": 0.6842415928840637, "learning_rate": 0.00019787998851179513, "loss": 0.4727, "step": 29120 }, { "epoch": 0.3372620756726717, "grad_norm": 1.0879751443862915, "learning_rate": 0.00019787737373362296, "loss": 0.4772, "step": 29130 }, { "epoch": 0.3373778539341453, "grad_norm": 0.742527186870575, "learning_rate": 0.00019787475736123507, "loss": 0.4523, "step": 29140 }, { "epoch": 0.33749363219561895, "grad_norm": 0.9419659376144409, "learning_rate": 0.00019787213939467406, "loss": 0.4492, "step": 29150 }, { "epoch": 0.3376094104570926, "grad_norm": 0.6919956803321838, "learning_rate": 0.00019786951983398262, "loss": 0.4202, "step": 29160 }, { "epoch": 0.3377251887185662, "grad_norm": 0.6615548133850098, "learning_rate": 0.00019786689867920334, "loss": 0.4369, "step": 29170 }, { "epoch": 0.33784096698003985, "grad_norm": 0.8853069543838501, "learning_rate": 0.00019786427593037898, "loss": 0.4187, "step": 29180 }, { "epoch": 0.3379567452415135, "grad_norm": 0.699215292930603, "learning_rate": 0.00019786165158755227, "loss": 0.4412, "step": 29190 }, { "epoch": 0.33807252350298705, "grad_norm": 0.7400662899017334, "learning_rate": 0.0001978590256507659, "loss": 0.4448, "step": 29200 }, { "epoch": 0.3381883017644607, "grad_norm": 0.4990818500518799, "learning_rate": 0.00019785639812006268, "loss": 0.451, "step": 29210 }, { "epoch": 0.3383040800259343, "grad_norm": 0.8322715163230896, "learning_rate": 0.00019785376899548538, "loss": 0.4405, "step": 29220 }, { "epoch": 0.33841985828740795, "grad_norm": 0.854572057723999, "learning_rate": 0.00019785113827707685, "loss": 0.4601, "step": 29230 }, { "epoch": 0.3385356365488816, "grad_norm": 0.7197252511978149, "learning_rate": 0.0001978485059648799, "loss": 0.4834, "step": 29240 }, { "epoch": 0.3386514148103552, "grad_norm": 0.8339878916740417, "learning_rate": 0.00019784587205893742, "loss": 0.4524, "step": 29250 }, { "epoch": 0.33876719307182884, "grad_norm": 0.728815495967865, "learning_rate": 0.00019784323655929237, "loss": 0.4471, "step": 29260 }, { "epoch": 0.3388829713333025, "grad_norm": 0.845120370388031, "learning_rate": 0.00019784059946598758, "loss": 0.468, "step": 29270 }, { "epoch": 0.3389987495947761, "grad_norm": 0.8616262674331665, "learning_rate": 0.00019783796077906608, "loss": 0.4688, "step": 29280 }, { "epoch": 0.33911452785624974, "grad_norm": 0.7504338622093201, "learning_rate": 0.00019783532049857082, "loss": 0.4663, "step": 29290 }, { "epoch": 0.3392303061177233, "grad_norm": 0.7446357607841492, "learning_rate": 0.00019783267862454477, "loss": 0.4626, "step": 29300 }, { "epoch": 0.33934608437919694, "grad_norm": 0.7037805318832397, "learning_rate": 0.00019783003515703104, "loss": 0.4518, "step": 29310 }, { "epoch": 0.3394618626406706, "grad_norm": 0.7374095916748047, "learning_rate": 0.00019782739009607258, "loss": 0.4527, "step": 29320 }, { "epoch": 0.3395776409021442, "grad_norm": 0.6088375449180603, "learning_rate": 0.00019782474344171258, "loss": 0.4402, "step": 29330 }, { "epoch": 0.33969341916361784, "grad_norm": 0.8041601777076721, "learning_rate": 0.00019782209519399408, "loss": 0.4567, "step": 29340 }, { "epoch": 0.33980919742509147, "grad_norm": 0.7952395677566528, "learning_rate": 0.00019781944535296025, "loss": 0.4533, "step": 29350 }, { "epoch": 0.3399249756865651, "grad_norm": 0.7202016711235046, "learning_rate": 0.00019781679391865423, "loss": 0.4331, "step": 29360 }, { "epoch": 0.34004075394803873, "grad_norm": 0.8369538187980652, "learning_rate": 0.0001978141408911192, "loss": 0.4491, "step": 29370 }, { "epoch": 0.34015653220951236, "grad_norm": 0.6768138408660889, "learning_rate": 0.00019781148627039837, "loss": 0.4605, "step": 29380 }, { "epoch": 0.340272310470986, "grad_norm": 0.7428684234619141, "learning_rate": 0.000197808830056535, "loss": 0.4917, "step": 29390 }, { "epoch": 0.34038808873245957, "grad_norm": 0.973679780960083, "learning_rate": 0.00019780617224957238, "loss": 0.457, "step": 29400 }, { "epoch": 0.3405038669939332, "grad_norm": 0.785365879535675, "learning_rate": 0.00019780351284955376, "loss": 0.4575, "step": 29410 }, { "epoch": 0.34061964525540683, "grad_norm": 0.6418831944465637, "learning_rate": 0.00019780085185652245, "loss": 0.4742, "step": 29420 }, { "epoch": 0.34073542351688046, "grad_norm": 0.7536094188690186, "learning_rate": 0.0001977981892705218, "loss": 0.4387, "step": 29430 }, { "epoch": 0.3408512017783541, "grad_norm": 0.7309443950653076, "learning_rate": 0.0001977955250915952, "loss": 0.4493, "step": 29440 }, { "epoch": 0.3409669800398277, "grad_norm": 0.7243247628211975, "learning_rate": 0.00019779285931978598, "loss": 0.4198, "step": 29450 }, { "epoch": 0.34108275830130136, "grad_norm": 0.7006673216819763, "learning_rate": 0.00019779019195513762, "loss": 0.4267, "step": 29460 }, { "epoch": 0.341198536562775, "grad_norm": 0.6837512254714966, "learning_rate": 0.0001977875229976936, "loss": 0.4587, "step": 29470 }, { "epoch": 0.3413143148242486, "grad_norm": 0.8737258315086365, "learning_rate": 0.00019778485244749728, "loss": 0.4792, "step": 29480 }, { "epoch": 0.3414300930857222, "grad_norm": 0.7804811000823975, "learning_rate": 0.00019778218030459225, "loss": 0.4481, "step": 29490 }, { "epoch": 0.34154587134719583, "grad_norm": 0.774608314037323, "learning_rate": 0.000197779506569022, "loss": 0.4453, "step": 29500 }, { "epoch": 0.34166164960866946, "grad_norm": 0.5803654789924622, "learning_rate": 0.00019777683124083009, "loss": 0.4517, "step": 29510 }, { "epoch": 0.3417774278701431, "grad_norm": 0.7635154724121094, "learning_rate": 0.0001977741543200601, "loss": 0.4569, "step": 29520 }, { "epoch": 0.3418932061316167, "grad_norm": 0.7757710814476013, "learning_rate": 0.00019777147580675558, "loss": 0.432, "step": 29530 }, { "epoch": 0.34200898439309035, "grad_norm": 0.7158822417259216, "learning_rate": 0.00019776879570096022, "loss": 0.4493, "step": 29540 }, { "epoch": 0.342124762654564, "grad_norm": 0.7578673362731934, "learning_rate": 0.00019776611400271768, "loss": 0.452, "step": 29550 }, { "epoch": 0.3422405409160376, "grad_norm": 0.9474319219589233, "learning_rate": 0.00019776343071207158, "loss": 0.4704, "step": 29560 }, { "epoch": 0.34235631917751125, "grad_norm": 0.7483941316604614, "learning_rate": 0.00019776074582906564, "loss": 0.4652, "step": 29570 }, { "epoch": 0.3424720974389849, "grad_norm": 0.6780919432640076, "learning_rate": 0.00019775805935374365, "loss": 0.4699, "step": 29580 }, { "epoch": 0.34258787570045846, "grad_norm": 0.7262126803398132, "learning_rate": 0.0001977553712861493, "loss": 0.4865, "step": 29590 }, { "epoch": 0.3427036539619321, "grad_norm": 0.7847318649291992, "learning_rate": 0.00019775268162632639, "loss": 0.4735, "step": 29600 }, { "epoch": 0.3428194322234057, "grad_norm": 0.7372470498085022, "learning_rate": 0.00019774999037431873, "loss": 0.4448, "step": 29610 }, { "epoch": 0.34293521048487935, "grad_norm": 0.7142220139503479, "learning_rate": 0.0001977472975301702, "loss": 0.4192, "step": 29620 }, { "epoch": 0.343050988746353, "grad_norm": 0.633639395236969, "learning_rate": 0.0001977446030939246, "loss": 0.4212, "step": 29630 }, { "epoch": 0.3431667670078266, "grad_norm": 0.8264889717102051, "learning_rate": 0.00019774190706562582, "loss": 0.4497, "step": 29640 }, { "epoch": 0.34328254526930024, "grad_norm": 0.6413941979408264, "learning_rate": 0.00019773920944531778, "loss": 0.4604, "step": 29650 }, { "epoch": 0.3433983235307739, "grad_norm": 0.9008981585502625, "learning_rate": 0.00019773651023304446, "loss": 0.4372, "step": 29660 }, { "epoch": 0.3435141017922475, "grad_norm": 0.7054181098937988, "learning_rate": 0.0001977338094288498, "loss": 0.4449, "step": 29670 }, { "epoch": 0.34362988005372114, "grad_norm": 0.6859874129295349, "learning_rate": 0.00019773110703277778, "loss": 0.4227, "step": 29680 }, { "epoch": 0.3437456583151947, "grad_norm": 0.8675273656845093, "learning_rate": 0.0001977284030448724, "loss": 0.4179, "step": 29690 }, { "epoch": 0.34386143657666834, "grad_norm": 0.9087483882904053, "learning_rate": 0.00019772569746517773, "loss": 0.4253, "step": 29700 }, { "epoch": 0.343977214838142, "grad_norm": 0.6598452925682068, "learning_rate": 0.00019772299029373783, "loss": 0.4568, "step": 29710 }, { "epoch": 0.3440929930996156, "grad_norm": 0.7858065962791443, "learning_rate": 0.0001977202815305968, "loss": 0.4544, "step": 29720 }, { "epoch": 0.34420877136108924, "grad_norm": 0.8278815150260925, "learning_rate": 0.00019771757117579872, "loss": 0.4619, "step": 29730 }, { "epoch": 0.34432454962256287, "grad_norm": 0.7692482471466064, "learning_rate": 0.00019771485922938782, "loss": 0.4659, "step": 29740 }, { "epoch": 0.3444403278840365, "grad_norm": 0.7624439001083374, "learning_rate": 0.00019771214569140822, "loss": 0.445, "step": 29750 }, { "epoch": 0.34455610614551013, "grad_norm": 0.739372193813324, "learning_rate": 0.0001977094305619041, "loss": 0.4529, "step": 29760 }, { "epoch": 0.34467188440698376, "grad_norm": 0.6535555124282837, "learning_rate": 0.0001977067138409197, "loss": 0.503, "step": 29770 }, { "epoch": 0.3447876626684574, "grad_norm": 0.8062235116958618, "learning_rate": 0.00019770399552849926, "loss": 0.4537, "step": 29780 }, { "epoch": 0.34490344092993097, "grad_norm": 0.7946727275848389, "learning_rate": 0.0001977012756246871, "loss": 0.4443, "step": 29790 }, { "epoch": 0.3450192191914046, "grad_norm": 0.7691386938095093, "learning_rate": 0.00019769855412952748, "loss": 0.4302, "step": 29800 }, { "epoch": 0.34513499745287823, "grad_norm": 0.7168875336647034, "learning_rate": 0.00019769583104306472, "loss": 0.4599, "step": 29810 }, { "epoch": 0.34525077571435187, "grad_norm": 0.7042225003242493, "learning_rate": 0.00019769310636534322, "loss": 0.46, "step": 29820 }, { "epoch": 0.3453665539758255, "grad_norm": 0.7086291909217834, "learning_rate": 0.0001976903800964073, "loss": 0.4163, "step": 29830 }, { "epoch": 0.34548233223729913, "grad_norm": 0.8071205615997314, "learning_rate": 0.00019768765223630145, "loss": 0.4245, "step": 29840 }, { "epoch": 0.34559811049877276, "grad_norm": 0.9719737768173218, "learning_rate": 0.00019768492278506998, "loss": 0.4434, "step": 29850 }, { "epoch": 0.3457138887602464, "grad_norm": 0.7370936274528503, "learning_rate": 0.00019768219174275746, "loss": 0.4269, "step": 29860 }, { "epoch": 0.34582966702172, "grad_norm": 0.6700521111488342, "learning_rate": 0.00019767945910940832, "loss": 0.3961, "step": 29870 }, { "epoch": 0.34594544528319365, "grad_norm": 0.6146963238716125, "learning_rate": 0.00019767672488506705, "loss": 0.4928, "step": 29880 }, { "epoch": 0.34606122354466723, "grad_norm": 0.6936514377593994, "learning_rate": 0.00019767398906977824, "loss": 0.4466, "step": 29890 }, { "epoch": 0.34617700180614086, "grad_norm": 0.9286683797836304, "learning_rate": 0.00019767125166358639, "loss": 0.4621, "step": 29900 }, { "epoch": 0.3462927800676145, "grad_norm": 0.8040321469306946, "learning_rate": 0.00019766851266653617, "loss": 0.4254, "step": 29910 }, { "epoch": 0.3464085583290881, "grad_norm": 0.6007877588272095, "learning_rate": 0.0001976657720786721, "loss": 0.4359, "step": 29920 }, { "epoch": 0.34652433659056175, "grad_norm": 0.691734790802002, "learning_rate": 0.00019766302990003887, "loss": 0.4522, "step": 29930 }, { "epoch": 0.3466401148520354, "grad_norm": 0.778103768825531, "learning_rate": 0.0001976602861306811, "loss": 0.421, "step": 29940 }, { "epoch": 0.346755893113509, "grad_norm": 0.8104856014251709, "learning_rate": 0.00019765754077064355, "loss": 0.4594, "step": 29950 }, { "epoch": 0.34687167137498265, "grad_norm": 0.7215676307678223, "learning_rate": 0.00019765479381997087, "loss": 0.4426, "step": 29960 }, { "epoch": 0.3469874496364563, "grad_norm": 0.7769370675086975, "learning_rate": 0.00019765204527870788, "loss": 0.4394, "step": 29970 }, { "epoch": 0.3471032278979299, "grad_norm": 0.6785605549812317, "learning_rate": 0.00019764929514689926, "loss": 0.4404, "step": 29980 }, { "epoch": 0.3472190061594035, "grad_norm": 0.6804041862487793, "learning_rate": 0.00019764654342458986, "loss": 0.4735, "step": 29990 }, { "epoch": 0.3473347844208771, "grad_norm": 0.6676384210586548, "learning_rate": 0.00019764379011182447, "loss": 0.4283, "step": 30000 }, { "epoch": 0.3473347844208771, "eval_chrf": 80.61622692012071, "eval_loss": 0.6926522254943848, "eval_runtime": 197.0152, "eval_samples_per_second": 0.508, "eval_steps_per_second": 0.02, "step": 30000 }, { "epoch": 0.34745056268235075, "grad_norm": 0.9586856961250305, "learning_rate": 0.00019764103520864796, "loss": 0.4792, "step": 30010 }, { "epoch": 0.3475663409438244, "grad_norm": 0.5639489889144897, "learning_rate": 0.00019763827871510517, "loss": 0.4224, "step": 30020 }, { "epoch": 0.347682119205298, "grad_norm": 0.9713011980056763, "learning_rate": 0.00019763552063124101, "loss": 0.4641, "step": 30030 }, { "epoch": 0.34779789746677164, "grad_norm": 0.5006837248802185, "learning_rate": 0.00019763276095710044, "loss": 0.4235, "step": 30040 }, { "epoch": 0.3479136757282453, "grad_norm": 0.8714874982833862, "learning_rate": 0.0001976299996927284, "loss": 0.4555, "step": 30050 }, { "epoch": 0.3480294539897189, "grad_norm": 0.8021594882011414, "learning_rate": 0.0001976272368381698, "loss": 0.4542, "step": 30060 }, { "epoch": 0.34814523225119254, "grad_norm": 0.7853084802627563, "learning_rate": 0.0001976244723934697, "loss": 0.4639, "step": 30070 }, { "epoch": 0.34826101051266617, "grad_norm": 0.8078621625900269, "learning_rate": 0.00019762170635867312, "loss": 0.4639, "step": 30080 }, { "epoch": 0.34837678877413975, "grad_norm": 0.6852286458015442, "learning_rate": 0.0001976189387338251, "loss": 0.4596, "step": 30090 }, { "epoch": 0.3484925670356134, "grad_norm": 0.8198529481887817, "learning_rate": 0.0001976161695189707, "loss": 0.4224, "step": 30100 }, { "epoch": 0.348608345297087, "grad_norm": 0.8080971837043762, "learning_rate": 0.00019761339871415508, "loss": 0.4252, "step": 30110 }, { "epoch": 0.34872412355856064, "grad_norm": 0.8420395851135254, "learning_rate": 0.00019761062631942333, "loss": 0.4187, "step": 30120 }, { "epoch": 0.34883990182003427, "grad_norm": 0.6990167498588562, "learning_rate": 0.00019760785233482065, "loss": 0.4797, "step": 30130 }, { "epoch": 0.3489556800815079, "grad_norm": 0.6790205240249634, "learning_rate": 0.00019760507676039213, "loss": 0.4332, "step": 30140 }, { "epoch": 0.34907145834298153, "grad_norm": 0.6785823106765747, "learning_rate": 0.00019760229959618308, "loss": 0.4605, "step": 30150 }, { "epoch": 0.34918723660445516, "grad_norm": 0.8260369300842285, "learning_rate": 0.00019759952084223869, "loss": 0.4402, "step": 30160 }, { "epoch": 0.3493030148659288, "grad_norm": 0.7020462155342102, "learning_rate": 0.0001975967404986042, "loss": 0.4645, "step": 30170 }, { "epoch": 0.34941879312740237, "grad_norm": 0.8621097803115845, "learning_rate": 0.0001975939585653249, "loss": 0.4818, "step": 30180 }, { "epoch": 0.349534571388876, "grad_norm": 0.9369802474975586, "learning_rate": 0.00019759117504244617, "loss": 0.4194, "step": 30190 }, { "epoch": 0.34965034965034963, "grad_norm": 0.7883701920509338, "learning_rate": 0.00019758838993001328, "loss": 0.487, "step": 30200 }, { "epoch": 0.34976612791182327, "grad_norm": 0.7517625689506531, "learning_rate": 0.0001975856032280716, "loss": 0.4145, "step": 30210 }, { "epoch": 0.3498819061732969, "grad_norm": 0.7183372974395752, "learning_rate": 0.00019758281493666654, "loss": 0.4513, "step": 30220 }, { "epoch": 0.34999768443477053, "grad_norm": 1.0402839183807373, "learning_rate": 0.00019758002505584346, "loss": 0.3996, "step": 30230 }, { "epoch": 0.35011346269624416, "grad_norm": 0.7967755794525146, "learning_rate": 0.0001975772335856479, "loss": 0.3919, "step": 30240 }, { "epoch": 0.3502292409577178, "grad_norm": 0.7831071615219116, "learning_rate": 0.00019757444052612524, "loss": 0.4312, "step": 30250 }, { "epoch": 0.3503450192191914, "grad_norm": 0.7764838933944702, "learning_rate": 0.000197571645877321, "loss": 0.4604, "step": 30260 }, { "epoch": 0.35046079748066505, "grad_norm": 0.8191924691200256, "learning_rate": 0.00019756884963928074, "loss": 0.4338, "step": 30270 }, { "epoch": 0.35057657574213863, "grad_norm": 0.8969576954841614, "learning_rate": 0.00019756605181204997, "loss": 0.4791, "step": 30280 }, { "epoch": 0.35069235400361226, "grad_norm": 0.5914546847343445, "learning_rate": 0.00019756325239567426, "loss": 0.432, "step": 30290 }, { "epoch": 0.3508081322650859, "grad_norm": 0.7140315175056458, "learning_rate": 0.0001975604513901992, "loss": 0.4279, "step": 30300 }, { "epoch": 0.3509239105265595, "grad_norm": 0.7911560535430908, "learning_rate": 0.00019755764879567038, "loss": 0.4474, "step": 30310 }, { "epoch": 0.35103968878803316, "grad_norm": 0.7750484943389893, "learning_rate": 0.00019755484461213353, "loss": 0.4625, "step": 30320 }, { "epoch": 0.3511554670495068, "grad_norm": 0.8546374440193176, "learning_rate": 0.0001975520388396343, "loss": 0.505, "step": 30330 }, { "epoch": 0.3512712453109804, "grad_norm": 1.0121471881866455, "learning_rate": 0.00019754923147821834, "loss": 0.4737, "step": 30340 }, { "epoch": 0.35138702357245405, "grad_norm": 0.7756264805793762, "learning_rate": 0.0001975464225279314, "loss": 0.4451, "step": 30350 }, { "epoch": 0.3515028018339277, "grad_norm": 0.7790165543556213, "learning_rate": 0.00019754361198881924, "loss": 0.4125, "step": 30360 }, { "epoch": 0.3516185800954013, "grad_norm": 0.9156092405319214, "learning_rate": 0.0001975407998609277, "loss": 0.4547, "step": 30370 }, { "epoch": 0.3517343583568749, "grad_norm": 0.7473875284194946, "learning_rate": 0.0001975379861443025, "loss": 0.421, "step": 30380 }, { "epoch": 0.3518501366183485, "grad_norm": 0.6192530393600464, "learning_rate": 0.00019753517083898942, "loss": 0.4271, "step": 30390 }, { "epoch": 0.35196591487982215, "grad_norm": 0.749930202960968, "learning_rate": 0.00019753235394503443, "loss": 0.4719, "step": 30400 }, { "epoch": 0.3520816931412958, "grad_norm": 0.8089101314544678, "learning_rate": 0.0001975295354624834, "loss": 0.4234, "step": 30410 }, { "epoch": 0.3521974714027694, "grad_norm": 0.7470836043357849, "learning_rate": 0.00019752671539138218, "loss": 0.4627, "step": 30420 }, { "epoch": 0.35231324966424304, "grad_norm": 0.7350556254386902, "learning_rate": 0.00019752389373177672, "loss": 0.4667, "step": 30430 }, { "epoch": 0.3524290279257167, "grad_norm": 0.8652623891830444, "learning_rate": 0.00019752107048371298, "loss": 0.4819, "step": 30440 }, { "epoch": 0.3525448061871903, "grad_norm": 0.6465603113174438, "learning_rate": 0.00019751824564723697, "loss": 0.4403, "step": 30450 }, { "epoch": 0.35266058444866394, "grad_norm": 0.7292660474777222, "learning_rate": 0.0001975154192223947, "loss": 0.4178, "step": 30460 }, { "epoch": 0.35277636271013757, "grad_norm": 0.9069141149520874, "learning_rate": 0.00019751259120923215, "loss": 0.4381, "step": 30470 }, { "epoch": 0.35289214097161115, "grad_norm": 0.8609533905982971, "learning_rate": 0.00019750976160779545, "loss": 0.423, "step": 30480 }, { "epoch": 0.3530079192330848, "grad_norm": 0.7562017440795898, "learning_rate": 0.00019750693041813063, "loss": 0.4364, "step": 30490 }, { "epoch": 0.3531236974945584, "grad_norm": 0.9907569885253906, "learning_rate": 0.00019750409764028386, "loss": 0.4564, "step": 30500 }, { "epoch": 0.35323947575603204, "grad_norm": 0.7624794840812683, "learning_rate": 0.00019750126327430125, "loss": 0.4497, "step": 30510 }, { "epoch": 0.35335525401750567, "grad_norm": 0.6962045431137085, "learning_rate": 0.00019749842732022895, "loss": 0.4709, "step": 30520 }, { "epoch": 0.3534710322789793, "grad_norm": 0.6380375027656555, "learning_rate": 0.0001974955897781132, "loss": 0.4577, "step": 30530 }, { "epoch": 0.35358681054045293, "grad_norm": 0.8284725546836853, "learning_rate": 0.00019749275064800018, "loss": 0.4691, "step": 30540 }, { "epoch": 0.35370258880192657, "grad_norm": 0.7660990953445435, "learning_rate": 0.00019748990992993613, "loss": 0.436, "step": 30550 }, { "epoch": 0.3538183670634002, "grad_norm": 0.8985053896903992, "learning_rate": 0.00019748706762396734, "loss": 0.4626, "step": 30560 }, { "epoch": 0.35393414532487383, "grad_norm": 0.7087849974632263, "learning_rate": 0.0001974842237301401, "loss": 0.418, "step": 30570 }, { "epoch": 0.3540499235863474, "grad_norm": 0.7461625933647156, "learning_rate": 0.00019748137824850072, "loss": 0.4462, "step": 30580 }, { "epoch": 0.35416570184782104, "grad_norm": 0.9012613892555237, "learning_rate": 0.00019747853117909556, "loss": 0.4615, "step": 30590 }, { "epoch": 0.35428148010929467, "grad_norm": 0.9633703827857971, "learning_rate": 0.00019747568252197098, "loss": 0.4091, "step": 30600 }, { "epoch": 0.3543972583707683, "grad_norm": 0.7843459844589233, "learning_rate": 0.00019747283227717336, "loss": 0.4461, "step": 30610 }, { "epoch": 0.35451303663224193, "grad_norm": 0.7528501152992249, "learning_rate": 0.0001974699804447492, "loss": 0.5251, "step": 30620 }, { "epoch": 0.35462881489371556, "grad_norm": 0.726162850856781, "learning_rate": 0.0001974671270247449, "loss": 0.4266, "step": 30630 }, { "epoch": 0.3547445931551892, "grad_norm": 0.729716956615448, "learning_rate": 0.0001974642720172069, "loss": 0.4808, "step": 30640 }, { "epoch": 0.3548603714166628, "grad_norm": 0.7938385605812073, "learning_rate": 0.00019746141542218175, "loss": 0.4584, "step": 30650 }, { "epoch": 0.35497614967813645, "grad_norm": 0.7267888188362122, "learning_rate": 0.00019745855723971598, "loss": 0.4256, "step": 30660 }, { "epoch": 0.3550919279396101, "grad_norm": 0.7421509027481079, "learning_rate": 0.00019745569746985612, "loss": 0.4851, "step": 30670 }, { "epoch": 0.35520770620108366, "grad_norm": 0.7564364671707153, "learning_rate": 0.00019745283611264877, "loss": 0.4532, "step": 30680 }, { "epoch": 0.3553234844625573, "grad_norm": 0.5512920618057251, "learning_rate": 0.00019744997316814051, "loss": 0.4214, "step": 30690 }, { "epoch": 0.3554392627240309, "grad_norm": 0.7705976963043213, "learning_rate": 0.000197447108636378, "loss": 0.4598, "step": 30700 }, { "epoch": 0.35555504098550456, "grad_norm": 1.0558487176895142, "learning_rate": 0.00019744424251740786, "loss": 0.4019, "step": 30710 }, { "epoch": 0.3556708192469782, "grad_norm": 0.7541359066963196, "learning_rate": 0.00019744137481127685, "loss": 0.4967, "step": 30720 }, { "epoch": 0.3557865975084518, "grad_norm": 0.6676609516143799, "learning_rate": 0.00019743850551803157, "loss": 0.4654, "step": 30730 }, { "epoch": 0.35590237576992545, "grad_norm": 0.792260468006134, "learning_rate": 0.00019743563463771882, "loss": 0.4302, "step": 30740 }, { "epoch": 0.3560181540313991, "grad_norm": 0.7691541910171509, "learning_rate": 0.0001974327621703854, "loss": 0.4515, "step": 30750 }, { "epoch": 0.3561339322928727, "grad_norm": 0.7016454339027405, "learning_rate": 0.00019742988811607802, "loss": 0.4616, "step": 30760 }, { "epoch": 0.3562497105543463, "grad_norm": 0.7144368886947632, "learning_rate": 0.0001974270124748435, "loss": 0.4486, "step": 30770 }, { "epoch": 0.3563654888158199, "grad_norm": 0.7607747912406921, "learning_rate": 0.00019742413524672872, "loss": 0.4339, "step": 30780 }, { "epoch": 0.35648126707729355, "grad_norm": 0.7062933444976807, "learning_rate": 0.0001974212564317805, "loss": 0.4376, "step": 30790 }, { "epoch": 0.3565970453387672, "grad_norm": 0.6070752739906311, "learning_rate": 0.00019741837603004576, "loss": 0.4301, "step": 30800 }, { "epoch": 0.3567128236002408, "grad_norm": 0.7491345405578613, "learning_rate": 0.0001974154940415714, "loss": 0.4838, "step": 30810 }, { "epoch": 0.35682860186171445, "grad_norm": 0.9208494424819946, "learning_rate": 0.00019741261046640437, "loss": 0.4285, "step": 30820 }, { "epoch": 0.3569443801231881, "grad_norm": 0.7147670984268188, "learning_rate": 0.00019740972530459168, "loss": 0.4393, "step": 30830 }, { "epoch": 0.3570601583846617, "grad_norm": 0.7651496529579163, "learning_rate": 0.00019740683855618024, "loss": 0.4606, "step": 30840 }, { "epoch": 0.35717593664613534, "grad_norm": 0.7650686502456665, "learning_rate": 0.00019740395022121708, "loss": 0.4356, "step": 30850 }, { "epoch": 0.35729171490760897, "grad_norm": 0.7028475403785706, "learning_rate": 0.00019740106029974934, "loss": 0.488, "step": 30860 }, { "epoch": 0.35740749316908255, "grad_norm": 0.7773790955543518, "learning_rate": 0.000197398168791824, "loss": 0.433, "step": 30870 }, { "epoch": 0.3575232714305562, "grad_norm": 0.8732051849365234, "learning_rate": 0.00019739527569748817, "loss": 0.4675, "step": 30880 }, { "epoch": 0.3576390496920298, "grad_norm": 0.7385003566741943, "learning_rate": 0.00019739238101678898, "loss": 0.4893, "step": 30890 }, { "epoch": 0.35775482795350344, "grad_norm": 0.6992689967155457, "learning_rate": 0.0001973894847497736, "loss": 0.4346, "step": 30900 }, { "epoch": 0.35787060621497707, "grad_norm": 0.6133136749267578, "learning_rate": 0.00019738658689648917, "loss": 0.482, "step": 30910 }, { "epoch": 0.3579863844764507, "grad_norm": 0.7630688548088074, "learning_rate": 0.0001973836874569829, "loss": 0.4392, "step": 30920 }, { "epoch": 0.35810216273792433, "grad_norm": 0.8155734539031982, "learning_rate": 0.00019738078643130204, "loss": 0.4649, "step": 30930 }, { "epoch": 0.35821794099939797, "grad_norm": 0.9518517255783081, "learning_rate": 0.0001973778838194938, "loss": 0.4562, "step": 30940 }, { "epoch": 0.3583337192608716, "grad_norm": 0.7084271311759949, "learning_rate": 0.00019737497962160549, "loss": 0.4329, "step": 30950 }, { "epoch": 0.35844949752234523, "grad_norm": 0.5387693047523499, "learning_rate": 0.00019737207383768441, "loss": 0.4397, "step": 30960 }, { "epoch": 0.3585652757838188, "grad_norm": 0.6200643181800842, "learning_rate": 0.00019736916646777788, "loss": 0.4187, "step": 30970 }, { "epoch": 0.35868105404529244, "grad_norm": 0.5123502612113953, "learning_rate": 0.00019736625751193328, "loss": 0.4131, "step": 30980 }, { "epoch": 0.35879683230676607, "grad_norm": 0.5148366093635559, "learning_rate": 0.00019736334697019792, "loss": 0.4398, "step": 30990 }, { "epoch": 0.3589126105682397, "grad_norm": 0.6812059879302979, "learning_rate": 0.0001973604348426193, "loss": 0.4133, "step": 31000 }, { "epoch": 0.3589126105682397, "eval_chrf": 83.24967828273468, "eval_loss": 0.6759328842163086, "eval_runtime": 216.1219, "eval_samples_per_second": 0.463, "eval_steps_per_second": 0.019, "step": 31000 }, { "epoch": 0.35902838882971333, "grad_norm": 0.7028791904449463, "learning_rate": 0.00019735752112924477, "loss": 0.4171, "step": 31010 }, { "epoch": 0.35914416709118696, "grad_norm": 0.6313734650611877, "learning_rate": 0.00019735460583012184, "loss": 0.477, "step": 31020 }, { "epoch": 0.3592599453526606, "grad_norm": 0.8126435875892639, "learning_rate": 0.000197351688945298, "loss": 0.4533, "step": 31030 }, { "epoch": 0.3593757236141342, "grad_norm": 0.8385278582572937, "learning_rate": 0.0001973487704748207, "loss": 0.4409, "step": 31040 }, { "epoch": 0.35949150187560786, "grad_norm": 0.6164735555648804, "learning_rate": 0.00019734585041873755, "loss": 0.4543, "step": 31050 }, { "epoch": 0.3596072801370815, "grad_norm": 0.8127566576004028, "learning_rate": 0.00019734292877709604, "loss": 0.4384, "step": 31060 }, { "epoch": 0.35972305839855506, "grad_norm": 0.7247339487075806, "learning_rate": 0.00019734000554994384, "loss": 0.4416, "step": 31070 }, { "epoch": 0.3598388366600287, "grad_norm": 0.7585310339927673, "learning_rate": 0.00019733708073732848, "loss": 0.442, "step": 31080 }, { "epoch": 0.3599546149215023, "grad_norm": 0.7440115213394165, "learning_rate": 0.00019733415433929765, "loss": 0.4422, "step": 31090 }, { "epoch": 0.36007039318297596, "grad_norm": 0.59879070520401, "learning_rate": 0.000197331226355899, "loss": 0.4436, "step": 31100 }, { "epoch": 0.3601861714444496, "grad_norm": 0.6699204444885254, "learning_rate": 0.0001973282967871802, "loss": 0.4383, "step": 31110 }, { "epoch": 0.3603019497059232, "grad_norm": 0.8452684879302979, "learning_rate": 0.00019732536563318905, "loss": 0.447, "step": 31120 }, { "epoch": 0.36041772796739685, "grad_norm": 0.6436376571655273, "learning_rate": 0.00019732243289397316, "loss": 0.4109, "step": 31130 }, { "epoch": 0.3605335062288705, "grad_norm": 0.6838762760162354, "learning_rate": 0.0001973194985695804, "loss": 0.4367, "step": 31140 }, { "epoch": 0.3606492844903441, "grad_norm": 0.5743063688278198, "learning_rate": 0.00019731656266005855, "loss": 0.4134, "step": 31150 }, { "epoch": 0.36076506275181774, "grad_norm": 0.8025876879692078, "learning_rate": 0.0001973136251654554, "loss": 0.4349, "step": 31160 }, { "epoch": 0.3608808410132913, "grad_norm": 0.8272978067398071, "learning_rate": 0.0001973106860858188, "loss": 0.4532, "step": 31170 }, { "epoch": 0.36099661927476495, "grad_norm": 0.7785543203353882, "learning_rate": 0.00019730774542119664, "loss": 0.4558, "step": 31180 }, { "epoch": 0.3611123975362386, "grad_norm": 0.6963177919387817, "learning_rate": 0.00019730480317163676, "loss": 0.4561, "step": 31190 }, { "epoch": 0.3612281757977122, "grad_norm": 0.7514274716377258, "learning_rate": 0.0001973018593371872, "loss": 0.4569, "step": 31200 }, { "epoch": 0.36134395405918585, "grad_norm": 0.8091863393783569, "learning_rate": 0.00019729891391789578, "loss": 0.4404, "step": 31210 }, { "epoch": 0.3614597323206595, "grad_norm": 0.7302409410476685, "learning_rate": 0.00019729596691381058, "loss": 0.4278, "step": 31220 }, { "epoch": 0.3615755105821331, "grad_norm": 0.8030725121498108, "learning_rate": 0.00019729301832497952, "loss": 0.4661, "step": 31230 }, { "epoch": 0.36169128884360674, "grad_norm": 0.6427509188652039, "learning_rate": 0.00019729006815145069, "loss": 0.424, "step": 31240 }, { "epoch": 0.36180706710508037, "grad_norm": 0.6757525205612183, "learning_rate": 0.0001972871163932721, "loss": 0.4417, "step": 31250 }, { "epoch": 0.361922845366554, "grad_norm": 0.7824217677116394, "learning_rate": 0.00019728416305049183, "loss": 0.4366, "step": 31260 }, { "epoch": 0.3620386236280276, "grad_norm": 0.7894025444984436, "learning_rate": 0.00019728120812315797, "loss": 0.473, "step": 31270 }, { "epoch": 0.3621544018895012, "grad_norm": 0.7695927023887634, "learning_rate": 0.0001972782516113187, "loss": 0.4689, "step": 31280 }, { "epoch": 0.36227018015097484, "grad_norm": 0.6761317849159241, "learning_rate": 0.00019727529351502217, "loss": 0.4713, "step": 31290 }, { "epoch": 0.3623859584124485, "grad_norm": 0.7752683162689209, "learning_rate": 0.00019727233383431652, "loss": 0.459, "step": 31300 }, { "epoch": 0.3625017366739221, "grad_norm": 0.6869176626205444, "learning_rate": 0.00019726937256924994, "loss": 0.4621, "step": 31310 }, { "epoch": 0.36261751493539573, "grad_norm": 0.7642596960067749, "learning_rate": 0.00019726640971987075, "loss": 0.4613, "step": 31320 }, { "epoch": 0.36273329319686937, "grad_norm": 0.9158223271369934, "learning_rate": 0.00019726344528622715, "loss": 0.4443, "step": 31330 }, { "epoch": 0.362849071458343, "grad_norm": 0.8089000582695007, "learning_rate": 0.00019726047926836744, "loss": 0.4498, "step": 31340 }, { "epoch": 0.36296484971981663, "grad_norm": 0.8005766868591309, "learning_rate": 0.0001972575116663399, "loss": 0.4333, "step": 31350 }, { "epoch": 0.36308062798129026, "grad_norm": 0.7373596429824829, "learning_rate": 0.0001972545424801929, "loss": 0.4364, "step": 31360 }, { "epoch": 0.36319640624276384, "grad_norm": 0.5547783970832825, "learning_rate": 0.00019725157170997476, "loss": 0.4477, "step": 31370 }, { "epoch": 0.36331218450423747, "grad_norm": 0.8195983171463013, "learning_rate": 0.00019724859935573395, "loss": 0.4236, "step": 31380 }, { "epoch": 0.3634279627657111, "grad_norm": 0.741077184677124, "learning_rate": 0.0001972456254175188, "loss": 0.4064, "step": 31390 }, { "epoch": 0.36354374102718473, "grad_norm": 0.69661545753479, "learning_rate": 0.00019724264989537778, "loss": 0.4881, "step": 31400 }, { "epoch": 0.36365951928865836, "grad_norm": 0.7796928882598877, "learning_rate": 0.00019723967278935937, "loss": 0.4407, "step": 31410 }, { "epoch": 0.363775297550132, "grad_norm": 0.6568746566772461, "learning_rate": 0.00019723669409951202, "loss": 0.449, "step": 31420 }, { "epoch": 0.3638910758116056, "grad_norm": 0.6616988182067871, "learning_rate": 0.00019723371382588428, "loss": 0.4122, "step": 31430 }, { "epoch": 0.36400685407307926, "grad_norm": 0.7161182761192322, "learning_rate": 0.0001972307319685247, "loss": 0.4274, "step": 31440 }, { "epoch": 0.3641226323345529, "grad_norm": 0.7637435793876648, "learning_rate": 0.00019722774852748182, "loss": 0.4568, "step": 31450 }, { "epoch": 0.36423841059602646, "grad_norm": 0.6790422201156616, "learning_rate": 0.00019722476350280427, "loss": 0.4373, "step": 31460 }, { "epoch": 0.3643541888575001, "grad_norm": 0.8845119476318359, "learning_rate": 0.00019722177689454062, "loss": 0.4239, "step": 31470 }, { "epoch": 0.3644699671189737, "grad_norm": 0.7735661268234253, "learning_rate": 0.00019721878870273955, "loss": 0.4393, "step": 31480 }, { "epoch": 0.36458574538044736, "grad_norm": 0.6819901466369629, "learning_rate": 0.0001972157989274497, "loss": 0.4684, "step": 31490 }, { "epoch": 0.364701523641921, "grad_norm": 0.5607456564903259, "learning_rate": 0.00019721280756871984, "loss": 0.4635, "step": 31500 }, { "epoch": 0.3648173019033946, "grad_norm": 0.8919089436531067, "learning_rate": 0.0001972098146265986, "loss": 0.443, "step": 31510 }, { "epoch": 0.36493308016486825, "grad_norm": 0.905838668346405, "learning_rate": 0.0001972068201011348, "loss": 0.4554, "step": 31520 }, { "epoch": 0.3650488584263419, "grad_norm": 0.6962574124336243, "learning_rate": 0.00019720382399237717, "loss": 0.492, "step": 31530 }, { "epoch": 0.3651646366878155, "grad_norm": 0.8887657523155212, "learning_rate": 0.00019720082630037455, "loss": 0.4637, "step": 31540 }, { "epoch": 0.36528041494928915, "grad_norm": 0.8585859537124634, "learning_rate": 0.00019719782702517572, "loss": 0.4687, "step": 31550 }, { "epoch": 0.3653961932107627, "grad_norm": 0.644352912902832, "learning_rate": 0.0001971948261668296, "loss": 0.4414, "step": 31560 }, { "epoch": 0.36551197147223635, "grad_norm": 0.7631421089172363, "learning_rate": 0.00019719182372538496, "loss": 0.4304, "step": 31570 }, { "epoch": 0.36562774973371, "grad_norm": 0.8129732012748718, "learning_rate": 0.0001971888197008908, "loss": 0.4148, "step": 31580 }, { "epoch": 0.3657435279951836, "grad_norm": 0.7444393038749695, "learning_rate": 0.00019718581409339597, "loss": 0.4036, "step": 31590 }, { "epoch": 0.36585930625665725, "grad_norm": 0.7733768224716187, "learning_rate": 0.00019718280690294955, "loss": 0.4546, "step": 31600 }, { "epoch": 0.3659750845181309, "grad_norm": 0.571118950843811, "learning_rate": 0.00019717979812960038, "loss": 0.4128, "step": 31610 }, { "epoch": 0.3660908627796045, "grad_norm": 0.8078710436820984, "learning_rate": 0.00019717678777339756, "loss": 0.4316, "step": 31620 }, { "epoch": 0.36620664104107814, "grad_norm": 0.7164070010185242, "learning_rate": 0.00019717377583439006, "loss": 0.5278, "step": 31630 }, { "epoch": 0.36632241930255177, "grad_norm": 0.7216572761535645, "learning_rate": 0.00019717076231262702, "loss": 0.458, "step": 31640 }, { "epoch": 0.3664381975640254, "grad_norm": 0.5927894115447998, "learning_rate": 0.0001971677472081574, "loss": 0.4314, "step": 31650 }, { "epoch": 0.366553975825499, "grad_norm": 0.7894261479377747, "learning_rate": 0.0001971647305210304, "loss": 0.4654, "step": 31660 }, { "epoch": 0.3666697540869726, "grad_norm": 0.8569123148918152, "learning_rate": 0.0001971617122512952, "loss": 0.4324, "step": 31670 }, { "epoch": 0.36678553234844624, "grad_norm": 0.8199725151062012, "learning_rate": 0.00019715869239900083, "loss": 0.4436, "step": 31680 }, { "epoch": 0.3669013106099199, "grad_norm": 0.8042467832565308, "learning_rate": 0.00019715567096419658, "loss": 0.4123, "step": 31690 }, { "epoch": 0.3670170888713935, "grad_norm": 0.6773293614387512, "learning_rate": 0.00019715264794693162, "loss": 0.4533, "step": 31700 }, { "epoch": 0.36713286713286714, "grad_norm": 0.7476542592048645, "learning_rate": 0.0001971496233472552, "loss": 0.4428, "step": 31710 }, { "epoch": 0.36724864539434077, "grad_norm": 0.7265835404396057, "learning_rate": 0.00019714659716521657, "loss": 0.45, "step": 31720 }, { "epoch": 0.3673644236558144, "grad_norm": 0.6283068656921387, "learning_rate": 0.00019714356940086502, "loss": 0.4523, "step": 31730 }, { "epoch": 0.36748020191728803, "grad_norm": 0.6916100382804871, "learning_rate": 0.0001971405400542499, "loss": 0.4246, "step": 31740 }, { "epoch": 0.36759598017876166, "grad_norm": 0.8223037719726562, "learning_rate": 0.00019713750912542054, "loss": 0.445, "step": 31750 }, { "epoch": 0.36771175844023524, "grad_norm": 0.7205837965011597, "learning_rate": 0.00019713447661442627, "loss": 0.4268, "step": 31760 }, { "epoch": 0.36782753670170887, "grad_norm": 0.8070530891418457, "learning_rate": 0.0001971314425213165, "loss": 0.4504, "step": 31770 }, { "epoch": 0.3679433149631825, "grad_norm": 0.6983947157859802, "learning_rate": 0.0001971284068461407, "loss": 0.4372, "step": 31780 }, { "epoch": 0.36805909322465613, "grad_norm": 0.8791850209236145, "learning_rate": 0.00019712536958894824, "loss": 0.4422, "step": 31790 }, { "epoch": 0.36817487148612976, "grad_norm": 0.7085838913917542, "learning_rate": 0.00019712233074978866, "loss": 0.4371, "step": 31800 }, { "epoch": 0.3682906497476034, "grad_norm": 0.8415221571922302, "learning_rate": 0.0001971192903287114, "loss": 0.4451, "step": 31810 }, { "epoch": 0.368406428009077, "grad_norm": 0.838901698589325, "learning_rate": 0.00019711624832576598, "loss": 0.4456, "step": 31820 }, { "epoch": 0.36852220627055066, "grad_norm": 0.6288304924964905, "learning_rate": 0.000197113204741002, "loss": 0.4579, "step": 31830 }, { "epoch": 0.3686379845320243, "grad_norm": 0.7653651833534241, "learning_rate": 0.00019711015957446897, "loss": 0.4392, "step": 31840 }, { "epoch": 0.3687537627934979, "grad_norm": 0.8340009450912476, "learning_rate": 0.00019710711282621657, "loss": 0.4904, "step": 31850 }, { "epoch": 0.3688695410549715, "grad_norm": 0.6676978468894958, "learning_rate": 0.00019710406449629435, "loss": 0.4324, "step": 31860 }, { "epoch": 0.3689853193164451, "grad_norm": 0.7971967458724976, "learning_rate": 0.00019710101458475195, "loss": 0.449, "step": 31870 }, { "epoch": 0.36910109757791876, "grad_norm": 0.6887031197547913, "learning_rate": 0.00019709796309163915, "loss": 0.4467, "step": 31880 }, { "epoch": 0.3692168758393924, "grad_norm": 0.818535566329956, "learning_rate": 0.00019709491001700555, "loss": 0.4103, "step": 31890 }, { "epoch": 0.369332654100866, "grad_norm": 0.6199958920478821, "learning_rate": 0.00019709185536090094, "loss": 0.4392, "step": 31900 }, { "epoch": 0.36944843236233965, "grad_norm": 0.8440913558006287, "learning_rate": 0.00019708879912337502, "loss": 0.4703, "step": 31910 }, { "epoch": 0.3695642106238133, "grad_norm": 0.64154052734375, "learning_rate": 0.00019708574130447763, "loss": 0.4427, "step": 31920 }, { "epoch": 0.3696799888852869, "grad_norm": 0.7256616353988647, "learning_rate": 0.00019708268190425852, "loss": 0.4704, "step": 31930 }, { "epoch": 0.36979576714676055, "grad_norm": 0.862727701663971, "learning_rate": 0.00019707962092276756, "loss": 0.4578, "step": 31940 }, { "epoch": 0.3699115454082342, "grad_norm": 0.7893557548522949, "learning_rate": 0.00019707655836005458, "loss": 0.4331, "step": 31950 }, { "epoch": 0.37002732366970775, "grad_norm": 0.7825723886489868, "learning_rate": 0.0001970734942161695, "loss": 0.4292, "step": 31960 }, { "epoch": 0.3701431019311814, "grad_norm": 0.7858340740203857, "learning_rate": 0.00019707042849116218, "loss": 0.4705, "step": 31970 }, { "epoch": 0.370258880192655, "grad_norm": 0.8253327012062073, "learning_rate": 0.0001970673611850826, "loss": 0.4501, "step": 31980 }, { "epoch": 0.37037465845412865, "grad_norm": 0.8884454369544983, "learning_rate": 0.00019706429229798068, "loss": 0.4702, "step": 31990 }, { "epoch": 0.3704904367156023, "grad_norm": 0.6838307976722717, "learning_rate": 0.00019706122182990642, "loss": 0.4772, "step": 32000 }, { "epoch": 0.3704904367156023, "eval_chrf": 75.47916311219912, "eval_loss": 0.6640332341194153, "eval_runtime": 279.4112, "eval_samples_per_second": 0.358, "eval_steps_per_second": 0.014, "step": 32000 }, { "epoch": 0.3706062149770759, "grad_norm": 0.747809112071991, "learning_rate": 0.00019705814978090985, "loss": 0.4442, "step": 32010 }, { "epoch": 0.37072199323854954, "grad_norm": 0.7655125856399536, "learning_rate": 0.000197055076151041, "loss": 0.4248, "step": 32020 }, { "epoch": 0.37083777150002317, "grad_norm": 0.6934143304824829, "learning_rate": 0.0001970520009403499, "loss": 0.4384, "step": 32030 }, { "epoch": 0.3709535497614968, "grad_norm": 0.8173362016677856, "learning_rate": 0.0001970489241488867, "loss": 0.4171, "step": 32040 }, { "epoch": 0.3710693280229704, "grad_norm": 0.49783217906951904, "learning_rate": 0.00019704584577670144, "loss": 0.4432, "step": 32050 }, { "epoch": 0.371185106284444, "grad_norm": 0.7017600536346436, "learning_rate": 0.0001970427658238443, "loss": 0.4431, "step": 32060 }, { "epoch": 0.37130088454591764, "grad_norm": 0.8327655792236328, "learning_rate": 0.00019703968429036548, "loss": 0.4391, "step": 32070 }, { "epoch": 0.3714166628073913, "grad_norm": 0.7152509689331055, "learning_rate": 0.0001970366011763151, "loss": 0.479, "step": 32080 }, { "epoch": 0.3715324410688649, "grad_norm": 0.7732742428779602, "learning_rate": 0.00019703351648174344, "loss": 0.4484, "step": 32090 }, { "epoch": 0.37164821933033854, "grad_norm": 0.8258805871009827, "learning_rate": 0.00019703043020670072, "loss": 0.4442, "step": 32100 }, { "epoch": 0.37176399759181217, "grad_norm": 0.8022509813308716, "learning_rate": 0.00019702734235123716, "loss": 0.4484, "step": 32110 }, { "epoch": 0.3718797758532858, "grad_norm": 0.7884577512741089, "learning_rate": 0.00019702425291540313, "loss": 0.4445, "step": 32120 }, { "epoch": 0.37199555411475943, "grad_norm": 0.8316375017166138, "learning_rate": 0.00019702116189924894, "loss": 0.4605, "step": 32130 }, { "epoch": 0.37211133237623306, "grad_norm": 0.8084789514541626, "learning_rate": 0.0001970180693028249, "loss": 0.4421, "step": 32140 }, { "epoch": 0.37222711063770664, "grad_norm": 0.7886109352111816, "learning_rate": 0.0001970149751261814, "loss": 0.4409, "step": 32150 }, { "epoch": 0.37234288889918027, "grad_norm": 0.798734188079834, "learning_rate": 0.00019701187936936883, "loss": 0.4362, "step": 32160 }, { "epoch": 0.3724586671606539, "grad_norm": 0.6926020383834839, "learning_rate": 0.0001970087820324376, "loss": 0.4391, "step": 32170 }, { "epoch": 0.37257444542212753, "grad_norm": 0.698354959487915, "learning_rate": 0.00019700568311543823, "loss": 0.4347, "step": 32180 }, { "epoch": 0.37269022368360116, "grad_norm": 0.7995192408561707, "learning_rate": 0.00019700258261842111, "loss": 0.432, "step": 32190 }, { "epoch": 0.3728060019450748, "grad_norm": 0.813062310218811, "learning_rate": 0.00019699948054143678, "loss": 0.4462, "step": 32200 }, { "epoch": 0.3729217802065484, "grad_norm": 0.8295624852180481, "learning_rate": 0.00019699637688453577, "loss": 0.4467, "step": 32210 }, { "epoch": 0.37303755846802206, "grad_norm": 0.7042912244796753, "learning_rate": 0.00019699327164776859, "loss": 0.4246, "step": 32220 }, { "epoch": 0.3731533367294957, "grad_norm": 0.8360041975975037, "learning_rate": 0.00019699016483118586, "loss": 0.4786, "step": 32230 }, { "epoch": 0.3732691149909693, "grad_norm": 0.6229578852653503, "learning_rate": 0.00019698705643483822, "loss": 0.4356, "step": 32240 }, { "epoch": 0.3733848932524429, "grad_norm": 0.7104181051254272, "learning_rate": 0.0001969839464587762, "loss": 0.4642, "step": 32250 }, { "epoch": 0.3735006715139165, "grad_norm": 0.9683583974838257, "learning_rate": 0.00019698083490305053, "loss": 0.5241, "step": 32260 }, { "epoch": 0.37361644977539016, "grad_norm": 0.8482490181922913, "learning_rate": 0.00019697772176771188, "loss": 0.436, "step": 32270 }, { "epoch": 0.3737322280368638, "grad_norm": 0.7092196345329285, "learning_rate": 0.0001969746070528109, "loss": 0.4535, "step": 32280 }, { "epoch": 0.3738480062983374, "grad_norm": 0.6221027374267578, "learning_rate": 0.00019697149075839843, "loss": 0.4378, "step": 32290 }, { "epoch": 0.37396378455981105, "grad_norm": 0.8272190093994141, "learning_rate": 0.00019696837288452513, "loss": 0.4173, "step": 32300 }, { "epoch": 0.3740795628212847, "grad_norm": 0.761194109916687, "learning_rate": 0.0001969652534312418, "loss": 0.4563, "step": 32310 }, { "epoch": 0.3741953410827583, "grad_norm": 0.7332513332366943, "learning_rate": 0.0001969621323985993, "loss": 0.4401, "step": 32320 }, { "epoch": 0.37431111934423195, "grad_norm": 0.9964307546615601, "learning_rate": 0.0001969590097866484, "loss": 0.4308, "step": 32330 }, { "epoch": 0.3744268976057056, "grad_norm": 0.7437024116516113, "learning_rate": 0.00019695588559544, "loss": 0.4622, "step": 32340 }, { "epoch": 0.37454267586717915, "grad_norm": 0.7568386793136597, "learning_rate": 0.00019695275982502502, "loss": 0.459, "step": 32350 }, { "epoch": 0.3746584541286528, "grad_norm": 0.851673424243927, "learning_rate": 0.00019694963247545433, "loss": 0.4281, "step": 32360 }, { "epoch": 0.3747742323901264, "grad_norm": 0.6231158375740051, "learning_rate": 0.00019694650354677883, "loss": 0.4399, "step": 32370 }, { "epoch": 0.37489001065160005, "grad_norm": 0.7695604562759399, "learning_rate": 0.00019694337303904954, "loss": 0.4552, "step": 32380 }, { "epoch": 0.3750057889130737, "grad_norm": 0.8278711438179016, "learning_rate": 0.00019694024095231742, "loss": 0.4657, "step": 32390 }, { "epoch": 0.3751215671745473, "grad_norm": 0.6331350207328796, "learning_rate": 0.00019693710728663352, "loss": 0.4239, "step": 32400 }, { "epoch": 0.37523734543602094, "grad_norm": 0.6943710446357727, "learning_rate": 0.00019693397204204884, "loss": 0.4451, "step": 32410 }, { "epoch": 0.3753531236974946, "grad_norm": 0.8185776472091675, "learning_rate": 0.0001969308352186145, "loss": 0.4592, "step": 32420 }, { "epoch": 0.3754689019589682, "grad_norm": 0.9405238628387451, "learning_rate": 0.0001969276968163815, "loss": 0.4507, "step": 32430 }, { "epoch": 0.37558468022044184, "grad_norm": 0.8298096656799316, "learning_rate": 0.00019692455683540107, "loss": 0.4665, "step": 32440 }, { "epoch": 0.3757004584819154, "grad_norm": 0.7867631316184998, "learning_rate": 0.00019692141527572428, "loss": 0.4354, "step": 32450 }, { "epoch": 0.37581623674338904, "grad_norm": 0.9160256385803223, "learning_rate": 0.00019691827213740232, "loss": 0.4528, "step": 32460 }, { "epoch": 0.3759320150048627, "grad_norm": 0.7393675446510315, "learning_rate": 0.0001969151274204864, "loss": 0.4249, "step": 32470 }, { "epoch": 0.3760477932663363, "grad_norm": 0.7604638338088989, "learning_rate": 0.00019691198112502768, "loss": 0.4408, "step": 32480 }, { "epoch": 0.37616357152780994, "grad_norm": 0.8976063132286072, "learning_rate": 0.00019690883325107746, "loss": 0.466, "step": 32490 }, { "epoch": 0.37627934978928357, "grad_norm": 0.7089973092079163, "learning_rate": 0.00019690568379868704, "loss": 0.4348, "step": 32500 }, { "epoch": 0.3763951280507572, "grad_norm": 0.7004563808441162, "learning_rate": 0.00019690253276790766, "loss": 0.4449, "step": 32510 }, { "epoch": 0.37651090631223083, "grad_norm": 0.7332916259765625, "learning_rate": 0.00019689938015879068, "loss": 0.4376, "step": 32520 }, { "epoch": 0.37662668457370446, "grad_norm": 0.9055464267730713, "learning_rate": 0.00019689622597138741, "loss": 0.4795, "step": 32530 }, { "epoch": 0.3767424628351781, "grad_norm": 0.9704527854919434, "learning_rate": 0.00019689307020574924, "loss": 0.4415, "step": 32540 }, { "epoch": 0.37685824109665167, "grad_norm": 0.6158437728881836, "learning_rate": 0.00019688991286192763, "loss": 0.4087, "step": 32550 }, { "epoch": 0.3769740193581253, "grad_norm": 0.7678256630897522, "learning_rate": 0.0001968867539399739, "loss": 0.4239, "step": 32560 }, { "epoch": 0.37708979761959893, "grad_norm": 0.8120213150978088, "learning_rate": 0.0001968835934399396, "loss": 0.4626, "step": 32570 }, { "epoch": 0.37720557588107256, "grad_norm": 0.6679076552391052, "learning_rate": 0.00019688043136187618, "loss": 0.446, "step": 32580 }, { "epoch": 0.3773213541425462, "grad_norm": 0.8104445934295654, "learning_rate": 0.00019687726770583507, "loss": 0.4415, "step": 32590 }, { "epoch": 0.3774371324040198, "grad_norm": 0.5499981045722961, "learning_rate": 0.00019687410247186792, "loss": 0.4334, "step": 32600 }, { "epoch": 0.37755291066549346, "grad_norm": 0.6923993229866028, "learning_rate": 0.0001968709356600262, "loss": 0.4479, "step": 32610 }, { "epoch": 0.3776686889269671, "grad_norm": 0.6233457326889038, "learning_rate": 0.0001968677672703615, "loss": 0.4332, "step": 32620 }, { "epoch": 0.3777844671884407, "grad_norm": 0.8203330636024475, "learning_rate": 0.00019686459730292545, "loss": 0.4345, "step": 32630 }, { "epoch": 0.37790024544991435, "grad_norm": 0.7847434878349304, "learning_rate": 0.00019686142575776967, "loss": 0.4357, "step": 32640 }, { "epoch": 0.3780160237113879, "grad_norm": 0.7158651351928711, "learning_rate": 0.00019685825263494585, "loss": 0.4883, "step": 32650 }, { "epoch": 0.37813180197286156, "grad_norm": 0.8530229926109314, "learning_rate": 0.00019685507793450562, "loss": 0.4687, "step": 32660 }, { "epoch": 0.3782475802343352, "grad_norm": 0.7206749320030212, "learning_rate": 0.00019685190165650069, "loss": 0.4869, "step": 32670 }, { "epoch": 0.3783633584958088, "grad_norm": 0.581028401851654, "learning_rate": 0.00019684872380098284, "loss": 0.4264, "step": 32680 }, { "epoch": 0.37847913675728245, "grad_norm": 0.7559664845466614, "learning_rate": 0.00019684554436800382, "loss": 0.4444, "step": 32690 }, { "epoch": 0.3785949150187561, "grad_norm": 0.7686482071876526, "learning_rate": 0.00019684236335761542, "loss": 0.4581, "step": 32700 }, { "epoch": 0.3787106932802297, "grad_norm": 0.7029942870140076, "learning_rate": 0.0001968391807698694, "loss": 0.4493, "step": 32710 }, { "epoch": 0.37882647154170335, "grad_norm": 0.7855258584022522, "learning_rate": 0.00019683599660481765, "loss": 0.4119, "step": 32720 }, { "epoch": 0.378942249803177, "grad_norm": 1.0246684551239014, "learning_rate": 0.000196832810862512, "loss": 0.4379, "step": 32730 }, { "epoch": 0.37905802806465055, "grad_norm": 0.8838835954666138, "learning_rate": 0.0001968296235430044, "loss": 0.4478, "step": 32740 }, { "epoch": 0.3791738063261242, "grad_norm": 0.7728408575057983, "learning_rate": 0.0001968264346463467, "loss": 0.4323, "step": 32750 }, { "epoch": 0.3792895845875978, "grad_norm": 0.8997697830200195, "learning_rate": 0.0001968232441725909, "loss": 0.432, "step": 32760 }, { "epoch": 0.37940536284907145, "grad_norm": 0.750637412071228, "learning_rate": 0.00019682005212178888, "loss": 0.3989, "step": 32770 }, { "epoch": 0.3795211411105451, "grad_norm": 0.742658257484436, "learning_rate": 0.00019681685849399269, "loss": 0.4544, "step": 32780 }, { "epoch": 0.3796369193720187, "grad_norm": 0.7581350803375244, "learning_rate": 0.00019681366328925433, "loss": 0.4632, "step": 32790 }, { "epoch": 0.37975269763349234, "grad_norm": 0.7807260155677795, "learning_rate": 0.00019681046650762586, "loss": 0.4561, "step": 32800 }, { "epoch": 0.379868475894966, "grad_norm": 0.7902418375015259, "learning_rate": 0.00019680726814915933, "loss": 0.4227, "step": 32810 }, { "epoch": 0.3799842541564396, "grad_norm": 0.7732905745506287, "learning_rate": 0.00019680406821390688, "loss": 0.4632, "step": 32820 }, { "epoch": 0.38010003241791324, "grad_norm": 0.7653182744979858, "learning_rate": 0.00019680086670192056, "loss": 0.4516, "step": 32830 }, { "epoch": 0.3802158106793868, "grad_norm": 0.8937899470329285, "learning_rate": 0.00019679766361325256, "loss": 0.4608, "step": 32840 }, { "epoch": 0.38033158894086044, "grad_norm": 0.6731113791465759, "learning_rate": 0.00019679445894795503, "loss": 0.4805, "step": 32850 }, { "epoch": 0.3804473672023341, "grad_norm": 0.6992696523666382, "learning_rate": 0.0001967912527060802, "loss": 0.4506, "step": 32860 }, { "epoch": 0.3805631454638077, "grad_norm": 0.6685723662376404, "learning_rate": 0.00019678804488768026, "loss": 0.4709, "step": 32870 }, { "epoch": 0.38067892372528134, "grad_norm": 0.6553595066070557, "learning_rate": 0.00019678483549280746, "loss": 0.4332, "step": 32880 }, { "epoch": 0.38079470198675497, "grad_norm": 0.8781308531761169, "learning_rate": 0.0001967816245215141, "loss": 0.4297, "step": 32890 }, { "epoch": 0.3809104802482286, "grad_norm": 0.8023228645324707, "learning_rate": 0.00019677841197385249, "loss": 0.4486, "step": 32900 }, { "epoch": 0.38102625850970223, "grad_norm": 0.8838029503822327, "learning_rate": 0.0001967751978498749, "loss": 0.4223, "step": 32910 }, { "epoch": 0.38114203677117586, "grad_norm": 1.0198270082473755, "learning_rate": 0.0001967719821496337, "loss": 0.4528, "step": 32920 }, { "epoch": 0.3812578150326495, "grad_norm": 0.7790926098823547, "learning_rate": 0.0001967687648731813, "loss": 0.4405, "step": 32930 }, { "epoch": 0.38137359329412307, "grad_norm": 0.7534083724021912, "learning_rate": 0.0001967655460205701, "loss": 0.4656, "step": 32940 }, { "epoch": 0.3814893715555967, "grad_norm": 0.7717673778533936, "learning_rate": 0.00019676232559185245, "loss": 0.4749, "step": 32950 }, { "epoch": 0.38160514981707033, "grad_norm": 0.8359370827674866, "learning_rate": 0.0001967591035870809, "loss": 0.466, "step": 32960 }, { "epoch": 0.38172092807854396, "grad_norm": 0.653666615486145, "learning_rate": 0.00019675588000630788, "loss": 0.4603, "step": 32970 }, { "epoch": 0.3818367063400176, "grad_norm": 0.7439927458763123, "learning_rate": 0.00019675265484958595, "loss": 0.4681, "step": 32980 }, { "epoch": 0.3819524846014912, "grad_norm": 0.5780789852142334, "learning_rate": 0.00019674942811696757, "loss": 0.3726, "step": 32990 }, { "epoch": 0.38206826286296486, "grad_norm": 0.8285802602767944, "learning_rate": 0.00019674619980850533, "loss": 0.4609, "step": 33000 }, { "epoch": 0.38206826286296486, "eval_chrf": 70.65395075493964, "eval_loss": 0.6838914752006531, "eval_runtime": 348.4953, "eval_samples_per_second": 0.287, "eval_steps_per_second": 0.011, "step": 33000 } ], "logging_steps": 10, "max_steps": 259116, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.593740795633992e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }