{ "best_global_step": 39000, "best_metric": 84.03459253857896, "best_model_checkpoint": "checkpoints_7B_lora_translated/en-chv-final/checkpoint-39000", "epoch": 0.7027900766041183, "eval_steps": 1000, "global_step": 50000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00014055801532082368, "grad_norm": 2.6348493099212646, "learning_rate": 5.05902192242833e-07, "loss": 2.5918, "step": 10 }, { "epoch": 0.00028111603064164735, "grad_norm": 2.4136617183685303, "learning_rate": 1.0680157391793143e-06, "loss": 2.4399, "step": 20 }, { "epoch": 0.000421674045962471, "grad_norm": 2.4269416332244873, "learning_rate": 1.6301292861157953e-06, "loss": 2.5371, "step": 30 }, { "epoch": 0.0005622320612832947, "grad_norm": 2.088914394378662, "learning_rate": 2.1922428330522765e-06, "loss": 2.5674, "step": 40 }, { "epoch": 0.0007027900766041184, "grad_norm": 2.4543466567993164, "learning_rate": 2.7543563799887578e-06, "loss": 2.4914, "step": 50 }, { "epoch": 0.000843348091924942, "grad_norm": 2.4548532962799072, "learning_rate": 3.316469926925239e-06, "loss": 2.4309, "step": 60 }, { "epoch": 0.0009839061072457658, "grad_norm": 2.680846691131592, "learning_rate": 3.878583473861721e-06, "loss": 2.3076, "step": 70 }, { "epoch": 0.0011244641225665894, "grad_norm": 2.1085638999938965, "learning_rate": 4.440697020798201e-06, "loss": 2.1619, "step": 80 }, { "epoch": 0.001265022137887413, "grad_norm": 2.349139928817749, "learning_rate": 5.002810567734682e-06, "loss": 2.1182, "step": 90 }, { "epoch": 0.0014055801532082367, "grad_norm": 2.2252542972564697, "learning_rate": 5.564924114671164e-06, "loss": 1.9373, "step": 100 }, { "epoch": 0.0015461381685290604, "grad_norm": 1.816213846206665, "learning_rate": 6.1270376616076454e-06, "loss": 1.7312, "step": 110 }, { "epoch": 0.001686696183849884, "grad_norm": 1.467105507850647, "learning_rate": 6.689151208544127e-06, "loss": 1.6066, "step": 120 }, { "epoch": 0.0018272541991707076, "grad_norm": 1.057456612586975, "learning_rate": 7.251264755480608e-06, "loss": 1.399, "step": 130 }, { "epoch": 0.0019678122144915315, "grad_norm": 0.7919467091560364, "learning_rate": 7.813378302417088e-06, "loss": 1.2557, "step": 140 }, { "epoch": 0.002108370229812355, "grad_norm": 0.7170568108558655, "learning_rate": 8.37549184935357e-06, "loss": 1.1852, "step": 150 }, { "epoch": 0.002248928245133179, "grad_norm": 0.6167587637901306, "learning_rate": 8.93760539629005e-06, "loss": 1.0936, "step": 160 }, { "epoch": 0.0023894862604540022, "grad_norm": 0.5959345698356628, "learning_rate": 9.499718943226533e-06, "loss": 1.04, "step": 170 }, { "epoch": 0.002530044275774826, "grad_norm": 0.6112813949584961, "learning_rate": 1.0061832490163013e-05, "loss": 1.0254, "step": 180 }, { "epoch": 0.0026706022910956495, "grad_norm": 0.45967984199523926, "learning_rate": 1.0623946037099493e-05, "loss": 0.9459, "step": 190 }, { "epoch": 0.0028111603064164734, "grad_norm": 0.4986984431743622, "learning_rate": 1.1186059584035975e-05, "loss": 0.8789, "step": 200 }, { "epoch": 0.0029517183217372973, "grad_norm": 0.5307064056396484, "learning_rate": 1.1748173130972456e-05, "loss": 0.9222, "step": 210 }, { "epoch": 0.0030922763370581207, "grad_norm": 0.5603976249694824, "learning_rate": 1.231028667790894e-05, "loss": 0.8335, "step": 220 }, { "epoch": 0.0032328343523789446, "grad_norm": 0.5682457089424133, "learning_rate": 1.2872400224845418e-05, "loss": 0.8723, "step": 230 }, { "epoch": 0.003373392367699768, "grad_norm": 0.5278539657592773, "learning_rate": 1.34345137717819e-05, "loss": 0.8296, "step": 240 }, { "epoch": 0.003513950383020592, "grad_norm": 0.5293506383895874, "learning_rate": 1.399662731871838e-05, "loss": 0.8298, "step": 250 }, { "epoch": 0.0036545083983414153, "grad_norm": 0.5066598653793335, "learning_rate": 1.4558740865654863e-05, "loss": 0.7801, "step": 260 }, { "epoch": 0.003795066413662239, "grad_norm": 0.617091178894043, "learning_rate": 1.5120854412591345e-05, "loss": 0.7985, "step": 270 }, { "epoch": 0.003935624428983063, "grad_norm": 0.5896926522254944, "learning_rate": 1.5682967959527825e-05, "loss": 0.8191, "step": 280 }, { "epoch": 0.0040761824443038865, "grad_norm": 0.5436782836914062, "learning_rate": 1.624508150646431e-05, "loss": 0.7488, "step": 290 }, { "epoch": 0.00421674045962471, "grad_norm": 0.6122232675552368, "learning_rate": 1.6807195053400786e-05, "loss": 0.7994, "step": 300 }, { "epoch": 0.004357298474945534, "grad_norm": 0.6472449898719788, "learning_rate": 1.736930860033727e-05, "loss": 0.7276, "step": 310 }, { "epoch": 0.004497856490266358, "grad_norm": 0.6526391506195068, "learning_rate": 1.793142214727375e-05, "loss": 0.7184, "step": 320 }, { "epoch": 0.004638414505587181, "grad_norm": 1.2312449216842651, "learning_rate": 1.849353569421023e-05, "loss": 0.7544, "step": 330 }, { "epoch": 0.0047789725209080045, "grad_norm": 0.6525812745094299, "learning_rate": 1.905564924114671e-05, "loss": 0.694, "step": 340 }, { "epoch": 0.004919530536228829, "grad_norm": 0.7462595701217651, "learning_rate": 1.9617762788083194e-05, "loss": 0.7105, "step": 350 }, { "epoch": 0.005060088551549652, "grad_norm": 0.7727270126342773, "learning_rate": 2.0179876335019675e-05, "loss": 0.6715, "step": 360 }, { "epoch": 0.005200646566870476, "grad_norm": 0.715003252029419, "learning_rate": 2.0741989881956155e-05, "loss": 0.6575, "step": 370 }, { "epoch": 0.005341204582191299, "grad_norm": 0.7247048020362854, "learning_rate": 2.130410342889264e-05, "loss": 0.6635, "step": 380 }, { "epoch": 0.005481762597512123, "grad_norm": 0.8433679938316345, "learning_rate": 2.186621697582912e-05, "loss": 0.6494, "step": 390 }, { "epoch": 0.005622320612832947, "grad_norm": 0.9507123827934265, "learning_rate": 2.24283305227656e-05, "loss": 0.6629, "step": 400 }, { "epoch": 0.00576287862815377, "grad_norm": 0.9074307084083557, "learning_rate": 2.299044406970208e-05, "loss": 0.7001, "step": 410 }, { "epoch": 0.0059034366434745945, "grad_norm": 0.9715143442153931, "learning_rate": 2.3552557616638564e-05, "loss": 0.6768, "step": 420 }, { "epoch": 0.006043994658795418, "grad_norm": 0.9060778617858887, "learning_rate": 2.411467116357504e-05, "loss": 0.6512, "step": 430 }, { "epoch": 0.006184552674116241, "grad_norm": 0.9930410385131836, "learning_rate": 2.4676784710511524e-05, "loss": 0.6667, "step": 440 }, { "epoch": 0.006325110689437065, "grad_norm": 1.1297030448913574, "learning_rate": 2.5238898257448008e-05, "loss": 0.631, "step": 450 }, { "epoch": 0.006465668704757889, "grad_norm": 1.0275930166244507, "learning_rate": 2.580101180438449e-05, "loss": 0.5607, "step": 460 }, { "epoch": 0.006606226720078713, "grad_norm": 1.1458061933517456, "learning_rate": 2.6363125351320966e-05, "loss": 0.6091, "step": 470 }, { "epoch": 0.006746784735399536, "grad_norm": 1.185503363609314, "learning_rate": 2.692523889825745e-05, "loss": 0.6215, "step": 480 }, { "epoch": 0.006887342750720359, "grad_norm": 1.04978346824646, "learning_rate": 2.748735244519393e-05, "loss": 0.6037, "step": 490 }, { "epoch": 0.007027900766041184, "grad_norm": 1.1438580751419067, "learning_rate": 2.8049465992130413e-05, "loss": 0.5887, "step": 500 }, { "epoch": 0.007168458781362007, "grad_norm": 0.9910487532615662, "learning_rate": 2.861157953906689e-05, "loss": 0.5972, "step": 510 }, { "epoch": 0.007309016796682831, "grad_norm": 1.381588339805603, "learning_rate": 2.9173693086003374e-05, "loss": 0.5942, "step": 520 }, { "epoch": 0.007449574812003655, "grad_norm": 0.9777755737304688, "learning_rate": 2.9735806632939855e-05, "loss": 0.5574, "step": 530 }, { "epoch": 0.007590132827324478, "grad_norm": 1.1398427486419678, "learning_rate": 3.0297920179876338e-05, "loss": 0.566, "step": 540 }, { "epoch": 0.007730690842645302, "grad_norm": 1.6577353477478027, "learning_rate": 3.086003372681282e-05, "loss": 0.5462, "step": 550 }, { "epoch": 0.007871248857966126, "grad_norm": 1.4741021394729614, "learning_rate": 3.14221472737493e-05, "loss": 0.573, "step": 560 }, { "epoch": 0.00801180687328695, "grad_norm": 1.2243646383285522, "learning_rate": 3.198426082068578e-05, "loss": 0.5566, "step": 570 }, { "epoch": 0.008152364888607773, "grad_norm": 1.2751414775848389, "learning_rate": 3.254637436762226e-05, "loss": 0.5994, "step": 580 }, { "epoch": 0.008292922903928596, "grad_norm": 1.2607536315917969, "learning_rate": 3.310848791455875e-05, "loss": 0.5064, "step": 590 }, { "epoch": 0.00843348091924942, "grad_norm": 1.5125900506973267, "learning_rate": 3.367060146149522e-05, "loss": 0.5533, "step": 600 }, { "epoch": 0.008574038934570243, "grad_norm": 1.3755593299865723, "learning_rate": 3.42327150084317e-05, "loss": 0.5528, "step": 610 }, { "epoch": 0.008714596949891068, "grad_norm": 1.279312252998352, "learning_rate": 3.479482855536819e-05, "loss": 0.5626, "step": 620 }, { "epoch": 0.008855154965211892, "grad_norm": 1.4999586343765259, "learning_rate": 3.535694210230467e-05, "loss": 0.5571, "step": 630 }, { "epoch": 0.008995712980532715, "grad_norm": 1.3935269117355347, "learning_rate": 3.591905564924115e-05, "loss": 0.5743, "step": 640 }, { "epoch": 0.009136270995853539, "grad_norm": 1.4465786218643188, "learning_rate": 3.648116919617763e-05, "loss": 0.5431, "step": 650 }, { "epoch": 0.009276829011174362, "grad_norm": 1.6643967628479004, "learning_rate": 3.704328274311411e-05, "loss": 0.546, "step": 660 }, { "epoch": 0.009417387026495186, "grad_norm": 1.3728482723236084, "learning_rate": 3.760539629005059e-05, "loss": 0.546, "step": 670 }, { "epoch": 0.009557945041816009, "grad_norm": 1.3901290893554688, "learning_rate": 3.816750983698708e-05, "loss": 0.5309, "step": 680 }, { "epoch": 0.009698503057136832, "grad_norm": 1.4969929456710815, "learning_rate": 3.872962338392355e-05, "loss": 0.5615, "step": 690 }, { "epoch": 0.009839061072457658, "grad_norm": 1.3987517356872559, "learning_rate": 3.929173693086004e-05, "loss": 0.504, "step": 700 }, { "epoch": 0.009979619087778481, "grad_norm": 1.2550172805786133, "learning_rate": 3.985385047779652e-05, "loss": 0.5662, "step": 710 }, { "epoch": 0.010120177103099304, "grad_norm": 1.4774284362792969, "learning_rate": 4.0415964024733e-05, "loss": 0.4979, "step": 720 }, { "epoch": 0.010260735118420128, "grad_norm": 1.6103997230529785, "learning_rate": 4.097807757166948e-05, "loss": 0.5223, "step": 730 }, { "epoch": 0.010401293133740951, "grad_norm": 1.2553802728652954, "learning_rate": 4.154019111860596e-05, "loss": 0.5115, "step": 740 }, { "epoch": 0.010541851149061775, "grad_norm": 1.5846428871154785, "learning_rate": 4.210230466554244e-05, "loss": 0.52, "step": 750 }, { "epoch": 0.010682409164382598, "grad_norm": 1.5420085191726685, "learning_rate": 4.2664418212478927e-05, "loss": 0.5173, "step": 760 }, { "epoch": 0.010822967179703423, "grad_norm": 1.3390594720840454, "learning_rate": 4.322653175941541e-05, "loss": 0.4842, "step": 770 }, { "epoch": 0.010963525195024247, "grad_norm": 1.249771237373352, "learning_rate": 4.378864530635188e-05, "loss": 0.5459, "step": 780 }, { "epoch": 0.01110408321034507, "grad_norm": 1.3949081897735596, "learning_rate": 4.435075885328837e-05, "loss": 0.5317, "step": 790 }, { "epoch": 0.011244641225665894, "grad_norm": 1.27957022190094, "learning_rate": 4.491287240022485e-05, "loss": 0.5237, "step": 800 }, { "epoch": 0.011385199240986717, "grad_norm": 1.7414036989212036, "learning_rate": 4.547498594716133e-05, "loss": 0.4906, "step": 810 }, { "epoch": 0.01152575725630754, "grad_norm": 1.4559071063995361, "learning_rate": 4.603709949409781e-05, "loss": 0.5103, "step": 820 }, { "epoch": 0.011666315271628364, "grad_norm": 1.3347688913345337, "learning_rate": 4.659921304103429e-05, "loss": 0.5145, "step": 830 }, { "epoch": 0.011806873286949189, "grad_norm": 1.288408875465393, "learning_rate": 4.716132658797077e-05, "loss": 0.5062, "step": 840 }, { "epoch": 0.011947431302270013, "grad_norm": 1.7022039890289307, "learning_rate": 4.7723440134907257e-05, "loss": 0.5169, "step": 850 }, { "epoch": 0.012087989317590836, "grad_norm": 1.4527498483657837, "learning_rate": 4.828555368184374e-05, "loss": 0.4784, "step": 860 }, { "epoch": 0.01222854733291166, "grad_norm": 1.1472194194793701, "learning_rate": 4.884766722878022e-05, "loss": 0.4804, "step": 870 }, { "epoch": 0.012369105348232483, "grad_norm": 1.1323292255401611, "learning_rate": 4.94097807757167e-05, "loss": 0.4463, "step": 880 }, { "epoch": 0.012509663363553306, "grad_norm": 1.5541162490844727, "learning_rate": 4.997189432265318e-05, "loss": 0.5003, "step": 890 }, { "epoch": 0.01265022137887413, "grad_norm": 1.435592532157898, "learning_rate": 5.053400786958966e-05, "loss": 0.5199, "step": 900 }, { "epoch": 0.012790779394194953, "grad_norm": 1.5248143672943115, "learning_rate": 5.1096121416526146e-05, "loss": 0.4724, "step": 910 }, { "epoch": 0.012931337409515778, "grad_norm": 1.5094494819641113, "learning_rate": 5.1658234963462626e-05, "loss": 0.4463, "step": 920 }, { "epoch": 0.013071895424836602, "grad_norm": 1.7850382328033447, "learning_rate": 5.2220348510399106e-05, "loss": 0.5005, "step": 930 }, { "epoch": 0.013212453440157425, "grad_norm": 1.5300614833831787, "learning_rate": 5.278246205733558e-05, "loss": 0.4515, "step": 940 }, { "epoch": 0.013353011455478249, "grad_norm": 1.3888568878173828, "learning_rate": 5.334457560427206e-05, "loss": 0.4704, "step": 950 }, { "epoch": 0.013493569470799072, "grad_norm": 1.5508018732070923, "learning_rate": 5.390668915120855e-05, "loss": 0.5048, "step": 960 }, { "epoch": 0.013634127486119895, "grad_norm": 1.7522460222244263, "learning_rate": 5.446880269814503e-05, "loss": 0.4869, "step": 970 }, { "epoch": 0.013774685501440719, "grad_norm": 1.1714389324188232, "learning_rate": 5.503091624508151e-05, "loss": 0.4906, "step": 980 }, { "epoch": 0.013915243516761544, "grad_norm": 1.3763507604599, "learning_rate": 5.5593029792017995e-05, "loss": 0.4758, "step": 990 }, { "epoch": 0.014055801532082367, "grad_norm": 1.4176082611083984, "learning_rate": 5.6155143338954476e-05, "loss": 0.5115, "step": 1000 }, { "epoch": 0.014055801532082367, "eval_chrf": 79.5330761969255, "eval_loss": 0.8439362049102783, "eval_runtime": 327.6679, "eval_samples_per_second": 0.305, "eval_steps_per_second": 0.012, "step": 1000 }, { "epoch": 0.014196359547403191, "grad_norm": 1.3907123804092407, "learning_rate": 5.6717256885890956e-05, "loss": 0.5051, "step": 1010 }, { "epoch": 0.014336917562724014, "grad_norm": 1.5536024570465088, "learning_rate": 5.7279370432827436e-05, "loss": 0.4984, "step": 1020 }, { "epoch": 0.014477475578044838, "grad_norm": 1.141889214515686, "learning_rate": 5.784148397976391e-05, "loss": 0.4467, "step": 1030 }, { "epoch": 0.014618033593365661, "grad_norm": 1.4408601522445679, "learning_rate": 5.840359752670039e-05, "loss": 0.4604, "step": 1040 }, { "epoch": 0.014758591608686485, "grad_norm": 1.8335459232330322, "learning_rate": 5.896571107363688e-05, "loss": 0.4617, "step": 1050 }, { "epoch": 0.01489914962400731, "grad_norm": 1.133646845817566, "learning_rate": 5.952782462057336e-05, "loss": 0.4644, "step": 1060 }, { "epoch": 0.015039707639328133, "grad_norm": 1.1528613567352295, "learning_rate": 6.008993816750984e-05, "loss": 0.4685, "step": 1070 }, { "epoch": 0.015180265654648957, "grad_norm": 1.8868284225463867, "learning_rate": 6.0652051714446325e-05, "loss": 0.4715, "step": 1080 }, { "epoch": 0.01532082366996978, "grad_norm": 1.3340322971343994, "learning_rate": 6.12141652613828e-05, "loss": 0.4428, "step": 1090 }, { "epoch": 0.015461381685290604, "grad_norm": 1.3002533912658691, "learning_rate": 6.177627880831929e-05, "loss": 0.4071, "step": 1100 }, { "epoch": 0.015601939700611427, "grad_norm": 1.4698681831359863, "learning_rate": 6.233839235525576e-05, "loss": 0.4395, "step": 1110 }, { "epoch": 0.015742497715932252, "grad_norm": 1.5502619743347168, "learning_rate": 6.290050590219225e-05, "loss": 0.4712, "step": 1120 }, { "epoch": 0.015883055731253076, "grad_norm": 1.1336027383804321, "learning_rate": 6.346261944912872e-05, "loss": 0.4367, "step": 1130 }, { "epoch": 0.0160236137465739, "grad_norm": 1.5595983266830444, "learning_rate": 6.402473299606521e-05, "loss": 0.4312, "step": 1140 }, { "epoch": 0.016164171761894722, "grad_norm": 1.5467026233673096, "learning_rate": 6.45868465430017e-05, "loss": 0.439, "step": 1150 }, { "epoch": 0.016304729777215546, "grad_norm": 1.471920371055603, "learning_rate": 6.514896008993817e-05, "loss": 0.4728, "step": 1160 }, { "epoch": 0.01644528779253637, "grad_norm": 1.8791810274124146, "learning_rate": 6.571107363687466e-05, "loss": 0.4584, "step": 1170 }, { "epoch": 0.016585845807857193, "grad_norm": 1.5976896286010742, "learning_rate": 6.627318718381114e-05, "loss": 0.463, "step": 1180 }, { "epoch": 0.016726403823178016, "grad_norm": 1.1342140436172485, "learning_rate": 6.683530073074762e-05, "loss": 0.4611, "step": 1190 }, { "epoch": 0.01686696183849884, "grad_norm": 1.3907729387283325, "learning_rate": 6.739741427768409e-05, "loss": 0.4471, "step": 1200 }, { "epoch": 0.017007519853819663, "grad_norm": 1.6492199897766113, "learning_rate": 6.795952782462058e-05, "loss": 0.4637, "step": 1210 }, { "epoch": 0.017148077869140486, "grad_norm": 1.5096964836120605, "learning_rate": 6.852164137155705e-05, "loss": 0.4498, "step": 1220 }, { "epoch": 0.01728863588446131, "grad_norm": 1.5577164888381958, "learning_rate": 6.908375491849354e-05, "loss": 0.4747, "step": 1230 }, { "epoch": 0.017429193899782137, "grad_norm": 1.2539633512496948, "learning_rate": 6.964586846543002e-05, "loss": 0.4391, "step": 1240 }, { "epoch": 0.01756975191510296, "grad_norm": 1.2233649492263794, "learning_rate": 7.02079820123665e-05, "loss": 0.4138, "step": 1250 }, { "epoch": 0.017710309930423784, "grad_norm": 1.5597176551818848, "learning_rate": 7.077009555930299e-05, "loss": 0.4144, "step": 1260 }, { "epoch": 0.017850867945744607, "grad_norm": 1.2913601398468018, "learning_rate": 7.133220910623947e-05, "loss": 0.4579, "step": 1270 }, { "epoch": 0.01799142596106543, "grad_norm": 1.2796281576156616, "learning_rate": 7.189432265317595e-05, "loss": 0.441, "step": 1280 }, { "epoch": 0.018131983976386254, "grad_norm": 1.7188844680786133, "learning_rate": 7.245643620011242e-05, "loss": 0.4527, "step": 1290 }, { "epoch": 0.018272541991707077, "grad_norm": 1.4409807920455933, "learning_rate": 7.30185497470489e-05, "loss": 0.4398, "step": 1300 }, { "epoch": 0.0184131000070279, "grad_norm": 1.4035305976867676, "learning_rate": 7.358066329398538e-05, "loss": 0.4337, "step": 1310 }, { "epoch": 0.018553658022348724, "grad_norm": 1.3769419193267822, "learning_rate": 7.414277684092187e-05, "loss": 0.4226, "step": 1320 }, { "epoch": 0.018694216037669548, "grad_norm": 1.3842592239379883, "learning_rate": 7.470489038785835e-05, "loss": 0.4376, "step": 1330 }, { "epoch": 0.01883477405299037, "grad_norm": 1.2802128791809082, "learning_rate": 7.526700393479483e-05, "loss": 0.4434, "step": 1340 }, { "epoch": 0.018975332068311195, "grad_norm": 1.3373446464538574, "learning_rate": 7.582911748173132e-05, "loss": 0.4339, "step": 1350 }, { "epoch": 0.019115890083632018, "grad_norm": 1.5376042127609253, "learning_rate": 7.63912310286678e-05, "loss": 0.4371, "step": 1360 }, { "epoch": 0.01925644809895284, "grad_norm": 1.5754722356796265, "learning_rate": 7.695334457560428e-05, "loss": 0.4052, "step": 1370 }, { "epoch": 0.019397006114273665, "grad_norm": 1.1984570026397705, "learning_rate": 7.751545812254075e-05, "loss": 0.433, "step": 1380 }, { "epoch": 0.01953756412959449, "grad_norm": 1.137647271156311, "learning_rate": 7.807757166947724e-05, "loss": 0.4159, "step": 1390 }, { "epoch": 0.019678122144915315, "grad_norm": 1.257773995399475, "learning_rate": 7.863968521641371e-05, "loss": 0.4287, "step": 1400 }, { "epoch": 0.01981868016023614, "grad_norm": 1.1570932865142822, "learning_rate": 7.92017987633502e-05, "loss": 0.4532, "step": 1410 }, { "epoch": 0.019959238175556962, "grad_norm": 1.426266074180603, "learning_rate": 7.976391231028668e-05, "loss": 0.4345, "step": 1420 }, { "epoch": 0.020099796190877785, "grad_norm": 1.1181676387786865, "learning_rate": 8.032602585722316e-05, "loss": 0.4368, "step": 1430 }, { "epoch": 0.02024035420619861, "grad_norm": 1.487839937210083, "learning_rate": 8.088813940415965e-05, "loss": 0.4388, "step": 1440 }, { "epoch": 0.020380912221519432, "grad_norm": 1.7598848342895508, "learning_rate": 8.145025295109613e-05, "loss": 0.437, "step": 1450 }, { "epoch": 0.020521470236840256, "grad_norm": 2.225933313369751, "learning_rate": 8.20123664980326e-05, "loss": 0.4562, "step": 1460 }, { "epoch": 0.02066202825216108, "grad_norm": 1.6490896940231323, "learning_rate": 8.257448004496908e-05, "loss": 0.4389, "step": 1470 }, { "epoch": 0.020802586267481903, "grad_norm": 1.0282951593399048, "learning_rate": 8.313659359190557e-05, "loss": 0.4185, "step": 1480 }, { "epoch": 0.020943144282802726, "grad_norm": 1.0933427810668945, "learning_rate": 8.369870713884205e-05, "loss": 0.3744, "step": 1490 }, { "epoch": 0.02108370229812355, "grad_norm": 1.3399862051010132, "learning_rate": 8.426082068577853e-05, "loss": 0.4078, "step": 1500 }, { "epoch": 0.021224260313444373, "grad_norm": 0.9295111894607544, "learning_rate": 8.482293423271501e-05, "loss": 0.3846, "step": 1510 }, { "epoch": 0.021364818328765196, "grad_norm": 1.304292917251587, "learning_rate": 8.538504777965149e-05, "loss": 0.3944, "step": 1520 }, { "epoch": 0.021505376344086023, "grad_norm": 1.3154966831207275, "learning_rate": 8.594716132658798e-05, "loss": 0.4197, "step": 1530 }, { "epoch": 0.021645934359406847, "grad_norm": 1.1910078525543213, "learning_rate": 8.650927487352446e-05, "loss": 0.4154, "step": 1540 }, { "epoch": 0.02178649237472767, "grad_norm": 1.4511432647705078, "learning_rate": 8.707138842046094e-05, "loss": 0.3892, "step": 1550 }, { "epoch": 0.021927050390048494, "grad_norm": 1.166874647140503, "learning_rate": 8.763350196739741e-05, "loss": 0.4098, "step": 1560 }, { "epoch": 0.022067608405369317, "grad_norm": 1.02645742893219, "learning_rate": 8.81956155143339e-05, "loss": 0.4274, "step": 1570 }, { "epoch": 0.02220816642069014, "grad_norm": 1.072724461555481, "learning_rate": 8.875772906127038e-05, "loss": 0.3831, "step": 1580 }, { "epoch": 0.022348724436010964, "grad_norm": 1.0810933113098145, "learning_rate": 8.931984260820686e-05, "loss": 0.4282, "step": 1590 }, { "epoch": 0.022489282451331787, "grad_norm": 1.1835025548934937, "learning_rate": 8.988195615514334e-05, "loss": 0.3466, "step": 1600 }, { "epoch": 0.02262984046665261, "grad_norm": 1.2185114622116089, "learning_rate": 9.044406970207983e-05, "loss": 0.3761, "step": 1610 }, { "epoch": 0.022770398481973434, "grad_norm": 1.1819682121276855, "learning_rate": 9.10061832490163e-05, "loss": 0.3896, "step": 1620 }, { "epoch": 0.022910956497294258, "grad_norm": 1.5933457612991333, "learning_rate": 9.156829679595279e-05, "loss": 0.416, "step": 1630 }, { "epoch": 0.02305151451261508, "grad_norm": 1.5445760488510132, "learning_rate": 9.213041034288927e-05, "loss": 0.4051, "step": 1640 }, { "epoch": 0.023192072527935904, "grad_norm": 1.101395606994629, "learning_rate": 9.269252388982574e-05, "loss": 0.4149, "step": 1650 }, { "epoch": 0.023332630543256728, "grad_norm": 1.1006306409835815, "learning_rate": 9.325463743676223e-05, "loss": 0.4041, "step": 1660 }, { "epoch": 0.02347318855857755, "grad_norm": 1.545573115348816, "learning_rate": 9.381675098369871e-05, "loss": 0.3858, "step": 1670 }, { "epoch": 0.023613746573898378, "grad_norm": 1.2185055017471313, "learning_rate": 9.437886453063519e-05, "loss": 0.4642, "step": 1680 }, { "epoch": 0.0237543045892192, "grad_norm": 1.1222918033599854, "learning_rate": 9.494097807757167e-05, "loss": 0.3786, "step": 1690 }, { "epoch": 0.023894862604540025, "grad_norm": 1.0804522037506104, "learning_rate": 9.550309162450816e-05, "loss": 0.4327, "step": 1700 }, { "epoch": 0.02403542061986085, "grad_norm": 1.225407361984253, "learning_rate": 9.606520517144464e-05, "loss": 0.3724, "step": 1710 }, { "epoch": 0.024175978635181672, "grad_norm": 1.0895999670028687, "learning_rate": 9.662731871838112e-05, "loss": 0.3786, "step": 1720 }, { "epoch": 0.024316536650502495, "grad_norm": 1.0843249559402466, "learning_rate": 9.718943226531761e-05, "loss": 0.3818, "step": 1730 }, { "epoch": 0.02445709466582332, "grad_norm": 1.1851115226745605, "learning_rate": 9.775154581225407e-05, "loss": 0.4002, "step": 1740 }, { "epoch": 0.024597652681144142, "grad_norm": 1.2212036848068237, "learning_rate": 9.831365935919056e-05, "loss": 0.4062, "step": 1750 }, { "epoch": 0.024738210696464966, "grad_norm": 1.0576879978179932, "learning_rate": 9.887577290612704e-05, "loss": 0.3912, "step": 1760 }, { "epoch": 0.02487876871178579, "grad_norm": 0.8810908198356628, "learning_rate": 9.943788645306352e-05, "loss": 0.384, "step": 1770 }, { "epoch": 0.025019326727106612, "grad_norm": 1.2164630889892578, "learning_rate": 0.0001, "loss": 0.3911, "step": 1780 }, { "epoch": 0.025159884742427436, "grad_norm": 1.2411495447158813, "learning_rate": 0.00010056211354693648, "loss": 0.3891, "step": 1790 }, { "epoch": 0.02530044275774826, "grad_norm": 1.175451397895813, "learning_rate": 0.00010112422709387297, "loss": 0.3795, "step": 1800 }, { "epoch": 0.025441000773069083, "grad_norm": 0.9957692623138428, "learning_rate": 0.00010168634064080944, "loss": 0.3749, "step": 1810 }, { "epoch": 0.025581558788389906, "grad_norm": 0.9644479751586914, "learning_rate": 0.00010224845418774594, "loss": 0.3787, "step": 1820 }, { "epoch": 0.025722116803710733, "grad_norm": 0.9513217210769653, "learning_rate": 0.0001028105677346824, "loss": 0.4283, "step": 1830 }, { "epoch": 0.025862674819031557, "grad_norm": 0.9092191457748413, "learning_rate": 0.0001033726812816189, "loss": 0.3784, "step": 1840 }, { "epoch": 0.02600323283435238, "grad_norm": 1.4808021783828735, "learning_rate": 0.00010393479482855537, "loss": 0.4201, "step": 1850 }, { "epoch": 0.026143790849673203, "grad_norm": 1.2115821838378906, "learning_rate": 0.00010449690837549186, "loss": 0.4046, "step": 1860 }, { "epoch": 0.026284348864994027, "grad_norm": 1.122286081314087, "learning_rate": 0.00010505902192242833, "loss": 0.384, "step": 1870 }, { "epoch": 0.02642490688031485, "grad_norm": 1.0566567182540894, "learning_rate": 0.00010562113546936481, "loss": 0.3915, "step": 1880 }, { "epoch": 0.026565464895635674, "grad_norm": 0.9356314539909363, "learning_rate": 0.0001061832490163013, "loss": 0.3931, "step": 1890 }, { "epoch": 0.026706022910956497, "grad_norm": 0.906581461429596, "learning_rate": 0.00010674536256323777, "loss": 0.3777, "step": 1900 }, { "epoch": 0.02684658092627732, "grad_norm": 1.248866319656372, "learning_rate": 0.00010730747611017427, "loss": 0.3682, "step": 1910 }, { "epoch": 0.026987138941598144, "grad_norm": 1.0143498182296753, "learning_rate": 0.00010786958965711074, "loss": 0.3706, "step": 1920 }, { "epoch": 0.027127696956918967, "grad_norm": 1.083511471748352, "learning_rate": 0.00010843170320404723, "loss": 0.3921, "step": 1930 }, { "epoch": 0.02726825497223979, "grad_norm": 1.1626296043395996, "learning_rate": 0.0001089938167509837, "loss": 0.4035, "step": 1940 }, { "epoch": 0.027408812987560614, "grad_norm": 1.2879900932312012, "learning_rate": 0.00010955593029792019, "loss": 0.4166, "step": 1950 }, { "epoch": 0.027549371002881438, "grad_norm": 1.0682278871536255, "learning_rate": 0.00011011804384485666, "loss": 0.3695, "step": 1960 }, { "epoch": 0.027689929018202265, "grad_norm": 0.9039905667304993, "learning_rate": 0.00011068015739179314, "loss": 0.3632, "step": 1970 }, { "epoch": 0.027830487033523088, "grad_norm": 1.5052381753921509, "learning_rate": 0.00011124227093872963, "loss": 0.3791, "step": 1980 }, { "epoch": 0.02797104504884391, "grad_norm": 0.8607678413391113, "learning_rate": 0.0001118043844856661, "loss": 0.363, "step": 1990 }, { "epoch": 0.028111603064164735, "grad_norm": 1.0287585258483887, "learning_rate": 0.0001123664980326026, "loss": 0.3258, "step": 2000 }, { "epoch": 0.028111603064164735, "eval_chrf": 79.95813802049287, "eval_loss": 0.6827706098556519, "eval_runtime": 328.1457, "eval_samples_per_second": 0.305, "eval_steps_per_second": 0.012, "step": 2000 }, { "epoch": 0.02825216107948556, "grad_norm": 0.9364410638809204, "learning_rate": 0.00011292861157953907, "loss": 0.3226, "step": 2010 }, { "epoch": 0.028392719094806382, "grad_norm": 0.9163166284561157, "learning_rate": 0.00011349072512647556, "loss": 0.3789, "step": 2020 }, { "epoch": 0.028533277110127205, "grad_norm": 0.9576098322868347, "learning_rate": 0.00011405283867341203, "loss": 0.385, "step": 2030 }, { "epoch": 0.02867383512544803, "grad_norm": 1.0045305490493774, "learning_rate": 0.00011461495222034852, "loss": 0.3638, "step": 2040 }, { "epoch": 0.028814393140768852, "grad_norm": 0.879092276096344, "learning_rate": 0.000115177065767285, "loss": 0.3359, "step": 2050 }, { "epoch": 0.028954951156089676, "grad_norm": 1.1309394836425781, "learning_rate": 0.00011573917931422147, "loss": 0.3656, "step": 2060 }, { "epoch": 0.0290955091714105, "grad_norm": 0.8896138668060303, "learning_rate": 0.00011630129286115796, "loss": 0.3606, "step": 2070 }, { "epoch": 0.029236067186731322, "grad_norm": 0.968147873878479, "learning_rate": 0.00011686340640809443, "loss": 0.3619, "step": 2080 }, { "epoch": 0.029376625202052146, "grad_norm": 0.9790638089179993, "learning_rate": 0.00011742551995503093, "loss": 0.3823, "step": 2090 }, { "epoch": 0.02951718321737297, "grad_norm": 0.9956974983215332, "learning_rate": 0.0001179876335019674, "loss": 0.35, "step": 2100 }, { "epoch": 0.029657741232693793, "grad_norm": 0.8596989512443542, "learning_rate": 0.00011854974704890389, "loss": 0.3352, "step": 2110 }, { "epoch": 0.02979829924801462, "grad_norm": 0.8734012842178345, "learning_rate": 0.00011911186059584036, "loss": 0.3636, "step": 2120 }, { "epoch": 0.029938857263335443, "grad_norm": 0.9950189590454102, "learning_rate": 0.00011967397414277685, "loss": 0.3519, "step": 2130 }, { "epoch": 0.030079415278656266, "grad_norm": 0.8868091106414795, "learning_rate": 0.00012023608768971332, "loss": 0.35, "step": 2140 }, { "epoch": 0.03021997329397709, "grad_norm": 1.1114511489868164, "learning_rate": 0.0001207982012366498, "loss": 0.3787, "step": 2150 }, { "epoch": 0.030360531309297913, "grad_norm": 0.9478281736373901, "learning_rate": 0.0001213603147835863, "loss": 0.3804, "step": 2160 }, { "epoch": 0.030501089324618737, "grad_norm": 1.0067881345748901, "learning_rate": 0.00012192242833052276, "loss": 0.3483, "step": 2170 }, { "epoch": 0.03064164733993956, "grad_norm": 0.8068206906318665, "learning_rate": 0.00012248454187745926, "loss": 0.346, "step": 2180 }, { "epoch": 0.030782205355260384, "grad_norm": 1.019374966621399, "learning_rate": 0.00012304665542439572, "loss": 0.3719, "step": 2190 }, { "epoch": 0.030922763370581207, "grad_norm": 0.9281585812568665, "learning_rate": 0.00012360876897133223, "loss": 0.3848, "step": 2200 }, { "epoch": 0.03106332138590203, "grad_norm": 1.1539422273635864, "learning_rate": 0.0001241708825182687, "loss": 0.3375, "step": 2210 }, { "epoch": 0.031203879401222854, "grad_norm": 0.738135814666748, "learning_rate": 0.00012473299606520518, "loss": 0.3362, "step": 2220 }, { "epoch": 0.03134443741654368, "grad_norm": 0.9841747879981995, "learning_rate": 0.00012529510961214167, "loss": 0.345, "step": 2230 }, { "epoch": 0.031484995431864504, "grad_norm": 1.066952109336853, "learning_rate": 0.00012585722315907813, "loss": 0.3728, "step": 2240 }, { "epoch": 0.031625553447185324, "grad_norm": 0.8559726476669312, "learning_rate": 0.00012641933670601462, "loss": 0.3485, "step": 2250 }, { "epoch": 0.03176611146250615, "grad_norm": 0.8930100798606873, "learning_rate": 0.0001269814502529511, "loss": 0.3533, "step": 2260 }, { "epoch": 0.03190666947782697, "grad_norm": 0.862795889377594, "learning_rate": 0.0001275435637998876, "loss": 0.3372, "step": 2270 }, { "epoch": 0.0320472274931478, "grad_norm": 1.2475395202636719, "learning_rate": 0.00012810567734682405, "loss": 0.334, "step": 2280 }, { "epoch": 0.03218778550846862, "grad_norm": 1.1247025728225708, "learning_rate": 0.00012866779089376056, "loss": 0.3321, "step": 2290 }, { "epoch": 0.032328343523789445, "grad_norm": 0.8312082290649414, "learning_rate": 0.00012922990444069702, "loss": 0.3547, "step": 2300 }, { "epoch": 0.032468901539110265, "grad_norm": 1.132459044456482, "learning_rate": 0.0001297920179876335, "loss": 0.346, "step": 2310 }, { "epoch": 0.03260945955443109, "grad_norm": 1.0509437322616577, "learning_rate": 0.00013035413153457, "loss": 0.353, "step": 2320 }, { "epoch": 0.03275001756975191, "grad_norm": 0.6921985745429993, "learning_rate": 0.00013091624508150646, "loss": 0.3266, "step": 2330 }, { "epoch": 0.03289057558507274, "grad_norm": 1.0222564935684204, "learning_rate": 0.00013147835862844295, "loss": 0.3283, "step": 2340 }, { "epoch": 0.033031133600393565, "grad_norm": 0.9538044929504395, "learning_rate": 0.00013204047217537943, "loss": 0.3507, "step": 2350 }, { "epoch": 0.033171691615714385, "grad_norm": 0.9025123715400696, "learning_rate": 0.00013260258572231592, "loss": 0.3235, "step": 2360 }, { "epoch": 0.03331224963103521, "grad_norm": 0.8488820195198059, "learning_rate": 0.00013316469926925238, "loss": 0.3101, "step": 2370 }, { "epoch": 0.03345280764635603, "grad_norm": 0.8986126780509949, "learning_rate": 0.0001337268128161889, "loss": 0.3591, "step": 2380 }, { "epoch": 0.03359336566167686, "grad_norm": 0.9442368149757385, "learning_rate": 0.00013428892636312535, "loss": 0.3341, "step": 2390 }, { "epoch": 0.03373392367699768, "grad_norm": 0.8823990225791931, "learning_rate": 0.00013485103991006184, "loss": 0.3431, "step": 2400 }, { "epoch": 0.033874481692318506, "grad_norm": 0.9334458112716675, "learning_rate": 0.00013541315345699833, "loss": 0.3524, "step": 2410 }, { "epoch": 0.034015039707639326, "grad_norm": 1.0057344436645508, "learning_rate": 0.0001359752670039348, "loss": 0.4021, "step": 2420 }, { "epoch": 0.03415559772296015, "grad_norm": 0.6858983635902405, "learning_rate": 0.00013653738055087128, "loss": 0.3678, "step": 2430 }, { "epoch": 0.03429615573828097, "grad_norm": 1.089292287826538, "learning_rate": 0.00013709949409780776, "loss": 0.3315, "step": 2440 }, { "epoch": 0.0344367137536018, "grad_norm": 0.9135834574699402, "learning_rate": 0.00013766160764474425, "loss": 0.3675, "step": 2450 }, { "epoch": 0.03457727176892262, "grad_norm": 0.9899437427520752, "learning_rate": 0.0001382237211916807, "loss": 0.3682, "step": 2460 }, { "epoch": 0.03471782978424345, "grad_norm": 1.0549930334091187, "learning_rate": 0.00013878583473861722, "loss": 0.3739, "step": 2470 }, { "epoch": 0.034858387799564274, "grad_norm": 0.9098493456840515, "learning_rate": 0.00013934794828555368, "loss": 0.3201, "step": 2480 }, { "epoch": 0.034998945814885093, "grad_norm": 0.8151450157165527, "learning_rate": 0.00013991006183249017, "loss": 0.3572, "step": 2490 }, { "epoch": 0.03513950383020592, "grad_norm": 0.8182278871536255, "learning_rate": 0.00014047217537942666, "loss": 0.347, "step": 2500 }, { "epoch": 0.03528006184552674, "grad_norm": 0.8542600870132446, "learning_rate": 0.00014103428892636312, "loss": 0.3541, "step": 2510 }, { "epoch": 0.03542061986084757, "grad_norm": 0.7973415851593018, "learning_rate": 0.0001415964024732996, "loss": 0.3006, "step": 2520 }, { "epoch": 0.03556117787616839, "grad_norm": 0.8098814487457275, "learning_rate": 0.0001421585160202361, "loss": 0.3527, "step": 2530 }, { "epoch": 0.035701735891489214, "grad_norm": 1.0114139318466187, "learning_rate": 0.00014272062956717258, "loss": 0.3239, "step": 2540 }, { "epoch": 0.035842293906810034, "grad_norm": 0.9623722434043884, "learning_rate": 0.00014328274311410904, "loss": 0.3452, "step": 2550 }, { "epoch": 0.03598285192213086, "grad_norm": 0.7429597973823547, "learning_rate": 0.00014384485666104555, "loss": 0.3391, "step": 2560 }, { "epoch": 0.03612340993745168, "grad_norm": 0.9132286906242371, "learning_rate": 0.00014440697020798201, "loss": 0.3569, "step": 2570 }, { "epoch": 0.03626396795277251, "grad_norm": 0.8367570638656616, "learning_rate": 0.0001449690837549185, "loss": 0.3389, "step": 2580 }, { "epoch": 0.03640452596809333, "grad_norm": 0.6398904919624329, "learning_rate": 0.000145531197301855, "loss": 0.3537, "step": 2590 }, { "epoch": 0.036545083983414155, "grad_norm": 0.720858097076416, "learning_rate": 0.00014609331084879145, "loss": 0.3445, "step": 2600 }, { "epoch": 0.036685641998734975, "grad_norm": 0.7617928385734558, "learning_rate": 0.00014665542439572794, "loss": 0.3368, "step": 2610 }, { "epoch": 0.0368262000140558, "grad_norm": 0.7096990942955017, "learning_rate": 0.00014721753794266442, "loss": 0.3279, "step": 2620 }, { "epoch": 0.03696675802937663, "grad_norm": 0.8983321189880371, "learning_rate": 0.0001477796514896009, "loss": 0.3285, "step": 2630 }, { "epoch": 0.03710731604469745, "grad_norm": 0.7671912908554077, "learning_rate": 0.00014834176503653737, "loss": 0.3237, "step": 2640 }, { "epoch": 0.037247874060018275, "grad_norm": 0.9126039147377014, "learning_rate": 0.00014890387858347388, "loss": 0.3819, "step": 2650 }, { "epoch": 0.037388432075339095, "grad_norm": 0.8447277545928955, "learning_rate": 0.00014946599213041034, "loss": 0.3585, "step": 2660 }, { "epoch": 0.03752899009065992, "grad_norm": 0.8905502557754517, "learning_rate": 0.00015002810567734683, "loss": 0.3018, "step": 2670 }, { "epoch": 0.03766954810598074, "grad_norm": 0.8705244064331055, "learning_rate": 0.00015059021922428332, "loss": 0.3162, "step": 2680 }, { "epoch": 0.03781010612130157, "grad_norm": 0.8502803444862366, "learning_rate": 0.00015115233277121978, "loss": 0.3344, "step": 2690 }, { "epoch": 0.03795066413662239, "grad_norm": 0.7822072505950928, "learning_rate": 0.00015171444631815627, "loss": 0.3248, "step": 2700 }, { "epoch": 0.038091222151943216, "grad_norm": 0.7527584433555603, "learning_rate": 0.00015227655986509275, "loss": 0.3294, "step": 2710 }, { "epoch": 0.038231780167264036, "grad_norm": 1.0729758739471436, "learning_rate": 0.00015283867341202924, "loss": 0.3372, "step": 2720 }, { "epoch": 0.03837233818258486, "grad_norm": 0.8660837411880493, "learning_rate": 0.00015340078695896573, "loss": 0.3804, "step": 2730 }, { "epoch": 0.03851289619790568, "grad_norm": 1.1277107000350952, "learning_rate": 0.00015396290050590221, "loss": 0.3386, "step": 2740 }, { "epoch": 0.03865345421322651, "grad_norm": 0.769478976726532, "learning_rate": 0.00015452501405283867, "loss": 0.303, "step": 2750 }, { "epoch": 0.03879401222854733, "grad_norm": 0.8349766135215759, "learning_rate": 0.00015508712759977516, "loss": 0.3209, "step": 2760 }, { "epoch": 0.038934570243868157, "grad_norm": 1.119290828704834, "learning_rate": 0.00015564924114671165, "loss": 0.3496, "step": 2770 }, { "epoch": 0.03907512825918898, "grad_norm": 0.9543491005897522, "learning_rate": 0.0001562113546936481, "loss": 0.3111, "step": 2780 }, { "epoch": 0.0392156862745098, "grad_norm": 0.7911608219146729, "learning_rate": 0.0001567734682405846, "loss": 0.3173, "step": 2790 }, { "epoch": 0.03935624428983063, "grad_norm": 0.8939515352249146, "learning_rate": 0.00015733558178752108, "loss": 0.3213, "step": 2800 }, { "epoch": 0.03949680230515145, "grad_norm": 0.6804023385047913, "learning_rate": 0.00015789769533445757, "loss": 0.2831, "step": 2810 }, { "epoch": 0.03963736032047228, "grad_norm": 0.894201934337616, "learning_rate": 0.00015845980888139406, "loss": 0.3261, "step": 2820 }, { "epoch": 0.0397779183357931, "grad_norm": 0.6332712173461914, "learning_rate": 0.00015902192242833054, "loss": 0.3138, "step": 2830 }, { "epoch": 0.039918476351113924, "grad_norm": 0.8150140643119812, "learning_rate": 0.000159584035975267, "loss": 0.3085, "step": 2840 }, { "epoch": 0.040059034366434744, "grad_norm": 1.022434115409851, "learning_rate": 0.0001601461495222035, "loss": 0.3481, "step": 2850 }, { "epoch": 0.04019959238175557, "grad_norm": 0.8622433543205261, "learning_rate": 0.00016070826306913998, "loss": 0.2984, "step": 2860 }, { "epoch": 0.04034015039707639, "grad_norm": 0.7442618608474731, "learning_rate": 0.00016127037661607644, "loss": 0.3327, "step": 2870 }, { "epoch": 0.04048070841239722, "grad_norm": 0.7326914668083191, "learning_rate": 0.00016183249016301295, "loss": 0.3162, "step": 2880 }, { "epoch": 0.04062126642771804, "grad_norm": 1.0088212490081787, "learning_rate": 0.0001623946037099494, "loss": 0.3155, "step": 2890 }, { "epoch": 0.040761824443038865, "grad_norm": 0.6482020616531372, "learning_rate": 0.0001629567172568859, "loss": 0.3216, "step": 2900 }, { "epoch": 0.040902382458359685, "grad_norm": 0.5968291759490967, "learning_rate": 0.0001635188308038224, "loss": 0.2942, "step": 2910 }, { "epoch": 0.04104294047368051, "grad_norm": 0.715417206287384, "learning_rate": 0.00016408094435075887, "loss": 0.3336, "step": 2920 }, { "epoch": 0.04118349848900134, "grad_norm": 0.7715390920639038, "learning_rate": 0.00016464305789769533, "loss": 0.3328, "step": 2930 }, { "epoch": 0.04132405650432216, "grad_norm": 0.8046168684959412, "learning_rate": 0.00016520517144463182, "loss": 0.3092, "step": 2940 }, { "epoch": 0.041464614519642985, "grad_norm": 0.8180148005485535, "learning_rate": 0.0001657672849915683, "loss": 0.3111, "step": 2950 }, { "epoch": 0.041605172534963805, "grad_norm": 0.664538562297821, "learning_rate": 0.00016632939853850477, "loss": 0.3258, "step": 2960 }, { "epoch": 0.04174573055028463, "grad_norm": 0.8169111013412476, "learning_rate": 0.00016689151208544128, "loss": 0.3313, "step": 2970 }, { "epoch": 0.04188628856560545, "grad_norm": 0.7573257684707642, "learning_rate": 0.00016745362563237774, "loss": 0.34, "step": 2980 }, { "epoch": 0.04202684658092628, "grad_norm": 0.9846084117889404, "learning_rate": 0.00016801573917931423, "loss": 0.3267, "step": 2990 }, { "epoch": 0.0421674045962471, "grad_norm": 0.5704976916313171, "learning_rate": 0.00016857785272625072, "loss": 0.3344, "step": 3000 }, { "epoch": 0.0421674045962471, "eval_chrf": 72.83505853609785, "eval_loss": 0.5984305739402771, "eval_runtime": 340.9131, "eval_samples_per_second": 0.293, "eval_steps_per_second": 0.012, "step": 3000 }, { "epoch": 0.042307962611567926, "grad_norm": 0.7193232774734497, "learning_rate": 0.0001691399662731872, "loss": 0.3326, "step": 3010 }, { "epoch": 0.042448520626888746, "grad_norm": 0.6804803013801575, "learning_rate": 0.00016970207982012366, "loss": 0.2982, "step": 3020 }, { "epoch": 0.04258907864220957, "grad_norm": 0.8106788396835327, "learning_rate": 0.00017026419336706015, "loss": 0.3429, "step": 3030 }, { "epoch": 0.04272963665753039, "grad_norm": 0.6692339777946472, "learning_rate": 0.00017082630691399664, "loss": 0.289, "step": 3040 }, { "epoch": 0.04287019467285122, "grad_norm": 0.61186683177948, "learning_rate": 0.0001713884204609331, "loss": 0.3138, "step": 3050 }, { "epoch": 0.043010752688172046, "grad_norm": 0.8928340077400208, "learning_rate": 0.0001719505340078696, "loss": 0.2933, "step": 3060 }, { "epoch": 0.043151310703492866, "grad_norm": 0.6500282287597656, "learning_rate": 0.00017251264755480607, "loss": 0.3044, "step": 3070 }, { "epoch": 0.04329186871881369, "grad_norm": 0.6658849716186523, "learning_rate": 0.00017307476110174256, "loss": 0.2989, "step": 3080 }, { "epoch": 0.04343242673413451, "grad_norm": 0.5688410997390747, "learning_rate": 0.00017363687464867905, "loss": 0.3307, "step": 3090 }, { "epoch": 0.04357298474945534, "grad_norm": 0.7900064587593079, "learning_rate": 0.00017419898819561553, "loss": 0.3258, "step": 3100 }, { "epoch": 0.04371354276477616, "grad_norm": 0.5709797739982605, "learning_rate": 0.000174761101742552, "loss": 0.3107, "step": 3110 }, { "epoch": 0.04385410078009699, "grad_norm": 0.7320407629013062, "learning_rate": 0.0001753232152894885, "loss": 0.2967, "step": 3120 }, { "epoch": 0.04399465879541781, "grad_norm": 0.6004393696784973, "learning_rate": 0.00017588532883642497, "loss": 0.2797, "step": 3130 }, { "epoch": 0.044135216810738634, "grad_norm": 0.6750929951667786, "learning_rate": 0.00017644744238336143, "loss": 0.2963, "step": 3140 }, { "epoch": 0.044275774826059454, "grad_norm": 0.8092765212059021, "learning_rate": 0.00017700955593029794, "loss": 0.3199, "step": 3150 }, { "epoch": 0.04441633284138028, "grad_norm": 0.712920069694519, "learning_rate": 0.0001775716694772344, "loss": 0.3357, "step": 3160 }, { "epoch": 0.0445568908567011, "grad_norm": 0.7346781492233276, "learning_rate": 0.0001781337830241709, "loss": 0.2914, "step": 3170 }, { "epoch": 0.04469744887202193, "grad_norm": 0.8555288314819336, "learning_rate": 0.00017869589657110738, "loss": 0.3262, "step": 3180 }, { "epoch": 0.04483800688734275, "grad_norm": 0.6750013828277588, "learning_rate": 0.00017925801011804386, "loss": 0.276, "step": 3190 }, { "epoch": 0.044978564902663574, "grad_norm": 0.7254294157028198, "learning_rate": 0.00017982012366498032, "loss": 0.3026, "step": 3200 }, { "epoch": 0.0451191229179844, "grad_norm": 0.7228041887283325, "learning_rate": 0.00018038223721191684, "loss": 0.3006, "step": 3210 }, { "epoch": 0.04525968093330522, "grad_norm": 0.7393637299537659, "learning_rate": 0.0001809443507588533, "loss": 0.2931, "step": 3220 }, { "epoch": 0.04540023894862605, "grad_norm": 0.6972795724868774, "learning_rate": 0.00018150646430578976, "loss": 0.3016, "step": 3230 }, { "epoch": 0.04554079696394687, "grad_norm": 0.6396249532699585, "learning_rate": 0.00018206857785272627, "loss": 0.3473, "step": 3240 }, { "epoch": 0.045681354979267695, "grad_norm": 0.6952264308929443, "learning_rate": 0.00018263069139966273, "loss": 0.2949, "step": 3250 }, { "epoch": 0.045821912994588515, "grad_norm": 0.701374888420105, "learning_rate": 0.00018319280494659922, "loss": 0.2838, "step": 3260 }, { "epoch": 0.04596247100990934, "grad_norm": 0.7950251698493958, "learning_rate": 0.0001837549184935357, "loss": 0.3195, "step": 3270 }, { "epoch": 0.04610302902523016, "grad_norm": 0.6929548978805542, "learning_rate": 0.0001843170320404722, "loss": 0.317, "step": 3280 }, { "epoch": 0.04624358704055099, "grad_norm": 0.8195067644119263, "learning_rate": 0.00018487914558740865, "loss": 0.2885, "step": 3290 }, { "epoch": 0.04638414505587181, "grad_norm": 0.7615912556648254, "learning_rate": 0.00018544125913434514, "loss": 0.3118, "step": 3300 }, { "epoch": 0.046524703071192636, "grad_norm": 0.7470051050186157, "learning_rate": 0.00018600337268128163, "loss": 0.3003, "step": 3310 }, { "epoch": 0.046665261086513456, "grad_norm": 0.6888974905014038, "learning_rate": 0.0001865654862282181, "loss": 0.3051, "step": 3320 }, { "epoch": 0.04680581910183428, "grad_norm": 0.7712236046791077, "learning_rate": 0.0001871275997751546, "loss": 0.3053, "step": 3330 }, { "epoch": 0.0469463771171551, "grad_norm": 0.7824040651321411, "learning_rate": 0.00018768971332209106, "loss": 0.2838, "step": 3340 }, { "epoch": 0.04708693513247593, "grad_norm": 0.7765060663223267, "learning_rate": 0.00018825182686902755, "loss": 0.2985, "step": 3350 }, { "epoch": 0.047227493147796756, "grad_norm": 0.6892460584640503, "learning_rate": 0.00018881394041596404, "loss": 0.3083, "step": 3360 }, { "epoch": 0.047368051163117576, "grad_norm": 0.8150136470794678, "learning_rate": 0.00018937605396290052, "loss": 0.2859, "step": 3370 }, { "epoch": 0.0475086091784384, "grad_norm": 0.743232250213623, "learning_rate": 0.00018993816750983698, "loss": 0.2857, "step": 3380 }, { "epoch": 0.04764916719375922, "grad_norm": 0.7354878783226013, "learning_rate": 0.00019050028105677347, "loss": 0.3026, "step": 3390 }, { "epoch": 0.04778972520908005, "grad_norm": 0.5742655396461487, "learning_rate": 0.00019106239460370996, "loss": 0.3129, "step": 3400 }, { "epoch": 0.04793028322440087, "grad_norm": 0.6180288791656494, "learning_rate": 0.00019162450815064642, "loss": 0.2948, "step": 3410 }, { "epoch": 0.0480708412397217, "grad_norm": 0.6718013882637024, "learning_rate": 0.00019218662169758293, "loss": 0.2985, "step": 3420 }, { "epoch": 0.04821139925504252, "grad_norm": 0.5934156179428101, "learning_rate": 0.0001927487352445194, "loss": 0.2858, "step": 3430 }, { "epoch": 0.048351957270363344, "grad_norm": 0.5521450638771057, "learning_rate": 0.00019331084879145588, "loss": 0.3007, "step": 3440 }, { "epoch": 0.048492515285684164, "grad_norm": 0.678540825843811, "learning_rate": 0.00019387296233839237, "loss": 0.2795, "step": 3450 }, { "epoch": 0.04863307330100499, "grad_norm": 0.6061440706253052, "learning_rate": 0.00019443507588532885, "loss": 0.2996, "step": 3460 }, { "epoch": 0.04877363131632581, "grad_norm": 0.6506759524345398, "learning_rate": 0.00019499718943226531, "loss": 0.3141, "step": 3470 }, { "epoch": 0.04891418933164664, "grad_norm": 0.5880570411682129, "learning_rate": 0.0001955593029792018, "loss": 0.3123, "step": 3480 }, { "epoch": 0.04905474734696746, "grad_norm": 0.8206682205200195, "learning_rate": 0.0001961214165261383, "loss": 0.3009, "step": 3490 }, { "epoch": 0.049195305362288284, "grad_norm": 0.7400031089782715, "learning_rate": 0.00019668353007307475, "loss": 0.2944, "step": 3500 }, { "epoch": 0.04933586337760911, "grad_norm": 0.6021711826324463, "learning_rate": 0.00019724564362001126, "loss": 0.3242, "step": 3510 }, { "epoch": 0.04947642139292993, "grad_norm": 0.6773003339767456, "learning_rate": 0.00019780775716694772, "loss": 0.3115, "step": 3520 }, { "epoch": 0.04961697940825076, "grad_norm": 0.600722074508667, "learning_rate": 0.0001983698707138842, "loss": 0.2828, "step": 3530 }, { "epoch": 0.04975753742357158, "grad_norm": 0.6942675113677979, "learning_rate": 0.0001989319842608207, "loss": 0.2918, "step": 3540 }, { "epoch": 0.049898095438892405, "grad_norm": 0.7262158393859863, "learning_rate": 0.00019949409780775718, "loss": 0.2892, "step": 3550 }, { "epoch": 0.050038653454213225, "grad_norm": 0.7155596613883972, "learning_rate": 0.00019999999989197024, "loss": 0.2958, "step": 3560 }, { "epoch": 0.05017921146953405, "grad_norm": 0.6224256157875061, "learning_rate": 0.00019999998692839935, "loss": 0.2975, "step": 3570 }, { "epoch": 0.05031976948485487, "grad_norm": 0.6881932020187378, "learning_rate": 0.00019999995235887974, "loss": 0.3063, "step": 3580 }, { "epoch": 0.0504603275001757, "grad_norm": 0.595230221748352, "learning_rate": 0.00019999989618341886, "loss": 0.2761, "step": 3590 }, { "epoch": 0.05060088551549652, "grad_norm": 0.7748909592628479, "learning_rate": 0.00019999981840202884, "loss": 0.2684, "step": 3600 }, { "epoch": 0.050741443530817346, "grad_norm": 0.6825932264328003, "learning_rate": 0.0001999997190147265, "loss": 0.3061, "step": 3610 }, { "epoch": 0.050882001546138166, "grad_norm": 0.7129770517349243, "learning_rate": 0.0001999995980215333, "loss": 0.3035, "step": 3620 }, { "epoch": 0.05102255956145899, "grad_norm": 0.7043442130088806, "learning_rate": 0.00019999945542247541, "loss": 0.2797, "step": 3630 }, { "epoch": 0.05116311757677981, "grad_norm": 0.5520175695419312, "learning_rate": 0.0001999992912175836, "loss": 0.2845, "step": 3640 }, { "epoch": 0.05130367559210064, "grad_norm": 0.5965823531150818, "learning_rate": 0.00019999910540689339, "loss": 0.3074, "step": 3650 }, { "epoch": 0.051444233607421466, "grad_norm": 0.578132152557373, "learning_rate": 0.0001999988979904449, "loss": 0.285, "step": 3660 }, { "epoch": 0.051584791622742286, "grad_norm": 0.6404640674591064, "learning_rate": 0.0001999986689682829, "loss": 0.3044, "step": 3670 }, { "epoch": 0.05172534963806311, "grad_norm": 0.6365982294082642, "learning_rate": 0.00019999841834045698, "loss": 0.2775, "step": 3680 }, { "epoch": 0.05186590765338393, "grad_norm": 0.7316439747810364, "learning_rate": 0.0001999981461070212, "loss": 0.3128, "step": 3690 }, { "epoch": 0.05200646566870476, "grad_norm": 0.5798088312149048, "learning_rate": 0.00019999785226803446, "loss": 0.2856, "step": 3700 }, { "epoch": 0.05214702368402558, "grad_norm": 0.5912203192710876, "learning_rate": 0.00019999753682356012, "loss": 0.3062, "step": 3710 }, { "epoch": 0.05228758169934641, "grad_norm": 0.8300290107727051, "learning_rate": 0.00019999719977366648, "loss": 0.293, "step": 3720 }, { "epoch": 0.05242813971466723, "grad_norm": 0.6406461000442505, "learning_rate": 0.00019999684111842627, "loss": 0.2573, "step": 3730 }, { "epoch": 0.052568697729988054, "grad_norm": 0.6481155157089233, "learning_rate": 0.00019999646085791701, "loss": 0.2763, "step": 3740 }, { "epoch": 0.052709255745308874, "grad_norm": 0.6483648419380188, "learning_rate": 0.00019999605899222083, "loss": 0.2845, "step": 3750 }, { "epoch": 0.0528498137606297, "grad_norm": 0.5151050686836243, "learning_rate": 0.00019999563552142461, "loss": 0.2493, "step": 3760 }, { "epoch": 0.05299037177595052, "grad_norm": 0.6392170190811157, "learning_rate": 0.00019999519044561982, "loss": 0.2946, "step": 3770 }, { "epoch": 0.05313092979127135, "grad_norm": 0.573092520236969, "learning_rate": 0.0001999947237649026, "loss": 0.2933, "step": 3780 }, { "epoch": 0.053271487806592174, "grad_norm": 0.6438156366348267, "learning_rate": 0.00019999423547937382, "loss": 0.2462, "step": 3790 }, { "epoch": 0.053412045821912994, "grad_norm": 0.6364404559135437, "learning_rate": 0.00019999372558913894, "loss": 0.3092, "step": 3800 }, { "epoch": 0.05355260383723382, "grad_norm": 0.5950109958648682, "learning_rate": 0.00019999319409430815, "loss": 0.2821, "step": 3810 }, { "epoch": 0.05369316185255464, "grad_norm": 0.5111678242683411, "learning_rate": 0.00019999264099499628, "loss": 0.3083, "step": 3820 }, { "epoch": 0.05383371986787547, "grad_norm": 0.5594717264175415, "learning_rate": 0.00019999206629132282, "loss": 0.3032, "step": 3830 }, { "epoch": 0.05397427788319629, "grad_norm": 0.7607744932174683, "learning_rate": 0.000199991469983412, "loss": 0.3019, "step": 3840 }, { "epoch": 0.054114835898517115, "grad_norm": 1.16470468044281, "learning_rate": 0.0001999908520713926, "loss": 0.287, "step": 3850 }, { "epoch": 0.054255393913837935, "grad_norm": 0.6292256712913513, "learning_rate": 0.0001999902125553981, "loss": 0.2816, "step": 3860 }, { "epoch": 0.05439595192915876, "grad_norm": 0.49813827872276306, "learning_rate": 0.0001999895514355667, "loss": 0.2729, "step": 3870 }, { "epoch": 0.05453650994447958, "grad_norm": 0.8331692814826965, "learning_rate": 0.0001999888687120413, "loss": 0.3057, "step": 3880 }, { "epoch": 0.05467706795980041, "grad_norm": 0.5014657974243164, "learning_rate": 0.00019998816438496935, "loss": 0.2808, "step": 3890 }, { "epoch": 0.05481762597512123, "grad_norm": 0.7092079520225525, "learning_rate": 0.000199987438454503, "loss": 0.2895, "step": 3900 }, { "epoch": 0.054958183990442055, "grad_norm": 0.7176849246025085, "learning_rate": 0.00019998669092079915, "loss": 0.2883, "step": 3910 }, { "epoch": 0.055098742005762875, "grad_norm": 0.7874364852905273, "learning_rate": 0.0001999859217840193, "loss": 0.2901, "step": 3920 }, { "epoch": 0.0552393000210837, "grad_norm": 0.6541767120361328, "learning_rate": 0.00019998513104432962, "loss": 0.3062, "step": 3930 }, { "epoch": 0.05537985803640453, "grad_norm": 0.6360107064247131, "learning_rate": 0.00019998431870190097, "loss": 0.2651, "step": 3940 }, { "epoch": 0.05552041605172535, "grad_norm": 0.6698468327522278, "learning_rate": 0.00019998348475690883, "loss": 0.2763, "step": 3950 }, { "epoch": 0.055660974067046176, "grad_norm": 0.6481212973594666, "learning_rate": 0.00019998262920953343, "loss": 0.2844, "step": 3960 }, { "epoch": 0.055801532082366996, "grad_norm": 0.6569493412971497, "learning_rate": 0.0001999817520599596, "loss": 0.2694, "step": 3970 }, { "epoch": 0.05594209009768782, "grad_norm": 0.6541939377784729, "learning_rate": 0.0001999808533083768, "loss": 0.3063, "step": 3980 }, { "epoch": 0.05608264811300864, "grad_norm": 0.6222134232521057, "learning_rate": 0.00019997993295497933, "loss": 0.2811, "step": 3990 }, { "epoch": 0.05622320612832947, "grad_norm": 0.537887454032898, "learning_rate": 0.00019997899099996594, "loss": 0.2882, "step": 4000 }, { "epoch": 0.05622320612832947, "eval_chrf": 80.12473807899819, "eval_loss": 0.5646688342094421, "eval_runtime": 269.6909, "eval_samples_per_second": 0.371, "eval_steps_per_second": 0.015, "step": 4000 }, { "epoch": 0.05636376414365029, "grad_norm": 0.7329326868057251, "learning_rate": 0.00019997802744354019, "loss": 0.2727, "step": 4010 }, { "epoch": 0.05650432215897112, "grad_norm": 0.5730016827583313, "learning_rate": 0.00019997704228591023, "loss": 0.3122, "step": 4020 }, { "epoch": 0.05664488017429194, "grad_norm": 0.6560089588165283, "learning_rate": 0.000199976035527289, "loss": 0.2894, "step": 4030 }, { "epoch": 0.056785438189612764, "grad_norm": 0.6476408243179321, "learning_rate": 0.0001999750071678939, "loss": 0.2553, "step": 4040 }, { "epoch": 0.056925996204933584, "grad_norm": 0.4794682562351227, "learning_rate": 0.00019997395720794723, "loss": 0.2737, "step": 4050 }, { "epoch": 0.05706655422025441, "grad_norm": 0.5600661635398865, "learning_rate": 0.00019997288564767578, "loss": 0.2772, "step": 4060 }, { "epoch": 0.05720711223557523, "grad_norm": 0.7425886392593384, "learning_rate": 0.00019997179248731106, "loss": 0.3064, "step": 4070 }, { "epoch": 0.05734767025089606, "grad_norm": 0.7069060206413269, "learning_rate": 0.0001999706777270893, "loss": 0.3061, "step": 4080 }, { "epoch": 0.057488228266216884, "grad_norm": 0.6282569766044617, "learning_rate": 0.00019996954136725133, "loss": 0.2908, "step": 4090 }, { "epoch": 0.057628786281537704, "grad_norm": 0.7161028385162354, "learning_rate": 0.00019996838340804267, "loss": 0.2728, "step": 4100 }, { "epoch": 0.05776934429685853, "grad_norm": 0.7045167684555054, "learning_rate": 0.00019996720384971354, "loss": 0.2568, "step": 4110 }, { "epoch": 0.05790990231217935, "grad_norm": 0.569190502166748, "learning_rate": 0.00019996600269251875, "loss": 0.2459, "step": 4120 }, { "epoch": 0.05805046032750018, "grad_norm": 0.6955873370170593, "learning_rate": 0.00019996477993671781, "loss": 0.2681, "step": 4130 }, { "epoch": 0.058191018342821, "grad_norm": 0.6311067938804626, "learning_rate": 0.000199963535582575, "loss": 0.2687, "step": 4140 }, { "epoch": 0.058331576358141825, "grad_norm": 0.5590523481369019, "learning_rate": 0.0001999622696303591, "loss": 0.313, "step": 4150 }, { "epoch": 0.058472134373462645, "grad_norm": 0.6355190277099609, "learning_rate": 0.00019996098208034365, "loss": 0.2935, "step": 4160 }, { "epoch": 0.05861269238878347, "grad_norm": 0.7000370025634766, "learning_rate": 0.00019995967293280677, "loss": 0.2969, "step": 4170 }, { "epoch": 0.05875325040410429, "grad_norm": 0.7441065311431885, "learning_rate": 0.00019995834218803144, "loss": 0.2705, "step": 4180 }, { "epoch": 0.05889380841942512, "grad_norm": 0.6410252451896667, "learning_rate": 0.0001999569898463051, "loss": 0.2813, "step": 4190 }, { "epoch": 0.05903436643474594, "grad_norm": 0.5887627005577087, "learning_rate": 0.00019995561590791994, "loss": 0.3098, "step": 4200 }, { "epoch": 0.059174924450066765, "grad_norm": 0.6581453680992126, "learning_rate": 0.00019995422037317285, "loss": 0.2945, "step": 4210 }, { "epoch": 0.059315482465387585, "grad_norm": 0.8213226199150085, "learning_rate": 0.0001999528032423653, "loss": 0.3026, "step": 4220 }, { "epoch": 0.05945604048070841, "grad_norm": 0.6150140762329102, "learning_rate": 0.0001999513645158035, "loss": 0.2856, "step": 4230 }, { "epoch": 0.05959659849602924, "grad_norm": 0.7967432737350464, "learning_rate": 0.0001999499041937983, "loss": 0.3084, "step": 4240 }, { "epoch": 0.05973715651135006, "grad_norm": 0.7099254131317139, "learning_rate": 0.00019994842227666524, "loss": 0.323, "step": 4250 }, { "epoch": 0.059877714526670886, "grad_norm": 0.6161561608314514, "learning_rate": 0.00019994691876472446, "loss": 0.3072, "step": 4260 }, { "epoch": 0.060018272541991706, "grad_norm": 0.5760515332221985, "learning_rate": 0.00019994539365830082, "loss": 0.2331, "step": 4270 }, { "epoch": 0.06015883055731253, "grad_norm": 0.619286060333252, "learning_rate": 0.00019994384695772385, "loss": 0.275, "step": 4280 }, { "epoch": 0.06029938857263335, "grad_norm": 0.5424856543540955, "learning_rate": 0.0001999422786633277, "loss": 0.2968, "step": 4290 }, { "epoch": 0.06043994658795418, "grad_norm": 0.5366678237915039, "learning_rate": 0.00019994068877545124, "loss": 0.2816, "step": 4300 }, { "epoch": 0.060580504603275, "grad_norm": 0.6595004796981812, "learning_rate": 0.000199939077294438, "loss": 0.2827, "step": 4310 }, { "epoch": 0.06072106261859583, "grad_norm": 0.6365996599197388, "learning_rate": 0.0001999374442206361, "loss": 0.2903, "step": 4320 }, { "epoch": 0.06086162063391665, "grad_norm": 0.6086582541465759, "learning_rate": 0.0001999357895543984, "loss": 0.2812, "step": 4330 }, { "epoch": 0.06100217864923747, "grad_norm": 0.623997151851654, "learning_rate": 0.00019993411329608245, "loss": 0.2985, "step": 4340 }, { "epoch": 0.06114273666455829, "grad_norm": 0.5988753437995911, "learning_rate": 0.00019993241544605036, "loss": 0.2632, "step": 4350 }, { "epoch": 0.06128329467987912, "grad_norm": 0.5856814384460449, "learning_rate": 0.00019993069600466904, "loss": 0.2703, "step": 4360 }, { "epoch": 0.06142385269519995, "grad_norm": 0.6997985243797302, "learning_rate": 0.00019992895497230993, "loss": 0.2943, "step": 4370 }, { "epoch": 0.06156441071052077, "grad_norm": 0.6265032887458801, "learning_rate": 0.00019992719234934917, "loss": 0.278, "step": 4380 }, { "epoch": 0.061704968725841594, "grad_norm": 0.5652262568473816, "learning_rate": 0.00019992540813616768, "loss": 0.2548, "step": 4390 }, { "epoch": 0.061845526741162414, "grad_norm": 0.5852500796318054, "learning_rate": 0.0001999236023331509, "loss": 0.3365, "step": 4400 }, { "epoch": 0.06198608475648324, "grad_norm": 0.7313827276229858, "learning_rate": 0.000199921774940689, "loss": 0.287, "step": 4410 }, { "epoch": 0.06212664277180406, "grad_norm": 0.6800966858863831, "learning_rate": 0.0001999199259591768, "loss": 0.2714, "step": 4420 }, { "epoch": 0.06226720078712489, "grad_norm": 0.563739538192749, "learning_rate": 0.00019991805538901382, "loss": 0.2744, "step": 4430 }, { "epoch": 0.06240775880244571, "grad_norm": 0.5896499752998352, "learning_rate": 0.00019991616323060418, "loss": 0.2732, "step": 4440 }, { "epoch": 0.06254831681776653, "grad_norm": 0.5427751541137695, "learning_rate": 0.00019991424948435671, "loss": 0.2753, "step": 4450 }, { "epoch": 0.06268887483308735, "grad_norm": 0.6296198964118958, "learning_rate": 0.0001999123141506849, "loss": 0.3029, "step": 4460 }, { "epoch": 0.06282943284840818, "grad_norm": 0.6063293218612671, "learning_rate": 0.0001999103572300069, "loss": 0.2979, "step": 4470 }, { "epoch": 0.06296999086372901, "grad_norm": 0.5386064052581787, "learning_rate": 0.00019990837872274552, "loss": 0.2657, "step": 4480 }, { "epoch": 0.06311054887904982, "grad_norm": 0.6669312715530396, "learning_rate": 0.00019990637862932816, "loss": 0.2939, "step": 4490 }, { "epoch": 0.06325110689437065, "grad_norm": 0.5777736306190491, "learning_rate": 0.0001999043569501871, "loss": 0.2665, "step": 4500 }, { "epoch": 0.06339166490969148, "grad_norm": 0.6691046357154846, "learning_rate": 0.00019990231368575905, "loss": 0.2958, "step": 4510 }, { "epoch": 0.0635322229250123, "grad_norm": 0.7404462695121765, "learning_rate": 0.0001999002488364855, "loss": 0.3013, "step": 4520 }, { "epoch": 0.06367278094033313, "grad_norm": 0.7259801626205444, "learning_rate": 0.00019989816240281255, "loss": 0.327, "step": 4530 }, { "epoch": 0.06381333895565394, "grad_norm": 0.5758121013641357, "learning_rate": 0.00019989605438519106, "loss": 0.2552, "step": 4540 }, { "epoch": 0.06395389697097477, "grad_norm": 0.6627341508865356, "learning_rate": 0.00019989392478407642, "loss": 0.2732, "step": 4550 }, { "epoch": 0.0640944549862956, "grad_norm": 0.6391485929489136, "learning_rate": 0.0001998917735999288, "loss": 0.2782, "step": 4560 }, { "epoch": 0.06423501300161642, "grad_norm": 0.6049222946166992, "learning_rate": 0.00019988960083321296, "loss": 0.2793, "step": 4570 }, { "epoch": 0.06437557101693724, "grad_norm": 0.5777444839477539, "learning_rate": 0.00019988740648439835, "loss": 0.2675, "step": 4580 }, { "epoch": 0.06451612903225806, "grad_norm": 0.43069472908973694, "learning_rate": 0.00019988519055395907, "loss": 0.2698, "step": 4590 }, { "epoch": 0.06465668704757889, "grad_norm": 0.5715444087982178, "learning_rate": 0.0001998829530423739, "loss": 0.2706, "step": 4600 }, { "epoch": 0.06479724506289972, "grad_norm": 0.7473022937774658, "learning_rate": 0.00019988069395012628, "loss": 0.2802, "step": 4610 }, { "epoch": 0.06493780307822053, "grad_norm": 0.5839947462081909, "learning_rate": 0.0001998784132777043, "loss": 0.2734, "step": 4620 }, { "epoch": 0.06507836109354136, "grad_norm": 0.7734289169311523, "learning_rate": 0.00019987611102560075, "loss": 0.2694, "step": 4630 }, { "epoch": 0.06521891910886218, "grad_norm": 0.5843231081962585, "learning_rate": 0.00019987378719431303, "loss": 0.2721, "step": 4640 }, { "epoch": 0.06535947712418301, "grad_norm": 0.7259225249290466, "learning_rate": 0.0001998714417843432, "loss": 0.2629, "step": 4650 }, { "epoch": 0.06550003513950382, "grad_norm": 0.8221696615219116, "learning_rate": 0.00019986907479619808, "loss": 0.2801, "step": 4660 }, { "epoch": 0.06564059315482465, "grad_norm": 0.4843771159648895, "learning_rate": 0.00019986668623038901, "loss": 0.2645, "step": 4670 }, { "epoch": 0.06578115117014548, "grad_norm": 0.3744613826274872, "learning_rate": 0.0001998642760874321, "loss": 0.2492, "step": 4680 }, { "epoch": 0.0659217091854663, "grad_norm": 0.5203977227210999, "learning_rate": 0.00019986184436784804, "loss": 0.2524, "step": 4690 }, { "epoch": 0.06606226720078713, "grad_norm": 0.5358824729919434, "learning_rate": 0.00019985939107216227, "loss": 0.2906, "step": 4700 }, { "epoch": 0.06620282521610794, "grad_norm": 0.6105577945709229, "learning_rate": 0.00019985691620090488, "loss": 0.2964, "step": 4710 }, { "epoch": 0.06634338323142877, "grad_norm": 0.6519573330879211, "learning_rate": 0.00019985441975461048, "loss": 0.2943, "step": 4720 }, { "epoch": 0.0664839412467496, "grad_norm": 0.6661750674247742, "learning_rate": 0.00019985190173381855, "loss": 0.2872, "step": 4730 }, { "epoch": 0.06662449926207042, "grad_norm": 0.5077973008155823, "learning_rate": 0.00019984936213907311, "loss": 0.2424, "step": 4740 }, { "epoch": 0.06676505727739124, "grad_norm": 0.5132244825363159, "learning_rate": 0.00019984680097092283, "loss": 0.2675, "step": 4750 }, { "epoch": 0.06690561529271206, "grad_norm": 0.6807255148887634, "learning_rate": 0.00019984421822992113, "loss": 0.2519, "step": 4760 }, { "epoch": 0.06704617330803289, "grad_norm": 0.6630058288574219, "learning_rate": 0.00019984161391662598, "loss": 0.2565, "step": 4770 }, { "epoch": 0.06718673132335372, "grad_norm": 0.5398477911949158, "learning_rate": 0.00019983898803160006, "loss": 0.2892, "step": 4780 }, { "epoch": 0.06732728933867453, "grad_norm": 0.4638122022151947, "learning_rate": 0.0001998363405754108, "loss": 0.3054, "step": 4790 }, { "epoch": 0.06746784735399536, "grad_norm": 0.6484492421150208, "learning_rate": 0.00019983367154863014, "loss": 0.287, "step": 4800 }, { "epoch": 0.06760840536931619, "grad_norm": 0.6471605896949768, "learning_rate": 0.00019983098095183475, "loss": 0.2659, "step": 4810 }, { "epoch": 0.06774896338463701, "grad_norm": 0.5113816857337952, "learning_rate": 0.00019982826878560598, "loss": 0.2951, "step": 4820 }, { "epoch": 0.06788952139995784, "grad_norm": 0.6426228880882263, "learning_rate": 0.0001998255350505298, "loss": 0.2928, "step": 4830 }, { "epoch": 0.06803007941527865, "grad_norm": 0.5491995811462402, "learning_rate": 0.0001998227797471969, "loss": 0.2524, "step": 4840 }, { "epoch": 0.06817063743059948, "grad_norm": 0.5573206543922424, "learning_rate": 0.00019982000287620255, "loss": 0.2792, "step": 4850 }, { "epoch": 0.0683111954459203, "grad_norm": 0.605418860912323, "learning_rate": 0.00019981720443814672, "loss": 0.2673, "step": 4860 }, { "epoch": 0.06845175346124113, "grad_norm": 0.4459189474582672, "learning_rate": 0.00019981438443363408, "loss": 0.2716, "step": 4870 }, { "epoch": 0.06859231147656195, "grad_norm": 0.5312105417251587, "learning_rate": 0.00019981154286327385, "loss": 0.2874, "step": 4880 }, { "epoch": 0.06873286949188277, "grad_norm": 0.5258371233940125, "learning_rate": 0.00019980867972768006, "loss": 0.2656, "step": 4890 }, { "epoch": 0.0688734275072036, "grad_norm": 0.6958247423171997, "learning_rate": 0.00019980579502747124, "loss": 0.2722, "step": 4900 }, { "epoch": 0.06901398552252443, "grad_norm": 0.6064774394035339, "learning_rate": 0.0001998028887632707, "loss": 0.2788, "step": 4910 }, { "epoch": 0.06915454353784524, "grad_norm": 0.5372047424316406, "learning_rate": 0.00019979996093570635, "loss": 0.2554, "step": 4920 }, { "epoch": 0.06929510155316607, "grad_norm": 0.42729809880256653, "learning_rate": 0.0001997970115454108, "loss": 0.2599, "step": 4930 }, { "epoch": 0.0694356595684869, "grad_norm": 0.394705206155777, "learning_rate": 0.00019979404059302126, "loss": 0.2753, "step": 4940 }, { "epoch": 0.06957621758380772, "grad_norm": 0.6101125478744507, "learning_rate": 0.00019979104807917966, "loss": 0.258, "step": 4950 }, { "epoch": 0.06971677559912855, "grad_norm": 0.5726629495620728, "learning_rate": 0.00019978803400453254, "loss": 0.2968, "step": 4960 }, { "epoch": 0.06985733361444936, "grad_norm": 0.5277143120765686, "learning_rate": 0.00019978499836973115, "loss": 0.2384, "step": 4970 }, { "epoch": 0.06999789162977019, "grad_norm": 0.501110851764679, "learning_rate": 0.00019978194117543132, "loss": 0.2515, "step": 4980 }, { "epoch": 0.07013844964509101, "grad_norm": 0.5731441378593445, "learning_rate": 0.00019977886242229364, "loss": 0.2585, "step": 4990 }, { "epoch": 0.07027900766041184, "grad_norm": 0.46598953008651733, "learning_rate": 0.00019977576211098325, "loss": 0.2707, "step": 5000 }, { "epoch": 0.07027900766041184, "eval_chrf": 75.03560060246959, "eval_loss": 0.5062581300735474, "eval_runtime": 277.3737, "eval_samples_per_second": 0.361, "eval_steps_per_second": 0.014, "step": 5000 }, { "epoch": 0.07041956567573265, "grad_norm": 0.6802473664283752, "learning_rate": 0.00019977264024217007, "loss": 0.2533, "step": 5010 }, { "epoch": 0.07056012369105348, "grad_norm": 0.5293552875518799, "learning_rate": 0.00019976949681652854, "loss": 0.2683, "step": 5020 }, { "epoch": 0.07070068170637431, "grad_norm": 0.48123079538345337, "learning_rate": 0.00019976633183473784, "loss": 0.258, "step": 5030 }, { "epoch": 0.07084123972169513, "grad_norm": 0.5725906491279602, "learning_rate": 0.00019976314529748183, "loss": 0.2627, "step": 5040 }, { "epoch": 0.07098179773701595, "grad_norm": 0.5668321847915649, "learning_rate": 0.000199759937205449, "loss": 0.2823, "step": 5050 }, { "epoch": 0.07112235575233677, "grad_norm": 0.934905469417572, "learning_rate": 0.00019975670755933243, "loss": 0.2561, "step": 5060 }, { "epoch": 0.0712629137676576, "grad_norm": 0.6161137819290161, "learning_rate": 0.00019975345635982994, "loss": 0.2741, "step": 5070 }, { "epoch": 0.07140347178297843, "grad_norm": 0.6338168978691101, "learning_rate": 0.00019975018360764398, "loss": 0.2402, "step": 5080 }, { "epoch": 0.07154402979829924, "grad_norm": 0.48079991340637207, "learning_rate": 0.0001997468893034817, "loss": 0.2948, "step": 5090 }, { "epoch": 0.07168458781362007, "grad_norm": 0.5044981241226196, "learning_rate": 0.00019974357344805483, "loss": 0.2754, "step": 5100 }, { "epoch": 0.0718251458289409, "grad_norm": 0.6133807897567749, "learning_rate": 0.0001997402360420798, "loss": 0.2572, "step": 5110 }, { "epoch": 0.07196570384426172, "grad_norm": 0.500830888748169, "learning_rate": 0.00019973687708627766, "loss": 0.2973, "step": 5120 }, { "epoch": 0.07210626185958255, "grad_norm": 0.5794868469238281, "learning_rate": 0.0001997334965813742, "loss": 0.2631, "step": 5130 }, { "epoch": 0.07224681987490336, "grad_norm": 0.562156081199646, "learning_rate": 0.00019973009452809974, "loss": 0.2863, "step": 5140 }, { "epoch": 0.07238737789022419, "grad_norm": 0.5636243224143982, "learning_rate": 0.0001997266709271894, "loss": 0.2801, "step": 5150 }, { "epoch": 0.07252793590554502, "grad_norm": 0.5850925445556641, "learning_rate": 0.0001997232257793828, "loss": 0.2621, "step": 5160 }, { "epoch": 0.07266849392086584, "grad_norm": 0.4883113503456116, "learning_rate": 0.00019971975908542438, "loss": 0.2549, "step": 5170 }, { "epoch": 0.07280905193618666, "grad_norm": 0.6881444454193115, "learning_rate": 0.0001997162708460631, "loss": 0.2476, "step": 5180 }, { "epoch": 0.07294960995150748, "grad_norm": 0.6542561054229736, "learning_rate": 0.00019971276106205268, "loss": 0.2735, "step": 5190 }, { "epoch": 0.07309016796682831, "grad_norm": 0.5345896482467651, "learning_rate": 0.00019970922973415136, "loss": 0.2419, "step": 5200 }, { "epoch": 0.07323072598214914, "grad_norm": 0.5475381016731262, "learning_rate": 0.0001997056768631222, "loss": 0.278, "step": 5210 }, { "epoch": 0.07337128399746995, "grad_norm": 0.5337420105934143, "learning_rate": 0.00019970210244973276, "loss": 0.2463, "step": 5220 }, { "epoch": 0.07351184201279078, "grad_norm": 0.8173430562019348, "learning_rate": 0.00019969850649475537, "loss": 0.293, "step": 5230 }, { "epoch": 0.0736524000281116, "grad_norm": 0.5612601637840271, "learning_rate": 0.000199694888998967, "loss": 0.2689, "step": 5240 }, { "epoch": 0.07379295804343243, "grad_norm": 0.47143304347991943, "learning_rate": 0.00019969124996314913, "loss": 0.2485, "step": 5250 }, { "epoch": 0.07393351605875326, "grad_norm": 0.6005493402481079, "learning_rate": 0.00019968758938808813, "loss": 0.2505, "step": 5260 }, { "epoch": 0.07407407407407407, "grad_norm": 0.49098819494247437, "learning_rate": 0.00019968390727457486, "loss": 0.2411, "step": 5270 }, { "epoch": 0.0742146320893949, "grad_norm": 0.5847885608673096, "learning_rate": 0.00019968020362340485, "loss": 0.2698, "step": 5280 }, { "epoch": 0.07435519010471572, "grad_norm": 0.5103782415390015, "learning_rate": 0.00019967647843537837, "loss": 0.2587, "step": 5290 }, { "epoch": 0.07449574812003655, "grad_norm": 0.6858464479446411, "learning_rate": 0.0001996727317113002, "loss": 0.262, "step": 5300 }, { "epoch": 0.07463630613535736, "grad_norm": 0.7934888005256653, "learning_rate": 0.00019966896345197989, "loss": 0.2364, "step": 5310 }, { "epoch": 0.07477686415067819, "grad_norm": 0.5066108107566833, "learning_rate": 0.00019966517365823164, "loss": 0.2535, "step": 5320 }, { "epoch": 0.07491742216599902, "grad_norm": 0.6084860563278198, "learning_rate": 0.00019966136233087425, "loss": 0.2713, "step": 5330 }, { "epoch": 0.07505798018131984, "grad_norm": 0.5973907709121704, "learning_rate": 0.00019965752947073115, "loss": 0.286, "step": 5340 }, { "epoch": 0.07519853819664066, "grad_norm": 0.5671558380126953, "learning_rate": 0.00019965367507863055, "loss": 0.2694, "step": 5350 }, { "epoch": 0.07533909621196148, "grad_norm": 0.6024190783500671, "learning_rate": 0.0001996497991554052, "loss": 0.2651, "step": 5360 }, { "epoch": 0.07547965422728231, "grad_norm": 0.5919899940490723, "learning_rate": 0.00019964590170189246, "loss": 0.2575, "step": 5370 }, { "epoch": 0.07562021224260314, "grad_norm": 0.6757079362869263, "learning_rate": 0.00019964198271893452, "loss": 0.24, "step": 5380 }, { "epoch": 0.07576077025792397, "grad_norm": 0.5828556418418884, "learning_rate": 0.000199638042207378, "loss": 0.2384, "step": 5390 }, { "epoch": 0.07590132827324478, "grad_norm": 0.6026638746261597, "learning_rate": 0.00019963408016807437, "loss": 0.2532, "step": 5400 }, { "epoch": 0.0760418862885656, "grad_norm": 0.5327057838439941, "learning_rate": 0.00019963009660187966, "loss": 0.2867, "step": 5410 }, { "epoch": 0.07618244430388643, "grad_norm": 0.6412426829338074, "learning_rate": 0.0001996260915096545, "loss": 0.2697, "step": 5420 }, { "epoch": 0.07632300231920726, "grad_norm": 0.5836541652679443, "learning_rate": 0.00019962206489226433, "loss": 0.2459, "step": 5430 }, { "epoch": 0.07646356033452807, "grad_norm": 0.7453212141990662, "learning_rate": 0.00019961801675057902, "loss": 0.2481, "step": 5440 }, { "epoch": 0.0766041183498489, "grad_norm": 0.5052883625030518, "learning_rate": 0.0001996139470854733, "loss": 0.278, "step": 5450 }, { "epoch": 0.07674467636516973, "grad_norm": 0.5511698722839355, "learning_rate": 0.00019960985589782641, "loss": 0.2621, "step": 5460 }, { "epoch": 0.07688523438049055, "grad_norm": 0.7070469856262207, "learning_rate": 0.0001996057431885223, "loss": 0.2965, "step": 5470 }, { "epoch": 0.07702579239581137, "grad_norm": 0.5122061371803284, "learning_rate": 0.00019960160895844957, "loss": 0.2566, "step": 5480 }, { "epoch": 0.07716635041113219, "grad_norm": 0.6440472602844238, "learning_rate": 0.00019959745320850144, "loss": 0.2799, "step": 5490 }, { "epoch": 0.07730690842645302, "grad_norm": 0.5466797351837158, "learning_rate": 0.00019959327593957585, "loss": 0.2383, "step": 5500 }, { "epoch": 0.07744746644177385, "grad_norm": 0.5561037659645081, "learning_rate": 0.00019958907715257528, "loss": 0.266, "step": 5510 }, { "epoch": 0.07758802445709466, "grad_norm": 0.6058019399642944, "learning_rate": 0.00019958485684840694, "loss": 0.2586, "step": 5520 }, { "epoch": 0.07772858247241549, "grad_norm": 0.48346906900405884, "learning_rate": 0.00019958061502798268, "loss": 0.2653, "step": 5530 }, { "epoch": 0.07786914048773631, "grad_norm": 0.5441072583198547, "learning_rate": 0.00019957635169221894, "loss": 0.2466, "step": 5540 }, { "epoch": 0.07800969850305714, "grad_norm": 0.6066705584526062, "learning_rate": 0.00019957206684203694, "loss": 0.2736, "step": 5550 }, { "epoch": 0.07815025651837797, "grad_norm": 0.5180489420890808, "learning_rate": 0.00019956776047836238, "loss": 0.235, "step": 5560 }, { "epoch": 0.07829081453369878, "grad_norm": 0.5487321019172668, "learning_rate": 0.0001995634326021257, "loss": 0.2201, "step": 5570 }, { "epoch": 0.0784313725490196, "grad_norm": 0.5634502172470093, "learning_rate": 0.000199559083214262, "loss": 0.2468, "step": 5580 }, { "epoch": 0.07857193056434043, "grad_norm": 0.5380911827087402, "learning_rate": 0.00019955471231571106, "loss": 0.2681, "step": 5590 }, { "epoch": 0.07871248857966126, "grad_norm": 0.5714244842529297, "learning_rate": 0.00019955031990741718, "loss": 0.2692, "step": 5600 }, { "epoch": 0.07885304659498207, "grad_norm": 0.5239103436470032, "learning_rate": 0.0001995459059903294, "loss": 0.2682, "step": 5610 }, { "epoch": 0.0789936046103029, "grad_norm": 0.6180918216705322, "learning_rate": 0.0001995414705654014, "loss": 0.2551, "step": 5620 }, { "epoch": 0.07913416262562373, "grad_norm": 0.5177292823791504, "learning_rate": 0.00019953701363359145, "loss": 0.2817, "step": 5630 }, { "epoch": 0.07927472064094455, "grad_norm": 0.48171669244766235, "learning_rate": 0.00019953253519586258, "loss": 0.2391, "step": 5640 }, { "epoch": 0.07941527865626537, "grad_norm": 0.6228054761886597, "learning_rate": 0.00019952803525318235, "loss": 0.2723, "step": 5650 }, { "epoch": 0.0795558366715862, "grad_norm": 0.41678664088249207, "learning_rate": 0.00019952351380652305, "loss": 0.2701, "step": 5660 }, { "epoch": 0.07969639468690702, "grad_norm": 0.44831570982933044, "learning_rate": 0.00019951897085686158, "loss": 0.2713, "step": 5670 }, { "epoch": 0.07983695270222785, "grad_norm": 0.5627865195274353, "learning_rate": 0.00019951440640517945, "loss": 0.2689, "step": 5680 }, { "epoch": 0.07997751071754867, "grad_norm": 0.5584320425987244, "learning_rate": 0.00019950982045246286, "loss": 0.2636, "step": 5690 }, { "epoch": 0.08011806873286949, "grad_norm": 0.4371662735939026, "learning_rate": 0.0001995052129997027, "loss": 0.2666, "step": 5700 }, { "epoch": 0.08025862674819031, "grad_norm": 0.6560251116752625, "learning_rate": 0.00019950058404789438, "loss": 0.2774, "step": 5710 }, { "epoch": 0.08039918476351114, "grad_norm": 0.6930556297302246, "learning_rate": 0.0001994959335980381, "loss": 0.2758, "step": 5720 }, { "epoch": 0.08053974277883197, "grad_norm": 0.5577618479728699, "learning_rate": 0.0001994912616511386, "loss": 0.258, "step": 5730 }, { "epoch": 0.08068030079415278, "grad_norm": 0.45983192324638367, "learning_rate": 0.0001994865682082053, "loss": 0.269, "step": 5740 }, { "epoch": 0.08082085880947361, "grad_norm": 0.5181092023849487, "learning_rate": 0.00019948185327025225, "loss": 0.2596, "step": 5750 }, { "epoch": 0.08096141682479444, "grad_norm": 0.5390303730964661, "learning_rate": 0.0001994771168382982, "loss": 0.2678, "step": 5760 }, { "epoch": 0.08110197484011526, "grad_norm": 0.48422420024871826, "learning_rate": 0.00019947235891336644, "loss": 0.253, "step": 5770 }, { "epoch": 0.08124253285543608, "grad_norm": 0.5115424990653992, "learning_rate": 0.000199467579496485, "loss": 0.2384, "step": 5780 }, { "epoch": 0.0813830908707569, "grad_norm": 0.507433295249939, "learning_rate": 0.00019946277858868655, "loss": 0.2822, "step": 5790 }, { "epoch": 0.08152364888607773, "grad_norm": 0.45908185839653015, "learning_rate": 0.0001994579561910083, "loss": 0.243, "step": 5800 }, { "epoch": 0.08166420690139856, "grad_norm": 0.6661983728408813, "learning_rate": 0.00019945311230449222, "loss": 0.2263, "step": 5810 }, { "epoch": 0.08180476491671937, "grad_norm": 0.6793397665023804, "learning_rate": 0.00019944824693018487, "loss": 0.2618, "step": 5820 }, { "epoch": 0.0819453229320402, "grad_norm": 0.5420805811882019, "learning_rate": 0.00019944336006913748, "loss": 0.2581, "step": 5830 }, { "epoch": 0.08208588094736102, "grad_norm": 0.542273759841919, "learning_rate": 0.00019943845172240584, "loss": 0.2634, "step": 5840 }, { "epoch": 0.08222643896268185, "grad_norm": 0.5444084405899048, "learning_rate": 0.00019943352189105053, "loss": 0.2698, "step": 5850 }, { "epoch": 0.08236699697800268, "grad_norm": 0.43644988536834717, "learning_rate": 0.00019942857057613663, "loss": 0.25, "step": 5860 }, { "epoch": 0.08250755499332349, "grad_norm": 0.39292803406715393, "learning_rate": 0.00019942359777873393, "loss": 0.2566, "step": 5870 }, { "epoch": 0.08264811300864432, "grad_norm": 0.736021101474762, "learning_rate": 0.00019941860349991688, "loss": 0.256, "step": 5880 }, { "epoch": 0.08278867102396514, "grad_norm": 0.5877601504325867, "learning_rate": 0.0001994135877407645, "loss": 0.274, "step": 5890 }, { "epoch": 0.08292922903928597, "grad_norm": 0.6260401010513306, "learning_rate": 0.00019940855050236048, "loss": 0.2566, "step": 5900 }, { "epoch": 0.08306978705460678, "grad_norm": 0.6187877655029297, "learning_rate": 0.00019940349178579326, "loss": 0.249, "step": 5910 }, { "epoch": 0.08321034506992761, "grad_norm": 0.5491911768913269, "learning_rate": 0.0001993984115921557, "loss": 0.2561, "step": 5920 }, { "epoch": 0.08335090308524844, "grad_norm": 0.6714176535606384, "learning_rate": 0.0001993933099225455, "loss": 0.259, "step": 5930 }, { "epoch": 0.08349146110056926, "grad_norm": 0.5543636083602905, "learning_rate": 0.00019938818677806488, "loss": 0.2774, "step": 5940 }, { "epoch": 0.08363201911589008, "grad_norm": 0.582870602607727, "learning_rate": 0.0001993830421598208, "loss": 0.2503, "step": 5950 }, { "epoch": 0.0837725771312109, "grad_norm": 0.5580389499664307, "learning_rate": 0.00019937787606892475, "loss": 0.2302, "step": 5960 }, { "epoch": 0.08391313514653173, "grad_norm": 0.5439727902412415, "learning_rate": 0.00019937268850649294, "loss": 0.2659, "step": 5970 }, { "epoch": 0.08405369316185256, "grad_norm": 0.4882200062274933, "learning_rate": 0.0001993674794736462, "loss": 0.2467, "step": 5980 }, { "epoch": 0.08419425117717338, "grad_norm": 0.5599921345710754, "learning_rate": 0.00019936224897150996, "loss": 0.2742, "step": 5990 }, { "epoch": 0.0843348091924942, "grad_norm": 0.728784441947937, "learning_rate": 0.00019935699700121431, "loss": 0.2446, "step": 6000 }, { "epoch": 0.0843348091924942, "eval_chrf": 77.78453910676699, "eval_loss": 0.502712607383728, "eval_runtime": 272.5964, "eval_samples_per_second": 0.367, "eval_steps_per_second": 0.015, "step": 6000 }, { "epoch": 0.08447536720781502, "grad_norm": 0.42536473274230957, "learning_rate": 0.00019935172356389406, "loss": 0.2493, "step": 6010 }, { "epoch": 0.08461592522313585, "grad_norm": 0.4811732769012451, "learning_rate": 0.00019934642866068853, "loss": 0.2551, "step": 6020 }, { "epoch": 0.08475648323845668, "grad_norm": 0.4817690849304199, "learning_rate": 0.00019934111229274174, "loss": 0.2505, "step": 6030 }, { "epoch": 0.08489704125377749, "grad_norm": 0.515292227268219, "learning_rate": 0.00019933577446120236, "loss": 0.2572, "step": 6040 }, { "epoch": 0.08503759926909832, "grad_norm": 0.5563884377479553, "learning_rate": 0.00019933041516722368, "loss": 0.2403, "step": 6050 }, { "epoch": 0.08517815728441915, "grad_norm": 0.47406917810440063, "learning_rate": 0.00019932503441196357, "loss": 0.2698, "step": 6060 }, { "epoch": 0.08531871529973997, "grad_norm": 0.6517256498336792, "learning_rate": 0.00019931963219658468, "loss": 0.2986, "step": 6070 }, { "epoch": 0.08545927331506079, "grad_norm": 0.5201681852340698, "learning_rate": 0.00019931420852225415, "loss": 0.2511, "step": 6080 }, { "epoch": 0.08559983133038161, "grad_norm": 0.42335227131843567, "learning_rate": 0.00019930876339014383, "loss": 0.2451, "step": 6090 }, { "epoch": 0.08574038934570244, "grad_norm": 0.5125184655189514, "learning_rate": 0.0001993032968014302, "loss": 0.2551, "step": 6100 }, { "epoch": 0.08588094736102327, "grad_norm": 0.6325479745864868, "learning_rate": 0.00019929780875729437, "loss": 0.262, "step": 6110 }, { "epoch": 0.08602150537634409, "grad_norm": 0.4180940091609955, "learning_rate": 0.0001992922992589221, "loss": 0.2548, "step": 6120 }, { "epoch": 0.0861620633916649, "grad_norm": 0.64287269115448, "learning_rate": 0.0001992867683075037, "loss": 0.2479, "step": 6130 }, { "epoch": 0.08630262140698573, "grad_norm": 0.523990273475647, "learning_rate": 0.0001992812159042343, "loss": 0.2832, "step": 6140 }, { "epoch": 0.08644317942230656, "grad_norm": 0.5508052706718445, "learning_rate": 0.00019927564205031342, "loss": 0.2494, "step": 6150 }, { "epoch": 0.08658373743762739, "grad_norm": 0.6613081097602844, "learning_rate": 0.00019927004674694543, "loss": 0.2536, "step": 6160 }, { "epoch": 0.0867242954529482, "grad_norm": 0.627884030342102, "learning_rate": 0.00019926442999533924, "loss": 0.272, "step": 6170 }, { "epoch": 0.08686485346826903, "grad_norm": 0.511834442615509, "learning_rate": 0.00019925879179670837, "loss": 0.2475, "step": 6180 }, { "epoch": 0.08700541148358985, "grad_norm": 0.7373877167701721, "learning_rate": 0.00019925313215227102, "loss": 0.2579, "step": 6190 }, { "epoch": 0.08714596949891068, "grad_norm": 0.5701001286506653, "learning_rate": 0.00019924745106325004, "loss": 0.2461, "step": 6200 }, { "epoch": 0.0872865275142315, "grad_norm": 0.41175466775894165, "learning_rate": 0.00019924174853087282, "loss": 0.2366, "step": 6210 }, { "epoch": 0.08742708552955232, "grad_norm": 0.676011323928833, "learning_rate": 0.00019923602455637152, "loss": 0.2642, "step": 6220 }, { "epoch": 0.08756764354487315, "grad_norm": 0.5405893325805664, "learning_rate": 0.0001992302791409828, "loss": 0.2344, "step": 6230 }, { "epoch": 0.08770820156019397, "grad_norm": 0.5449483394622803, "learning_rate": 0.00019922451228594805, "loss": 0.2557, "step": 6240 }, { "epoch": 0.08784875957551479, "grad_norm": 0.6101166605949402, "learning_rate": 0.00019921872399251321, "loss": 0.232, "step": 6250 }, { "epoch": 0.08798931759083561, "grad_norm": 0.5353050827980042, "learning_rate": 0.00019921291426192898, "loss": 0.2606, "step": 6260 }, { "epoch": 0.08812987560615644, "grad_norm": 0.4918290376663208, "learning_rate": 0.00019920708309545054, "loss": 0.2597, "step": 6270 }, { "epoch": 0.08827043362147727, "grad_norm": 0.5629746913909912, "learning_rate": 0.00019920123049433776, "loss": 0.2756, "step": 6280 }, { "epoch": 0.0884109916367981, "grad_norm": 0.5814275145530701, "learning_rate": 0.00019919535645985517, "loss": 0.2447, "step": 6290 }, { "epoch": 0.08855154965211891, "grad_norm": 0.5752664804458618, "learning_rate": 0.00019918946099327194, "loss": 0.254, "step": 6300 }, { "epoch": 0.08869210766743973, "grad_norm": 0.5221841335296631, "learning_rate": 0.00019918354409586178, "loss": 0.2445, "step": 6310 }, { "epoch": 0.08883266568276056, "grad_norm": 0.5126676559448242, "learning_rate": 0.00019917760576890315, "loss": 0.2354, "step": 6320 }, { "epoch": 0.08897322369808139, "grad_norm": 0.4682322144508362, "learning_rate": 0.00019917164601367904, "loss": 0.2286, "step": 6330 }, { "epoch": 0.0891137817134022, "grad_norm": 0.44143393635749817, "learning_rate": 0.00019916566483147715, "loss": 0.2326, "step": 6340 }, { "epoch": 0.08925433972872303, "grad_norm": 0.5489510893821716, "learning_rate": 0.00019915966222358976, "loss": 0.2772, "step": 6350 }, { "epoch": 0.08939489774404386, "grad_norm": 0.5929551124572754, "learning_rate": 0.00019915363819131374, "loss": 0.2331, "step": 6360 }, { "epoch": 0.08953545575936468, "grad_norm": 0.47340741753578186, "learning_rate": 0.0001991475927359507, "loss": 0.21, "step": 6370 }, { "epoch": 0.0896760137746855, "grad_norm": 0.6008301973342896, "learning_rate": 0.00019914152585880684, "loss": 0.2413, "step": 6380 }, { "epoch": 0.08981657179000632, "grad_norm": 0.5169734954833984, "learning_rate": 0.00019913543756119287, "loss": 0.239, "step": 6390 }, { "epoch": 0.08995712980532715, "grad_norm": 0.4037923216819763, "learning_rate": 0.0001991293278444243, "loss": 0.2459, "step": 6400 }, { "epoch": 0.09009768782064798, "grad_norm": 0.5685014724731445, "learning_rate": 0.00019912319670982117, "loss": 0.2245, "step": 6410 }, { "epoch": 0.0902382458359688, "grad_norm": 0.4846496880054474, "learning_rate": 0.0001991170441587082, "loss": 0.2427, "step": 6420 }, { "epoch": 0.09037880385128962, "grad_norm": 0.5893009305000305, "learning_rate": 0.00019911087019241466, "loss": 0.2562, "step": 6430 }, { "epoch": 0.09051936186661044, "grad_norm": 0.5451236963272095, "learning_rate": 0.00019910467481227452, "loss": 0.2269, "step": 6440 }, { "epoch": 0.09065991988193127, "grad_norm": 0.46445807814598083, "learning_rate": 0.00019909845801962636, "loss": 0.2583, "step": 6450 }, { "epoch": 0.0908004778972521, "grad_norm": 0.38349512219429016, "learning_rate": 0.00019909221981581334, "loss": 0.2493, "step": 6460 }, { "epoch": 0.09094103591257291, "grad_norm": 0.6188830137252808, "learning_rate": 0.00019908596020218333, "loss": 0.2664, "step": 6470 }, { "epoch": 0.09108159392789374, "grad_norm": 0.516822874546051, "learning_rate": 0.00019907967918008873, "loss": 0.2412, "step": 6480 }, { "epoch": 0.09122215194321456, "grad_norm": 0.42812368273735046, "learning_rate": 0.00019907337675088664, "loss": 0.2495, "step": 6490 }, { "epoch": 0.09136270995853539, "grad_norm": 0.5717242360115051, "learning_rate": 0.00019906705291593876, "loss": 0.249, "step": 6500 }, { "epoch": 0.0915032679738562, "grad_norm": 0.48675647377967834, "learning_rate": 0.00019906070767661144, "loss": 0.2476, "step": 6510 }, { "epoch": 0.09164382598917703, "grad_norm": 0.4916194975376129, "learning_rate": 0.0001990543410342756, "loss": 0.2774, "step": 6520 }, { "epoch": 0.09178438400449786, "grad_norm": 0.5784870982170105, "learning_rate": 0.0001990479529903068, "loss": 0.2368, "step": 6530 }, { "epoch": 0.09192494201981868, "grad_norm": 0.5581029653549194, "learning_rate": 0.00019904154354608526, "loss": 0.275, "step": 6540 }, { "epoch": 0.0920655000351395, "grad_norm": 0.4577856957912445, "learning_rate": 0.00019903511270299583, "loss": 0.2732, "step": 6550 }, { "epoch": 0.09220605805046032, "grad_norm": 0.6007733345031738, "learning_rate": 0.00019902866046242785, "loss": 0.2408, "step": 6560 }, { "epoch": 0.09234661606578115, "grad_norm": 0.4733324944972992, "learning_rate": 0.0001990221868257755, "loss": 0.2442, "step": 6570 }, { "epoch": 0.09248717408110198, "grad_norm": 0.5347083210945129, "learning_rate": 0.00019901569179443744, "loss": 0.2243, "step": 6580 }, { "epoch": 0.0926277320964228, "grad_norm": 0.5432078838348389, "learning_rate": 0.00019900917536981697, "loss": 0.2569, "step": 6590 }, { "epoch": 0.09276829011174362, "grad_norm": 0.47041982412338257, "learning_rate": 0.00019900263755332203, "loss": 0.2623, "step": 6600 }, { "epoch": 0.09290884812706444, "grad_norm": 0.709175705909729, "learning_rate": 0.00019899607834636518, "loss": 0.2454, "step": 6610 }, { "epoch": 0.09304940614238527, "grad_norm": 0.5433493256568909, "learning_rate": 0.00019898949775036355, "loss": 0.2594, "step": 6620 }, { "epoch": 0.0931899641577061, "grad_norm": 0.4944099485874176, "learning_rate": 0.00019898289576673905, "loss": 0.2749, "step": 6630 }, { "epoch": 0.09333052217302691, "grad_norm": 0.6038771867752075, "learning_rate": 0.000198976272396918, "loss": 0.2425, "step": 6640 }, { "epoch": 0.09347108018834774, "grad_norm": 0.6890851855278015, "learning_rate": 0.00019896962764233148, "loss": 0.2524, "step": 6650 }, { "epoch": 0.09361163820366857, "grad_norm": 0.6427812576293945, "learning_rate": 0.00019896296150441516, "loss": 0.2449, "step": 6660 }, { "epoch": 0.09375219621898939, "grad_norm": 0.5056873559951782, "learning_rate": 0.00019895627398460933, "loss": 0.2421, "step": 6670 }, { "epoch": 0.0938927542343102, "grad_norm": 0.7854136228561401, "learning_rate": 0.00019894956508435886, "loss": 0.2575, "step": 6680 }, { "epoch": 0.09403331224963103, "grad_norm": 0.5667088627815247, "learning_rate": 0.0001989428348051133, "loss": 0.2742, "step": 6690 }, { "epoch": 0.09417387026495186, "grad_norm": 0.6039605140686035, "learning_rate": 0.00019893608314832677, "loss": 0.2721, "step": 6700 }, { "epoch": 0.09431442828027269, "grad_norm": 0.49026021361351013, "learning_rate": 0.00019892931011545803, "loss": 0.2341, "step": 6710 }, { "epoch": 0.09445498629559351, "grad_norm": 0.4740043878555298, "learning_rate": 0.00019892251570797047, "loss": 0.2331, "step": 6720 }, { "epoch": 0.09459554431091433, "grad_norm": 0.4828124940395355, "learning_rate": 0.0001989156999273321, "loss": 0.2738, "step": 6730 }, { "epoch": 0.09473610232623515, "grad_norm": 0.5850508809089661, "learning_rate": 0.0001989088627750155, "loss": 0.2336, "step": 6740 }, { "epoch": 0.09487666034155598, "grad_norm": 0.48459509015083313, "learning_rate": 0.00019890200425249792, "loss": 0.252, "step": 6750 }, { "epoch": 0.0950172183568768, "grad_norm": 0.4098058342933655, "learning_rate": 0.0001988951243612612, "loss": 0.2502, "step": 6760 }, { "epoch": 0.09515777637219762, "grad_norm": 0.5001391172409058, "learning_rate": 0.00019888822310279184, "loss": 0.2418, "step": 6770 }, { "epoch": 0.09529833438751845, "grad_norm": 0.6043509244918823, "learning_rate": 0.0001988813004785809, "loss": 0.2552, "step": 6780 }, { "epoch": 0.09543889240283927, "grad_norm": 0.4252854287624359, "learning_rate": 0.00019887435649012408, "loss": 0.2252, "step": 6790 }, { "epoch": 0.0955794504181601, "grad_norm": 0.45443910360336304, "learning_rate": 0.00019886739113892166, "loss": 0.2399, "step": 6800 }, { "epoch": 0.09572000843348091, "grad_norm": 0.4590103328227997, "learning_rate": 0.0001988604044264786, "loss": 0.2294, "step": 6810 }, { "epoch": 0.09586056644880174, "grad_norm": 0.4579792320728302, "learning_rate": 0.0001988533963543045, "loss": 0.2328, "step": 6820 }, { "epoch": 0.09600112446412257, "grad_norm": 0.5196878910064697, "learning_rate": 0.00019884636692391343, "loss": 0.2263, "step": 6830 }, { "epoch": 0.0961416824794434, "grad_norm": 0.46159133315086365, "learning_rate": 0.0001988393161368242, "loss": 0.2534, "step": 6840 }, { "epoch": 0.09628224049476422, "grad_norm": 0.582403838634491, "learning_rate": 0.0001988322439945602, "loss": 0.2528, "step": 6850 }, { "epoch": 0.09642279851008503, "grad_norm": 0.5543406009674072, "learning_rate": 0.00019882515049864945, "loss": 0.2295, "step": 6860 }, { "epoch": 0.09656335652540586, "grad_norm": 0.42296671867370605, "learning_rate": 0.00019881803565062456, "loss": 0.2402, "step": 6870 }, { "epoch": 0.09670391454072669, "grad_norm": 0.5111995935440063, "learning_rate": 0.00019881089945202274, "loss": 0.2117, "step": 6880 }, { "epoch": 0.09684447255604751, "grad_norm": 0.40601876378059387, "learning_rate": 0.00019880374190438585, "loss": 0.2378, "step": 6890 }, { "epoch": 0.09698503057136833, "grad_norm": 0.5247124433517456, "learning_rate": 0.00019879656300926036, "loss": 0.2824, "step": 6900 }, { "epoch": 0.09712558858668915, "grad_norm": 0.49566319584846497, "learning_rate": 0.0001987893627681973, "loss": 0.2467, "step": 6910 }, { "epoch": 0.09726614660200998, "grad_norm": 0.582575798034668, "learning_rate": 0.0001987821411827524, "loss": 0.2451, "step": 6920 }, { "epoch": 0.09740670461733081, "grad_norm": 0.6084159016609192, "learning_rate": 0.00019877489825448588, "loss": 0.239, "step": 6930 }, { "epoch": 0.09754726263265162, "grad_norm": 0.5342608094215393, "learning_rate": 0.00019876763398496273, "loss": 0.2594, "step": 6940 }, { "epoch": 0.09768782064797245, "grad_norm": 0.5405324101448059, "learning_rate": 0.0001987603483757524, "loss": 0.2271, "step": 6950 }, { "epoch": 0.09782837866329328, "grad_norm": 0.505929708480835, "learning_rate": 0.00019875304142842905, "loss": 0.252, "step": 6960 }, { "epoch": 0.0979689366786141, "grad_norm": 0.5125372409820557, "learning_rate": 0.0001987457131445714, "loss": 0.2668, "step": 6970 }, { "epoch": 0.09810949469393491, "grad_norm": 0.5413892865180969, "learning_rate": 0.0001987383635257628, "loss": 0.2326, "step": 6980 }, { "epoch": 0.09825005270925574, "grad_norm": 0.6026713848114014, "learning_rate": 0.00019873099257359123, "loss": 0.2719, "step": 6990 }, { "epoch": 0.09839061072457657, "grad_norm": 0.5687562227249146, "learning_rate": 0.0001987236002896492, "loss": 0.2375, "step": 7000 }, { "epoch": 0.09839061072457657, "eval_chrf": 79.72353346065984, "eval_loss": 0.4819222390651703, "eval_runtime": 196.8304, "eval_samples_per_second": 0.508, "eval_steps_per_second": 0.02, "step": 7000 }, { "epoch": 0.0985311687398974, "grad_norm": 0.6272669434547424, "learning_rate": 0.0001987161866755339, "loss": 0.2573, "step": 7010 }, { "epoch": 0.09867172675521822, "grad_norm": 0.43905067443847656, "learning_rate": 0.00019870875173284716, "loss": 0.2667, "step": 7020 }, { "epoch": 0.09881228477053904, "grad_norm": 0.5508715510368347, "learning_rate": 0.00019870129546319533, "loss": 0.2484, "step": 7030 }, { "epoch": 0.09895284278585986, "grad_norm": 0.5342567563056946, "learning_rate": 0.00019869381786818938, "loss": 0.2521, "step": 7040 }, { "epoch": 0.09909340080118069, "grad_norm": 0.5154755711555481, "learning_rate": 0.00019868631894944498, "loss": 0.2399, "step": 7050 }, { "epoch": 0.09923395881650152, "grad_norm": 0.40197110176086426, "learning_rate": 0.0001986787987085823, "loss": 0.2216, "step": 7060 }, { "epoch": 0.09937451683182233, "grad_norm": 0.4556470811367035, "learning_rate": 0.00019867125714722618, "loss": 0.2395, "step": 7070 }, { "epoch": 0.09951507484714316, "grad_norm": 0.46725577116012573, "learning_rate": 0.00019866369426700602, "loss": 0.2501, "step": 7080 }, { "epoch": 0.09965563286246398, "grad_norm": 0.6671276688575745, "learning_rate": 0.0001986561100695559, "loss": 0.2496, "step": 7090 }, { "epoch": 0.09979619087778481, "grad_norm": 0.4392211437225342, "learning_rate": 0.00019864850455651438, "loss": 0.2292, "step": 7100 }, { "epoch": 0.09993674889310562, "grad_norm": 0.5096685886383057, "learning_rate": 0.00019864087772952479, "loss": 0.2348, "step": 7110 }, { "epoch": 0.10007730690842645, "grad_norm": 0.6072500944137573, "learning_rate": 0.00019863322959023493, "loss": 0.2564, "step": 7120 }, { "epoch": 0.10021786492374728, "grad_norm": 0.46237069368362427, "learning_rate": 0.00019862556014029725, "loss": 0.2228, "step": 7130 }, { "epoch": 0.1003584229390681, "grad_norm": 0.527052640914917, "learning_rate": 0.00019861786938136882, "loss": 0.2264, "step": 7140 }, { "epoch": 0.10049898095438893, "grad_norm": 0.5248671770095825, "learning_rate": 0.0001986101573151113, "loss": 0.2181, "step": 7150 }, { "epoch": 0.10063953896970974, "grad_norm": 0.5087975263595581, "learning_rate": 0.00019860242394319097, "loss": 0.2399, "step": 7160 }, { "epoch": 0.10078009698503057, "grad_norm": 0.6083720922470093, "learning_rate": 0.0001985946692672787, "loss": 0.25, "step": 7170 }, { "epoch": 0.1009206550003514, "grad_norm": 0.427260160446167, "learning_rate": 0.0001985868932890499, "loss": 0.2368, "step": 7180 }, { "epoch": 0.10106121301567222, "grad_norm": 0.5379019975662231, "learning_rate": 0.00019857909601018472, "loss": 0.2441, "step": 7190 }, { "epoch": 0.10120177103099304, "grad_norm": 0.4616182744503021, "learning_rate": 0.0001985712774323678, "loss": 0.2326, "step": 7200 }, { "epoch": 0.10134232904631386, "grad_norm": 0.509182870388031, "learning_rate": 0.00019856343755728846, "loss": 0.2416, "step": 7210 }, { "epoch": 0.10148288706163469, "grad_norm": 0.4154062569141388, "learning_rate": 0.0001985555763866405, "loss": 0.2091, "step": 7220 }, { "epoch": 0.10162344507695552, "grad_norm": 0.4568565785884857, "learning_rate": 0.00019854769392212246, "loss": 0.2101, "step": 7230 }, { "epoch": 0.10176400309227633, "grad_norm": 0.5902854800224304, "learning_rate": 0.0001985397901654374, "loss": 0.2583, "step": 7240 }, { "epoch": 0.10190456110759716, "grad_norm": 0.4994632303714752, "learning_rate": 0.00019853186511829303, "loss": 0.2377, "step": 7250 }, { "epoch": 0.10204511912291798, "grad_norm": 0.4629485011100769, "learning_rate": 0.0001985239187824016, "loss": 0.2637, "step": 7260 }, { "epoch": 0.10218567713823881, "grad_norm": 0.5010542273521423, "learning_rate": 0.00019851595115948, "loss": 0.2284, "step": 7270 }, { "epoch": 0.10232623515355962, "grad_norm": 0.5815369486808777, "learning_rate": 0.0001985079622512497, "loss": 0.2589, "step": 7280 }, { "epoch": 0.10246679316888045, "grad_norm": 0.5223342776298523, "learning_rate": 0.00019849995205943683, "loss": 0.1986, "step": 7290 }, { "epoch": 0.10260735118420128, "grad_norm": 0.47997236251831055, "learning_rate": 0.000198491920585772, "loss": 0.2295, "step": 7300 }, { "epoch": 0.1027479091995221, "grad_norm": 0.4088563621044159, "learning_rate": 0.00019848386783199055, "loss": 0.2579, "step": 7310 }, { "epoch": 0.10288846721484293, "grad_norm": 0.47599363327026367, "learning_rate": 0.00019847579379983228, "loss": 0.2112, "step": 7320 }, { "epoch": 0.10302902523016375, "grad_norm": 0.4933672249317169, "learning_rate": 0.00019846769849104172, "loss": 0.2191, "step": 7330 }, { "epoch": 0.10316958324548457, "grad_norm": 0.5240706205368042, "learning_rate": 0.00019845958190736792, "loss": 0.2438, "step": 7340 }, { "epoch": 0.1033101412608054, "grad_norm": 0.49246230721473694, "learning_rate": 0.00019845144405056456, "loss": 0.2165, "step": 7350 }, { "epoch": 0.10345069927612623, "grad_norm": 0.4593331813812256, "learning_rate": 0.00019844328492238985, "loss": 0.2268, "step": 7360 }, { "epoch": 0.10359125729144704, "grad_norm": 0.46987101435661316, "learning_rate": 0.00019843510452460672, "loss": 0.2433, "step": 7370 }, { "epoch": 0.10373181530676787, "grad_norm": 0.39112797379493713, "learning_rate": 0.00019842690285898256, "loss": 0.239, "step": 7380 }, { "epoch": 0.10387237332208869, "grad_norm": 0.43101245164871216, "learning_rate": 0.00019841867992728948, "loss": 0.2328, "step": 7390 }, { "epoch": 0.10401293133740952, "grad_norm": 0.4474431872367859, "learning_rate": 0.00019841043573130409, "loss": 0.2493, "step": 7400 }, { "epoch": 0.10415348935273033, "grad_norm": 0.6698712110519409, "learning_rate": 0.0001984021702728076, "loss": 0.2216, "step": 7410 }, { "epoch": 0.10429404736805116, "grad_norm": 0.6470811367034912, "learning_rate": 0.00019839388355358587, "loss": 0.2573, "step": 7420 }, { "epoch": 0.10443460538337199, "grad_norm": 0.5258649587631226, "learning_rate": 0.0001983855755754293, "loss": 0.2524, "step": 7430 }, { "epoch": 0.10457516339869281, "grad_norm": 0.4496793746948242, "learning_rate": 0.00019837724634013299, "loss": 0.2452, "step": 7440 }, { "epoch": 0.10471572141401364, "grad_norm": 0.4539211094379425, "learning_rate": 0.00019836889584949643, "loss": 0.2345, "step": 7450 }, { "epoch": 0.10485627942933445, "grad_norm": 0.44238775968551636, "learning_rate": 0.0001983605241053239, "loss": 0.2284, "step": 7460 }, { "epoch": 0.10499683744465528, "grad_norm": 0.7367810010910034, "learning_rate": 0.0001983521311094242, "loss": 0.2167, "step": 7470 }, { "epoch": 0.10513739545997611, "grad_norm": 0.6223915815353394, "learning_rate": 0.00019834371686361068, "loss": 0.2394, "step": 7480 }, { "epoch": 0.10527795347529693, "grad_norm": 0.5297920107841492, "learning_rate": 0.00019833528136970133, "loss": 0.2371, "step": 7490 }, { "epoch": 0.10541851149061775, "grad_norm": 0.6213352084159851, "learning_rate": 0.00019832682462951872, "loss": 0.2323, "step": 7500 }, { "epoch": 0.10555906950593857, "grad_norm": 0.6057639122009277, "learning_rate": 0.00019831834664489, "loss": 0.2644, "step": 7510 }, { "epoch": 0.1056996275212594, "grad_norm": 0.43835896253585815, "learning_rate": 0.00019830984741764693, "loss": 0.248, "step": 7520 }, { "epoch": 0.10584018553658023, "grad_norm": 0.40223389863967896, "learning_rate": 0.00019830132694962586, "loss": 0.2213, "step": 7530 }, { "epoch": 0.10598074355190104, "grad_norm": 0.545213520526886, "learning_rate": 0.0001982927852426677, "loss": 0.2464, "step": 7540 }, { "epoch": 0.10612130156722187, "grad_norm": 0.6962087750434875, "learning_rate": 0.00019828422229861798, "loss": 0.2557, "step": 7550 }, { "epoch": 0.1062618595825427, "grad_norm": 0.4490336775779724, "learning_rate": 0.00019827563811932678, "loss": 0.2205, "step": 7560 }, { "epoch": 0.10640241759786352, "grad_norm": 0.668609082698822, "learning_rate": 0.00019826703270664885, "loss": 0.2255, "step": 7570 }, { "epoch": 0.10654297561318435, "grad_norm": 0.577055811882019, "learning_rate": 0.00019825840606244342, "loss": 0.2412, "step": 7580 }, { "epoch": 0.10668353362850516, "grad_norm": 0.4610646665096283, "learning_rate": 0.00019824975818857435, "loss": 0.2206, "step": 7590 }, { "epoch": 0.10682409164382599, "grad_norm": 0.5945837497711182, "learning_rate": 0.0001982410890869101, "loss": 0.2382, "step": 7600 }, { "epoch": 0.10696464965914682, "grad_norm": 0.48687273263931274, "learning_rate": 0.00019823239875932377, "loss": 0.2252, "step": 7610 }, { "epoch": 0.10710520767446764, "grad_norm": 0.4183899462223053, "learning_rate": 0.00019822368720769293, "loss": 0.2288, "step": 7620 }, { "epoch": 0.10724576568978846, "grad_norm": 0.39779701828956604, "learning_rate": 0.0001982149544338998, "loss": 0.2239, "step": 7630 }, { "epoch": 0.10738632370510928, "grad_norm": 0.4888986349105835, "learning_rate": 0.00019820620043983124, "loss": 0.2267, "step": 7640 }, { "epoch": 0.10752688172043011, "grad_norm": 0.43206343054771423, "learning_rate": 0.00019819742522737854, "loss": 0.263, "step": 7650 }, { "epoch": 0.10766743973575094, "grad_norm": 0.49468815326690674, "learning_rate": 0.0001981886287984377, "loss": 0.2393, "step": 7660 }, { "epoch": 0.10780799775107175, "grad_norm": 0.4747503697872162, "learning_rate": 0.00019817981115490932, "loss": 0.2164, "step": 7670 }, { "epoch": 0.10794855576639258, "grad_norm": 0.5016836524009705, "learning_rate": 0.00019817097229869849, "loss": 0.2225, "step": 7680 }, { "epoch": 0.1080891137817134, "grad_norm": 0.47500646114349365, "learning_rate": 0.00019816211223171493, "loss": 0.2545, "step": 7690 }, { "epoch": 0.10822967179703423, "grad_norm": 0.4425160884857178, "learning_rate": 0.00019815323095587295, "loss": 0.2328, "step": 7700 }, { "epoch": 0.10837022981235504, "grad_norm": 0.4525761008262634, "learning_rate": 0.00019814432847309145, "loss": 0.215, "step": 7710 }, { "epoch": 0.10851078782767587, "grad_norm": 0.5745055079460144, "learning_rate": 0.00019813540478529385, "loss": 0.2292, "step": 7720 }, { "epoch": 0.1086513458429967, "grad_norm": 0.5081249475479126, "learning_rate": 0.00019812645989440827, "loss": 0.2418, "step": 7730 }, { "epoch": 0.10879190385831752, "grad_norm": 0.4926249384880066, "learning_rate": 0.00019811749380236728, "loss": 0.2463, "step": 7740 }, { "epoch": 0.10893246187363835, "grad_norm": 0.5166073441505432, "learning_rate": 0.00019810850651110807, "loss": 0.2275, "step": 7750 }, { "epoch": 0.10907301988895916, "grad_norm": 0.35917648673057556, "learning_rate": 0.0001980994980225725, "loss": 0.2274, "step": 7760 }, { "epoch": 0.10921357790427999, "grad_norm": 0.39934268593788147, "learning_rate": 0.00019809046833870692, "loss": 0.2674, "step": 7770 }, { "epoch": 0.10935413591960082, "grad_norm": 0.42677411437034607, "learning_rate": 0.00019808141746146225, "loss": 0.2292, "step": 7780 }, { "epoch": 0.10949469393492164, "grad_norm": 0.5277503132820129, "learning_rate": 0.00019807234539279405, "loss": 0.2441, "step": 7790 }, { "epoch": 0.10963525195024246, "grad_norm": 0.49637770652770996, "learning_rate": 0.00019806325213466238, "loss": 0.2547, "step": 7800 }, { "epoch": 0.10977580996556328, "grad_norm": 0.43198898434638977, "learning_rate": 0.00019805413768903197, "loss": 0.2427, "step": 7810 }, { "epoch": 0.10991636798088411, "grad_norm": 0.6287575364112854, "learning_rate": 0.00019804500205787207, "loss": 0.2561, "step": 7820 }, { "epoch": 0.11005692599620494, "grad_norm": 0.5387474894523621, "learning_rate": 0.0001980358452431565, "loss": 0.2219, "step": 7830 }, { "epoch": 0.11019748401152575, "grad_norm": 0.5207616686820984, "learning_rate": 0.0001980266672468637, "loss": 0.2252, "step": 7840 }, { "epoch": 0.11033804202684658, "grad_norm": 0.6341081261634827, "learning_rate": 0.00019801746807097668, "loss": 0.2415, "step": 7850 }, { "epoch": 0.1104786000421674, "grad_norm": 0.49170592427253723, "learning_rate": 0.00019800824771748298, "loss": 0.2483, "step": 7860 }, { "epoch": 0.11061915805748823, "grad_norm": 0.4697312116622925, "learning_rate": 0.00019799900618837474, "loss": 0.2623, "step": 7870 }, { "epoch": 0.11075971607280906, "grad_norm": 0.5614626407623291, "learning_rate": 0.0001979897434856487, "loss": 0.2268, "step": 7880 }, { "epoch": 0.11090027408812987, "grad_norm": 0.5126927495002747, "learning_rate": 0.00019798045961130615, "loss": 0.2269, "step": 7890 }, { "epoch": 0.1110408321034507, "grad_norm": 0.45797285437583923, "learning_rate": 0.00019797115456735297, "loss": 0.2573, "step": 7900 }, { "epoch": 0.11118139011877153, "grad_norm": 0.4749772250652313, "learning_rate": 0.00019796182835579956, "loss": 0.2135, "step": 7910 }, { "epoch": 0.11132194813409235, "grad_norm": 0.4598739743232727, "learning_rate": 0.000197952480978661, "loss": 0.2278, "step": 7920 }, { "epoch": 0.11146250614941317, "grad_norm": 0.40140441060066223, "learning_rate": 0.00019794311243795685, "loss": 0.232, "step": 7930 }, { "epoch": 0.11160306416473399, "grad_norm": 0.5466649532318115, "learning_rate": 0.00019793372273571125, "loss": 0.2229, "step": 7940 }, { "epoch": 0.11174362218005482, "grad_norm": 0.4565970301628113, "learning_rate": 0.00019792431187395294, "loss": 0.2664, "step": 7950 }, { "epoch": 0.11188418019537565, "grad_norm": 0.4595685303211212, "learning_rate": 0.00019791487985471525, "loss": 0.2316, "step": 7960 }, { "epoch": 0.11202473821069646, "grad_norm": 0.455619752407074, "learning_rate": 0.00019790542668003607, "loss": 0.2581, "step": 7970 }, { "epoch": 0.11216529622601729, "grad_norm": 0.4443734884262085, "learning_rate": 0.0001978959523519578, "loss": 0.2496, "step": 7980 }, { "epoch": 0.11230585424133811, "grad_norm": 0.45769697427749634, "learning_rate": 0.0001978864568725275, "loss": 0.2285, "step": 7990 }, { "epoch": 0.11244641225665894, "grad_norm": 0.4537741541862488, "learning_rate": 0.00019787694024379676, "loss": 0.2621, "step": 8000 }, { "epoch": 0.11244641225665894, "eval_chrf": 82.43581377835828, "eval_loss": 0.5067694783210754, "eval_runtime": 205.7966, "eval_samples_per_second": 0.486, "eval_steps_per_second": 0.019, "step": 8000 }, { "epoch": 0.11258697027197975, "grad_norm": 0.5132802128791809, "learning_rate": 0.0001978674024678217, "loss": 0.2356, "step": 8010 }, { "epoch": 0.11272752828730058, "grad_norm": 0.47775766253471375, "learning_rate": 0.00019785784354666306, "loss": 0.2392, "step": 8020 }, { "epoch": 0.1128680863026214, "grad_norm": 0.48678943514823914, "learning_rate": 0.00019784826348238617, "loss": 0.2202, "step": 8030 }, { "epoch": 0.11300864431794223, "grad_norm": 0.5569357872009277, "learning_rate": 0.00019783866227706084, "loss": 0.2233, "step": 8040 }, { "epoch": 0.11314920233326306, "grad_norm": 0.46687600016593933, "learning_rate": 0.00019782903993276156, "loss": 0.2487, "step": 8050 }, { "epoch": 0.11328976034858387, "grad_norm": 0.4049544632434845, "learning_rate": 0.00019781939645156728, "loss": 0.2368, "step": 8060 }, { "epoch": 0.1134303183639047, "grad_norm": 0.5318085551261902, "learning_rate": 0.00019780973183556162, "loss": 0.2557, "step": 8070 }, { "epoch": 0.11357087637922553, "grad_norm": 0.46007734537124634, "learning_rate": 0.00019780004608683266, "loss": 0.2357, "step": 8080 }, { "epoch": 0.11371143439454635, "grad_norm": 0.5977789759635925, "learning_rate": 0.0001977903392074731, "loss": 0.2584, "step": 8090 }, { "epoch": 0.11385199240986717, "grad_norm": 0.5007039904594421, "learning_rate": 0.00019778061119958023, "loss": 0.2112, "step": 8100 }, { "epoch": 0.113992550425188, "grad_norm": 0.5760759711265564, "learning_rate": 0.00019777086206525585, "loss": 0.2459, "step": 8110 }, { "epoch": 0.11413310844050882, "grad_norm": 0.43324658274650574, "learning_rate": 0.0001977610918066064, "loss": 0.2449, "step": 8120 }, { "epoch": 0.11427366645582965, "grad_norm": 0.5646788477897644, "learning_rate": 0.0001977513004257428, "loss": 0.2107, "step": 8130 }, { "epoch": 0.11441422447115046, "grad_norm": 0.3953395187854767, "learning_rate": 0.00019774148792478058, "loss": 0.2319, "step": 8140 }, { "epoch": 0.11455478248647129, "grad_norm": 0.3455359637737274, "learning_rate": 0.00019773165430583982, "loss": 0.2154, "step": 8150 }, { "epoch": 0.11469534050179211, "grad_norm": 0.5823870897293091, "learning_rate": 0.00019772179957104518, "loss": 0.2302, "step": 8160 }, { "epoch": 0.11483589851711294, "grad_norm": 0.5114933252334595, "learning_rate": 0.00019771192372252584, "loss": 0.2567, "step": 8170 }, { "epoch": 0.11497645653243377, "grad_norm": 0.48762500286102295, "learning_rate": 0.00019770202676241562, "loss": 0.2375, "step": 8180 }, { "epoch": 0.11511701454775458, "grad_norm": 0.8657709360122681, "learning_rate": 0.0001976921086928528, "loss": 0.2359, "step": 8190 }, { "epoch": 0.11525757256307541, "grad_norm": 0.48863881826400757, "learning_rate": 0.00019768216951598032, "loss": 0.2253, "step": 8200 }, { "epoch": 0.11539813057839624, "grad_norm": 0.4692245125770569, "learning_rate": 0.0001976722092339456, "loss": 0.239, "step": 8210 }, { "epoch": 0.11553868859371706, "grad_norm": 0.5036208033561707, "learning_rate": 0.00019766222784890067, "loss": 0.2216, "step": 8220 }, { "epoch": 0.11567924660903788, "grad_norm": 0.5226303935050964, "learning_rate": 0.0001976522253630021, "loss": 0.2641, "step": 8230 }, { "epoch": 0.1158198046243587, "grad_norm": 0.5416714549064636, "learning_rate": 0.00019764220177841103, "loss": 0.255, "step": 8240 }, { "epoch": 0.11596036263967953, "grad_norm": 0.39930713176727295, "learning_rate": 0.0001976321570972931, "loss": 0.2399, "step": 8250 }, { "epoch": 0.11610092065500036, "grad_norm": 0.4201917350292206, "learning_rate": 0.00019762209132181862, "loss": 0.2565, "step": 8260 }, { "epoch": 0.11624147867032117, "grad_norm": 0.6495237946510315, "learning_rate": 0.00019761200445416239, "loss": 0.2286, "step": 8270 }, { "epoch": 0.116382036685642, "grad_norm": 0.4430442452430725, "learning_rate": 0.00019760189649650376, "loss": 0.2358, "step": 8280 }, { "epoch": 0.11652259470096282, "grad_norm": 0.498177707195282, "learning_rate": 0.00019759176745102664, "loss": 0.2272, "step": 8290 }, { "epoch": 0.11666315271628365, "grad_norm": 0.4552452862262726, "learning_rate": 0.0001975816173199195, "loss": 0.233, "step": 8300 }, { "epoch": 0.11680371073160448, "grad_norm": 0.5571867227554321, "learning_rate": 0.00019757144610537544, "loss": 0.217, "step": 8310 }, { "epoch": 0.11694426874692529, "grad_norm": 0.5300667881965637, "learning_rate": 0.00019756125380959193, "loss": 0.2378, "step": 8320 }, { "epoch": 0.11708482676224612, "grad_norm": 0.5119692087173462, "learning_rate": 0.00019755104043477125, "loss": 0.2262, "step": 8330 }, { "epoch": 0.11722538477756694, "grad_norm": 0.4793112874031067, "learning_rate": 0.00019754080598312, "loss": 0.2116, "step": 8340 }, { "epoch": 0.11736594279288777, "grad_norm": 0.5343843698501587, "learning_rate": 0.00019753055045684944, "loss": 0.2373, "step": 8350 }, { "epoch": 0.11750650080820858, "grad_norm": 0.4279910922050476, "learning_rate": 0.0001975202738581754, "loss": 0.2469, "step": 8360 }, { "epoch": 0.11764705882352941, "grad_norm": 0.49253949522972107, "learning_rate": 0.00019750997618931825, "loss": 0.245, "step": 8370 }, { "epoch": 0.11778761683885024, "grad_norm": 0.4585903584957123, "learning_rate": 0.00019749965745250285, "loss": 0.2319, "step": 8380 }, { "epoch": 0.11792817485417106, "grad_norm": 0.6014941930770874, "learning_rate": 0.00019748931764995868, "loss": 0.2493, "step": 8390 }, { "epoch": 0.11806873286949188, "grad_norm": 0.6054887175559998, "learning_rate": 0.0001974789567839198, "loss": 0.2592, "step": 8400 }, { "epoch": 0.1182092908848127, "grad_norm": 0.6954628229141235, "learning_rate": 0.00019746857485662475, "loss": 0.2638, "step": 8410 }, { "epoch": 0.11834984890013353, "grad_norm": 0.719398021697998, "learning_rate": 0.00019745817187031655, "loss": 0.2348, "step": 8420 }, { "epoch": 0.11849040691545436, "grad_norm": 0.38335222005844116, "learning_rate": 0.000197447747827243, "loss": 0.2278, "step": 8430 }, { "epoch": 0.11863096493077517, "grad_norm": 0.36306431889533997, "learning_rate": 0.00019743730272965627, "loss": 0.212, "step": 8440 }, { "epoch": 0.118771522946096, "grad_norm": 0.5828087329864502, "learning_rate": 0.00019742683657981308, "loss": 0.2816, "step": 8450 }, { "epoch": 0.11891208096141682, "grad_norm": 0.535893440246582, "learning_rate": 0.00019741634937997478, "loss": 0.236, "step": 8460 }, { "epoch": 0.11905263897673765, "grad_norm": 0.4915892779827118, "learning_rate": 0.0001974058411324072, "loss": 0.2408, "step": 8470 }, { "epoch": 0.11919319699205848, "grad_norm": 0.4109126925468445, "learning_rate": 0.0001973953118393808, "loss": 0.2082, "step": 8480 }, { "epoch": 0.11933375500737929, "grad_norm": 0.49847477674484253, "learning_rate": 0.0001973847615031705, "loss": 0.2236, "step": 8490 }, { "epoch": 0.11947431302270012, "grad_norm": 0.5509682297706604, "learning_rate": 0.0001973741901260558, "loss": 0.245, "step": 8500 }, { "epoch": 0.11961487103802095, "grad_norm": 0.5117948651313782, "learning_rate": 0.00019736359771032076, "loss": 0.2585, "step": 8510 }, { "epoch": 0.11975542905334177, "grad_norm": 0.4626653790473938, "learning_rate": 0.00019735298425825392, "loss": 0.2572, "step": 8520 }, { "epoch": 0.11989598706866259, "grad_norm": 0.7884001135826111, "learning_rate": 0.00019734234977214848, "loss": 0.2232, "step": 8530 }, { "epoch": 0.12003654508398341, "grad_norm": 0.43860548734664917, "learning_rate": 0.0001973316942543021, "loss": 0.1902, "step": 8540 }, { "epoch": 0.12017710309930424, "grad_norm": 0.6320831775665283, "learning_rate": 0.000197321017707017, "loss": 0.2194, "step": 8550 }, { "epoch": 0.12031766111462507, "grad_norm": 0.5021673440933228, "learning_rate": 0.0001973103201326, "loss": 0.2337, "step": 8560 }, { "epoch": 0.12045821912994588, "grad_norm": 0.5387911200523376, "learning_rate": 0.0001972996015333623, "loss": 0.2245, "step": 8570 }, { "epoch": 0.1205987771452667, "grad_norm": 0.5952897071838379, "learning_rate": 0.00019728886191161987, "loss": 0.2344, "step": 8580 }, { "epoch": 0.12073933516058753, "grad_norm": 0.3942081034183502, "learning_rate": 0.00019727810126969305, "loss": 0.2262, "step": 8590 }, { "epoch": 0.12087989317590836, "grad_norm": 0.5513126850128174, "learning_rate": 0.0001972673196099068, "loss": 0.2509, "step": 8600 }, { "epoch": 0.12102045119122919, "grad_norm": 0.5135902762413025, "learning_rate": 0.00019725651693459058, "loss": 0.2407, "step": 8610 }, { "epoch": 0.12116100920655, "grad_norm": 0.46190938353538513, "learning_rate": 0.00019724569324607843, "loss": 0.2418, "step": 8620 }, { "epoch": 0.12130156722187083, "grad_norm": 0.520503044128418, "learning_rate": 0.0001972348485467089, "loss": 0.2665, "step": 8630 }, { "epoch": 0.12144212523719165, "grad_norm": 0.5142205953598022, "learning_rate": 0.0001972239828388251, "loss": 0.2331, "step": 8640 }, { "epoch": 0.12158268325251248, "grad_norm": 0.496604859828949, "learning_rate": 0.00019721309612477463, "loss": 0.2185, "step": 8650 }, { "epoch": 0.1217232412678333, "grad_norm": 0.8093092441558838, "learning_rate": 0.00019720218840690973, "loss": 0.2721, "step": 8660 }, { "epoch": 0.12186379928315412, "grad_norm": 0.4930914044380188, "learning_rate": 0.00019719125968758707, "loss": 0.241, "step": 8670 }, { "epoch": 0.12200435729847495, "grad_norm": 0.44180360436439514, "learning_rate": 0.00019718030996916793, "loss": 0.2084, "step": 8680 }, { "epoch": 0.12214491531379577, "grad_norm": 0.47540080547332764, "learning_rate": 0.0001971693392540181, "loss": 0.2316, "step": 8690 }, { "epoch": 0.12228547332911659, "grad_norm": 0.49588194489479065, "learning_rate": 0.00019715834754450791, "loss": 0.2561, "step": 8700 }, { "epoch": 0.12242603134443741, "grad_norm": 0.33857402205467224, "learning_rate": 0.00019714733484301218, "loss": 0.2137, "step": 8710 }, { "epoch": 0.12256658935975824, "grad_norm": 0.5175547003746033, "learning_rate": 0.00019713630115191037, "loss": 0.2084, "step": 8720 }, { "epoch": 0.12270714737507907, "grad_norm": 0.5434607863426208, "learning_rate": 0.00019712524647358637, "loss": 0.2329, "step": 8730 }, { "epoch": 0.1228477053903999, "grad_norm": 0.5493789315223694, "learning_rate": 0.00019711417081042868, "loss": 0.2165, "step": 8740 }, { "epoch": 0.12298826340572071, "grad_norm": 0.48063433170318604, "learning_rate": 0.00019710307416483024, "loss": 0.2262, "step": 8750 }, { "epoch": 0.12312882142104153, "grad_norm": 0.5307974815368652, "learning_rate": 0.0001970919565391887, "loss": 0.2445, "step": 8760 }, { "epoch": 0.12326937943636236, "grad_norm": 0.8536155223846436, "learning_rate": 0.000197080817935906, "loss": 0.2391, "step": 8770 }, { "epoch": 0.12340993745168319, "grad_norm": 0.49438464641571045, "learning_rate": 0.0001970696583573888, "loss": 0.222, "step": 8780 }, { "epoch": 0.123550495467004, "grad_norm": 0.4848876893520355, "learning_rate": 0.0001970584778060483, "loss": 0.2287, "step": 8790 }, { "epoch": 0.12369105348232483, "grad_norm": 0.408035546541214, "learning_rate": 0.00019704727628430004, "loss": 0.2271, "step": 8800 }, { "epoch": 0.12383161149764566, "grad_norm": 0.5347570776939392, "learning_rate": 0.0001970360537945643, "loss": 0.2477, "step": 8810 }, { "epoch": 0.12397216951296648, "grad_norm": 0.5373921990394592, "learning_rate": 0.00019702481033926578, "loss": 0.2469, "step": 8820 }, { "epoch": 0.1241127275282873, "grad_norm": 0.5754639506340027, "learning_rate": 0.00019701354592083377, "loss": 0.2607, "step": 8830 }, { "epoch": 0.12425328554360812, "grad_norm": 0.5097301006317139, "learning_rate": 0.00019700226054170198, "loss": 0.2157, "step": 8840 }, { "epoch": 0.12439384355892895, "grad_norm": 0.42645829916000366, "learning_rate": 0.00019699095420430877, "loss": 0.2224, "step": 8850 }, { "epoch": 0.12453440157424978, "grad_norm": 0.5002944469451904, "learning_rate": 0.00019697962691109697, "loss": 0.2162, "step": 8860 }, { "epoch": 0.12467495958957059, "grad_norm": 0.42151981592178345, "learning_rate": 0.00019696827866451394, "loss": 0.2208, "step": 8870 }, { "epoch": 0.12481551760489142, "grad_norm": 0.5172638893127441, "learning_rate": 0.00019695690946701163, "loss": 0.2177, "step": 8880 }, { "epoch": 0.12495607562021224, "grad_norm": 0.48275619745254517, "learning_rate": 0.00019694551932104642, "loss": 0.2216, "step": 8890 }, { "epoch": 0.12509663363553306, "grad_norm": 0.4467068314552307, "learning_rate": 0.00019693410822907924, "loss": 0.2391, "step": 8900 }, { "epoch": 0.1252371916508539, "grad_norm": 0.4540744125843048, "learning_rate": 0.00019692267619357562, "loss": 0.2436, "step": 8910 }, { "epoch": 0.1253777496661747, "grad_norm": 0.4542349576950073, "learning_rate": 0.00019691122321700553, "loss": 0.2116, "step": 8920 }, { "epoch": 0.12551830768149555, "grad_norm": 0.48326826095581055, "learning_rate": 0.0001968997493018435, "loss": 0.2603, "step": 8930 }, { "epoch": 0.12565886569681636, "grad_norm": 0.45143458247184753, "learning_rate": 0.00019688825445056854, "loss": 0.223, "step": 8940 }, { "epoch": 0.12579942371213718, "grad_norm": 0.49860867857933044, "learning_rate": 0.00019687673866566427, "loss": 0.2305, "step": 8950 }, { "epoch": 0.12593998172745802, "grad_norm": 0.5772536396980286, "learning_rate": 0.0001968652019496188, "loss": 0.2452, "step": 8960 }, { "epoch": 0.12608053974277883, "grad_norm": 0.5652404427528381, "learning_rate": 0.0001968536443049247, "loss": 0.2551, "step": 8970 }, { "epoch": 0.12622109775809964, "grad_norm": 0.44158807396888733, "learning_rate": 0.0001968420657340791, "loss": 0.2139, "step": 8980 }, { "epoch": 0.12636165577342048, "grad_norm": 0.5304787755012512, "learning_rate": 0.0001968304662395837, "loss": 0.2375, "step": 8990 }, { "epoch": 0.1265022137887413, "grad_norm": 0.4007686376571655, "learning_rate": 0.00019681884582394468, "loss": 0.2681, "step": 9000 }, { "epoch": 0.1265022137887413, "eval_chrf": 83.5463488314016, "eval_loss": 0.4594023525714874, "eval_runtime": 205.5947, "eval_samples_per_second": 0.486, "eval_steps_per_second": 0.019, "step": 9000 }, { "epoch": 0.12664277180406214, "grad_norm": 0.5894201993942261, "learning_rate": 0.00019680720448967271, "loss": 0.2327, "step": 9010 }, { "epoch": 0.12678332981938295, "grad_norm": 0.5372081398963928, "learning_rate": 0.00019679554223928306, "loss": 0.2178, "step": 9020 }, { "epoch": 0.12692388783470376, "grad_norm": 0.4564507305622101, "learning_rate": 0.00019678385907529543, "loss": 0.2213, "step": 9030 }, { "epoch": 0.1270644458500246, "grad_norm": 0.3771321475505829, "learning_rate": 0.00019677215500023406, "loss": 0.2248, "step": 9040 }, { "epoch": 0.12720500386534542, "grad_norm": 0.5481129884719849, "learning_rate": 0.0001967604300166278, "loss": 0.2589, "step": 9050 }, { "epoch": 0.12734556188066626, "grad_norm": 0.5340000987052917, "learning_rate": 0.00019674868412700988, "loss": 0.2493, "step": 9060 }, { "epoch": 0.12748611989598707, "grad_norm": 0.6218507289886475, "learning_rate": 0.00019673691733391814, "loss": 0.2186, "step": 9070 }, { "epoch": 0.12762667791130788, "grad_norm": 0.4179977476596832, "learning_rate": 0.0001967251296398949, "loss": 0.2448, "step": 9080 }, { "epoch": 0.12776723592662872, "grad_norm": 0.626200795173645, "learning_rate": 0.00019671332104748701, "loss": 0.2592, "step": 9090 }, { "epoch": 0.12790779394194954, "grad_norm": 0.5540094375610352, "learning_rate": 0.0001967014915592458, "loss": 0.2281, "step": 9100 }, { "epoch": 0.12804835195727035, "grad_norm": 0.5310568809509277, "learning_rate": 0.00019668964117772718, "loss": 0.2474, "step": 9110 }, { "epoch": 0.1281889099725912, "grad_norm": 0.44051870703697205, "learning_rate": 0.00019667776990549155, "loss": 0.2523, "step": 9120 }, { "epoch": 0.128329467987912, "grad_norm": 0.39723333716392517, "learning_rate": 0.00019666587774510373, "loss": 0.2224, "step": 9130 }, { "epoch": 0.12847002600323285, "grad_norm": 0.40893715620040894, "learning_rate": 0.00019665396469913321, "loss": 0.2333, "step": 9140 }, { "epoch": 0.12861058401855366, "grad_norm": 0.4723608195781708, "learning_rate": 0.00019664203077015389, "loss": 0.2293, "step": 9150 }, { "epoch": 0.12875114203387447, "grad_norm": 0.4790593087673187, "learning_rate": 0.00019663007596074422, "loss": 0.2409, "step": 9160 }, { "epoch": 0.1288917000491953, "grad_norm": 0.5768985152244568, "learning_rate": 0.00019661810027348718, "loss": 0.2289, "step": 9170 }, { "epoch": 0.12903225806451613, "grad_norm": 0.45519647002220154, "learning_rate": 0.00019660610371097014, "loss": 0.2169, "step": 9180 }, { "epoch": 0.12917281607983697, "grad_norm": 0.5375517010688782, "learning_rate": 0.00019659408627578518, "loss": 0.2313, "step": 9190 }, { "epoch": 0.12931337409515778, "grad_norm": 0.5258593559265137, "learning_rate": 0.0001965820479705287, "loss": 0.2525, "step": 9200 }, { "epoch": 0.1294539321104786, "grad_norm": 0.4912596642971039, "learning_rate": 0.0001965699887978017, "loss": 0.2692, "step": 9210 }, { "epoch": 0.12959449012579943, "grad_norm": 0.5644695162773132, "learning_rate": 0.00019655790876020972, "loss": 0.2123, "step": 9220 }, { "epoch": 0.12973504814112025, "grad_norm": 0.5708533525466919, "learning_rate": 0.00019654580786036276, "loss": 0.2336, "step": 9230 }, { "epoch": 0.12987560615644106, "grad_norm": 0.4405006766319275, "learning_rate": 0.00019653368610087532, "loss": 0.2438, "step": 9240 }, { "epoch": 0.1300161641717619, "grad_norm": 0.6312296986579895, "learning_rate": 0.00019652154348436638, "loss": 0.2254, "step": 9250 }, { "epoch": 0.1301567221870827, "grad_norm": 0.5054198503494263, "learning_rate": 0.00019650938001345954, "loss": 0.238, "step": 9260 }, { "epoch": 0.13029728020240355, "grad_norm": 0.4843730330467224, "learning_rate": 0.0001964971956907828, "loss": 0.2237, "step": 9270 }, { "epoch": 0.13043783821772437, "grad_norm": 0.5578977465629578, "learning_rate": 0.00019648499051896868, "loss": 0.2551, "step": 9280 }, { "epoch": 0.13057839623304518, "grad_norm": 0.6799400448799133, "learning_rate": 0.0001964727645006543, "loss": 0.2286, "step": 9290 }, { "epoch": 0.13071895424836602, "grad_norm": 0.4192991554737091, "learning_rate": 0.0001964605176384811, "loss": 0.262, "step": 9300 }, { "epoch": 0.13085951226368683, "grad_norm": 0.4753544330596924, "learning_rate": 0.00019644824993509522, "loss": 0.2607, "step": 9310 }, { "epoch": 0.13100007027900765, "grad_norm": 0.4694324731826782, "learning_rate": 0.00019643596139314715, "loss": 0.2221, "step": 9320 }, { "epoch": 0.1311406282943285, "grad_norm": 0.43425655364990234, "learning_rate": 0.000196423652015292, "loss": 0.2448, "step": 9330 }, { "epoch": 0.1312811863096493, "grad_norm": 0.30343201756477356, "learning_rate": 0.0001964113218041893, "loss": 0.2271, "step": 9340 }, { "epoch": 0.13142174432497014, "grad_norm": 0.457977831363678, "learning_rate": 0.0001963989707625031, "loss": 0.2114, "step": 9350 }, { "epoch": 0.13156230234029095, "grad_norm": 0.45157763361930847, "learning_rate": 0.00019638659889290193, "loss": 0.2095, "step": 9360 }, { "epoch": 0.13170286035561177, "grad_norm": 0.44693228602409363, "learning_rate": 0.00019637420619805895, "loss": 0.2295, "step": 9370 }, { "epoch": 0.1318434183709326, "grad_norm": 0.5506470203399658, "learning_rate": 0.00019636179268065168, "loss": 0.2344, "step": 9380 }, { "epoch": 0.13198397638625342, "grad_norm": 0.3795108497142792, "learning_rate": 0.0001963493583433621, "loss": 0.2339, "step": 9390 }, { "epoch": 0.13212453440157426, "grad_norm": 0.42435768246650696, "learning_rate": 0.00019633690318887688, "loss": 0.2245, "step": 9400 }, { "epoch": 0.13226509241689507, "grad_norm": 0.48172616958618164, "learning_rate": 0.00019632442721988696, "loss": 0.2214, "step": 9410 }, { "epoch": 0.1324056504322159, "grad_norm": 0.4663597643375397, "learning_rate": 0.000196311930439088, "loss": 0.238, "step": 9420 }, { "epoch": 0.13254620844753673, "grad_norm": 0.43239930272102356, "learning_rate": 0.00019629941284918, "loss": 0.2342, "step": 9430 }, { "epoch": 0.13268676646285754, "grad_norm": 0.6404114961624146, "learning_rate": 0.0001962868744528675, "loss": 0.2242, "step": 9440 }, { "epoch": 0.13282732447817835, "grad_norm": 0.42753544449806213, "learning_rate": 0.00019627431525285954, "loss": 0.2237, "step": 9450 }, { "epoch": 0.1329678824934992, "grad_norm": 0.3860357999801636, "learning_rate": 0.00019626173525186968, "loss": 0.2327, "step": 9460 }, { "epoch": 0.13310844050882, "grad_norm": 0.5065526366233826, "learning_rate": 0.00019624913445261594, "loss": 0.213, "step": 9470 }, { "epoch": 0.13324899852414085, "grad_norm": 0.5617696642875671, "learning_rate": 0.0001962365128578208, "loss": 0.2382, "step": 9480 }, { "epoch": 0.13338955653946166, "grad_norm": 0.4992463290691376, "learning_rate": 0.0001962238704702113, "loss": 0.2364, "step": 9490 }, { "epoch": 0.13353011455478248, "grad_norm": 0.637082040309906, "learning_rate": 0.000196211207292519, "loss": 0.2254, "step": 9500 }, { "epoch": 0.13367067257010332, "grad_norm": 0.43641334772109985, "learning_rate": 0.00019619852332747984, "loss": 0.2171, "step": 9510 }, { "epoch": 0.13381123058542413, "grad_norm": 0.42612147331237793, "learning_rate": 0.00019618581857783432, "loss": 0.2484, "step": 9520 }, { "epoch": 0.13395178860074497, "grad_norm": 0.4482842981815338, "learning_rate": 0.00019617309304632743, "loss": 0.2294, "step": 9530 }, { "epoch": 0.13409234661606578, "grad_norm": 0.5693816542625427, "learning_rate": 0.00019616034673570863, "loss": 0.2484, "step": 9540 }, { "epoch": 0.1342329046313866, "grad_norm": 0.506377100944519, "learning_rate": 0.0001961475796487319, "loss": 0.1952, "step": 9550 }, { "epoch": 0.13437346264670744, "grad_norm": 0.5523819923400879, "learning_rate": 0.0001961347917881557, "loss": 0.2189, "step": 9560 }, { "epoch": 0.13451402066202825, "grad_norm": 0.4156286120414734, "learning_rate": 0.00019612198315674295, "loss": 0.2076, "step": 9570 }, { "epoch": 0.13465457867734906, "grad_norm": 0.38789063692092896, "learning_rate": 0.00019610915375726108, "loss": 0.2092, "step": 9580 }, { "epoch": 0.1347951366926699, "grad_norm": 0.5569458603858948, "learning_rate": 0.000196096303592482, "loss": 0.2292, "step": 9590 }, { "epoch": 0.13493569470799072, "grad_norm": 0.4448249638080597, "learning_rate": 0.0001960834326651821, "loss": 0.2044, "step": 9600 }, { "epoch": 0.13507625272331156, "grad_norm": 0.42959892749786377, "learning_rate": 0.0001960705409781423, "loss": 0.2375, "step": 9610 }, { "epoch": 0.13521681073863237, "grad_norm": 0.4909825325012207, "learning_rate": 0.00019605762853414796, "loss": 0.2007, "step": 9620 }, { "epoch": 0.13535736875395318, "grad_norm": 1.044018268585205, "learning_rate": 0.0001960446953359889, "loss": 0.2375, "step": 9630 }, { "epoch": 0.13549792676927402, "grad_norm": 0.43878230452537537, "learning_rate": 0.00019603174138645952, "loss": 0.2492, "step": 9640 }, { "epoch": 0.13563848478459484, "grad_norm": 0.4571862518787384, "learning_rate": 0.0001960187666883586, "loss": 0.2401, "step": 9650 }, { "epoch": 0.13577904279991568, "grad_norm": 0.510308027267456, "learning_rate": 0.00019600577124448946, "loss": 0.2349, "step": 9660 }, { "epoch": 0.1359196008152365, "grad_norm": 0.3984282910823822, "learning_rate": 0.0001959927550576599, "loss": 0.2202, "step": 9670 }, { "epoch": 0.1360601588305573, "grad_norm": 0.4630577862262726, "learning_rate": 0.00019597971813068216, "loss": 0.2337, "step": 9680 }, { "epoch": 0.13620071684587814, "grad_norm": 0.5899356007575989, "learning_rate": 0.00019596666046637303, "loss": 0.2292, "step": 9690 }, { "epoch": 0.13634127486119896, "grad_norm": 0.5899912714958191, "learning_rate": 0.00019595358206755373, "loss": 0.241, "step": 9700 }, { "epoch": 0.13648183287651977, "grad_norm": 0.5677922368049622, "learning_rate": 0.00019594048293704994, "loss": 0.2375, "step": 9710 }, { "epoch": 0.1366223908918406, "grad_norm": 0.4866439700126648, "learning_rate": 0.0001959273630776919, "loss": 0.2449, "step": 9720 }, { "epoch": 0.13676294890716142, "grad_norm": 0.4721696674823761, "learning_rate": 0.00019591422249231425, "loss": 0.2277, "step": 9730 }, { "epoch": 0.13690350692248227, "grad_norm": 0.3854721188545227, "learning_rate": 0.00019590106118375618, "loss": 0.2213, "step": 9740 }, { "epoch": 0.13704406493780308, "grad_norm": 0.4399317800998688, "learning_rate": 0.00019588787915486125, "loss": 0.2259, "step": 9750 }, { "epoch": 0.1371846229531239, "grad_norm": 0.49207353591918945, "learning_rate": 0.0001958746764084776, "loss": 0.2135, "step": 9760 }, { "epoch": 0.13732518096844473, "grad_norm": 0.47379225492477417, "learning_rate": 0.0001958614529474578, "loss": 0.2362, "step": 9770 }, { "epoch": 0.13746573898376555, "grad_norm": 0.5586134195327759, "learning_rate": 0.00019584820877465892, "loss": 0.2275, "step": 9780 }, { "epoch": 0.13760629699908639, "grad_norm": 0.49579349160194397, "learning_rate": 0.0001958349438929425, "loss": 0.2083, "step": 9790 }, { "epoch": 0.1377468550144072, "grad_norm": 0.38048985600471497, "learning_rate": 0.0001958216583051745, "loss": 0.2013, "step": 9800 }, { "epoch": 0.137887413029728, "grad_norm": 0.48474690318107605, "learning_rate": 0.00019580835201422545, "loss": 0.2571, "step": 9810 }, { "epoch": 0.13802797104504885, "grad_norm": 0.5224998593330383, "learning_rate": 0.00019579502502297026, "loss": 0.2331, "step": 9820 }, { "epoch": 0.13816852906036967, "grad_norm": 0.4451829493045807, "learning_rate": 0.00019578167733428834, "loss": 0.2139, "step": 9830 }, { "epoch": 0.13830908707569048, "grad_norm": 0.47491031885147095, "learning_rate": 0.00019576830895106365, "loss": 0.2188, "step": 9840 }, { "epoch": 0.13844964509101132, "grad_norm": 0.5613846778869629, "learning_rate": 0.0001957549198761845, "loss": 0.23, "step": 9850 }, { "epoch": 0.13859020310633213, "grad_norm": 0.5138101577758789, "learning_rate": 0.00019574151011254374, "loss": 0.2407, "step": 9860 }, { "epoch": 0.13873076112165297, "grad_norm": 0.6644909381866455, "learning_rate": 0.0001957280796630387, "loss": 0.2388, "step": 9870 }, { "epoch": 0.1388713191369738, "grad_norm": 0.4899885058403015, "learning_rate": 0.00019571462853057109, "loss": 0.2562, "step": 9880 }, { "epoch": 0.1390118771522946, "grad_norm": 0.5329784154891968, "learning_rate": 0.00019570115671804726, "loss": 0.2092, "step": 9890 }, { "epoch": 0.13915243516761544, "grad_norm": 0.4139322340488434, "learning_rate": 0.00019568766422837787, "loss": 0.23, "step": 9900 }, { "epoch": 0.13929299318293625, "grad_norm": 0.5224969983100891, "learning_rate": 0.00019567415106447805, "loss": 0.213, "step": 9910 }, { "epoch": 0.1394335511982571, "grad_norm": 0.387287974357605, "learning_rate": 0.00019566061722926751, "loss": 0.2257, "step": 9920 }, { "epoch": 0.1395741092135779, "grad_norm": 0.5436925292015076, "learning_rate": 0.00019564706272567035, "loss": 0.2511, "step": 9930 }, { "epoch": 0.13971466722889872, "grad_norm": 0.4864850342273712, "learning_rate": 0.00019563348755661516, "loss": 0.2112, "step": 9940 }, { "epoch": 0.13985522524421956, "grad_norm": 0.4781823456287384, "learning_rate": 0.00019561989172503497, "loss": 0.226, "step": 9950 }, { "epoch": 0.13999578325954037, "grad_norm": 0.5050092339515686, "learning_rate": 0.0001956062752338673, "loss": 0.2352, "step": 9960 }, { "epoch": 0.1401363412748612, "grad_norm": 0.4395652413368225, "learning_rate": 0.0001955926380860541, "loss": 0.2332, "step": 9970 }, { "epoch": 0.14027689929018203, "grad_norm": 0.5985183119773865, "learning_rate": 0.00019557898028454182, "loss": 0.239, "step": 9980 }, { "epoch": 0.14041745730550284, "grad_norm": 0.4194292724132538, "learning_rate": 0.00019556530183228137, "loss": 0.2056, "step": 9990 }, { "epoch": 0.14055801532082368, "grad_norm": 0.47930511832237244, "learning_rate": 0.00019555160273222809, "loss": 0.2028, "step": 10000 }, { "epoch": 0.14055801532082368, "eval_chrf": 83.31965993456161, "eval_loss": 0.46625107526779175, "eval_runtime": 204.6953, "eval_samples_per_second": 0.489, "eval_steps_per_second": 0.02, "step": 10000 }, { "epoch": 0.1406985733361445, "grad_norm": 0.7410067915916443, "learning_rate": 0.00019553788298734182, "loss": 0.2348, "step": 10010 }, { "epoch": 0.1408391313514653, "grad_norm": 0.5216307640075684, "learning_rate": 0.0001955241426005868, "loss": 0.2371, "step": 10020 }, { "epoch": 0.14097968936678615, "grad_norm": 0.5811477899551392, "learning_rate": 0.0001955103815749318, "loss": 0.2139, "step": 10030 }, { "epoch": 0.14112024738210696, "grad_norm": 0.49069467186927795, "learning_rate": 0.00019549659991335006, "loss": 0.2291, "step": 10040 }, { "epoch": 0.1412608053974278, "grad_norm": 0.46020469069480896, "learning_rate": 0.0001954827976188192, "loss": 0.1943, "step": 10050 }, { "epoch": 0.14140136341274862, "grad_norm": 0.48496192693710327, "learning_rate": 0.00019546897469432129, "loss": 0.2219, "step": 10060 }, { "epoch": 0.14154192142806943, "grad_norm": 0.5898727774620056, "learning_rate": 0.00019545513114284298, "loss": 0.2242, "step": 10070 }, { "epoch": 0.14168247944339027, "grad_norm": 0.4671427607536316, "learning_rate": 0.0001954412669673753, "loss": 0.2234, "step": 10080 }, { "epoch": 0.14182303745871108, "grad_norm": 0.6892075538635254, "learning_rate": 0.00019542738217091365, "loss": 0.2087, "step": 10090 }, { "epoch": 0.1419635954740319, "grad_norm": 0.5817259550094604, "learning_rate": 0.00019541347675645806, "loss": 0.2352, "step": 10100 }, { "epoch": 0.14210415348935274, "grad_norm": 0.4034591019153595, "learning_rate": 0.00019539955072701295, "loss": 0.2207, "step": 10110 }, { "epoch": 0.14224471150467355, "grad_norm": 0.47377240657806396, "learning_rate": 0.00019538560408558707, "loss": 0.2231, "step": 10120 }, { "epoch": 0.1423852695199944, "grad_norm": 0.4781285524368286, "learning_rate": 0.00019537163683519377, "loss": 0.22, "step": 10130 }, { "epoch": 0.1425258275353152, "grad_norm": 0.6363995671272278, "learning_rate": 0.00019535764897885084, "loss": 0.2312, "step": 10140 }, { "epoch": 0.14266638555063602, "grad_norm": 0.46255913376808167, "learning_rate": 0.00019534364051958044, "loss": 0.2199, "step": 10150 }, { "epoch": 0.14280694356595686, "grad_norm": 0.5095379948616028, "learning_rate": 0.00019532961146040927, "loss": 0.2301, "step": 10160 }, { "epoch": 0.14294750158127767, "grad_norm": 0.5038661360740662, "learning_rate": 0.0001953155618043684, "loss": 0.2323, "step": 10170 }, { "epoch": 0.14308805959659848, "grad_norm": 0.42738714814186096, "learning_rate": 0.00019530149155449344, "loss": 0.2105, "step": 10180 }, { "epoch": 0.14322861761191932, "grad_norm": 0.42202505469322205, "learning_rate": 0.0001952874007138244, "loss": 0.222, "step": 10190 }, { "epoch": 0.14336917562724014, "grad_norm": 0.562140941619873, "learning_rate": 0.00019527328928540567, "loss": 0.2117, "step": 10200 }, { "epoch": 0.14350973364256098, "grad_norm": 0.5138777494430542, "learning_rate": 0.00019525915727228624, "loss": 0.2098, "step": 10210 }, { "epoch": 0.1436502916578818, "grad_norm": 0.5792656540870667, "learning_rate": 0.00019524500467751942, "loss": 0.2492, "step": 10220 }, { "epoch": 0.1437908496732026, "grad_norm": 0.3801478147506714, "learning_rate": 0.000195230831504163, "loss": 0.2312, "step": 10230 }, { "epoch": 0.14393140768852344, "grad_norm": 0.4319283962249756, "learning_rate": 0.00019521663775527931, "loss": 0.2111, "step": 10240 }, { "epoch": 0.14407196570384426, "grad_norm": 0.594716489315033, "learning_rate": 0.00019520242343393495, "loss": 0.2148, "step": 10250 }, { "epoch": 0.1442125237191651, "grad_norm": 0.516743004322052, "learning_rate": 0.0001951881885432011, "loss": 0.2086, "step": 10260 }, { "epoch": 0.1443530817344859, "grad_norm": 0.5203841924667358, "learning_rate": 0.00019517393308615338, "loss": 0.2184, "step": 10270 }, { "epoch": 0.14449363974980672, "grad_norm": 0.5061612129211426, "learning_rate": 0.00019515965706587174, "loss": 0.2359, "step": 10280 }, { "epoch": 0.14463419776512756, "grad_norm": 0.49285292625427246, "learning_rate": 0.0001951453604854407, "loss": 0.2366, "step": 10290 }, { "epoch": 0.14477475578044838, "grad_norm": 0.4080356955528259, "learning_rate": 0.00019513104334794917, "loss": 0.2116, "step": 10300 }, { "epoch": 0.1449153137957692, "grad_norm": 0.460966020822525, "learning_rate": 0.00019511670565649047, "loss": 0.2254, "step": 10310 }, { "epoch": 0.14505587181109003, "grad_norm": 0.5037501454353333, "learning_rate": 0.00019510234741416244, "loss": 0.2269, "step": 10320 }, { "epoch": 0.14519642982641084, "grad_norm": 0.7021297812461853, "learning_rate": 0.00019508796862406728, "loss": 0.2282, "step": 10330 }, { "epoch": 0.14533698784173169, "grad_norm": 0.5365563035011292, "learning_rate": 0.00019507356928931172, "loss": 0.2094, "step": 10340 }, { "epoch": 0.1454775458570525, "grad_norm": 0.45180317759513855, "learning_rate": 0.0001950591494130068, "loss": 0.2235, "step": 10350 }, { "epoch": 0.1456181038723733, "grad_norm": 0.46922942996025085, "learning_rate": 0.0001950447089982681, "loss": 0.2483, "step": 10360 }, { "epoch": 0.14575866188769415, "grad_norm": 0.5505566000938416, "learning_rate": 0.0001950302480482156, "loss": 0.2225, "step": 10370 }, { "epoch": 0.14589921990301497, "grad_norm": 0.44335654377937317, "learning_rate": 0.00019501576656597377, "loss": 0.2624, "step": 10380 }, { "epoch": 0.1460397779183358, "grad_norm": 0.4967087507247925, "learning_rate": 0.00019500126455467145, "loss": 0.2072, "step": 10390 }, { "epoch": 0.14618033593365662, "grad_norm": 0.43531334400177, "learning_rate": 0.0001949867420174419, "loss": 0.215, "step": 10400 }, { "epoch": 0.14632089394897743, "grad_norm": 0.7007616758346558, "learning_rate": 0.00019497219895742287, "loss": 0.2255, "step": 10410 }, { "epoch": 0.14646145196429827, "grad_norm": 0.5119047164916992, "learning_rate": 0.00019495763537775656, "loss": 0.2443, "step": 10420 }, { "epoch": 0.14660200997961909, "grad_norm": 0.8673428297042847, "learning_rate": 0.00019494305128158956, "loss": 0.2403, "step": 10430 }, { "epoch": 0.1467425679949399, "grad_norm": 0.5624560117721558, "learning_rate": 0.0001949284466720729, "loss": 0.2311, "step": 10440 }, { "epoch": 0.14688312601026074, "grad_norm": 0.5158213973045349, "learning_rate": 0.00019491382155236198, "loss": 0.2141, "step": 10450 }, { "epoch": 0.14702368402558155, "grad_norm": 0.4443780183792114, "learning_rate": 0.00019489917592561677, "loss": 0.2156, "step": 10460 }, { "epoch": 0.1471642420409024, "grad_norm": 0.5963322520256042, "learning_rate": 0.0001948845097950016, "loss": 0.2358, "step": 10470 }, { "epoch": 0.1473048000562232, "grad_norm": 0.4588531255722046, "learning_rate": 0.00019486982316368519, "loss": 0.2029, "step": 10480 }, { "epoch": 0.14744535807154402, "grad_norm": 0.43157896399497986, "learning_rate": 0.00019485511603484074, "loss": 0.2093, "step": 10490 }, { "epoch": 0.14758591608686486, "grad_norm": 0.4570079445838928, "learning_rate": 0.00019484038841164586, "loss": 0.2317, "step": 10500 }, { "epoch": 0.14772647410218567, "grad_norm": 0.44846659898757935, "learning_rate": 0.00019482564029728262, "loss": 0.221, "step": 10510 }, { "epoch": 0.1478670321175065, "grad_norm": 0.48597201704978943, "learning_rate": 0.0001948108716949375, "loss": 0.2547, "step": 10520 }, { "epoch": 0.14800759013282733, "grad_norm": 0.5305898785591125, "learning_rate": 0.0001947960826078013, "loss": 0.2329, "step": 10530 }, { "epoch": 0.14814814814814814, "grad_norm": 0.4303765296936035, "learning_rate": 0.00019478127303906945, "loss": 0.2363, "step": 10540 }, { "epoch": 0.14828870616346898, "grad_norm": 0.6144363284111023, "learning_rate": 0.00019476644299194166, "loss": 0.2386, "step": 10550 }, { "epoch": 0.1484292641787898, "grad_norm": 0.5180664658546448, "learning_rate": 0.00019475159246962206, "loss": 0.2009, "step": 10560 }, { "epoch": 0.1485698221941106, "grad_norm": 0.8704946637153625, "learning_rate": 0.00019473672147531933, "loss": 0.2402, "step": 10570 }, { "epoch": 0.14871038020943145, "grad_norm": 0.5063996315002441, "learning_rate": 0.00019472183001224645, "loss": 0.2416, "step": 10580 }, { "epoch": 0.14885093822475226, "grad_norm": 0.4910792410373688, "learning_rate": 0.00019470691808362085, "loss": 0.2412, "step": 10590 }, { "epoch": 0.1489914962400731, "grad_norm": 0.43074488639831543, "learning_rate": 0.0001946919856926644, "loss": 0.2145, "step": 10600 }, { "epoch": 0.14913205425539391, "grad_norm": 0.5350972414016724, "learning_rate": 0.0001946770328426034, "loss": 0.2121, "step": 10610 }, { "epoch": 0.14927261227071473, "grad_norm": 0.5227901935577393, "learning_rate": 0.00019466205953666852, "loss": 0.2107, "step": 10620 }, { "epoch": 0.14941317028603557, "grad_norm": 0.4980103075504303, "learning_rate": 0.00019464706577809496, "loss": 0.2462, "step": 10630 }, { "epoch": 0.14955372830135638, "grad_norm": 0.5568163394927979, "learning_rate": 0.00019463205157012217, "loss": 0.2313, "step": 10640 }, { "epoch": 0.14969428631667722, "grad_norm": 0.4182775318622589, "learning_rate": 0.00019461701691599417, "loss": 0.2151, "step": 10650 }, { "epoch": 0.14983484433199803, "grad_norm": 0.5256010293960571, "learning_rate": 0.0001946019618189593, "loss": 0.2374, "step": 10660 }, { "epoch": 0.14997540234731885, "grad_norm": 0.4888829290866852, "learning_rate": 0.00019458688628227043, "loss": 0.2414, "step": 10670 }, { "epoch": 0.1501159603626397, "grad_norm": 0.35894840955734253, "learning_rate": 0.0001945717903091847, "loss": 0.2301, "step": 10680 }, { "epoch": 0.1502565183779605, "grad_norm": 0.6234340667724609, "learning_rate": 0.00019455667390296379, "loss": 0.2293, "step": 10690 }, { "epoch": 0.15039707639328131, "grad_norm": 0.5959022045135498, "learning_rate": 0.0001945415370668737, "loss": 0.2221, "step": 10700 }, { "epoch": 0.15053763440860216, "grad_norm": 0.5411731004714966, "learning_rate": 0.00019452637980418492, "loss": 0.2451, "step": 10710 }, { "epoch": 0.15067819242392297, "grad_norm": 0.41397252678871155, "learning_rate": 0.0001945112021181723, "loss": 0.226, "step": 10720 }, { "epoch": 0.1508187504392438, "grad_norm": 0.4831259846687317, "learning_rate": 0.0001944960040121151, "loss": 0.1949, "step": 10730 }, { "epoch": 0.15095930845456462, "grad_norm": 0.4825355112552643, "learning_rate": 0.00019448078548929708, "loss": 0.2511, "step": 10740 }, { "epoch": 0.15109986646988544, "grad_norm": 0.42742496728897095, "learning_rate": 0.0001944655465530063, "loss": 0.1975, "step": 10750 }, { "epoch": 0.15124042448520628, "grad_norm": 0.5690857768058777, "learning_rate": 0.00019445028720653534, "loss": 0.2182, "step": 10760 }, { "epoch": 0.1513809825005271, "grad_norm": 0.4305635392665863, "learning_rate": 0.00019443500745318102, "loss": 0.2287, "step": 10770 }, { "epoch": 0.15152154051584793, "grad_norm": 0.3888097107410431, "learning_rate": 0.00019441970729624475, "loss": 0.2393, "step": 10780 }, { "epoch": 0.15166209853116874, "grad_norm": 0.42094510793685913, "learning_rate": 0.00019440438673903226, "loss": 0.2327, "step": 10790 }, { "epoch": 0.15180265654648956, "grad_norm": 0.438627690076828, "learning_rate": 0.00019438904578485372, "loss": 0.2304, "step": 10800 }, { "epoch": 0.1519432145618104, "grad_norm": 0.507936954498291, "learning_rate": 0.00019437368443702362, "loss": 0.2064, "step": 10810 }, { "epoch": 0.1520837725771312, "grad_norm": 0.4277139902114868, "learning_rate": 0.00019435830269886098, "loss": 0.2124, "step": 10820 }, { "epoch": 0.15222433059245202, "grad_norm": 0.43021994829177856, "learning_rate": 0.0001943429005736892, "loss": 0.2263, "step": 10830 }, { "epoch": 0.15236488860777286, "grad_norm": 0.43604740500450134, "learning_rate": 0.00019432747806483603, "loss": 0.2275, "step": 10840 }, { "epoch": 0.15250544662309368, "grad_norm": 0.4549795985221863, "learning_rate": 0.0001943120351756336, "loss": 0.2049, "step": 10850 }, { "epoch": 0.15264600463841452, "grad_norm": 0.45931580662727356, "learning_rate": 0.00019429657190941856, "loss": 0.2235, "step": 10860 }, { "epoch": 0.15278656265373533, "grad_norm": 0.5587042570114136, "learning_rate": 0.00019428108826953188, "loss": 0.2682, "step": 10870 }, { "epoch": 0.15292712066905614, "grad_norm": 0.4498588442802429, "learning_rate": 0.00019426558425931893, "loss": 0.2162, "step": 10880 }, { "epoch": 0.15306767868437698, "grad_norm": 0.36715206503868103, "learning_rate": 0.00019425005988212948, "loss": 0.2055, "step": 10890 }, { "epoch": 0.1532082366996978, "grad_norm": 0.40799346566200256, "learning_rate": 0.00019423451514131776, "loss": 0.1994, "step": 10900 }, { "epoch": 0.1533487947150186, "grad_norm": 0.6820157170295715, "learning_rate": 0.00019421895004024235, "loss": 0.2721, "step": 10910 }, { "epoch": 0.15348935273033945, "grad_norm": 0.5497896075248718, "learning_rate": 0.00019420336458226626, "loss": 0.2473, "step": 10920 }, { "epoch": 0.15362991074566026, "grad_norm": 0.4838441014289856, "learning_rate": 0.0001941877587707568, "loss": 0.2316, "step": 10930 }, { "epoch": 0.1537704687609811, "grad_norm": 0.5930109024047852, "learning_rate": 0.00019417213260908585, "loss": 0.2177, "step": 10940 }, { "epoch": 0.15391102677630192, "grad_norm": 0.5457140207290649, "learning_rate": 0.00019415648610062952, "loss": 0.214, "step": 10950 }, { "epoch": 0.15405158479162273, "grad_norm": 0.49875709414482117, "learning_rate": 0.00019414081924876841, "loss": 0.2238, "step": 10960 }, { "epoch": 0.15419214280694357, "grad_norm": 0.5106085538864136, "learning_rate": 0.0001941251320568875, "loss": 0.2032, "step": 10970 }, { "epoch": 0.15433270082226438, "grad_norm": 0.5855301022529602, "learning_rate": 0.00019410942452837614, "loss": 0.2275, "step": 10980 }, { "epoch": 0.15447325883758523, "grad_norm": 0.45460623502731323, "learning_rate": 0.00019409369666662814, "loss": 0.1917, "step": 10990 }, { "epoch": 0.15461381685290604, "grad_norm": 0.46648094058036804, "learning_rate": 0.00019407794847504158, "loss": 0.2199, "step": 11000 }, { "epoch": 0.15461381685290604, "eval_chrf": 83.42666111421941, "eval_loss": 0.44805753231048584, "eval_runtime": 141.7912, "eval_samples_per_second": 0.705, "eval_steps_per_second": 0.028, "step": 11000 }, { "epoch": 0.15475437486822685, "grad_norm": 0.5129318237304688, "learning_rate": 0.00019406217995701904, "loss": 0.2431, "step": 11010 }, { "epoch": 0.1548949328835477, "grad_norm": 0.5577740669250488, "learning_rate": 0.00019404639111596749, "loss": 0.2244, "step": 11020 }, { "epoch": 0.1550354908988685, "grad_norm": 0.5699359774589539, "learning_rate": 0.00019403058195529823, "loss": 0.232, "step": 11030 }, { "epoch": 0.15517604891418932, "grad_norm": 0.37373116612434387, "learning_rate": 0.00019401475247842696, "loss": 0.229, "step": 11040 }, { "epoch": 0.15531660692951016, "grad_norm": 0.5445089340209961, "learning_rate": 0.00019399890268877382, "loss": 0.2208, "step": 11050 }, { "epoch": 0.15545716494483097, "grad_norm": 0.452178955078125, "learning_rate": 0.0001939830325897633, "loss": 0.2649, "step": 11060 }, { "epoch": 0.1555977229601518, "grad_norm": 0.5334861278533936, "learning_rate": 0.0001939671421848243, "loss": 0.2138, "step": 11070 }, { "epoch": 0.15573828097547263, "grad_norm": 0.45646482706069946, "learning_rate": 0.00019395123147739003, "loss": 0.2432, "step": 11080 }, { "epoch": 0.15587883899079344, "grad_norm": 0.541918158531189, "learning_rate": 0.00019393530047089822, "loss": 0.2224, "step": 11090 }, { "epoch": 0.15601939700611428, "grad_norm": 0.5018472671508789, "learning_rate": 0.0001939193491687909, "loss": 0.2117, "step": 11100 }, { "epoch": 0.1561599550214351, "grad_norm": 0.4296761453151703, "learning_rate": 0.00019390337757451448, "loss": 0.2125, "step": 11110 }, { "epoch": 0.15630051303675593, "grad_norm": 0.4516717791557312, "learning_rate": 0.0001938873856915198, "loss": 0.2294, "step": 11120 }, { "epoch": 0.15644107105207675, "grad_norm": 0.532499372959137, "learning_rate": 0.00019387137352326204, "loss": 0.2269, "step": 11130 }, { "epoch": 0.15658162906739756, "grad_norm": 0.5420687794685364, "learning_rate": 0.0001938553410732008, "loss": 0.2162, "step": 11140 }, { "epoch": 0.1567221870827184, "grad_norm": 0.5829330682754517, "learning_rate": 0.0001938392883448, "loss": 0.2294, "step": 11150 }, { "epoch": 0.1568627450980392, "grad_norm": 0.47940340638160706, "learning_rate": 0.00019382321534152803, "loss": 0.238, "step": 11160 }, { "epoch": 0.15700330311336003, "grad_norm": 0.4618227183818817, "learning_rate": 0.00019380712206685762, "loss": 0.2472, "step": 11170 }, { "epoch": 0.15714386112868087, "grad_norm": 0.451911985874176, "learning_rate": 0.00019379100852426584, "loss": 0.224, "step": 11180 }, { "epoch": 0.15728441914400168, "grad_norm": 0.5306965708732605, "learning_rate": 0.00019377487471723418, "loss": 0.2518, "step": 11190 }, { "epoch": 0.15742497715932252, "grad_norm": 0.6336318850517273, "learning_rate": 0.00019375872064924854, "loss": 0.2168, "step": 11200 }, { "epoch": 0.15756553517464333, "grad_norm": 0.6012146472930908, "learning_rate": 0.0001937425463237991, "loss": 0.239, "step": 11210 }, { "epoch": 0.15770609318996415, "grad_norm": 0.72569340467453, "learning_rate": 0.00019372635174438055, "loss": 0.219, "step": 11220 }, { "epoch": 0.157846651205285, "grad_norm": 0.41902977228164673, "learning_rate": 0.0001937101369144918, "loss": 0.2157, "step": 11230 }, { "epoch": 0.1579872092206058, "grad_norm": 0.6844558119773865, "learning_rate": 0.0001936939018376363, "loss": 0.2345, "step": 11240 }, { "epoch": 0.15812776723592664, "grad_norm": 0.5705519318580627, "learning_rate": 0.00019367764651732173, "loss": 0.2267, "step": 11250 }, { "epoch": 0.15826832525124745, "grad_norm": 0.4265342950820923, "learning_rate": 0.00019366137095706024, "loss": 0.2178, "step": 11260 }, { "epoch": 0.15840888326656827, "grad_norm": 0.49786338210105896, "learning_rate": 0.0001936450751603683, "loss": 0.2194, "step": 11270 }, { "epoch": 0.1585494412818891, "grad_norm": 0.5172784328460693, "learning_rate": 0.0001936287591307668, "loss": 0.2397, "step": 11280 }, { "epoch": 0.15868999929720992, "grad_norm": 0.5547787547111511, "learning_rate": 0.00019361242287178093, "loss": 0.2122, "step": 11290 }, { "epoch": 0.15883055731253073, "grad_norm": 0.46685513854026794, "learning_rate": 0.00019359606638694033, "loss": 0.235, "step": 11300 }, { "epoch": 0.15897111532785158, "grad_norm": 0.5981021523475647, "learning_rate": 0.00019357968967977898, "loss": 0.243, "step": 11310 }, { "epoch": 0.1591116733431724, "grad_norm": 0.4840559661388397, "learning_rate": 0.0001935632927538352, "loss": 0.2093, "step": 11320 }, { "epoch": 0.15925223135849323, "grad_norm": 0.4729189872741699, "learning_rate": 0.00019354687561265167, "loss": 0.2144, "step": 11330 }, { "epoch": 0.15939278937381404, "grad_norm": 0.5277968645095825, "learning_rate": 0.00019353043825977558, "loss": 0.2071, "step": 11340 }, { "epoch": 0.15953334738913486, "grad_norm": 0.48004767298698425, "learning_rate": 0.00019351398069875826, "loss": 0.2289, "step": 11350 }, { "epoch": 0.1596739054044557, "grad_norm": 0.5471176505088806, "learning_rate": 0.00019349750293315557, "loss": 0.24, "step": 11360 }, { "epoch": 0.1598144634197765, "grad_norm": 0.49257561564445496, "learning_rate": 0.0001934810049665277, "loss": 0.2292, "step": 11370 }, { "epoch": 0.15995502143509735, "grad_norm": 0.394435316324234, "learning_rate": 0.00019346448680243917, "loss": 0.2066, "step": 11380 }, { "epoch": 0.16009557945041816, "grad_norm": 0.4983547031879425, "learning_rate": 0.0001934479484444589, "loss": 0.2114, "step": 11390 }, { "epoch": 0.16023613746573898, "grad_norm": 0.5153059363365173, "learning_rate": 0.00019343138989616013, "loss": 0.2369, "step": 11400 }, { "epoch": 0.16037669548105982, "grad_norm": 0.6187337636947632, "learning_rate": 0.00019341481116112052, "loss": 0.2314, "step": 11410 }, { "epoch": 0.16051725349638063, "grad_norm": 0.3402414619922638, "learning_rate": 0.00019339821224292208, "loss": 0.2134, "step": 11420 }, { "epoch": 0.16065781151170144, "grad_norm": 0.4472907483577728, "learning_rate": 0.00019338159314515117, "loss": 0.1956, "step": 11430 }, { "epoch": 0.16079836952702228, "grad_norm": 0.5496037006378174, "learning_rate": 0.00019336495387139844, "loss": 0.2259, "step": 11440 }, { "epoch": 0.1609389275423431, "grad_norm": 0.531448245048523, "learning_rate": 0.000193348294425259, "loss": 0.2356, "step": 11450 }, { "epoch": 0.16107948555766394, "grad_norm": 0.4533897936344147, "learning_rate": 0.00019333161481033232, "loss": 0.2438, "step": 11460 }, { "epoch": 0.16122004357298475, "grad_norm": 0.4858724772930145, "learning_rate": 0.00019331491503022212, "loss": 0.2291, "step": 11470 }, { "epoch": 0.16136060158830556, "grad_norm": 0.48073771595954895, "learning_rate": 0.00019329819508853658, "loss": 0.2346, "step": 11480 }, { "epoch": 0.1615011596036264, "grad_norm": 0.4850899875164032, "learning_rate": 0.0001932814549888882, "loss": 0.2124, "step": 11490 }, { "epoch": 0.16164171761894722, "grad_norm": 0.49765729904174805, "learning_rate": 0.00019326469473489387, "loss": 0.2031, "step": 11500 }, { "epoch": 0.16178227563426806, "grad_norm": 0.434810996055603, "learning_rate": 0.00019324791433017477, "loss": 0.2285, "step": 11510 }, { "epoch": 0.16192283364958887, "grad_norm": 0.4233197867870331, "learning_rate": 0.00019323111377835645, "loss": 0.1947, "step": 11520 }, { "epoch": 0.16206339166490968, "grad_norm": 0.4610973000526428, "learning_rate": 0.00019321429308306884, "loss": 0.2174, "step": 11530 }, { "epoch": 0.16220394968023052, "grad_norm": 0.4681873619556427, "learning_rate": 0.00019319745224794622, "loss": 0.2333, "step": 11540 }, { "epoch": 0.16234450769555134, "grad_norm": 0.3164941668510437, "learning_rate": 0.00019318059127662723, "loss": 0.2105, "step": 11550 }, { "epoch": 0.16248506571087215, "grad_norm": 0.4490436315536499, "learning_rate": 0.00019316371017275483, "loss": 0.2346, "step": 11560 }, { "epoch": 0.162625623726193, "grad_norm": 0.39336520433425903, "learning_rate": 0.0001931468089399763, "loss": 0.225, "step": 11570 }, { "epoch": 0.1627661817415138, "grad_norm": 0.48553505539894104, "learning_rate": 0.00019312988758194338, "loss": 0.2094, "step": 11580 }, { "epoch": 0.16290673975683465, "grad_norm": 0.5376838445663452, "learning_rate": 0.00019311294610231208, "loss": 0.2215, "step": 11590 }, { "epoch": 0.16304729777215546, "grad_norm": 0.3806610703468323, "learning_rate": 0.00019309598450474274, "loss": 0.24, "step": 11600 }, { "epoch": 0.16318785578747627, "grad_norm": 0.32200586795806885, "learning_rate": 0.00019307900279290005, "loss": 0.2218, "step": 11610 }, { "epoch": 0.1633284138027971, "grad_norm": 0.34597766399383545, "learning_rate": 0.00019306200097045313, "loss": 0.2129, "step": 11620 }, { "epoch": 0.16346897181811793, "grad_norm": 0.41980305314064026, "learning_rate": 0.00019304497904107533, "loss": 0.2381, "step": 11630 }, { "epoch": 0.16360952983343874, "grad_norm": 0.5225925445556641, "learning_rate": 0.00019302793700844446, "loss": 0.2235, "step": 11640 }, { "epoch": 0.16375008784875958, "grad_norm": 0.6067329049110413, "learning_rate": 0.00019301087487624257, "loss": 0.2363, "step": 11650 }, { "epoch": 0.1638906458640804, "grad_norm": 0.5046607255935669, "learning_rate": 0.00019299379264815612, "loss": 0.2086, "step": 11660 }, { "epoch": 0.16403120387940123, "grad_norm": 0.3685866892337799, "learning_rate": 0.00019297669032787587, "loss": 0.2232, "step": 11670 }, { "epoch": 0.16417176189472205, "grad_norm": 0.46026644110679626, "learning_rate": 0.00019295956791909693, "loss": 0.2348, "step": 11680 }, { "epoch": 0.16431231991004286, "grad_norm": 0.40158572793006897, "learning_rate": 0.0001929424254255188, "loss": 0.2264, "step": 11690 }, { "epoch": 0.1644528779253637, "grad_norm": 0.5387255549430847, "learning_rate": 0.00019292526285084524, "loss": 0.2055, "step": 11700 }, { "epoch": 0.1645934359406845, "grad_norm": 0.5109559893608093, "learning_rate": 0.0001929080801987844, "loss": 0.2088, "step": 11710 }, { "epoch": 0.16473399395600535, "grad_norm": 0.4983414113521576, "learning_rate": 0.00019289087747304873, "loss": 0.2203, "step": 11720 }, { "epoch": 0.16487455197132617, "grad_norm": 0.5244500041007996, "learning_rate": 0.00019287365467735511, "loss": 0.2393, "step": 11730 }, { "epoch": 0.16501510998664698, "grad_norm": 0.7381274104118347, "learning_rate": 0.00019285641181542465, "loss": 0.2293, "step": 11740 }, { "epoch": 0.16515566800196782, "grad_norm": 0.401611328125, "learning_rate": 0.0001928391488909828, "loss": 0.2418, "step": 11750 }, { "epoch": 0.16529622601728863, "grad_norm": 0.35754626989364624, "learning_rate": 0.00019282186590775942, "loss": 0.2519, "step": 11760 }, { "epoch": 0.16543678403260945, "grad_norm": 0.48142239451408386, "learning_rate": 0.00019280456286948868, "loss": 0.2001, "step": 11770 }, { "epoch": 0.1655773420479303, "grad_norm": 0.6075026392936707, "learning_rate": 0.00019278723977990902, "loss": 0.1858, "step": 11780 }, { "epoch": 0.1657179000632511, "grad_norm": 0.4034958779811859, "learning_rate": 0.0001927698966427633, "loss": 0.2119, "step": 11790 }, { "epoch": 0.16585845807857194, "grad_norm": 0.42134714126586914, "learning_rate": 0.0001927525334617986, "loss": 0.2428, "step": 11800 }, { "epoch": 0.16599901609389275, "grad_norm": 0.5965345501899719, "learning_rate": 0.0001927351502407665, "loss": 0.2184, "step": 11810 }, { "epoch": 0.16613957410921357, "grad_norm": 0.5037842392921448, "learning_rate": 0.00019271774698342273, "loss": 0.2136, "step": 11820 }, { "epoch": 0.1662801321245344, "grad_norm": 0.684178352355957, "learning_rate": 0.00019270032369352746, "loss": 0.2337, "step": 11830 }, { "epoch": 0.16642069013985522, "grad_norm": 0.45954614877700806, "learning_rate": 0.0001926828803748452, "loss": 0.2235, "step": 11840 }, { "epoch": 0.16656124815517606, "grad_norm": 0.4757162928581238, "learning_rate": 0.00019266541703114466, "loss": 0.214, "step": 11850 }, { "epoch": 0.16670180617049687, "grad_norm": 0.4088406264781952, "learning_rate": 0.000192647933666199, "loss": 0.2377, "step": 11860 }, { "epoch": 0.1668423641858177, "grad_norm": 0.5777791738510132, "learning_rate": 0.00019263043028378566, "loss": 0.2662, "step": 11870 }, { "epoch": 0.16698292220113853, "grad_norm": 0.4320667088031769, "learning_rate": 0.00019261290688768644, "loss": 0.2339, "step": 11880 }, { "epoch": 0.16712348021645934, "grad_norm": 0.4025791585445404, "learning_rate": 0.00019259536348168742, "loss": 0.2062, "step": 11890 }, { "epoch": 0.16726403823178015, "grad_norm": 0.5231763124465942, "learning_rate": 0.00019257780006957904, "loss": 0.2287, "step": 11900 }, { "epoch": 0.167404596247101, "grad_norm": 0.4425447881221771, "learning_rate": 0.00019256021665515601, "loss": 0.2174, "step": 11910 }, { "epoch": 0.1675451542624218, "grad_norm": 0.5530385971069336, "learning_rate": 0.0001925426132422174, "loss": 0.2384, "step": 11920 }, { "epoch": 0.16768571227774265, "grad_norm": 0.4237968921661377, "learning_rate": 0.0001925249898345666, "loss": 0.2378, "step": 11930 }, { "epoch": 0.16782627029306346, "grad_norm": 0.4966355264186859, "learning_rate": 0.0001925073464360113, "loss": 0.2284, "step": 11940 }, { "epoch": 0.16796682830838428, "grad_norm": 0.47906044125556946, "learning_rate": 0.00019248968305036354, "loss": 0.24, "step": 11950 }, { "epoch": 0.16810738632370512, "grad_norm": 0.3971858322620392, "learning_rate": 0.00019247199968143967, "loss": 0.2029, "step": 11960 }, { "epoch": 0.16824794433902593, "grad_norm": 0.40358278155326843, "learning_rate": 0.00019245429633306036, "loss": 0.2155, "step": 11970 }, { "epoch": 0.16838850235434677, "grad_norm": 0.4425409734249115, "learning_rate": 0.00019243657300905054, "loss": 0.2003, "step": 11980 }, { "epoch": 0.16852906036966758, "grad_norm": 0.5036493539810181, "learning_rate": 0.00019241882971323955, "loss": 0.2192, "step": 11990 }, { "epoch": 0.1686696183849884, "grad_norm": 0.5340859293937683, "learning_rate": 0.00019240106644946095, "loss": 0.2337, "step": 12000 }, { "epoch": 0.1686696183849884, "eval_chrf": 82.09448804661407, "eval_loss": 0.4382096529006958, "eval_runtime": 247.2295, "eval_samples_per_second": 0.404, "eval_steps_per_second": 0.016, "step": 12000 }, { "epoch": 0.16881017640030924, "grad_norm": 0.5135290026664734, "learning_rate": 0.00019238328322155273, "loss": 0.2238, "step": 12010 }, { "epoch": 0.16895073441563005, "grad_norm": 0.5166633725166321, "learning_rate": 0.0001923654800333571, "loss": 0.2511, "step": 12020 }, { "epoch": 0.16909129243095086, "grad_norm": 0.44228142499923706, "learning_rate": 0.00019234765688872055, "loss": 0.2041, "step": 12030 }, { "epoch": 0.1692318504462717, "grad_norm": 0.3702377378940582, "learning_rate": 0.000192329813791494, "loss": 0.2112, "step": 12040 }, { "epoch": 0.16937240846159252, "grad_norm": 0.4565224349498749, "learning_rate": 0.00019231195074553262, "loss": 0.2125, "step": 12050 }, { "epoch": 0.16951296647691336, "grad_norm": 0.4436790645122528, "learning_rate": 0.00019229406775469587, "loss": 0.1962, "step": 12060 }, { "epoch": 0.16965352449223417, "grad_norm": 0.5260986089706421, "learning_rate": 0.00019227616482284755, "loss": 0.2261, "step": 12070 }, { "epoch": 0.16979408250755498, "grad_norm": 0.3428475558757782, "learning_rate": 0.00019225824195385574, "loss": 0.231, "step": 12080 }, { "epoch": 0.16993464052287582, "grad_norm": 0.47546109557151794, "learning_rate": 0.00019224029915159286, "loss": 0.2129, "step": 12090 }, { "epoch": 0.17007519853819664, "grad_norm": 0.47432127594947815, "learning_rate": 0.00019222233641993567, "loss": 0.2029, "step": 12100 }, { "epoch": 0.17021575655351748, "grad_norm": 0.4969255030155182, "learning_rate": 0.00019220435376276509, "loss": 0.2369, "step": 12110 }, { "epoch": 0.1703563145688383, "grad_norm": 0.7607478499412537, "learning_rate": 0.00019218635118396652, "loss": 0.2149, "step": 12120 }, { "epoch": 0.1704968725841591, "grad_norm": 0.3294755220413208, "learning_rate": 0.0001921683286874296, "loss": 0.2078, "step": 12130 }, { "epoch": 0.17063743059947994, "grad_norm": 0.5928043723106384, "learning_rate": 0.0001921502862770482, "loss": 0.2304, "step": 12140 }, { "epoch": 0.17077798861480076, "grad_norm": 0.3646385371685028, "learning_rate": 0.00019213222395672057, "loss": 0.2069, "step": 12150 }, { "epoch": 0.17091854663012157, "grad_norm": 0.4349091053009033, "learning_rate": 0.00019211414173034928, "loss": 0.189, "step": 12160 }, { "epoch": 0.1710591046454424, "grad_norm": 0.41584905982017517, "learning_rate": 0.00019209603960184115, "loss": 0.2247, "step": 12170 }, { "epoch": 0.17119966266076322, "grad_norm": 0.42634835839271545, "learning_rate": 0.00019207791757510728, "loss": 0.2287, "step": 12180 }, { "epoch": 0.17134022067608407, "grad_norm": 0.6456392407417297, "learning_rate": 0.00019205977565406318, "loss": 0.2153, "step": 12190 }, { "epoch": 0.17148077869140488, "grad_norm": 0.44439736008644104, "learning_rate": 0.00019204161384262856, "loss": 0.203, "step": 12200 }, { "epoch": 0.1716213367067257, "grad_norm": 0.6120620965957642, "learning_rate": 0.0001920234321447274, "loss": 0.2369, "step": 12210 }, { "epoch": 0.17176189472204653, "grad_norm": 0.6957530379295349, "learning_rate": 0.0001920052305642881, "loss": 0.2361, "step": 12220 }, { "epoch": 0.17190245273736734, "grad_norm": 0.3580227494239807, "learning_rate": 0.00019198700910524321, "loss": 0.2263, "step": 12230 }, { "epoch": 0.17204301075268819, "grad_norm": 0.4982926547527313, "learning_rate": 0.0001919687677715297, "loss": 0.2188, "step": 12240 }, { "epoch": 0.172183568768009, "grad_norm": 0.4078795611858368, "learning_rate": 0.0001919505065670888, "loss": 0.2392, "step": 12250 }, { "epoch": 0.1723241267833298, "grad_norm": 0.4633350074291229, "learning_rate": 0.00019193222549586598, "loss": 0.221, "step": 12260 }, { "epoch": 0.17246468479865065, "grad_norm": 0.4645605683326721, "learning_rate": 0.00019191392456181105, "loss": 0.2296, "step": 12270 }, { "epoch": 0.17260524281397147, "grad_norm": 0.4471352994441986, "learning_rate": 0.0001918956037688781, "loss": 0.2, "step": 12280 }, { "epoch": 0.17274580082929228, "grad_norm": 0.4225039780139923, "learning_rate": 0.00019187726312102553, "loss": 0.1919, "step": 12290 }, { "epoch": 0.17288635884461312, "grad_norm": 0.44874119758605957, "learning_rate": 0.00019185890262221595, "loss": 0.2224, "step": 12300 }, { "epoch": 0.17302691685993393, "grad_norm": 0.4801863431930542, "learning_rate": 0.0001918405222764164, "loss": 0.2148, "step": 12310 }, { "epoch": 0.17316747487525477, "grad_norm": 0.43257445096969604, "learning_rate": 0.0001918221220875981, "loss": 0.2254, "step": 12320 }, { "epoch": 0.1733080328905756, "grad_norm": 0.4954662322998047, "learning_rate": 0.00019180370205973657, "loss": 0.2532, "step": 12330 }, { "epoch": 0.1734485909058964, "grad_norm": 0.47669610381126404, "learning_rate": 0.00019178526219681163, "loss": 0.1945, "step": 12340 }, { "epoch": 0.17358914892121724, "grad_norm": 0.3972938656806946, "learning_rate": 0.00019176680250280743, "loss": 0.2247, "step": 12350 }, { "epoch": 0.17372970693653805, "grad_norm": 0.5760671496391296, "learning_rate": 0.0001917483229817123, "loss": 0.2242, "step": 12360 }, { "epoch": 0.17387026495185887, "grad_norm": 0.4229896664619446, "learning_rate": 0.00019172982363751894, "loss": 0.2132, "step": 12370 }, { "epoch": 0.1740108229671797, "grad_norm": 0.5061476826667786, "learning_rate": 0.00019171130447422435, "loss": 0.2555, "step": 12380 }, { "epoch": 0.17415138098250052, "grad_norm": 0.5105674862861633, "learning_rate": 0.00019169276549582974, "loss": 0.2052, "step": 12390 }, { "epoch": 0.17429193899782136, "grad_norm": 0.4769287705421448, "learning_rate": 0.00019167420670634062, "loss": 0.1912, "step": 12400 }, { "epoch": 0.17443249701314217, "grad_norm": 0.430573433637619, "learning_rate": 0.00019165562810976683, "loss": 0.2602, "step": 12410 }, { "epoch": 0.174573055028463, "grad_norm": 0.4756665527820587, "learning_rate": 0.0001916370297101224, "loss": 0.2292, "step": 12420 }, { "epoch": 0.17471361304378383, "grad_norm": 0.4962463080883026, "learning_rate": 0.00019161841151142572, "loss": 0.1848, "step": 12430 }, { "epoch": 0.17485417105910464, "grad_norm": 0.4224216043949127, "learning_rate": 0.00019159977351769944, "loss": 0.2334, "step": 12440 }, { "epoch": 0.17499472907442548, "grad_norm": 0.45669710636138916, "learning_rate": 0.00019158111573297046, "loss": 0.2431, "step": 12450 }, { "epoch": 0.1751352870897463, "grad_norm": 0.4416327178478241, "learning_rate": 0.00019156243816126998, "loss": 0.2015, "step": 12460 }, { "epoch": 0.1752758451050671, "grad_norm": 0.558631956577301, "learning_rate": 0.00019154374080663345, "loss": 0.2086, "step": 12470 }, { "epoch": 0.17541640312038795, "grad_norm": 0.5783782601356506, "learning_rate": 0.00019152502367310062, "loss": 0.2295, "step": 12480 }, { "epoch": 0.17555696113570876, "grad_norm": 0.44455182552337646, "learning_rate": 0.00019150628676471555, "loss": 0.2517, "step": 12490 }, { "epoch": 0.17569751915102957, "grad_norm": 0.49356889724731445, "learning_rate": 0.00019148753008552645, "loss": 0.2256, "step": 12500 }, { "epoch": 0.17583807716635041, "grad_norm": 0.4006481468677521, "learning_rate": 0.0001914687536395859, "loss": 0.2079, "step": 12510 }, { "epoch": 0.17597863518167123, "grad_norm": 0.33360642194747925, "learning_rate": 0.00019144995743095077, "loss": 0.2366, "step": 12520 }, { "epoch": 0.17611919319699207, "grad_norm": 0.3714017868041992, "learning_rate": 0.00019143114146368215, "loss": 0.2267, "step": 12530 }, { "epoch": 0.17625975121231288, "grad_norm": 0.38289201259613037, "learning_rate": 0.00019141230574184538, "loss": 0.2202, "step": 12540 }, { "epoch": 0.1764003092276337, "grad_norm": 0.5035020112991333, "learning_rate": 0.00019139345026951012, "loss": 0.199, "step": 12550 }, { "epoch": 0.17654086724295454, "grad_norm": 0.38164594769477844, "learning_rate": 0.00019137457505075025, "loss": 0.2322, "step": 12560 }, { "epoch": 0.17668142525827535, "grad_norm": 0.5696097612380981, "learning_rate": 0.00019135568008964398, "loss": 0.1965, "step": 12570 }, { "epoch": 0.1768219832735962, "grad_norm": 0.5146777033805847, "learning_rate": 0.0001913367653902737, "loss": 0.221, "step": 12580 }, { "epoch": 0.176962541288917, "grad_norm": 0.4113754630088806, "learning_rate": 0.00019131783095672616, "loss": 0.2316, "step": 12590 }, { "epoch": 0.17710309930423782, "grad_norm": 0.38866838812828064, "learning_rate": 0.00019129887679309224, "loss": 0.2412, "step": 12600 }, { "epoch": 0.17724365731955866, "grad_norm": 0.5679992437362671, "learning_rate": 0.00019127990290346728, "loss": 0.2309, "step": 12610 }, { "epoch": 0.17738421533487947, "grad_norm": 0.3673727810382843, "learning_rate": 0.0001912609092919507, "loss": 0.2344, "step": 12620 }, { "epoch": 0.17752477335020028, "grad_norm": 0.48948225378990173, "learning_rate": 0.00019124189596264626, "loss": 0.2096, "step": 12630 }, { "epoch": 0.17766533136552112, "grad_norm": 0.4309072494506836, "learning_rate": 0.00019122286291966195, "loss": 0.2049, "step": 12640 }, { "epoch": 0.17780588938084194, "grad_norm": 0.4598619043827057, "learning_rate": 0.00019120381016711008, "loss": 0.1806, "step": 12650 }, { "epoch": 0.17794644739616278, "grad_norm": 0.405023455619812, "learning_rate": 0.0001911847377091072, "loss": 0.2084, "step": 12660 }, { "epoch": 0.1780870054114836, "grad_norm": 0.4332144260406494, "learning_rate": 0.00019116564554977404, "loss": 0.2318, "step": 12670 }, { "epoch": 0.1782275634268044, "grad_norm": 0.5438380837440491, "learning_rate": 0.00019114653369323566, "loss": 0.2146, "step": 12680 }, { "epoch": 0.17836812144212524, "grad_norm": 0.48414480686187744, "learning_rate": 0.00019112740214362136, "loss": 0.1936, "step": 12690 }, { "epoch": 0.17850867945744606, "grad_norm": 0.5146945118904114, "learning_rate": 0.00019110825090506468, "loss": 0.2198, "step": 12700 }, { "epoch": 0.1786492374727669, "grad_norm": 0.4110274314880371, "learning_rate": 0.00019108907998170344, "loss": 0.2242, "step": 12710 }, { "epoch": 0.1787897954880877, "grad_norm": 0.5148970484733582, "learning_rate": 0.00019106988937767972, "loss": 0.2179, "step": 12720 }, { "epoch": 0.17893035350340852, "grad_norm": 0.48713383078575134, "learning_rate": 0.0001910506790971398, "loss": 0.2264, "step": 12730 }, { "epoch": 0.17907091151872936, "grad_norm": 0.4454123377799988, "learning_rate": 0.0001910314491442343, "loss": 0.2162, "step": 12740 }, { "epoch": 0.17921146953405018, "grad_norm": 0.46519991755485535, "learning_rate": 0.00019101219952311795, "loss": 0.2241, "step": 12750 }, { "epoch": 0.179352027549371, "grad_norm": 0.48231005668640137, "learning_rate": 0.0001909929302379499, "loss": 0.2213, "step": 12760 }, { "epoch": 0.17949258556469183, "grad_norm": 0.49811065196990967, "learning_rate": 0.00019097364129289337, "loss": 0.2279, "step": 12770 }, { "epoch": 0.17963314358001264, "grad_norm": 0.3791274428367615, "learning_rate": 0.00019095433269211598, "loss": 0.2109, "step": 12780 }, { "epoch": 0.17977370159533348, "grad_norm": 0.4766414165496826, "learning_rate": 0.00019093500443978956, "loss": 0.2173, "step": 12790 }, { "epoch": 0.1799142596106543, "grad_norm": 0.49525031447410583, "learning_rate": 0.00019091565654009009, "loss": 0.2219, "step": 12800 }, { "epoch": 0.1800548176259751, "grad_norm": 0.529272735118866, "learning_rate": 0.00019089628899719791, "loss": 0.2211, "step": 12810 }, { "epoch": 0.18019537564129595, "grad_norm": 0.519967257976532, "learning_rate": 0.00019087690181529758, "loss": 0.2226, "step": 12820 }, { "epoch": 0.18033593365661676, "grad_norm": 0.4815217852592468, "learning_rate": 0.00019085749499857786, "loss": 0.2246, "step": 12830 }, { "epoch": 0.1804764916719376, "grad_norm": 0.4821476936340332, "learning_rate": 0.00019083806855123178, "loss": 0.2039, "step": 12840 }, { "epoch": 0.18061704968725842, "grad_norm": 0.6629970073699951, "learning_rate": 0.00019081862247745659, "loss": 0.2119, "step": 12850 }, { "epoch": 0.18075760770257923, "grad_norm": 0.39005452394485474, "learning_rate": 0.00019079915678145385, "loss": 0.214, "step": 12860 }, { "epoch": 0.18089816571790007, "grad_norm": 0.39662966132164, "learning_rate": 0.00019077967146742926, "loss": 0.2117, "step": 12870 }, { "epoch": 0.18103872373322089, "grad_norm": 0.48629820346832275, "learning_rate": 0.00019076016653959282, "loss": 0.1954, "step": 12880 }, { "epoch": 0.1811792817485417, "grad_norm": 0.5073099732398987, "learning_rate": 0.00019074064200215877, "loss": 0.2155, "step": 12890 }, { "epoch": 0.18131983976386254, "grad_norm": 0.46670103073120117, "learning_rate": 0.00019072109785934558, "loss": 0.2193, "step": 12900 }, { "epoch": 0.18146039777918335, "grad_norm": 0.5066492557525635, "learning_rate": 0.0001907015341153759, "loss": 0.2282, "step": 12910 }, { "epoch": 0.1816009557945042, "grad_norm": 0.39050865173339844, "learning_rate": 0.00019068195077447673, "loss": 0.1896, "step": 12920 }, { "epoch": 0.181741513809825, "grad_norm": 0.5603247880935669, "learning_rate": 0.0001906623478408792, "loss": 0.2103, "step": 12930 }, { "epoch": 0.18188207182514582, "grad_norm": 0.6061719655990601, "learning_rate": 0.0001906427253188187, "loss": 0.2125, "step": 12940 }, { "epoch": 0.18202262984046666, "grad_norm": 0.3703446090221405, "learning_rate": 0.00019062308321253485, "loss": 0.2129, "step": 12950 }, { "epoch": 0.18216318785578747, "grad_norm": 0.5037427544593811, "learning_rate": 0.0001906034215262716, "loss": 0.2336, "step": 12960 }, { "epoch": 0.1823037458711083, "grad_norm": 0.6292206048965454, "learning_rate": 0.0001905837402642769, "loss": 0.2211, "step": 12970 }, { "epoch": 0.18244430388642913, "grad_norm": 0.44565314054489136, "learning_rate": 0.00019056403943080321, "loss": 0.2081, "step": 12980 }, { "epoch": 0.18258486190174994, "grad_norm": 0.6111086010932922, "learning_rate": 0.000190544319030107, "loss": 0.2155, "step": 12990 }, { "epoch": 0.18272541991707078, "grad_norm": 0.45473793148994446, "learning_rate": 0.0001905245790664491, "loss": 0.245, "step": 13000 }, { "epoch": 0.18272541991707078, "eval_chrf": 81.71859402430363, "eval_loss": 0.4482495188713074, "eval_runtime": 258.9033, "eval_samples_per_second": 0.386, "eval_steps_per_second": 0.015, "step": 13000 }, { "epoch": 0.1828659779323916, "grad_norm": 0.46908169984817505, "learning_rate": 0.0001905048195440945, "loss": 0.1984, "step": 13010 }, { "epoch": 0.1830065359477124, "grad_norm": 0.4461404085159302, "learning_rate": 0.00019048504046731242, "loss": 0.1747, "step": 13020 }, { "epoch": 0.18314709396303325, "grad_norm": 0.5931270718574524, "learning_rate": 0.00019046524184037632, "loss": 0.1915, "step": 13030 }, { "epoch": 0.18328765197835406, "grad_norm": 0.4598511755466461, "learning_rate": 0.0001904454236675639, "loss": 0.2267, "step": 13040 }, { "epoch": 0.1834282099936749, "grad_norm": 0.451461523771286, "learning_rate": 0.00019042558595315707, "loss": 0.2103, "step": 13050 }, { "epoch": 0.18356876800899571, "grad_norm": 0.3634202778339386, "learning_rate": 0.00019040572870144192, "loss": 0.2564, "step": 13060 }, { "epoch": 0.18370932602431653, "grad_norm": 0.43997520208358765, "learning_rate": 0.0001903858519167088, "loss": 0.2137, "step": 13070 }, { "epoch": 0.18384988403963737, "grad_norm": 0.3918921947479248, "learning_rate": 0.00019036595560325236, "loss": 0.2077, "step": 13080 }, { "epoch": 0.18399044205495818, "grad_norm": 0.43988847732543945, "learning_rate": 0.00019034603976537126, "loss": 0.2317, "step": 13090 }, { "epoch": 0.184131000070279, "grad_norm": 0.39643704891204834, "learning_rate": 0.00019032610440736862, "loss": 0.2075, "step": 13100 }, { "epoch": 0.18427155808559983, "grad_norm": 0.38452738523483276, "learning_rate": 0.0001903061495335516, "loss": 0.2103, "step": 13110 }, { "epoch": 0.18441211610092065, "grad_norm": 0.37879735231399536, "learning_rate": 0.00019028617514823163, "loss": 0.2307, "step": 13120 }, { "epoch": 0.1845526741162415, "grad_norm": 0.469174861907959, "learning_rate": 0.0001902661812557244, "loss": 0.1975, "step": 13130 }, { "epoch": 0.1846932321315623, "grad_norm": 0.4302513301372528, "learning_rate": 0.0001902461678603498, "loss": 0.2394, "step": 13140 }, { "epoch": 0.18483379014688311, "grad_norm": 0.5000349879264832, "learning_rate": 0.00019022613496643187, "loss": 0.1926, "step": 13150 }, { "epoch": 0.18497434816220396, "grad_norm": 0.4314963221549988, "learning_rate": 0.0001902060825782989, "loss": 0.2047, "step": 13160 }, { "epoch": 0.18511490617752477, "grad_norm": 0.33490025997161865, "learning_rate": 0.00019018601070028344, "loss": 0.2165, "step": 13170 }, { "epoch": 0.1852554641928456, "grad_norm": 0.5817905068397522, "learning_rate": 0.00019016591933672223, "loss": 0.2041, "step": 13180 }, { "epoch": 0.18539602220816642, "grad_norm": 0.47698774933815, "learning_rate": 0.0001901458084919561, "loss": 0.203, "step": 13190 }, { "epoch": 0.18553658022348724, "grad_norm": 0.4562699794769287, "learning_rate": 0.0001901256781703303, "loss": 0.2205, "step": 13200 }, { "epoch": 0.18567713823880808, "grad_norm": 0.4335058629512787, "learning_rate": 0.00019010552837619413, "loss": 0.2257, "step": 13210 }, { "epoch": 0.1858176962541289, "grad_norm": 0.6168200373649597, "learning_rate": 0.00019008535911390113, "loss": 0.1967, "step": 13220 }, { "epoch": 0.1859582542694497, "grad_norm": 0.5537468194961548, "learning_rate": 0.0001900651703878091, "loss": 0.2206, "step": 13230 }, { "epoch": 0.18609881228477054, "grad_norm": 0.37052398920059204, "learning_rate": 0.00019004496220227996, "loss": 0.2306, "step": 13240 }, { "epoch": 0.18623937030009136, "grad_norm": 0.4688161313533783, "learning_rate": 0.0001900247345616799, "loss": 0.2218, "step": 13250 }, { "epoch": 0.1863799283154122, "grad_norm": 0.41777825355529785, "learning_rate": 0.0001900044874703793, "loss": 0.2035, "step": 13260 }, { "epoch": 0.186520486330733, "grad_norm": 0.3905136287212372, "learning_rate": 0.00018998422093275275, "loss": 0.2016, "step": 13270 }, { "epoch": 0.18666104434605382, "grad_norm": 0.43584224581718445, "learning_rate": 0.000189963934953179, "loss": 0.21, "step": 13280 }, { "epoch": 0.18680160236137466, "grad_norm": 0.40059593319892883, "learning_rate": 0.00018994362953604106, "loss": 0.2297, "step": 13290 }, { "epoch": 0.18694216037669548, "grad_norm": 0.5512275099754333, "learning_rate": 0.00018992330468572605, "loss": 0.2083, "step": 13300 }, { "epoch": 0.18708271839201632, "grad_norm": 0.49532201886177063, "learning_rate": 0.00018990296040662544, "loss": 0.2327, "step": 13310 }, { "epoch": 0.18722327640733713, "grad_norm": 0.4382390081882477, "learning_rate": 0.0001898825967031347, "loss": 0.2257, "step": 13320 }, { "epoch": 0.18736383442265794, "grad_norm": 0.6387760043144226, "learning_rate": 0.0001898622135796537, "loss": 0.1984, "step": 13330 }, { "epoch": 0.18750439243797878, "grad_norm": 0.35923030972480774, "learning_rate": 0.00018984181104058636, "loss": 0.2124, "step": 13340 }, { "epoch": 0.1876449504532996, "grad_norm": 0.4097476303577423, "learning_rate": 0.00018982138909034082, "loss": 0.2065, "step": 13350 }, { "epoch": 0.1877855084686204, "grad_norm": 0.4327114224433899, "learning_rate": 0.00018980094773332948, "loss": 0.2198, "step": 13360 }, { "epoch": 0.18792606648394125, "grad_norm": 0.41230443120002747, "learning_rate": 0.00018978048697396887, "loss": 0.2367, "step": 13370 }, { "epoch": 0.18806662449926206, "grad_norm": 0.4218708276748657, "learning_rate": 0.00018976000681667973, "loss": 0.1985, "step": 13380 }, { "epoch": 0.1882071825145829, "grad_norm": 0.6109094619750977, "learning_rate": 0.00018973950726588698, "loss": 0.2591, "step": 13390 }, { "epoch": 0.18834774052990372, "grad_norm": 0.43765881657600403, "learning_rate": 0.0001897189883260198, "loss": 0.2243, "step": 13400 }, { "epoch": 0.18848829854522453, "grad_norm": 0.44101229310035706, "learning_rate": 0.0001896984500015114, "loss": 0.2173, "step": 13410 }, { "epoch": 0.18862885656054537, "grad_norm": 0.6721788048744202, "learning_rate": 0.0001896778922967994, "loss": 0.1979, "step": 13420 }, { "epoch": 0.18876941457586618, "grad_norm": 0.5215440392494202, "learning_rate": 0.0001896573152163254, "loss": 0.2283, "step": 13430 }, { "epoch": 0.18890997259118703, "grad_norm": 0.5103262662887573, "learning_rate": 0.00018963671876453533, "loss": 0.2215, "step": 13440 }, { "epoch": 0.18905053060650784, "grad_norm": 0.5467560887336731, "learning_rate": 0.0001896161029458792, "loss": 0.2221, "step": 13450 }, { "epoch": 0.18919108862182865, "grad_norm": 0.4276997745037079, "learning_rate": 0.0001895954677648113, "loss": 0.213, "step": 13460 }, { "epoch": 0.1893316466371495, "grad_norm": 0.41185396909713745, "learning_rate": 0.00018957481322579, "loss": 0.2052, "step": 13470 }, { "epoch": 0.1894722046524703, "grad_norm": 0.4776594936847687, "learning_rate": 0.00018955413933327797, "loss": 0.1955, "step": 13480 }, { "epoch": 0.18961276266779112, "grad_norm": 0.4993971288204193, "learning_rate": 0.00018953344609174197, "loss": 0.2146, "step": 13490 }, { "epoch": 0.18975332068311196, "grad_norm": 0.4730333685874939, "learning_rate": 0.00018951273350565297, "loss": 0.2338, "step": 13500 }, { "epoch": 0.18989387869843277, "grad_norm": 0.44033026695251465, "learning_rate": 0.00018949200157948614, "loss": 0.2186, "step": 13510 }, { "epoch": 0.1900344367137536, "grad_norm": 0.35666805505752563, "learning_rate": 0.00018947125031772076, "loss": 0.1963, "step": 13520 }, { "epoch": 0.19017499472907443, "grad_norm": 0.5002583861351013, "learning_rate": 0.00018945047972484042, "loss": 0.2188, "step": 13530 }, { "epoch": 0.19031555274439524, "grad_norm": 0.5445455312728882, "learning_rate": 0.00018942968980533275, "loss": 0.2194, "step": 13540 }, { "epoch": 0.19045611075971608, "grad_norm": 0.47202199697494507, "learning_rate": 0.00018940888056368963, "loss": 0.232, "step": 13550 }, { "epoch": 0.1905966687750369, "grad_norm": 0.430093914270401, "learning_rate": 0.00018938805200440707, "loss": 0.1965, "step": 13560 }, { "epoch": 0.19073722679035773, "grad_norm": 0.42162808775901794, "learning_rate": 0.0001893672041319853, "loss": 0.2359, "step": 13570 }, { "epoch": 0.19087778480567855, "grad_norm": 0.37718167901039124, "learning_rate": 0.0001893463369509287, "loss": 0.2112, "step": 13580 }, { "epoch": 0.19101834282099936, "grad_norm": 0.5031275153160095, "learning_rate": 0.0001893254504657458, "loss": 0.2085, "step": 13590 }, { "epoch": 0.1911589008363202, "grad_norm": 0.5971906781196594, "learning_rate": 0.00018930454468094933, "loss": 0.2166, "step": 13600 }, { "epoch": 0.191299458851641, "grad_norm": 0.39828595519065857, "learning_rate": 0.00018928361960105622, "loss": 0.2174, "step": 13610 }, { "epoch": 0.19144001686696183, "grad_norm": 0.4507116377353668, "learning_rate": 0.0001892626752305875, "loss": 0.2281, "step": 13620 }, { "epoch": 0.19158057488228267, "grad_norm": 0.5327116847038269, "learning_rate": 0.00018924171157406843, "loss": 0.1867, "step": 13630 }, { "epoch": 0.19172113289760348, "grad_norm": 0.4895416498184204, "learning_rate": 0.00018922072863602837, "loss": 0.2094, "step": 13640 }, { "epoch": 0.19186169091292432, "grad_norm": 0.5132678747177124, "learning_rate": 0.00018919972642100086, "loss": 0.1946, "step": 13650 }, { "epoch": 0.19200224892824513, "grad_norm": 0.4152938723564148, "learning_rate": 0.00018917870493352371, "loss": 0.2102, "step": 13660 }, { "epoch": 0.19214280694356595, "grad_norm": 0.33532798290252686, "learning_rate": 0.00018915766417813875, "loss": 0.2144, "step": 13670 }, { "epoch": 0.1922833649588868, "grad_norm": 0.5051287412643433, "learning_rate": 0.00018913660415939207, "loss": 0.2253, "step": 13680 }, { "epoch": 0.1924239229742076, "grad_norm": 0.481604665517807, "learning_rate": 0.0001891155248818339, "loss": 0.2151, "step": 13690 }, { "epoch": 0.19256448098952844, "grad_norm": 0.528761088848114, "learning_rate": 0.00018909442635001855, "loss": 0.2135, "step": 13700 }, { "epoch": 0.19270503900484925, "grad_norm": 0.5373625159263611, "learning_rate": 0.0001890733085685046, "loss": 0.2001, "step": 13710 }, { "epoch": 0.19284559702017007, "grad_norm": 0.4210106432437897, "learning_rate": 0.00018905217154185474, "loss": 0.2519, "step": 13720 }, { "epoch": 0.1929861550354909, "grad_norm": 0.43053504824638367, "learning_rate": 0.00018903101527463587, "loss": 0.2049, "step": 13730 }, { "epoch": 0.19312671305081172, "grad_norm": 0.6001156568527222, "learning_rate": 0.00018900983977141893, "loss": 0.2079, "step": 13740 }, { "epoch": 0.19326727106613253, "grad_norm": 0.4350476562976837, "learning_rate": 0.00018898864503677913, "loss": 0.211, "step": 13750 }, { "epoch": 0.19340782908145338, "grad_norm": 0.43925341963768005, "learning_rate": 0.00018896743107529583, "loss": 0.2195, "step": 13760 }, { "epoch": 0.1935483870967742, "grad_norm": 0.5062753558158875, "learning_rate": 0.0001889461978915524, "loss": 0.2326, "step": 13770 }, { "epoch": 0.19368894511209503, "grad_norm": 0.47204822301864624, "learning_rate": 0.0001889249454901366, "loss": 0.2176, "step": 13780 }, { "epoch": 0.19382950312741584, "grad_norm": 0.41110143065452576, "learning_rate": 0.00018890367387564013, "loss": 0.1917, "step": 13790 }, { "epoch": 0.19397006114273666, "grad_norm": 0.4885755777359009, "learning_rate": 0.00018888238305265893, "loss": 0.2354, "step": 13800 }, { "epoch": 0.1941106191580575, "grad_norm": 0.392553448677063, "learning_rate": 0.00018886107302579314, "loss": 0.2213, "step": 13810 }, { "epoch": 0.1942511771733783, "grad_norm": 0.40750473737716675, "learning_rate": 0.00018883974379964693, "loss": 0.2022, "step": 13820 }, { "epoch": 0.19439173518869912, "grad_norm": 0.37037041783332825, "learning_rate": 0.0001888183953788287, "loss": 0.2058, "step": 13830 }, { "epoch": 0.19453229320401996, "grad_norm": 0.5609288215637207, "learning_rate": 0.00018879702776795099, "loss": 0.22, "step": 13840 }, { "epoch": 0.19467285121934078, "grad_norm": 0.6449435949325562, "learning_rate": 0.0001887756409716305, "loss": 0.2034, "step": 13850 }, { "epoch": 0.19481340923466162, "grad_norm": 0.4815952777862549, "learning_rate": 0.00018875423499448802, "loss": 0.2233, "step": 13860 }, { "epoch": 0.19495396724998243, "grad_norm": 0.5087131857872009, "learning_rate": 0.00018873280984114848, "loss": 0.1971, "step": 13870 }, { "epoch": 0.19509452526530324, "grad_norm": 0.5035146474838257, "learning_rate": 0.00018871136551624104, "loss": 0.2373, "step": 13880 }, { "epoch": 0.19523508328062408, "grad_norm": 0.357378214597702, "learning_rate": 0.000188689902024399, "loss": 0.2291, "step": 13890 }, { "epoch": 0.1953756412959449, "grad_norm": 0.40452587604522705, "learning_rate": 0.00018866841937025962, "loss": 0.2147, "step": 13900 }, { "epoch": 0.19551619931126574, "grad_norm": 0.4429807662963867, "learning_rate": 0.0001886469175584645, "loss": 0.1939, "step": 13910 }, { "epoch": 0.19565675732658655, "grad_norm": 0.5234241485595703, "learning_rate": 0.00018862539659365935, "loss": 0.201, "step": 13920 }, { "epoch": 0.19579731534190736, "grad_norm": 0.3796229958534241, "learning_rate": 0.00018860385648049392, "loss": 0.2417, "step": 13930 }, { "epoch": 0.1959378733572282, "grad_norm": 0.3806133568286896, "learning_rate": 0.0001885822972236222, "loss": 0.2303, "step": 13940 }, { "epoch": 0.19607843137254902, "grad_norm": 0.49205514788627625, "learning_rate": 0.00018856071882770222, "loss": 0.2181, "step": 13950 }, { "epoch": 0.19621898938786983, "grad_norm": 0.4757485091686249, "learning_rate": 0.00018853912129739625, "loss": 0.2091, "step": 13960 }, { "epoch": 0.19635954740319067, "grad_norm": 0.5125443935394287, "learning_rate": 0.00018851750463737063, "loss": 0.2024, "step": 13970 }, { "epoch": 0.19650010541851148, "grad_norm": 0.49263033270835876, "learning_rate": 0.0001884958688522958, "loss": 0.2257, "step": 13980 }, { "epoch": 0.19664066343383232, "grad_norm": 0.5992262959480286, "learning_rate": 0.00018847421394684643, "loss": 0.2042, "step": 13990 }, { "epoch": 0.19678122144915314, "grad_norm": 0.4547590911388397, "learning_rate": 0.00018845253992570124, "loss": 0.24, "step": 14000 }, { "epoch": 0.19678122144915314, "eval_chrf": 82.73228602148815, "eval_loss": 0.4420166313648224, "eval_runtime": 190.7174, "eval_samples_per_second": 0.524, "eval_steps_per_second": 0.021, "step": 14000 }, { "epoch": 0.19692177946447395, "grad_norm": 0.287767618894577, "learning_rate": 0.00018843084679354315, "loss": 0.2096, "step": 14010 }, { "epoch": 0.1970623374797948, "grad_norm": 0.4257965087890625, "learning_rate": 0.0001884091345550591, "loss": 0.2235, "step": 14020 }, { "epoch": 0.1972028954951156, "grad_norm": 0.5454546213150024, "learning_rate": 0.0001883874032149403, "loss": 0.1984, "step": 14030 }, { "epoch": 0.19734345351043645, "grad_norm": 0.4185550808906555, "learning_rate": 0.00018836565277788195, "loss": 0.246, "step": 14040 }, { "epoch": 0.19748401152575726, "grad_norm": 0.41897377371788025, "learning_rate": 0.00018834388324858348, "loss": 0.2287, "step": 14050 }, { "epoch": 0.19762456954107807, "grad_norm": 0.42247578501701355, "learning_rate": 0.00018832209463174836, "loss": 0.186, "step": 14060 }, { "epoch": 0.1977651275563989, "grad_norm": 0.4365829527378082, "learning_rate": 0.0001883002869320843, "loss": 0.2034, "step": 14070 }, { "epoch": 0.19790568557171972, "grad_norm": 0.7284541130065918, "learning_rate": 0.00018827846015430298, "loss": 0.1982, "step": 14080 }, { "epoch": 0.19804624358704054, "grad_norm": 0.3755764365196228, "learning_rate": 0.00018825661430312037, "loss": 0.2262, "step": 14090 }, { "epoch": 0.19818680160236138, "grad_norm": 0.4915720224380493, "learning_rate": 0.0001882347493832564, "loss": 0.1925, "step": 14100 }, { "epoch": 0.1983273596176822, "grad_norm": 0.41482189297676086, "learning_rate": 0.00018821286539943518, "loss": 0.2127, "step": 14110 }, { "epoch": 0.19846791763300303, "grad_norm": 0.44070643186569214, "learning_rate": 0.00018819096235638504, "loss": 0.2281, "step": 14120 }, { "epoch": 0.19860847564832385, "grad_norm": 0.5023027658462524, "learning_rate": 0.00018816904025883825, "loss": 0.2305, "step": 14130 }, { "epoch": 0.19874903366364466, "grad_norm": 0.5836760997772217, "learning_rate": 0.00018814709911153137, "loss": 0.2253, "step": 14140 }, { "epoch": 0.1988895916789655, "grad_norm": 0.40505480766296387, "learning_rate": 0.00018812513891920495, "loss": 0.2162, "step": 14150 }, { "epoch": 0.1990301496942863, "grad_norm": 0.5243901014328003, "learning_rate": 0.0001881031596866037, "loss": 0.2209, "step": 14160 }, { "epoch": 0.19917070770960715, "grad_norm": 0.36334285140037537, "learning_rate": 0.00018808116141847642, "loss": 0.2311, "step": 14170 }, { "epoch": 0.19931126572492797, "grad_norm": 0.4597943425178528, "learning_rate": 0.00018805914411957608, "loss": 0.1936, "step": 14180 }, { "epoch": 0.19945182374024878, "grad_norm": 0.44531357288360596, "learning_rate": 0.00018803710779465973, "loss": 0.1955, "step": 14190 }, { "epoch": 0.19959238175556962, "grad_norm": 0.47967594861984253, "learning_rate": 0.0001880150524484885, "loss": 0.235, "step": 14200 }, { "epoch": 0.19973293977089043, "grad_norm": 0.39149224758148193, "learning_rate": 0.0001879929780858277, "loss": 0.2078, "step": 14210 }, { "epoch": 0.19987349778621125, "grad_norm": 0.5905307531356812, "learning_rate": 0.00018797088471144666, "loss": 0.1982, "step": 14220 }, { "epoch": 0.2000140558015321, "grad_norm": 0.3699857294559479, "learning_rate": 0.00018794877233011887, "loss": 0.2067, "step": 14230 }, { "epoch": 0.2001546138168529, "grad_norm": 0.3734479546546936, "learning_rate": 0.00018792664094662197, "loss": 0.2056, "step": 14240 }, { "epoch": 0.20029517183217374, "grad_norm": 0.40720364451408386, "learning_rate": 0.00018790449056573758, "loss": 0.2302, "step": 14250 }, { "epoch": 0.20043572984749455, "grad_norm": 0.5361537933349609, "learning_rate": 0.00018788232119225153, "loss": 0.2193, "step": 14260 }, { "epoch": 0.20057628786281537, "grad_norm": 0.46045634150505066, "learning_rate": 0.00018786013283095376, "loss": 0.185, "step": 14270 }, { "epoch": 0.2007168458781362, "grad_norm": 0.569119393825531, "learning_rate": 0.00018783792548663825, "loss": 0.1936, "step": 14280 }, { "epoch": 0.20085740389345702, "grad_norm": 0.4964165985584259, "learning_rate": 0.00018781569916410308, "loss": 0.2168, "step": 14290 }, { "epoch": 0.20099796190877786, "grad_norm": 0.6262520551681519, "learning_rate": 0.00018779345386815054, "loss": 0.2686, "step": 14300 }, { "epoch": 0.20113851992409867, "grad_norm": 0.8814814686775208, "learning_rate": 0.00018777118960358683, "loss": 0.2103, "step": 14310 }, { "epoch": 0.2012790779394195, "grad_norm": 0.376753568649292, "learning_rate": 0.0001877489063752224, "loss": 0.2288, "step": 14320 }, { "epoch": 0.20141963595474033, "grad_norm": 0.38907644152641296, "learning_rate": 0.00018772660418787184, "loss": 0.1988, "step": 14330 }, { "epoch": 0.20156019397006114, "grad_norm": 0.4345405399799347, "learning_rate": 0.00018770428304635363, "loss": 0.2709, "step": 14340 }, { "epoch": 0.20170075198538195, "grad_norm": 0.4775392413139343, "learning_rate": 0.0001876819429554905, "loss": 0.2192, "step": 14350 }, { "epoch": 0.2018413100007028, "grad_norm": 0.5034828186035156, "learning_rate": 0.00018765958392010925, "loss": 0.2587, "step": 14360 }, { "epoch": 0.2019818680160236, "grad_norm": 0.4657890498638153, "learning_rate": 0.00018763720594504077, "loss": 0.2169, "step": 14370 }, { "epoch": 0.20212242603134445, "grad_norm": 0.4603046774864197, "learning_rate": 0.00018761480903512002, "loss": 0.1639, "step": 14380 }, { "epoch": 0.20226298404666526, "grad_norm": 0.4143376648426056, "learning_rate": 0.00018759239319518603, "loss": 0.2072, "step": 14390 }, { "epoch": 0.20240354206198607, "grad_norm": 0.45824283361434937, "learning_rate": 0.00018756995843008208, "loss": 0.2259, "step": 14400 }, { "epoch": 0.20254410007730692, "grad_norm": 0.4499847888946533, "learning_rate": 0.0001875475047446553, "loss": 0.1921, "step": 14410 }, { "epoch": 0.20268465809262773, "grad_norm": 0.46914368867874146, "learning_rate": 0.000187525032143757, "loss": 0.1884, "step": 14420 }, { "epoch": 0.20282521610794857, "grad_norm": 0.48389866948127747, "learning_rate": 0.00018750254063224268, "loss": 0.1902, "step": 14430 }, { "epoch": 0.20296577412326938, "grad_norm": 0.4360426962375641, "learning_rate": 0.00018748003021497184, "loss": 0.2002, "step": 14440 }, { "epoch": 0.2031063321385902, "grad_norm": 0.4696538746356964, "learning_rate": 0.00018745750089680803, "loss": 0.21, "step": 14450 }, { "epoch": 0.20324689015391104, "grad_norm": 0.7128496766090393, "learning_rate": 0.00018743495268261896, "loss": 0.2014, "step": 14460 }, { "epoch": 0.20338744816923185, "grad_norm": 0.5214452147483826, "learning_rate": 0.00018741238557727635, "loss": 0.243, "step": 14470 }, { "epoch": 0.20352800618455266, "grad_norm": 0.5012695789337158, "learning_rate": 0.00018738979958565603, "loss": 0.2158, "step": 14480 }, { "epoch": 0.2036685641998735, "grad_norm": 0.5507377982139587, "learning_rate": 0.000187367194712638, "loss": 0.2138, "step": 14490 }, { "epoch": 0.20380912221519432, "grad_norm": 0.3723544180393219, "learning_rate": 0.00018734457096310613, "loss": 0.2079, "step": 14500 }, { "epoch": 0.20394968023051516, "grad_norm": 0.40772634744644165, "learning_rate": 0.00018732192834194862, "loss": 0.2123, "step": 14510 }, { "epoch": 0.20409023824583597, "grad_norm": 0.5124629735946655, "learning_rate": 0.00018729926685405755, "loss": 0.21, "step": 14520 }, { "epoch": 0.20423079626115678, "grad_norm": 0.45871278643608093, "learning_rate": 0.00018727658650432918, "loss": 0.2142, "step": 14530 }, { "epoch": 0.20437135427647762, "grad_norm": 0.41411224007606506, "learning_rate": 0.0001872538872976638, "loss": 0.2149, "step": 14540 }, { "epoch": 0.20451191229179844, "grad_norm": 0.554733157157898, "learning_rate": 0.0001872311692389658, "loss": 0.2243, "step": 14550 }, { "epoch": 0.20465247030711925, "grad_norm": 0.46813228726387024, "learning_rate": 0.00018720843233314364, "loss": 0.2046, "step": 14560 }, { "epoch": 0.2047930283224401, "grad_norm": 0.43941813707351685, "learning_rate": 0.0001871856765851098, "loss": 0.2083, "step": 14570 }, { "epoch": 0.2049335863377609, "grad_norm": 0.47301948070526123, "learning_rate": 0.00018716290199978093, "loss": 0.206, "step": 14580 }, { "epoch": 0.20507414435308174, "grad_norm": 0.39562416076660156, "learning_rate": 0.00018714010858207768, "loss": 0.2112, "step": 14590 }, { "epoch": 0.20521470236840256, "grad_norm": 0.4848250150680542, "learning_rate": 0.0001871172963369248, "loss": 0.2064, "step": 14600 }, { "epoch": 0.20535526038372337, "grad_norm": 0.3889874517917633, "learning_rate": 0.00018709446526925103, "loss": 0.2082, "step": 14610 }, { "epoch": 0.2054958183990442, "grad_norm": 0.3820645213127136, "learning_rate": 0.0001870716153839893, "loss": 0.2239, "step": 14620 }, { "epoch": 0.20563637641436502, "grad_norm": 0.4439220428466797, "learning_rate": 0.00018704874668607654, "loss": 0.2037, "step": 14630 }, { "epoch": 0.20577693442968586, "grad_norm": 0.47588831186294556, "learning_rate": 0.00018702585918045373, "loss": 0.2323, "step": 14640 }, { "epoch": 0.20591749244500668, "grad_norm": 0.5247348546981812, "learning_rate": 0.00018700295287206592, "loss": 0.1991, "step": 14650 }, { "epoch": 0.2060580504603275, "grad_norm": 0.4903160333633423, "learning_rate": 0.00018698002776586226, "loss": 0.2093, "step": 14660 }, { "epoch": 0.20619860847564833, "grad_norm": 0.43988537788391113, "learning_rate": 0.00018695708386679596, "loss": 0.2057, "step": 14670 }, { "epoch": 0.20633916649096914, "grad_norm": 0.5525961518287659, "learning_rate": 0.00018693412117982421, "loss": 0.2137, "step": 14680 }, { "epoch": 0.20647972450628996, "grad_norm": 0.4375472664833069, "learning_rate": 0.00018691113970990836, "loss": 0.1852, "step": 14690 }, { "epoch": 0.2066202825216108, "grad_norm": 0.3917044699192047, "learning_rate": 0.00018688813946201376, "loss": 0.2279, "step": 14700 }, { "epoch": 0.2067608405369316, "grad_norm": 0.40974360704421997, "learning_rate": 0.00018686512044110987, "loss": 0.2278, "step": 14710 }, { "epoch": 0.20690139855225245, "grad_norm": 0.352774441242218, "learning_rate": 0.0001868420826521701, "loss": 0.1905, "step": 14720 }, { "epoch": 0.20704195656757327, "grad_norm": 0.42764684557914734, "learning_rate": 0.000186819026100172, "loss": 0.1961, "step": 14730 }, { "epoch": 0.20718251458289408, "grad_norm": 0.39628422260284424, "learning_rate": 0.00018679595079009722, "loss": 0.1657, "step": 14740 }, { "epoch": 0.20732307259821492, "grad_norm": 0.44212454557418823, "learning_rate": 0.00018677285672693135, "loss": 0.2166, "step": 14750 }, { "epoch": 0.20746363061353573, "grad_norm": 0.391118586063385, "learning_rate": 0.00018674974391566407, "loss": 0.1821, "step": 14760 }, { "epoch": 0.20760418862885657, "grad_norm": 0.513839840888977, "learning_rate": 0.00018672661236128913, "loss": 0.2207, "step": 14770 }, { "epoch": 0.20774474664417739, "grad_norm": 0.3673436939716339, "learning_rate": 0.00018670346206880437, "loss": 0.2181, "step": 14780 }, { "epoch": 0.2078853046594982, "grad_norm": 0.5244107842445374, "learning_rate": 0.00018668029304321157, "loss": 0.2183, "step": 14790 }, { "epoch": 0.20802586267481904, "grad_norm": 0.41967636346817017, "learning_rate": 0.00018665710528951662, "loss": 0.2092, "step": 14800 }, { "epoch": 0.20816642069013985, "grad_norm": 0.4290749132633209, "learning_rate": 0.0001866338988127295, "loss": 0.1935, "step": 14810 }, { "epoch": 0.20830697870546067, "grad_norm": 0.4170161485671997, "learning_rate": 0.00018661067361786418, "loss": 0.1976, "step": 14820 }, { "epoch": 0.2084475367207815, "grad_norm": 0.574755847454071, "learning_rate": 0.0001865874297099387, "loss": 0.217, "step": 14830 }, { "epoch": 0.20858809473610232, "grad_norm": 0.4211540222167969, "learning_rate": 0.000186564167093975, "loss": 0.1967, "step": 14840 }, { "epoch": 0.20872865275142316, "grad_norm": 0.43254420161247253, "learning_rate": 0.00018654088577499937, "loss": 0.2107, "step": 14850 }, { "epoch": 0.20886921076674397, "grad_norm": 0.4957050383090973, "learning_rate": 0.00018651758575804187, "loss": 0.1828, "step": 14860 }, { "epoch": 0.2090097687820648, "grad_norm": 0.34140756726264954, "learning_rate": 0.00018649426704813672, "loss": 0.2137, "step": 14870 }, { "epoch": 0.20915032679738563, "grad_norm": 0.5176404118537903, "learning_rate": 0.00018647092965032208, "loss": 0.1977, "step": 14880 }, { "epoch": 0.20929088481270644, "grad_norm": 0.4792444109916687, "learning_rate": 0.00018644757356964025, "loss": 0.2029, "step": 14890 }, { "epoch": 0.20943144282802728, "grad_norm": 0.3671039640903473, "learning_rate": 0.0001864241988111376, "loss": 0.2126, "step": 14900 }, { "epoch": 0.2095720008433481, "grad_norm": 0.393860399723053, "learning_rate": 0.00018640080537986442, "loss": 0.198, "step": 14910 }, { "epoch": 0.2097125588586689, "grad_norm": 0.4475869834423065, "learning_rate": 0.00018637739328087506, "loss": 0.1871, "step": 14920 }, { "epoch": 0.20985311687398975, "grad_norm": 0.45307090878486633, "learning_rate": 0.00018635396251922797, "loss": 0.2012, "step": 14930 }, { "epoch": 0.20999367488931056, "grad_norm": 0.41060614585876465, "learning_rate": 0.00018633051309998555, "loss": 0.2149, "step": 14940 }, { "epoch": 0.21013423290463137, "grad_norm": 0.8010419607162476, "learning_rate": 0.0001863070450282143, "loss": 0.2274, "step": 14950 }, { "epoch": 0.21027479091995221, "grad_norm": 0.37445002794265747, "learning_rate": 0.00018628355830898466, "loss": 0.2408, "step": 14960 }, { "epoch": 0.21041534893527303, "grad_norm": 0.3989226520061493, "learning_rate": 0.00018626005294737125, "loss": 0.2151, "step": 14970 }, { "epoch": 0.21055590695059387, "grad_norm": 0.37011614441871643, "learning_rate": 0.00018623652894845258, "loss": 0.2236, "step": 14980 }, { "epoch": 0.21069646496591468, "grad_norm": 0.44250568747520447, "learning_rate": 0.00018621298631731124, "loss": 0.2028, "step": 14990 }, { "epoch": 0.2108370229812355, "grad_norm": 0.4633479416370392, "learning_rate": 0.00018618942505903383, "loss": 0.2012, "step": 15000 }, { "epoch": 0.2108370229812355, "eval_chrf": 82.82051205341394, "eval_loss": 0.4326775372028351, "eval_runtime": 257.9815, "eval_samples_per_second": 0.388, "eval_steps_per_second": 0.016, "step": 15000 }, { "epoch": 0.21097758099655634, "grad_norm": 0.4391336441040039, "learning_rate": 0.000186165845178711, "loss": 0.2285, "step": 15010 }, { "epoch": 0.21111813901187715, "grad_norm": 0.4783604145050049, "learning_rate": 0.0001861422466814374, "loss": 0.2139, "step": 15020 }, { "epoch": 0.211258697027198, "grad_norm": 0.3801127076148987, "learning_rate": 0.0001861186295723117, "loss": 0.2095, "step": 15030 }, { "epoch": 0.2113992550425188, "grad_norm": 0.43613746762275696, "learning_rate": 0.0001860949938564366, "loss": 0.2007, "step": 15040 }, { "epoch": 0.21153981305783962, "grad_norm": 0.39664286375045776, "learning_rate": 0.00018607133953891887, "loss": 0.2161, "step": 15050 }, { "epoch": 0.21168037107316046, "grad_norm": 0.47274744510650635, "learning_rate": 0.00018604766662486916, "loss": 0.1867, "step": 15060 }, { "epoch": 0.21182092908848127, "grad_norm": 0.45525887608528137, "learning_rate": 0.0001860239751194023, "loss": 0.227, "step": 15070 }, { "epoch": 0.21196148710380208, "grad_norm": 0.42642149329185486, "learning_rate": 0.00018600026502763707, "loss": 0.2166, "step": 15080 }, { "epoch": 0.21210204511912292, "grad_norm": 0.41591188311576843, "learning_rate": 0.00018597653635469622, "loss": 0.2226, "step": 15090 }, { "epoch": 0.21224260313444374, "grad_norm": 0.44854825735092163, "learning_rate": 0.00018595278910570658, "loss": 0.2025, "step": 15100 }, { "epoch": 0.21238316114976458, "grad_norm": 0.4680345058441162, "learning_rate": 0.00018592902328579895, "loss": 0.2058, "step": 15110 }, { "epoch": 0.2125237191650854, "grad_norm": 0.34579533338546753, "learning_rate": 0.00018590523890010818, "loss": 0.2152, "step": 15120 }, { "epoch": 0.2126642771804062, "grad_norm": 0.36790943145751953, "learning_rate": 0.0001858814359537731, "loss": 0.2128, "step": 15130 }, { "epoch": 0.21280483519572704, "grad_norm": 0.4481298327445984, "learning_rate": 0.00018585761445193656, "loss": 0.2156, "step": 15140 }, { "epoch": 0.21294539321104786, "grad_norm": 0.562809944152832, "learning_rate": 0.00018583377439974544, "loss": 0.2246, "step": 15150 }, { "epoch": 0.2130859512263687, "grad_norm": 0.5986753702163696, "learning_rate": 0.0001858099158023506, "loss": 0.2006, "step": 15160 }, { "epoch": 0.2132265092416895, "grad_norm": 0.4528370201587677, "learning_rate": 0.0001857860386649069, "loss": 0.1915, "step": 15170 }, { "epoch": 0.21336706725701032, "grad_norm": 0.5109283328056335, "learning_rate": 0.00018576214299257325, "loss": 0.2116, "step": 15180 }, { "epoch": 0.21350762527233116, "grad_norm": 0.36227118968963623, "learning_rate": 0.00018573822879051254, "loss": 0.1796, "step": 15190 }, { "epoch": 0.21364818328765198, "grad_norm": 0.5447039008140564, "learning_rate": 0.00018571429606389166, "loss": 0.1986, "step": 15200 }, { "epoch": 0.2137887413029728, "grad_norm": 0.5264976024627686, "learning_rate": 0.00018569034481788146, "loss": 0.2082, "step": 15210 }, { "epoch": 0.21392929931829363, "grad_norm": 0.48652032017707825, "learning_rate": 0.00018566637505765688, "loss": 0.199, "step": 15220 }, { "epoch": 0.21406985733361444, "grad_norm": 0.6124891638755798, "learning_rate": 0.0001856423867883968, "loss": 0.2012, "step": 15230 }, { "epoch": 0.21421041534893528, "grad_norm": 0.5273460745811462, "learning_rate": 0.0001856183800152841, "loss": 0.2263, "step": 15240 }, { "epoch": 0.2143509733642561, "grad_norm": 0.4930509030818939, "learning_rate": 0.00018559435474350573, "loss": 0.207, "step": 15250 }, { "epoch": 0.2144915313795769, "grad_norm": 0.46386903524398804, "learning_rate": 0.0001855703109782525, "loss": 0.2058, "step": 15260 }, { "epoch": 0.21463208939489775, "grad_norm": 0.45673784613609314, "learning_rate": 0.00018554624872471934, "loss": 0.1887, "step": 15270 }, { "epoch": 0.21477264741021856, "grad_norm": 0.46073588728904724, "learning_rate": 0.00018552216798810515, "loss": 0.2107, "step": 15280 }, { "epoch": 0.21491320542553938, "grad_norm": 0.4474230110645294, "learning_rate": 0.0001854980687736127, "loss": 0.2315, "step": 15290 }, { "epoch": 0.21505376344086022, "grad_norm": 0.38105547428131104, "learning_rate": 0.00018547395108644898, "loss": 0.2084, "step": 15300 }, { "epoch": 0.21519432145618103, "grad_norm": 0.4035305380821228, "learning_rate": 0.00018544981493182481, "loss": 0.2395, "step": 15310 }, { "epoch": 0.21533487947150187, "grad_norm": 0.532655656337738, "learning_rate": 0.00018542566031495497, "loss": 0.2164, "step": 15320 }, { "epoch": 0.21547543748682269, "grad_norm": 0.4325355589389801, "learning_rate": 0.00018540148724105834, "loss": 0.222, "step": 15330 }, { "epoch": 0.2156159955021435, "grad_norm": 0.5806542038917542, "learning_rate": 0.00018537729571535777, "loss": 0.1897, "step": 15340 }, { "epoch": 0.21575655351746434, "grad_norm": 0.437415212392807, "learning_rate": 0.00018535308574308003, "loss": 0.2089, "step": 15350 }, { "epoch": 0.21589711153278515, "grad_norm": 0.460148423910141, "learning_rate": 0.00018532885732945595, "loss": 0.2275, "step": 15360 }, { "epoch": 0.216037669548106, "grad_norm": 0.47994258999824524, "learning_rate": 0.00018530461047972026, "loss": 0.2063, "step": 15370 }, { "epoch": 0.2161782275634268, "grad_norm": 0.4585625231266022, "learning_rate": 0.00018528034519911177, "loss": 0.2147, "step": 15380 }, { "epoch": 0.21631878557874762, "grad_norm": 0.42683348059654236, "learning_rate": 0.0001852560614928732, "loss": 0.2218, "step": 15390 }, { "epoch": 0.21645934359406846, "grad_norm": 0.3197411000728607, "learning_rate": 0.00018523175936625128, "loss": 0.231, "step": 15400 }, { "epoch": 0.21659990160938927, "grad_norm": 0.4453927278518677, "learning_rate": 0.0001852074388244967, "loss": 0.2244, "step": 15410 }, { "epoch": 0.21674045962471009, "grad_norm": 0.47477197647094727, "learning_rate": 0.0001851830998728642, "loss": 0.2029, "step": 15420 }, { "epoch": 0.21688101764003093, "grad_norm": 0.3015683591365814, "learning_rate": 0.0001851587425166124, "loss": 0.1895, "step": 15430 }, { "epoch": 0.21702157565535174, "grad_norm": 0.3970557451248169, "learning_rate": 0.0001851343667610039, "loss": 0.2238, "step": 15440 }, { "epoch": 0.21716213367067258, "grad_norm": 0.48441988229751587, "learning_rate": 0.00018510997261130538, "loss": 0.2008, "step": 15450 }, { "epoch": 0.2173026916859934, "grad_norm": 0.6000593304634094, "learning_rate": 0.00018508556007278744, "loss": 0.1975, "step": 15460 }, { "epoch": 0.2174432497013142, "grad_norm": 0.320560097694397, "learning_rate": 0.00018506112915072455, "loss": 0.1964, "step": 15470 }, { "epoch": 0.21758380771663505, "grad_norm": 0.5494293570518494, "learning_rate": 0.00018503667985039532, "loss": 0.2143, "step": 15480 }, { "epoch": 0.21772436573195586, "grad_norm": 0.40263715386390686, "learning_rate": 0.00018501221217708224, "loss": 0.2493, "step": 15490 }, { "epoch": 0.2178649237472767, "grad_norm": 0.5201196670532227, "learning_rate": 0.00018498772613607177, "loss": 0.2009, "step": 15500 }, { "epoch": 0.2180054817625975, "grad_norm": 0.5006442070007324, "learning_rate": 0.00018496322173265436, "loss": 0.2501, "step": 15510 }, { "epoch": 0.21814603977791833, "grad_norm": 0.46168252825737, "learning_rate": 0.00018493869897212443, "loss": 0.2442, "step": 15520 }, { "epoch": 0.21828659779323917, "grad_norm": 0.43364977836608887, "learning_rate": 0.0001849141578597803, "loss": 0.2114, "step": 15530 }, { "epoch": 0.21842715580855998, "grad_norm": 0.4662631154060364, "learning_rate": 0.00018488959840092442, "loss": 0.2014, "step": 15540 }, { "epoch": 0.2185677138238808, "grad_norm": 0.44242143630981445, "learning_rate": 0.000184865020600863, "loss": 0.2131, "step": 15550 }, { "epoch": 0.21870827183920163, "grad_norm": 0.6606244444847107, "learning_rate": 0.00018484042446490634, "loss": 0.2399, "step": 15560 }, { "epoch": 0.21884882985452245, "grad_norm": 0.5681745409965515, "learning_rate": 0.00018481580999836866, "loss": 0.2157, "step": 15570 }, { "epoch": 0.2189893878698433, "grad_norm": 0.45000481605529785, "learning_rate": 0.00018479117720656814, "loss": 0.2016, "step": 15580 }, { "epoch": 0.2191299458851641, "grad_norm": 0.4646105170249939, "learning_rate": 0.00018476652609482694, "loss": 0.2096, "step": 15590 }, { "epoch": 0.21927050390048491, "grad_norm": 0.38280490040779114, "learning_rate": 0.00018474185666847123, "loss": 0.197, "step": 15600 }, { "epoch": 0.21941106191580576, "grad_norm": 0.35731178522109985, "learning_rate": 0.00018471716893283095, "loss": 0.1794, "step": 15610 }, { "epoch": 0.21955161993112657, "grad_norm": 0.4666403830051422, "learning_rate": 0.00018469246289324023, "loss": 0.2042, "step": 15620 }, { "epoch": 0.2196921779464474, "grad_norm": 0.41200706362724304, "learning_rate": 0.00018466773855503702, "loss": 0.1793, "step": 15630 }, { "epoch": 0.21983273596176822, "grad_norm": 0.41489627957344055, "learning_rate": 0.00018464299592356318, "loss": 0.2233, "step": 15640 }, { "epoch": 0.21997329397708903, "grad_norm": 0.46858951449394226, "learning_rate": 0.00018461823500416467, "loss": 0.2311, "step": 15650 }, { "epoch": 0.22011385199240988, "grad_norm": 0.3232119381427765, "learning_rate": 0.00018459345580219128, "loss": 0.1869, "step": 15660 }, { "epoch": 0.2202544100077307, "grad_norm": 0.48415064811706543, "learning_rate": 0.0001845686583229968, "loss": 0.224, "step": 15670 }, { "epoch": 0.2203949680230515, "grad_norm": 0.35237863659858704, "learning_rate": 0.00018454384257193896, "loss": 0.2156, "step": 15680 }, { "epoch": 0.22053552603837234, "grad_norm": 0.4336957335472107, "learning_rate": 0.0001845190085543795, "loss": 0.2382, "step": 15690 }, { "epoch": 0.22067608405369316, "grad_norm": 0.6433353424072266, "learning_rate": 0.00018449415627568393, "loss": 0.2264, "step": 15700 }, { "epoch": 0.220816642069014, "grad_norm": 0.5027011036872864, "learning_rate": 0.00018446928574122193, "loss": 0.2008, "step": 15710 }, { "epoch": 0.2209572000843348, "grad_norm": 0.3565092384815216, "learning_rate": 0.00018444439695636694, "loss": 0.2275, "step": 15720 }, { "epoch": 0.22109775809965562, "grad_norm": 0.4112890362739563, "learning_rate": 0.00018441948992649646, "loss": 0.2175, "step": 15730 }, { "epoch": 0.22123831611497646, "grad_norm": 0.48462048172950745, "learning_rate": 0.00018439456465699187, "loss": 0.2027, "step": 15740 }, { "epoch": 0.22137887413029728, "grad_norm": 0.49463990330696106, "learning_rate": 0.00018436962115323855, "loss": 0.2039, "step": 15750 }, { "epoch": 0.22151943214561812, "grad_norm": 0.33819833397865295, "learning_rate": 0.00018434465942062574, "loss": 0.21, "step": 15760 }, { "epoch": 0.22165999016093893, "grad_norm": 0.3969721794128418, "learning_rate": 0.00018431967946454665, "loss": 0.2409, "step": 15770 }, { "epoch": 0.22180054817625974, "grad_norm": 0.37428468465805054, "learning_rate": 0.00018429468129039847, "loss": 0.2053, "step": 15780 }, { "epoch": 0.22194110619158058, "grad_norm": 0.48097750544548035, "learning_rate": 0.0001842696649035823, "loss": 0.2133, "step": 15790 }, { "epoch": 0.2220816642069014, "grad_norm": 0.4189276695251465, "learning_rate": 0.00018424463030950312, "loss": 0.2064, "step": 15800 }, { "epoch": 0.2222222222222222, "grad_norm": 0.5002239942550659, "learning_rate": 0.00018421957751356993, "loss": 0.2125, "step": 15810 }, { "epoch": 0.22236278023754305, "grad_norm": 0.4362868070602417, "learning_rate": 0.00018419450652119564, "loss": 0.2095, "step": 15820 }, { "epoch": 0.22250333825286386, "grad_norm": 0.5985012054443359, "learning_rate": 0.00018416941733779703, "loss": 0.2297, "step": 15830 }, { "epoch": 0.2226438962681847, "grad_norm": 0.4958445727825165, "learning_rate": 0.00018414430996879487, "loss": 0.1994, "step": 15840 }, { "epoch": 0.22278445428350552, "grad_norm": 0.5681561231613159, "learning_rate": 0.0001841191844196139, "loss": 0.2125, "step": 15850 }, { "epoch": 0.22292501229882633, "grad_norm": 0.4579131305217743, "learning_rate": 0.00018409404069568265, "loss": 0.1977, "step": 15860 }, { "epoch": 0.22306557031414717, "grad_norm": 0.46691954135894775, "learning_rate": 0.00018406887880243372, "loss": 0.1974, "step": 15870 }, { "epoch": 0.22320612832946798, "grad_norm": 0.5419637560844421, "learning_rate": 0.0001840436987453036, "loss": 0.2065, "step": 15880 }, { "epoch": 0.22334668634478883, "grad_norm": 0.4614500105381012, "learning_rate": 0.00018401850052973258, "loss": 0.2051, "step": 15890 }, { "epoch": 0.22348724436010964, "grad_norm": 0.44594377279281616, "learning_rate": 0.00018399328416116503, "loss": 0.2126, "step": 15900 }, { "epoch": 0.22362780237543045, "grad_norm": 0.47176501154899597, "learning_rate": 0.00018396804964504923, "loss": 0.2117, "step": 15910 }, { "epoch": 0.2237683603907513, "grad_norm": 0.4225786030292511, "learning_rate": 0.00018394279698683727, "loss": 0.1908, "step": 15920 }, { "epoch": 0.2239089184060721, "grad_norm": 0.38729235529899597, "learning_rate": 0.00018391752619198527, "loss": 0.2097, "step": 15930 }, { "epoch": 0.22404947642139292, "grad_norm": 0.5475749373435974, "learning_rate": 0.0001838922372659532, "loss": 0.2019, "step": 15940 }, { "epoch": 0.22419003443671376, "grad_norm": 0.5189427137374878, "learning_rate": 0.000183866930214205, "loss": 0.1956, "step": 15950 }, { "epoch": 0.22433059245203457, "grad_norm": 0.4810304045677185, "learning_rate": 0.00018384160504220846, "loss": 0.1958, "step": 15960 }, { "epoch": 0.2244711504673554, "grad_norm": 0.5092229843139648, "learning_rate": 0.00018381626175543536, "loss": 0.1996, "step": 15970 }, { "epoch": 0.22461170848267623, "grad_norm": 0.36356088519096375, "learning_rate": 0.00018379090035936134, "loss": 0.1989, "step": 15980 }, { "epoch": 0.22475226649799704, "grad_norm": 0.34988951683044434, "learning_rate": 0.00018376552085946598, "loss": 0.1811, "step": 15990 }, { "epoch": 0.22489282451331788, "grad_norm": 0.611853301525116, "learning_rate": 0.00018374012326123276, "loss": 0.2104, "step": 16000 }, { "epoch": 0.22489282451331788, "eval_chrf": 82.88572916646845, "eval_loss": 0.4360054135322571, "eval_runtime": 240.7154, "eval_samples_per_second": 0.415, "eval_steps_per_second": 0.017, "step": 16000 }, { "epoch": 0.2250333825286387, "grad_norm": 0.4638526737689972, "learning_rate": 0.0001837147075701491, "loss": 0.2037, "step": 16010 }, { "epoch": 0.2251739405439595, "grad_norm": 0.5316300392150879, "learning_rate": 0.00018368927379170622, "loss": 0.1983, "step": 16020 }, { "epoch": 0.22531449855928035, "grad_norm": 0.4769590198993683, "learning_rate": 0.00018366382193139939, "loss": 0.2131, "step": 16030 }, { "epoch": 0.22545505657460116, "grad_norm": 0.39321017265319824, "learning_rate": 0.00018363835199472774, "loss": 0.2069, "step": 16040 }, { "epoch": 0.225595614589922, "grad_norm": 0.4696575999259949, "learning_rate": 0.00018361286398719428, "loss": 0.2193, "step": 16050 }, { "epoch": 0.2257361726052428, "grad_norm": 0.49544069170951843, "learning_rate": 0.00018358735791430592, "loss": 0.2088, "step": 16060 }, { "epoch": 0.22587673062056363, "grad_norm": 0.47814834117889404, "learning_rate": 0.0001835618337815735, "loss": 0.2276, "step": 16070 }, { "epoch": 0.22601728863588447, "grad_norm": 0.4744305908679962, "learning_rate": 0.00018353629159451176, "loss": 0.2162, "step": 16080 }, { "epoch": 0.22615784665120528, "grad_norm": 0.455851286649704, "learning_rate": 0.00018351073135863927, "loss": 0.2195, "step": 16090 }, { "epoch": 0.22629840466652612, "grad_norm": 0.64469313621521, "learning_rate": 0.00018348515307947867, "loss": 0.2198, "step": 16100 }, { "epoch": 0.22643896268184693, "grad_norm": 0.43515893816947937, "learning_rate": 0.00018345955676255633, "loss": 0.2141, "step": 16110 }, { "epoch": 0.22657952069716775, "grad_norm": 0.45102620124816895, "learning_rate": 0.00018343394241340257, "loss": 0.21, "step": 16120 }, { "epoch": 0.2267200787124886, "grad_norm": 0.5091341733932495, "learning_rate": 0.00018340831003755162, "loss": 0.2125, "step": 16130 }, { "epoch": 0.2268606367278094, "grad_norm": 0.3054172992706299, "learning_rate": 0.00018338265964054163, "loss": 0.1855, "step": 16140 }, { "epoch": 0.2270011947431302, "grad_norm": 0.4756179451942444, "learning_rate": 0.00018335699122791455, "loss": 0.2135, "step": 16150 }, { "epoch": 0.22714175275845105, "grad_norm": 0.40579459071159363, "learning_rate": 0.00018333130480521637, "loss": 0.2245, "step": 16160 }, { "epoch": 0.22728231077377187, "grad_norm": 0.46695980429649353, "learning_rate": 0.0001833056003779968, "loss": 0.2166, "step": 16170 }, { "epoch": 0.2274228687890927, "grad_norm": 0.32243990898132324, "learning_rate": 0.0001832798779518096, "loss": 0.1676, "step": 16180 }, { "epoch": 0.22756342680441352, "grad_norm": 0.45454373955726624, "learning_rate": 0.00018325413753221228, "loss": 0.2095, "step": 16190 }, { "epoch": 0.22770398481973433, "grad_norm": 0.43410179018974304, "learning_rate": 0.00018322837912476635, "loss": 0.1961, "step": 16200 }, { "epoch": 0.22784454283505517, "grad_norm": 0.44975170493125916, "learning_rate": 0.00018320260273503713, "loss": 0.2127, "step": 16210 }, { "epoch": 0.227985100850376, "grad_norm": 0.49282968044281006, "learning_rate": 0.00018317680836859386, "loss": 0.2208, "step": 16220 }, { "epoch": 0.22812565886569683, "grad_norm": 0.46582046151161194, "learning_rate": 0.00018315099603100968, "loss": 0.227, "step": 16230 }, { "epoch": 0.22826621688101764, "grad_norm": 0.42106932401657104, "learning_rate": 0.00018312516572786158, "loss": 0.2068, "step": 16240 }, { "epoch": 0.22840677489633845, "grad_norm": 0.43135741353034973, "learning_rate": 0.0001830993174647304, "loss": 0.2116, "step": 16250 }, { "epoch": 0.2285473329116593, "grad_norm": 0.603646993637085, "learning_rate": 0.000183073451247201, "loss": 0.2234, "step": 16260 }, { "epoch": 0.2286878909269801, "grad_norm": 0.40896886587142944, "learning_rate": 0.00018304756708086193, "loss": 0.1962, "step": 16270 }, { "epoch": 0.22882844894230092, "grad_norm": 0.5486946702003479, "learning_rate": 0.00018302166497130576, "loss": 0.2184, "step": 16280 }, { "epoch": 0.22896900695762176, "grad_norm": 0.4316113591194153, "learning_rate": 0.00018299574492412884, "loss": 0.1846, "step": 16290 }, { "epoch": 0.22910956497294258, "grad_norm": 0.6302161812782288, "learning_rate": 0.0001829698069449315, "loss": 0.2188, "step": 16300 }, { "epoch": 0.22925012298826342, "grad_norm": 0.4198704957962036, "learning_rate": 0.00018294385103931785, "loss": 0.2137, "step": 16310 }, { "epoch": 0.22939068100358423, "grad_norm": 0.41124218702316284, "learning_rate": 0.00018291787721289596, "loss": 0.2481, "step": 16320 }, { "epoch": 0.22953123901890504, "grad_norm": 0.4199219048023224, "learning_rate": 0.00018289188547127765, "loss": 0.2094, "step": 16330 }, { "epoch": 0.22967179703422588, "grad_norm": 0.48190534114837646, "learning_rate": 0.0001828658758200787, "loss": 0.2291, "step": 16340 }, { "epoch": 0.2298123550495467, "grad_norm": 0.424686998128891, "learning_rate": 0.00018283984826491877, "loss": 0.2012, "step": 16350 }, { "epoch": 0.22995291306486754, "grad_norm": 0.40577951073646545, "learning_rate": 0.00018281380281142139, "loss": 0.2089, "step": 16360 }, { "epoch": 0.23009347108018835, "grad_norm": 0.5267784595489502, "learning_rate": 0.00018278773946521387, "loss": 0.2284, "step": 16370 }, { "epoch": 0.23023402909550916, "grad_norm": 0.5176638960838318, "learning_rate": 0.00018276165823192744, "loss": 0.171, "step": 16380 }, { "epoch": 0.23037458711083, "grad_norm": 0.378432959318161, "learning_rate": 0.00018273555911719723, "loss": 0.192, "step": 16390 }, { "epoch": 0.23051514512615082, "grad_norm": 0.5014709234237671, "learning_rate": 0.00018270944212666221, "loss": 0.2117, "step": 16400 }, { "epoch": 0.23065570314147163, "grad_norm": 0.4741896390914917, "learning_rate": 0.0001826833072659652, "loss": 0.2264, "step": 16410 }, { "epoch": 0.23079626115679247, "grad_norm": 0.45438021421432495, "learning_rate": 0.00018265715454075283, "loss": 0.2268, "step": 16420 }, { "epoch": 0.23093681917211328, "grad_norm": 0.49692875146865845, "learning_rate": 0.00018263098395667572, "loss": 0.2201, "step": 16430 }, { "epoch": 0.23107737718743412, "grad_norm": 0.5172484517097473, "learning_rate": 0.00018260479551938824, "loss": 0.2068, "step": 16440 }, { "epoch": 0.23121793520275494, "grad_norm": 0.4847676455974579, "learning_rate": 0.00018257858923454863, "loss": 0.194, "step": 16450 }, { "epoch": 0.23135849321807575, "grad_norm": 0.4178107976913452, "learning_rate": 0.00018255236510781906, "loss": 0.2018, "step": 16460 }, { "epoch": 0.2314990512333966, "grad_norm": 0.43576788902282715, "learning_rate": 0.00018252612314486545, "loss": 0.175, "step": 16470 }, { "epoch": 0.2316396092487174, "grad_norm": 0.43119123578071594, "learning_rate": 0.00018249986335135767, "loss": 0.1909, "step": 16480 }, { "epoch": 0.23178016726403824, "grad_norm": 0.5131032466888428, "learning_rate": 0.00018247358573296934, "loss": 0.2193, "step": 16490 }, { "epoch": 0.23192072527935906, "grad_norm": 0.411156564950943, "learning_rate": 0.0001824472902953781, "loss": 0.2003, "step": 16500 }, { "epoch": 0.23206128329467987, "grad_norm": 0.45583435893058777, "learning_rate": 0.00018242097704426516, "loss": 0.2122, "step": 16510 }, { "epoch": 0.2322018413100007, "grad_norm": 0.3981030285358429, "learning_rate": 0.00018239464598531591, "loss": 0.2043, "step": 16520 }, { "epoch": 0.23234239932532152, "grad_norm": 0.6014482975006104, "learning_rate": 0.00018236829712421933, "loss": 0.208, "step": 16530 }, { "epoch": 0.23248295734064234, "grad_norm": 0.45501312613487244, "learning_rate": 0.00018234193046666838, "loss": 0.211, "step": 16540 }, { "epoch": 0.23262351535596318, "grad_norm": 0.4550839066505432, "learning_rate": 0.00018231554601835982, "loss": 0.2037, "step": 16550 }, { "epoch": 0.232764073371284, "grad_norm": 0.44017985463142395, "learning_rate": 0.00018228914378499428, "loss": 0.201, "step": 16560 }, { "epoch": 0.23290463138660483, "grad_norm": 0.5057845115661621, "learning_rate": 0.00018226272377227617, "loss": 0.1819, "step": 16570 }, { "epoch": 0.23304518940192565, "grad_norm": 0.3775175213813782, "learning_rate": 0.00018223628598591383, "loss": 0.2052, "step": 16580 }, { "epoch": 0.23318574741724646, "grad_norm": 0.37587618827819824, "learning_rate": 0.00018220983043161937, "loss": 0.201, "step": 16590 }, { "epoch": 0.2333263054325673, "grad_norm": 0.4216788411140442, "learning_rate": 0.00018218335711510874, "loss": 0.2156, "step": 16600 }, { "epoch": 0.2334668634478881, "grad_norm": 0.4030705392360687, "learning_rate": 0.0001821568660421018, "loss": 0.2056, "step": 16610 }, { "epoch": 0.23360742146320895, "grad_norm": 0.346451073884964, "learning_rate": 0.0001821303572183222, "loss": 0.2017, "step": 16620 }, { "epoch": 0.23374797947852977, "grad_norm": 0.49614450335502625, "learning_rate": 0.00018210383064949737, "loss": 0.2038, "step": 16630 }, { "epoch": 0.23388853749385058, "grad_norm": 0.45596131682395935, "learning_rate": 0.0001820772863413587, "loss": 0.219, "step": 16640 }, { "epoch": 0.23402909550917142, "grad_norm": 0.49397969245910645, "learning_rate": 0.00018205072429964124, "loss": 0.2031, "step": 16650 }, { "epoch": 0.23416965352449223, "grad_norm": 0.43771892786026, "learning_rate": 0.00018202414453008409, "loss": 0.1984, "step": 16660 }, { "epoch": 0.23431021153981305, "grad_norm": 0.39425256848335266, "learning_rate": 0.00018199754703843, "loss": 0.2244, "step": 16670 }, { "epoch": 0.2344507695551339, "grad_norm": 0.5792267322540283, "learning_rate": 0.0001819709318304256, "loss": 0.2039, "step": 16680 }, { "epoch": 0.2345913275704547, "grad_norm": 0.417339026927948, "learning_rate": 0.00018194429891182137, "loss": 0.182, "step": 16690 }, { "epoch": 0.23473188558577554, "grad_norm": 0.5354083776473999, "learning_rate": 0.00018191764828837158, "loss": 0.2253, "step": 16700 }, { "epoch": 0.23487244360109635, "grad_norm": 0.4503895044326782, "learning_rate": 0.00018189097996583443, "loss": 0.2231, "step": 16710 }, { "epoch": 0.23501300161641717, "grad_norm": 0.5055163502693176, "learning_rate": 0.00018186429394997182, "loss": 0.2395, "step": 16720 }, { "epoch": 0.235153559631738, "grad_norm": 0.4840635359287262, "learning_rate": 0.00018183759024654949, "loss": 0.2135, "step": 16730 }, { "epoch": 0.23529411764705882, "grad_norm": 0.3905552625656128, "learning_rate": 0.00018181086886133704, "loss": 0.2024, "step": 16740 }, { "epoch": 0.23543467566237966, "grad_norm": 0.36550042033195496, "learning_rate": 0.00018178412980010794, "loss": 0.2198, "step": 16750 }, { "epoch": 0.23557523367770047, "grad_norm": 0.43158572912216187, "learning_rate": 0.00018175737306863932, "loss": 0.1993, "step": 16760 }, { "epoch": 0.2357157916930213, "grad_norm": 0.3679793179035187, "learning_rate": 0.00018173059867271233, "loss": 0.2072, "step": 16770 }, { "epoch": 0.23585634970834213, "grad_norm": 0.5061398148536682, "learning_rate": 0.00018170380661811173, "loss": 0.202, "step": 16780 }, { "epoch": 0.23599690772366294, "grad_norm": 0.5475767850875854, "learning_rate": 0.00018167699691062626, "loss": 0.2039, "step": 16790 }, { "epoch": 0.23613746573898375, "grad_norm": 0.6202900409698486, "learning_rate": 0.00018165016955604843, "loss": 0.2007, "step": 16800 }, { "epoch": 0.2362780237543046, "grad_norm": 0.4951488971710205, "learning_rate": 0.0001816233245601745, "loss": 0.2619, "step": 16810 }, { "epoch": 0.2364185817696254, "grad_norm": 0.4565191864967346, "learning_rate": 0.0001815964619288046, "loss": 0.2022, "step": 16820 }, { "epoch": 0.23655913978494625, "grad_norm": 0.3810023069381714, "learning_rate": 0.00018156958166774267, "loss": 0.1996, "step": 16830 }, { "epoch": 0.23669969780026706, "grad_norm": 0.5086340308189392, "learning_rate": 0.00018154268378279645, "loss": 0.2292, "step": 16840 }, { "epoch": 0.23684025581558787, "grad_norm": 0.4447498321533203, "learning_rate": 0.00018151576827977743, "loss": 0.2026, "step": 16850 }, { "epoch": 0.23698081383090872, "grad_norm": 0.44191133975982666, "learning_rate": 0.00018148883516450103, "loss": 0.2195, "step": 16860 }, { "epoch": 0.23712137184622953, "grad_norm": 0.5270133018493652, "learning_rate": 0.00018146188444278635, "loss": 0.216, "step": 16870 }, { "epoch": 0.23726192986155034, "grad_norm": 0.5399002432823181, "learning_rate": 0.0001814349161204564, "loss": 0.1924, "step": 16880 }, { "epoch": 0.23740248787687118, "grad_norm": 0.46844422817230225, "learning_rate": 0.00018140793020333793, "loss": 0.2233, "step": 16890 }, { "epoch": 0.237543045892192, "grad_norm": 0.4430544376373291, "learning_rate": 0.0001813809266972615, "loss": 0.2369, "step": 16900 }, { "epoch": 0.23768360390751284, "grad_norm": 0.4523754119873047, "learning_rate": 0.00018135390560806142, "loss": 0.1982, "step": 16910 }, { "epoch": 0.23782416192283365, "grad_norm": 0.5143480896949768, "learning_rate": 0.00018132686694157592, "loss": 0.2017, "step": 16920 }, { "epoch": 0.23796471993815446, "grad_norm": 0.4497261643409729, "learning_rate": 0.00018129981070364698, "loss": 0.208, "step": 16930 }, { "epoch": 0.2381052779534753, "grad_norm": 0.5222679972648621, "learning_rate": 0.00018127273690012028, "loss": 0.1978, "step": 16940 }, { "epoch": 0.23824583596879612, "grad_norm": 0.5843683481216431, "learning_rate": 0.00018124564553684543, "loss": 0.2123, "step": 16950 }, { "epoch": 0.23838639398411696, "grad_norm": 0.5215798616409302, "learning_rate": 0.00018121853661967578, "loss": 0.2211, "step": 16960 }, { "epoch": 0.23852695199943777, "grad_norm": 0.3762476444244385, "learning_rate": 0.00018119141015446842, "loss": 0.2345, "step": 16970 }, { "epoch": 0.23866751001475858, "grad_norm": 0.4703845977783203, "learning_rate": 0.0001811642661470843, "loss": 0.1882, "step": 16980 }, { "epoch": 0.23880806803007942, "grad_norm": 0.3904546797275543, "learning_rate": 0.0001811371046033882, "loss": 0.2094, "step": 16990 }, { "epoch": 0.23894862604540024, "grad_norm": 0.3003694713115692, "learning_rate": 0.00018110992552924855, "loss": 0.1755, "step": 17000 }, { "epoch": 0.23894862604540024, "eval_chrf": 83.42682500619117, "eval_loss": 0.433625727891922, "eval_runtime": 194.2793, "eval_samples_per_second": 0.515, "eval_steps_per_second": 0.021, "step": 17000 }, { "epoch": 0.23908918406072105, "grad_norm": 0.4176693856716156, "learning_rate": 0.00018108272893053767, "loss": 0.2057, "step": 17010 }, { "epoch": 0.2392297420760419, "grad_norm": 0.4226667881011963, "learning_rate": 0.00018105551481313167, "loss": 0.1982, "step": 17020 }, { "epoch": 0.2393703000913627, "grad_norm": 0.5105968713760376, "learning_rate": 0.00018102828318291038, "loss": 0.2114, "step": 17030 }, { "epoch": 0.23951085810668354, "grad_norm": 0.3624613583087921, "learning_rate": 0.0001810010340457575, "loss": 0.2005, "step": 17040 }, { "epoch": 0.23965141612200436, "grad_norm": 0.4732239544391632, "learning_rate": 0.00018097376740756045, "loss": 0.1839, "step": 17050 }, { "epoch": 0.23979197413732517, "grad_norm": 0.4396938383579254, "learning_rate": 0.0001809464832742104, "loss": 0.2279, "step": 17060 }, { "epoch": 0.239932532152646, "grad_norm": 0.4598935842514038, "learning_rate": 0.0001809191816516024, "loss": 0.2389, "step": 17070 }, { "epoch": 0.24007309016796682, "grad_norm": 0.43191003799438477, "learning_rate": 0.00018089186254563523, "loss": 0.1953, "step": 17080 }, { "epoch": 0.24021364818328766, "grad_norm": 0.29088646173477173, "learning_rate": 0.0001808645259622114, "loss": 0.1809, "step": 17090 }, { "epoch": 0.24035420619860848, "grad_norm": 0.5946388244628906, "learning_rate": 0.00018083717190723727, "loss": 0.1966, "step": 17100 }, { "epoch": 0.2404947642139293, "grad_norm": 0.4110700488090515, "learning_rate": 0.00018080980038662293, "loss": 0.1832, "step": 17110 }, { "epoch": 0.24063532222925013, "grad_norm": 0.44298121333122253, "learning_rate": 0.00018078241140628224, "loss": 0.2172, "step": 17120 }, { "epoch": 0.24077588024457094, "grad_norm": 0.5646606683731079, "learning_rate": 0.00018075500497213295, "loss": 0.2133, "step": 17130 }, { "epoch": 0.24091643825989176, "grad_norm": 0.40852683782577515, "learning_rate": 0.0001807275810900963, "loss": 0.2284, "step": 17140 }, { "epoch": 0.2410569962752126, "grad_norm": 0.38603800535202026, "learning_rate": 0.00018070013976609765, "loss": 0.2188, "step": 17150 }, { "epoch": 0.2411975542905334, "grad_norm": 0.4328840374946594, "learning_rate": 0.0001806726810060659, "loss": 0.2019, "step": 17160 }, { "epoch": 0.24133811230585425, "grad_norm": 0.44296500086784363, "learning_rate": 0.00018064520481593372, "loss": 0.1915, "step": 17170 }, { "epoch": 0.24147867032117507, "grad_norm": 0.3564743101596832, "learning_rate": 0.00018061771120163767, "loss": 0.222, "step": 17180 }, { "epoch": 0.24161922833649588, "grad_norm": 0.367158442735672, "learning_rate": 0.00018059020016911802, "loss": 0.1837, "step": 17190 }, { "epoch": 0.24175978635181672, "grad_norm": 0.40433263778686523, "learning_rate": 0.00018056267172431874, "loss": 0.2111, "step": 17200 }, { "epoch": 0.24190034436713753, "grad_norm": 0.4994758367538452, "learning_rate": 0.00018053512587318758, "loss": 0.2068, "step": 17210 }, { "epoch": 0.24204090238245837, "grad_norm": 0.5511106848716736, "learning_rate": 0.00018050756262167622, "loss": 0.2285, "step": 17220 }, { "epoch": 0.24218146039777919, "grad_norm": 0.4459092319011688, "learning_rate": 0.0001804799819757398, "loss": 0.221, "step": 17230 }, { "epoch": 0.2423220184131, "grad_norm": 0.38065147399902344, "learning_rate": 0.00018045238394133752, "loss": 0.2076, "step": 17240 }, { "epoch": 0.24246257642842084, "grad_norm": 0.4815782606601715, "learning_rate": 0.0001804247685244321, "loss": 0.2036, "step": 17250 }, { "epoch": 0.24260313444374165, "grad_norm": 0.4660325348377228, "learning_rate": 0.00018039713573099017, "loss": 0.2204, "step": 17260 }, { "epoch": 0.24274369245906247, "grad_norm": 0.4304184913635254, "learning_rate": 0.00018036948556698204, "loss": 0.214, "step": 17270 }, { "epoch": 0.2428842504743833, "grad_norm": 0.49734094738960266, "learning_rate": 0.00018034181803838178, "loss": 0.212, "step": 17280 }, { "epoch": 0.24302480848970412, "grad_norm": 0.48818838596343994, "learning_rate": 0.00018031413315116722, "loss": 0.2311, "step": 17290 }, { "epoch": 0.24316536650502496, "grad_norm": 0.43453922867774963, "learning_rate": 0.00018028643091132, "loss": 0.2106, "step": 17300 }, { "epoch": 0.24330592452034577, "grad_norm": 0.3406917452812195, "learning_rate": 0.00018025871132482536, "loss": 0.2251, "step": 17310 }, { "epoch": 0.2434464825356666, "grad_norm": 0.43452584743499756, "learning_rate": 0.00018023097439767244, "loss": 0.2188, "step": 17320 }, { "epoch": 0.24358704055098743, "grad_norm": 0.5142020583152771, "learning_rate": 0.00018020322013585408, "loss": 0.2188, "step": 17330 }, { "epoch": 0.24372759856630824, "grad_norm": 0.5353051424026489, "learning_rate": 0.00018017544854536679, "loss": 0.1949, "step": 17340 }, { "epoch": 0.24386815658162908, "grad_norm": 0.49185070395469666, "learning_rate": 0.0001801476596322109, "loss": 0.2284, "step": 17350 }, { "epoch": 0.2440087145969499, "grad_norm": 0.40892067551612854, "learning_rate": 0.00018011985340239054, "loss": 0.2017, "step": 17360 }, { "epoch": 0.2441492726122707, "grad_norm": 0.4842357933521271, "learning_rate": 0.00018009202986191348, "loss": 0.2098, "step": 17370 }, { "epoch": 0.24428983062759155, "grad_norm": 0.5735198855400085, "learning_rate": 0.0001800641890167912, "loss": 0.2444, "step": 17380 }, { "epoch": 0.24443038864291236, "grad_norm": 0.39232829213142395, "learning_rate": 0.00018003633087303907, "loss": 0.1893, "step": 17390 }, { "epoch": 0.24457094665823317, "grad_norm": 0.4202956557273865, "learning_rate": 0.000180008455436676, "loss": 0.209, "step": 17400 }, { "epoch": 0.24471150467355401, "grad_norm": 0.41963180899620056, "learning_rate": 0.00017998056271372484, "loss": 0.2169, "step": 17410 }, { "epoch": 0.24485206268887483, "grad_norm": 0.44267401099205017, "learning_rate": 0.00017995265271021207, "loss": 0.1852, "step": 17420 }, { "epoch": 0.24499262070419567, "grad_norm": 0.4419970214366913, "learning_rate": 0.00017992472543216785, "loss": 0.1821, "step": 17430 }, { "epoch": 0.24513317871951648, "grad_norm": 0.4276140630245209, "learning_rate": 0.00017989678088562618, "loss": 0.2028, "step": 17440 }, { "epoch": 0.2452737367348373, "grad_norm": 0.5623214840888977, "learning_rate": 0.00017986881907662471, "loss": 0.1916, "step": 17450 }, { "epoch": 0.24541429475015814, "grad_norm": 0.36114174127578735, "learning_rate": 0.00017984084001120492, "loss": 0.2202, "step": 17460 }, { "epoch": 0.24555485276547895, "grad_norm": 0.5440938472747803, "learning_rate": 0.00017981284369541188, "loss": 0.221, "step": 17470 }, { "epoch": 0.2456954107807998, "grad_norm": 0.40802478790283203, "learning_rate": 0.00017978483013529452, "loss": 0.2104, "step": 17480 }, { "epoch": 0.2458359687961206, "grad_norm": 0.5030210614204407, "learning_rate": 0.00017975679933690536, "loss": 0.2122, "step": 17490 }, { "epoch": 0.24597652681144141, "grad_norm": 0.34485700726509094, "learning_rate": 0.0001797287513063008, "loss": 0.2088, "step": 17500 }, { "epoch": 0.24611708482676226, "grad_norm": 0.42414161562919617, "learning_rate": 0.00017970068604954086, "loss": 0.2085, "step": 17510 }, { "epoch": 0.24625764284208307, "grad_norm": 0.41926831007003784, "learning_rate": 0.00017967260357268928, "loss": 0.1908, "step": 17520 }, { "epoch": 0.24639820085740388, "grad_norm": 0.4200794994831085, "learning_rate": 0.00017964450388181357, "loss": 0.1899, "step": 17530 }, { "epoch": 0.24653875887272472, "grad_norm": 0.4162304997444153, "learning_rate": 0.00017961638698298492, "loss": 0.2034, "step": 17540 }, { "epoch": 0.24667931688804554, "grad_norm": 0.48807257413864136, "learning_rate": 0.00017958825288227827, "loss": 0.2171, "step": 17550 }, { "epoch": 0.24681987490336638, "grad_norm": 0.47989019751548767, "learning_rate": 0.0001795601015857723, "loss": 0.2188, "step": 17560 }, { "epoch": 0.2469604329186872, "grad_norm": 0.3965669870376587, "learning_rate": 0.00017953193309954926, "loss": 0.2018, "step": 17570 }, { "epoch": 0.247100990934008, "grad_norm": 0.49545612931251526, "learning_rate": 0.0001795037474296953, "loss": 0.2242, "step": 17580 }, { "epoch": 0.24724154894932884, "grad_norm": 0.35966184735298157, "learning_rate": 0.0001794755445823002, "loss": 0.213, "step": 17590 }, { "epoch": 0.24738210696464966, "grad_norm": 0.4222733676433563, "learning_rate": 0.00017944732456345741, "loss": 0.1917, "step": 17600 }, { "epoch": 0.24752266497997047, "grad_norm": 0.4538722038269043, "learning_rate": 0.0001794190873792642, "loss": 0.1971, "step": 17610 }, { "epoch": 0.2476632229952913, "grad_norm": 0.4126892387866974, "learning_rate": 0.00017939083303582137, "loss": 0.2106, "step": 17620 }, { "epoch": 0.24780378101061212, "grad_norm": 0.5503072142601013, "learning_rate": 0.00017936256153923367, "loss": 0.1967, "step": 17630 }, { "epoch": 0.24794433902593296, "grad_norm": 0.44349250197410583, "learning_rate": 0.00017933427289560934, "loss": 0.225, "step": 17640 }, { "epoch": 0.24808489704125378, "grad_norm": 0.5387433767318726, "learning_rate": 0.00017930596711106044, "loss": 0.2118, "step": 17650 }, { "epoch": 0.2482254550565746, "grad_norm": 0.4662763774394989, "learning_rate": 0.0001792776441917027, "loss": 0.192, "step": 17660 }, { "epoch": 0.24836601307189543, "grad_norm": 0.640113890171051, "learning_rate": 0.00017924930414365558, "loss": 0.1827, "step": 17670 }, { "epoch": 0.24850657108721624, "grad_norm": 0.4936950206756592, "learning_rate": 0.00017922094697304217, "loss": 0.2297, "step": 17680 }, { "epoch": 0.24864712910253708, "grad_norm": 0.41726192831993103, "learning_rate": 0.00017919257268598932, "loss": 0.2046, "step": 17690 }, { "epoch": 0.2487876871178579, "grad_norm": 0.42578253149986267, "learning_rate": 0.0001791641812886276, "loss": 0.1857, "step": 17700 }, { "epoch": 0.2489282451331787, "grad_norm": 0.39796364307403564, "learning_rate": 0.0001791357727870912, "loss": 0.2083, "step": 17710 }, { "epoch": 0.24906880314849955, "grad_norm": 0.5429249405860901, "learning_rate": 0.0001791073471875181, "loss": 0.2113, "step": 17720 }, { "epoch": 0.24920936116382036, "grad_norm": 0.42298436164855957, "learning_rate": 0.00017907890449604986, "loss": 0.2002, "step": 17730 }, { "epoch": 0.24934991917914118, "grad_norm": 0.4462754428386688, "learning_rate": 0.00017905044471883184, "loss": 0.2039, "step": 17740 }, { "epoch": 0.24949047719446202, "grad_norm": 0.4810706377029419, "learning_rate": 0.000179021967862013, "loss": 0.1958, "step": 17750 }, { "epoch": 0.24963103520978283, "grad_norm": 0.45998165011405945, "learning_rate": 0.00017899347393174605, "loss": 0.1908, "step": 17760 }, { "epoch": 0.24977159322510367, "grad_norm": 0.44851624965667725, "learning_rate": 0.0001789649629341874, "loss": 0.201, "step": 17770 }, { "epoch": 0.24991215124042448, "grad_norm": 0.4333913028240204, "learning_rate": 0.0001789364348754971, "loss": 0.194, "step": 17780 }, { "epoch": 0.2500527092557453, "grad_norm": 0.40873411297798157, "learning_rate": 0.00017890788976183895, "loss": 0.1963, "step": 17790 }, { "epoch": 0.2501932672710661, "grad_norm": 0.5309141874313354, "learning_rate": 0.00017887932759938036, "loss": 0.2043, "step": 17800 }, { "epoch": 0.250333825286387, "grad_norm": 0.4489333927631378, "learning_rate": 0.00017885074839429246, "loss": 0.2397, "step": 17810 }, { "epoch": 0.2504743833017078, "grad_norm": 0.700109601020813, "learning_rate": 0.00017882215215275, "loss": 0.2205, "step": 17820 }, { "epoch": 0.2506149413170286, "grad_norm": 0.4139418303966522, "learning_rate": 0.00017879353888093158, "loss": 0.2263, "step": 17830 }, { "epoch": 0.2507554993323494, "grad_norm": 0.411004900932312, "learning_rate": 0.0001787649085850193, "loss": 0.1978, "step": 17840 }, { "epoch": 0.25089605734767023, "grad_norm": 0.5109737515449524, "learning_rate": 0.00017873626127119902, "loss": 0.2025, "step": 17850 }, { "epoch": 0.2510366153629911, "grad_norm": 0.6595651507377625, "learning_rate": 0.00017870759694566028, "loss": 0.2063, "step": 17860 }, { "epoch": 0.2511771733783119, "grad_norm": 0.5950677990913391, "learning_rate": 0.00017867891561459625, "loss": 0.2064, "step": 17870 }, { "epoch": 0.2513177313936327, "grad_norm": 0.4480992555618286, "learning_rate": 0.00017865021728420385, "loss": 0.221, "step": 17880 }, { "epoch": 0.25145828940895354, "grad_norm": 0.44384530186653137, "learning_rate": 0.00017862150196068359, "loss": 0.211, "step": 17890 }, { "epoch": 0.25159884742427435, "grad_norm": 0.46428948640823364, "learning_rate": 0.0001785927696502397, "loss": 0.1983, "step": 17900 }, { "epoch": 0.25173940543959517, "grad_norm": 0.3470834791660309, "learning_rate": 0.00017856402035908007, "loss": 0.1977, "step": 17910 }, { "epoch": 0.25187996345491603, "grad_norm": 0.48576465249061584, "learning_rate": 0.00017853525409341624, "loss": 0.2215, "step": 17920 }, { "epoch": 0.25202052147023685, "grad_norm": 0.44239556789398193, "learning_rate": 0.00017850647085946347, "loss": 0.2219, "step": 17930 }, { "epoch": 0.25216107948555766, "grad_norm": 0.3892723619937897, "learning_rate": 0.00017847767066344065, "loss": 0.1916, "step": 17940 }, { "epoch": 0.2523016375008785, "grad_norm": 0.4856000542640686, "learning_rate": 0.0001784488535115703, "loss": 0.2231, "step": 17950 }, { "epoch": 0.2524421955161993, "grad_norm": 0.49842894077301025, "learning_rate": 0.00017842001941007868, "loss": 0.2161, "step": 17960 }, { "epoch": 0.25258275353152015, "grad_norm": 0.5050910711288452, "learning_rate": 0.00017839116836519564, "loss": 0.2183, "step": 17970 }, { "epoch": 0.25272331154684097, "grad_norm": 0.42533183097839355, "learning_rate": 0.0001783623003831547, "loss": 0.2169, "step": 17980 }, { "epoch": 0.2528638695621618, "grad_norm": 0.42304936051368713, "learning_rate": 0.00017833341547019316, "loss": 0.195, "step": 17990 }, { "epoch": 0.2530044275774826, "grad_norm": 0.4381530284881592, "learning_rate": 0.00017830451363255178, "loss": 0.2159, "step": 18000 }, { "epoch": 0.2530044275774826, "eval_chrf": 83.43002044720006, "eval_loss": 0.4113892614841461, "eval_runtime": 138.5753, "eval_samples_per_second": 0.722, "eval_steps_per_second": 0.029, "step": 18000 }, { "epoch": 0.2531449855928034, "grad_norm": 0.40219688415527344, "learning_rate": 0.00017827559487647513, "loss": 0.2029, "step": 18010 }, { "epoch": 0.2532855436081243, "grad_norm": 0.38504263758659363, "learning_rate": 0.00017824665920821136, "loss": 0.1915, "step": 18020 }, { "epoch": 0.2534261016234451, "grad_norm": 0.5165150761604309, "learning_rate": 0.0001782177066340123, "loss": 0.1825, "step": 18030 }, { "epoch": 0.2535666596387659, "grad_norm": 0.3051179349422455, "learning_rate": 0.00017818873716013344, "loss": 0.1767, "step": 18040 }, { "epoch": 0.2537072176540867, "grad_norm": 0.3943491578102112, "learning_rate": 0.0001781597507928339, "loss": 0.2013, "step": 18050 }, { "epoch": 0.2538477756694075, "grad_norm": 0.5130552649497986, "learning_rate": 0.00017813074753837647, "loss": 0.1949, "step": 18060 }, { "epoch": 0.2539883336847284, "grad_norm": 0.34325119853019714, "learning_rate": 0.00017810172740302758, "loss": 0.1749, "step": 18070 }, { "epoch": 0.2541288917000492, "grad_norm": 0.44568413496017456, "learning_rate": 0.00017807269039305727, "loss": 0.193, "step": 18080 }, { "epoch": 0.25426944971537, "grad_norm": 0.3938472867012024, "learning_rate": 0.00017804363651473932, "loss": 0.1897, "step": 18090 }, { "epoch": 0.25441000773069083, "grad_norm": 0.3924921154975891, "learning_rate": 0.00017801456577435105, "loss": 0.2201, "step": 18100 }, { "epoch": 0.25455056574601165, "grad_norm": 0.3777751922607422, "learning_rate": 0.0001779854781781735, "loss": 0.2211, "step": 18110 }, { "epoch": 0.2546911237613325, "grad_norm": 0.46796715259552, "learning_rate": 0.0001779563737324913, "loss": 0.1866, "step": 18120 }, { "epoch": 0.25483168177665333, "grad_norm": 0.3461954891681671, "learning_rate": 0.00017792725244359276, "loss": 0.2112, "step": 18130 }, { "epoch": 0.25497223979197414, "grad_norm": 0.40790891647338867, "learning_rate": 0.0001778981143177698, "loss": 0.1958, "step": 18140 }, { "epoch": 0.25511279780729496, "grad_norm": 0.3641470968723297, "learning_rate": 0.000177868959361318, "loss": 0.1938, "step": 18150 }, { "epoch": 0.25525335582261577, "grad_norm": 0.43044883012771606, "learning_rate": 0.00017783978758053657, "loss": 0.1978, "step": 18160 }, { "epoch": 0.2553939138379366, "grad_norm": 0.39675578474998474, "learning_rate": 0.00017781059898172835, "loss": 0.2101, "step": 18170 }, { "epoch": 0.25553447185325745, "grad_norm": 0.4233925938606262, "learning_rate": 0.00017778139357119976, "loss": 0.2083, "step": 18180 }, { "epoch": 0.25567502986857826, "grad_norm": 0.38321974873542786, "learning_rate": 0.00017775217135526097, "loss": 0.2095, "step": 18190 }, { "epoch": 0.2558155878838991, "grad_norm": 0.4945431351661682, "learning_rate": 0.00017772293234022572, "loss": 0.1863, "step": 18200 }, { "epoch": 0.2559561458992199, "grad_norm": 0.518644392490387, "learning_rate": 0.00017769367653241134, "loss": 0.2165, "step": 18210 }, { "epoch": 0.2560967039145407, "grad_norm": 0.36446699500083923, "learning_rate": 0.00017766440393813884, "loss": 0.1928, "step": 18220 }, { "epoch": 0.25623726192986157, "grad_norm": 0.3861456513404846, "learning_rate": 0.00017763511456373284, "loss": 0.2266, "step": 18230 }, { "epoch": 0.2563778199451824, "grad_norm": 0.38828468322753906, "learning_rate": 0.00017760580841552162, "loss": 0.2068, "step": 18240 }, { "epoch": 0.2565183779605032, "grad_norm": 0.3888741135597229, "learning_rate": 0.000177576485499837, "loss": 0.2199, "step": 18250 }, { "epoch": 0.256658935975824, "grad_norm": 0.5249620676040649, "learning_rate": 0.0001775471458230145, "loss": 0.2339, "step": 18260 }, { "epoch": 0.2567994939911448, "grad_norm": 0.33323395252227783, "learning_rate": 0.00017751778939139326, "loss": 0.204, "step": 18270 }, { "epoch": 0.2569400520064657, "grad_norm": 0.548209547996521, "learning_rate": 0.000177488416211316, "loss": 0.2021, "step": 18280 }, { "epoch": 0.2570806100217865, "grad_norm": 0.4949204623699188, "learning_rate": 0.00017745902628912903, "loss": 0.2204, "step": 18290 }, { "epoch": 0.2572211680371073, "grad_norm": 0.3896910548210144, "learning_rate": 0.00017742961963118238, "loss": 0.1932, "step": 18300 }, { "epoch": 0.25736172605242813, "grad_norm": 0.4428505599498749, "learning_rate": 0.00017740019624382962, "loss": 0.1915, "step": 18310 }, { "epoch": 0.25750228406774894, "grad_norm": 0.5322151184082031, "learning_rate": 0.00017737075613342794, "loss": 0.2097, "step": 18320 }, { "epoch": 0.2576428420830698, "grad_norm": 0.3616546094417572, "learning_rate": 0.00017734129930633817, "loss": 0.1985, "step": 18330 }, { "epoch": 0.2577834000983906, "grad_norm": 0.6213672161102295, "learning_rate": 0.00017731182576892477, "loss": 0.2052, "step": 18340 }, { "epoch": 0.25792395811371144, "grad_norm": 0.48710718750953674, "learning_rate": 0.0001772823355275557, "loss": 0.188, "step": 18350 }, { "epoch": 0.25806451612903225, "grad_norm": 0.45230305194854736, "learning_rate": 0.00017725282858860268, "loss": 0.205, "step": 18360 }, { "epoch": 0.25820507414435306, "grad_norm": 0.46916815638542175, "learning_rate": 0.00017722330495844094, "loss": 0.2174, "step": 18370 }, { "epoch": 0.25834563215967393, "grad_norm": 0.7887526750564575, "learning_rate": 0.0001771937646434493, "loss": 0.2108, "step": 18380 }, { "epoch": 0.25848619017499475, "grad_norm": 0.38442105054855347, "learning_rate": 0.0001771642076500103, "loss": 0.1842, "step": 18390 }, { "epoch": 0.25862674819031556, "grad_norm": 0.5363273024559021, "learning_rate": 0.00017713463398450995, "loss": 0.2144, "step": 18400 }, { "epoch": 0.25876730620563637, "grad_norm": 0.48678267002105713, "learning_rate": 0.00017710504365333796, "loss": 0.2134, "step": 18410 }, { "epoch": 0.2589078642209572, "grad_norm": 0.5642648935317993, "learning_rate": 0.0001770754366628876, "loss": 0.2127, "step": 18420 }, { "epoch": 0.259048422236278, "grad_norm": 0.4107931852340698, "learning_rate": 0.0001770458130195557, "loss": 0.2001, "step": 18430 }, { "epoch": 0.25918898025159887, "grad_norm": 0.4847404658794403, "learning_rate": 0.00017701617272974275, "loss": 0.1919, "step": 18440 }, { "epoch": 0.2593295382669197, "grad_norm": 0.44709765911102295, "learning_rate": 0.00017698651579985285, "loss": 0.2095, "step": 18450 }, { "epoch": 0.2594700962822405, "grad_norm": 0.4158313572406769, "learning_rate": 0.00017695684223629365, "loss": 0.1873, "step": 18460 }, { "epoch": 0.2596106542975613, "grad_norm": 0.4474424719810486, "learning_rate": 0.0001769271520454764, "loss": 0.2023, "step": 18470 }, { "epoch": 0.2597512123128821, "grad_norm": 0.5589454174041748, "learning_rate": 0.0001768974452338159, "loss": 0.2115, "step": 18480 }, { "epoch": 0.259891770328203, "grad_norm": 0.3891916573047638, "learning_rate": 0.00017686772180773065, "loss": 0.2203, "step": 18490 }, { "epoch": 0.2600323283435238, "grad_norm": 0.43455711007118225, "learning_rate": 0.00017683798177364266, "loss": 0.2092, "step": 18500 }, { "epoch": 0.2601728863588446, "grad_norm": 0.4177962839603424, "learning_rate": 0.00017680822513797757, "loss": 0.2134, "step": 18510 }, { "epoch": 0.2603134443741654, "grad_norm": 0.4519466757774353, "learning_rate": 0.00017677845190716454, "loss": 0.196, "step": 18520 }, { "epoch": 0.26045400238948624, "grad_norm": 0.47306278347969055, "learning_rate": 0.00017674866208763638, "loss": 0.2026, "step": 18530 }, { "epoch": 0.2605945604048071, "grad_norm": 0.4423769414424896, "learning_rate": 0.00017671885568582946, "loss": 0.2158, "step": 18540 }, { "epoch": 0.2607351184201279, "grad_norm": 0.4966670274734497, "learning_rate": 0.00017668903270818377, "loss": 0.2113, "step": 18550 }, { "epoch": 0.26087567643544873, "grad_norm": 0.40313148498535156, "learning_rate": 0.0001766591931611428, "loss": 0.2062, "step": 18560 }, { "epoch": 0.26101623445076955, "grad_norm": 0.37123391032218933, "learning_rate": 0.0001766293370511537, "loss": 0.2014, "step": 18570 }, { "epoch": 0.26115679246609036, "grad_norm": 0.5386476516723633, "learning_rate": 0.00017659946438466714, "loss": 0.202, "step": 18580 }, { "epoch": 0.26129735048141123, "grad_norm": 0.5218870639801025, "learning_rate": 0.00017656957516813744, "loss": 0.211, "step": 18590 }, { "epoch": 0.26143790849673204, "grad_norm": 0.4854200482368469, "learning_rate": 0.0001765396694080224, "loss": 0.1978, "step": 18600 }, { "epoch": 0.26157846651205285, "grad_norm": 0.4300956428050995, "learning_rate": 0.00017650974711078346, "loss": 0.1931, "step": 18610 }, { "epoch": 0.26171902452737367, "grad_norm": 0.5700703859329224, "learning_rate": 0.0001764798082828856, "loss": 0.2024, "step": 18620 }, { "epoch": 0.2618595825426945, "grad_norm": 0.4762032926082611, "learning_rate": 0.00017644985293079746, "loss": 0.2197, "step": 18630 }, { "epoch": 0.2620001405580153, "grad_norm": 0.35937386751174927, "learning_rate": 0.00017641988106099107, "loss": 0.212, "step": 18640 }, { "epoch": 0.26214069857333616, "grad_norm": 0.3749754726886749, "learning_rate": 0.00017638989267994225, "loss": 0.1959, "step": 18650 }, { "epoch": 0.262281256588657, "grad_norm": 0.3686469793319702, "learning_rate": 0.00017635988779413019, "loss": 0.1866, "step": 18660 }, { "epoch": 0.2624218146039778, "grad_norm": 0.5276985764503479, "learning_rate": 0.00017632986641003776, "loss": 0.229, "step": 18670 }, { "epoch": 0.2625623726192986, "grad_norm": 0.3779345452785492, "learning_rate": 0.00017629982853415138, "loss": 0.1868, "step": 18680 }, { "epoch": 0.2627029306346194, "grad_norm": 0.5236541032791138, "learning_rate": 0.000176269774172961, "loss": 0.2268, "step": 18690 }, { "epoch": 0.2628434886499403, "grad_norm": 0.4989383816719055, "learning_rate": 0.00017623970333296017, "loss": 0.2125, "step": 18700 }, { "epoch": 0.2629840466652611, "grad_norm": 0.47551271319389343, "learning_rate": 0.00017620961602064593, "loss": 0.2148, "step": 18710 }, { "epoch": 0.2631246046805819, "grad_norm": 0.4368431568145752, "learning_rate": 0.000176179512242519, "loss": 0.2271, "step": 18720 }, { "epoch": 0.2632651626959027, "grad_norm": 0.6284667253494263, "learning_rate": 0.00017614939200508357, "loss": 0.214, "step": 18730 }, { "epoch": 0.26340572071122353, "grad_norm": 0.42767229676246643, "learning_rate": 0.00017611925531484736, "loss": 0.2123, "step": 18740 }, { "epoch": 0.2635462787265444, "grad_norm": 0.6016656160354614, "learning_rate": 0.0001760891021783217, "loss": 0.2052, "step": 18750 }, { "epoch": 0.2636868367418652, "grad_norm": 0.43791624903678894, "learning_rate": 0.00017605893260202153, "loss": 0.2119, "step": 18760 }, { "epoch": 0.26382739475718603, "grad_norm": 0.3640478849411011, "learning_rate": 0.0001760287465924652, "loss": 0.2138, "step": 18770 }, { "epoch": 0.26396795277250684, "grad_norm": 0.38699495792388916, "learning_rate": 0.00017599854415617473, "loss": 0.1983, "step": 18780 }, { "epoch": 0.26410851078782765, "grad_norm": 0.4709777235984802, "learning_rate": 0.0001759683252996756, "loss": 0.2169, "step": 18790 }, { "epoch": 0.2642490688031485, "grad_norm": 0.42550647258758545, "learning_rate": 0.0001759380900294969, "loss": 0.2114, "step": 18800 }, { "epoch": 0.26438962681846934, "grad_norm": 0.5002481341362, "learning_rate": 0.00017590783835217126, "loss": 0.2095, "step": 18810 }, { "epoch": 0.26453018483379015, "grad_norm": 0.4592345654964447, "learning_rate": 0.00017587757027423483, "loss": 0.1928, "step": 18820 }, { "epoch": 0.26467074284911096, "grad_norm": 0.3958444595336914, "learning_rate": 0.00017584728580222734, "loss": 0.228, "step": 18830 }, { "epoch": 0.2648113008644318, "grad_norm": 0.3654407858848572, "learning_rate": 0.00017581698494269202, "loss": 0.2025, "step": 18840 }, { "epoch": 0.26495185887975264, "grad_norm": 0.4766884446144104, "learning_rate": 0.00017578666770217563, "loss": 0.1977, "step": 18850 }, { "epoch": 0.26509241689507346, "grad_norm": 0.4469415545463562, "learning_rate": 0.00017575633408722853, "loss": 0.1858, "step": 18860 }, { "epoch": 0.26523297491039427, "grad_norm": 0.47276103496551514, "learning_rate": 0.0001757259841044046, "loss": 0.2349, "step": 18870 }, { "epoch": 0.2653735329257151, "grad_norm": 0.4530576765537262, "learning_rate": 0.00017569561776026123, "loss": 0.2035, "step": 18880 }, { "epoch": 0.2655140909410359, "grad_norm": 0.4598478376865387, "learning_rate": 0.00017566523506135935, "loss": 0.189, "step": 18890 }, { "epoch": 0.2656546489563567, "grad_norm": 0.5444883704185486, "learning_rate": 0.0001756348360142634, "loss": 0.1917, "step": 18900 }, { "epoch": 0.2657952069716776, "grad_norm": 0.5340624451637268, "learning_rate": 0.00017560442062554147, "loss": 0.2025, "step": 18910 }, { "epoch": 0.2659357649869984, "grad_norm": 0.5737981200218201, "learning_rate": 0.00017557398890176502, "loss": 0.1819, "step": 18920 }, { "epoch": 0.2660763230023192, "grad_norm": 0.5385230779647827, "learning_rate": 0.00017554354084950915, "loss": 0.2375, "step": 18930 }, { "epoch": 0.26621688101764, "grad_norm": 0.49425017833709717, "learning_rate": 0.0001755130764753524, "loss": 0.216, "step": 18940 }, { "epoch": 0.26635743903296083, "grad_norm": 0.44835686683654785, "learning_rate": 0.00017548259578587696, "loss": 0.216, "step": 18950 }, { "epoch": 0.2664979970482817, "grad_norm": 0.4047083258628845, "learning_rate": 0.0001754520987876684, "loss": 0.1913, "step": 18960 }, { "epoch": 0.2666385550636025, "grad_norm": 0.4027661383152008, "learning_rate": 0.00017542158548731598, "loss": 0.1955, "step": 18970 }, { "epoch": 0.2667791130789233, "grad_norm": 0.4459643065929413, "learning_rate": 0.0001753910558914123, "loss": 0.2197, "step": 18980 }, { "epoch": 0.26691967109424414, "grad_norm": 0.3401888906955719, "learning_rate": 0.0001753605100065536, "loss": 0.2196, "step": 18990 }, { "epoch": 0.26706022910956495, "grad_norm": 0.46558499336242676, "learning_rate": 0.00017532994783933962, "loss": 0.2281, "step": 19000 }, { "epoch": 0.26706022910956495, "eval_chrf": 76.47169301348917, "eval_loss": 0.4275648593902588, "eval_runtime": 326.6959, "eval_samples_per_second": 0.306, "eval_steps_per_second": 0.012, "step": 19000 }, { "epoch": 0.2672007871248858, "grad_norm": 0.4833649694919586, "learning_rate": 0.0001752993693963736, "loss": 0.182, "step": 19010 }, { "epoch": 0.26734134514020663, "grad_norm": 0.5820573568344116, "learning_rate": 0.0001752687746842623, "loss": 0.2045, "step": 19020 }, { "epoch": 0.26748190315552745, "grad_norm": 0.5024533271789551, "learning_rate": 0.00017523816370961602, "loss": 0.1888, "step": 19030 }, { "epoch": 0.26762246117084826, "grad_norm": 0.4784148037433624, "learning_rate": 0.0001752075364790485, "loss": 0.2106, "step": 19040 }, { "epoch": 0.26776301918616907, "grad_norm": 0.5217591524124146, "learning_rate": 0.0001751768929991771, "loss": 0.2359, "step": 19050 }, { "epoch": 0.26790357720148994, "grad_norm": 0.43953946232795715, "learning_rate": 0.0001751462332766226, "loss": 0.2068, "step": 19060 }, { "epoch": 0.26804413521681075, "grad_norm": 0.5105519890785217, "learning_rate": 0.00017511555731800936, "loss": 0.2008, "step": 19070 }, { "epoch": 0.26818469323213157, "grad_norm": 0.4403964877128601, "learning_rate": 0.00017508486512996516, "loss": 0.2134, "step": 19080 }, { "epoch": 0.2683252512474524, "grad_norm": 0.5443793535232544, "learning_rate": 0.00017505415671912139, "loss": 0.2092, "step": 19090 }, { "epoch": 0.2684658092627732, "grad_norm": 0.3778785765171051, "learning_rate": 0.00017502343209211288, "loss": 0.2164, "step": 19100 }, { "epoch": 0.26860636727809406, "grad_norm": 0.5645463466644287, "learning_rate": 0.000174992691255578, "loss": 0.201, "step": 19110 }, { "epoch": 0.2687469252934149, "grad_norm": 0.444047749042511, "learning_rate": 0.00017496193421615851, "loss": 0.1811, "step": 19120 }, { "epoch": 0.2688874833087357, "grad_norm": 0.4804798662662506, "learning_rate": 0.00017493116098049987, "loss": 0.2073, "step": 19130 }, { "epoch": 0.2690280413240565, "grad_norm": 0.417547345161438, "learning_rate": 0.00017490037155525086, "loss": 0.1938, "step": 19140 }, { "epoch": 0.2691685993393773, "grad_norm": 0.5173671841621399, "learning_rate": 0.00017486956594706387, "loss": 0.2288, "step": 19150 }, { "epoch": 0.2693091573546981, "grad_norm": 0.4034634232521057, "learning_rate": 0.00017483874416259471, "loss": 0.1969, "step": 19160 }, { "epoch": 0.269449715370019, "grad_norm": 0.3967057168483734, "learning_rate": 0.00017480790620850276, "loss": 0.2383, "step": 19170 }, { "epoch": 0.2695902733853398, "grad_norm": 0.4266200661659241, "learning_rate": 0.00017477705209145082, "loss": 0.2376, "step": 19180 }, { "epoch": 0.2697308314006606, "grad_norm": 0.3088657557964325, "learning_rate": 0.00017474618181810523, "loss": 0.2151, "step": 19190 }, { "epoch": 0.26987138941598143, "grad_norm": 0.5541336536407471, "learning_rate": 0.0001747152953951358, "loss": 0.2106, "step": 19200 }, { "epoch": 0.27001194743130225, "grad_norm": 0.4477527439594269, "learning_rate": 0.00017468439282921584, "loss": 0.2028, "step": 19210 }, { "epoch": 0.2701525054466231, "grad_norm": 0.4126959443092346, "learning_rate": 0.00017465347412702213, "loss": 0.1946, "step": 19220 }, { "epoch": 0.2702930634619439, "grad_norm": 0.3782075047492981, "learning_rate": 0.00017462253929523497, "loss": 0.1879, "step": 19230 }, { "epoch": 0.27043362147726474, "grad_norm": 0.3895922899246216, "learning_rate": 0.00017459158834053814, "loss": 0.2412, "step": 19240 }, { "epoch": 0.27057417949258555, "grad_norm": 0.45436742901802063, "learning_rate": 0.00017456062126961882, "loss": 0.2113, "step": 19250 }, { "epoch": 0.27071473750790637, "grad_norm": 0.41081178188323975, "learning_rate": 0.00017452963808916786, "loss": 0.1954, "step": 19260 }, { "epoch": 0.27085529552322724, "grad_norm": 0.46563029289245605, "learning_rate": 0.00017449863880587932, "loss": 0.1945, "step": 19270 }, { "epoch": 0.27099585353854805, "grad_norm": 0.44589686393737793, "learning_rate": 0.000174467623426451, "loss": 0.1926, "step": 19280 }, { "epoch": 0.27113641155386886, "grad_norm": 0.481802761554718, "learning_rate": 0.000174436591957584, "loss": 0.2063, "step": 19290 }, { "epoch": 0.2712769695691897, "grad_norm": 0.5556591749191284, "learning_rate": 0.00017440554440598306, "loss": 0.2105, "step": 19300 }, { "epoch": 0.2714175275845105, "grad_norm": 0.6346491575241089, "learning_rate": 0.00017437448077835618, "loss": 0.2069, "step": 19310 }, { "epoch": 0.27155808559983136, "grad_norm": 0.32252898812294006, "learning_rate": 0.00017434340108141505, "loss": 0.1964, "step": 19320 }, { "epoch": 0.27169864361515217, "grad_norm": 0.5045115947723389, "learning_rate": 0.00017431230532187467, "loss": 0.2325, "step": 19330 }, { "epoch": 0.271839201630473, "grad_norm": 0.8819959163665771, "learning_rate": 0.00017428119350645363, "loss": 0.2, "step": 19340 }, { "epoch": 0.2719797596457938, "grad_norm": 0.4297947883605957, "learning_rate": 0.00017425006564187386, "loss": 0.1934, "step": 19350 }, { "epoch": 0.2721203176611146, "grad_norm": 0.4750787913799286, "learning_rate": 0.0001742189217348609, "loss": 0.205, "step": 19360 }, { "epoch": 0.2722608756764354, "grad_norm": 0.3775157034397125, "learning_rate": 0.00017418776179214365, "loss": 0.1828, "step": 19370 }, { "epoch": 0.2724014336917563, "grad_norm": 0.5501800179481506, "learning_rate": 0.00017415658582045453, "loss": 0.1994, "step": 19380 }, { "epoch": 0.2725419917070771, "grad_norm": 0.6898669004440308, "learning_rate": 0.0001741253938265294, "loss": 0.1744, "step": 19390 }, { "epoch": 0.2726825497223979, "grad_norm": 0.49606406688690186, "learning_rate": 0.00017409418581710757, "loss": 0.1686, "step": 19400 }, { "epoch": 0.27282310773771873, "grad_norm": 0.44804930686950684, "learning_rate": 0.00017406296179893187, "loss": 0.2089, "step": 19410 }, { "epoch": 0.27296366575303954, "grad_norm": 0.38459184765815735, "learning_rate": 0.00017403172177874848, "loss": 0.1954, "step": 19420 }, { "epoch": 0.2731042237683604, "grad_norm": 0.48969027400016785, "learning_rate": 0.00017400046576330718, "loss": 0.2108, "step": 19430 }, { "epoch": 0.2732447817836812, "grad_norm": 0.42338326573371887, "learning_rate": 0.00017396919375936107, "loss": 0.2352, "step": 19440 }, { "epoch": 0.27338533979900204, "grad_norm": 0.45828261971473694, "learning_rate": 0.0001739379057736668, "loss": 0.1963, "step": 19450 }, { "epoch": 0.27352589781432285, "grad_norm": 0.42012134194374084, "learning_rate": 0.00017390660181298441, "loss": 0.1928, "step": 19460 }, { "epoch": 0.27366645582964366, "grad_norm": 0.548736572265625, "learning_rate": 0.00017387528188407745, "loss": 0.2258, "step": 19470 }, { "epoch": 0.27380701384496453, "grad_norm": 0.41380250453948975, "learning_rate": 0.00017384394599371286, "loss": 0.1968, "step": 19480 }, { "epoch": 0.27394757186028534, "grad_norm": 0.5325862765312195, "learning_rate": 0.00017381259414866107, "loss": 0.207, "step": 19490 }, { "epoch": 0.27408812987560616, "grad_norm": 0.39338448643684387, "learning_rate": 0.00017378122635569593, "loss": 0.1751, "step": 19500 }, { "epoch": 0.27422868789092697, "grad_norm": 0.4523838460445404, "learning_rate": 0.0001737498426215948, "loss": 0.1822, "step": 19510 }, { "epoch": 0.2743692459062478, "grad_norm": 0.6740010976791382, "learning_rate": 0.00017371844295313837, "loss": 0.206, "step": 19520 }, { "epoch": 0.27450980392156865, "grad_norm": 0.4458725154399872, "learning_rate": 0.00017368702735711086, "loss": 0.1859, "step": 19530 }, { "epoch": 0.27465036193688946, "grad_norm": 0.41209864616394043, "learning_rate": 0.00017365559584029994, "loss": 0.2018, "step": 19540 }, { "epoch": 0.2747909199522103, "grad_norm": 0.38490617275238037, "learning_rate": 0.00017362414840949662, "loss": 0.1948, "step": 19550 }, { "epoch": 0.2749314779675311, "grad_norm": 0.3892601728439331, "learning_rate": 0.00017359268507149552, "loss": 0.2101, "step": 19560 }, { "epoch": 0.2750720359828519, "grad_norm": 0.4824336767196655, "learning_rate": 0.00017356120583309447, "loss": 0.2029, "step": 19570 }, { "epoch": 0.27521259399817277, "grad_norm": 0.3767576813697815, "learning_rate": 0.00017352971070109494, "loss": 0.1963, "step": 19580 }, { "epoch": 0.2753531520134936, "grad_norm": 0.4037289321422577, "learning_rate": 0.00017349819968230174, "loss": 0.1774, "step": 19590 }, { "epoch": 0.2754937100288144, "grad_norm": 0.4326764643192291, "learning_rate": 0.00017346667278352312, "loss": 0.2109, "step": 19600 }, { "epoch": 0.2756342680441352, "grad_norm": 0.39291003346443176, "learning_rate": 0.00017343513001157073, "loss": 0.1853, "step": 19610 }, { "epoch": 0.275774826059456, "grad_norm": 0.4207342267036438, "learning_rate": 0.00017340357137325975, "loss": 0.2188, "step": 19620 }, { "epoch": 0.27591538407477684, "grad_norm": 0.5787175297737122, "learning_rate": 0.0001733719968754087, "loss": 0.2112, "step": 19630 }, { "epoch": 0.2760559420900977, "grad_norm": 0.35075294971466064, "learning_rate": 0.00017334040652483958, "loss": 0.1989, "step": 19640 }, { "epoch": 0.2761965001054185, "grad_norm": 0.5642573833465576, "learning_rate": 0.00017330880032837768, "loss": 0.2036, "step": 19650 }, { "epoch": 0.27633705812073933, "grad_norm": 0.5270206332206726, "learning_rate": 0.00017327717829285195, "loss": 0.2034, "step": 19660 }, { "epoch": 0.27647761613606014, "grad_norm": 0.5013365745544434, "learning_rate": 0.00017324554042509455, "loss": 0.1999, "step": 19670 }, { "epoch": 0.27661817415138096, "grad_norm": 0.41403472423553467, "learning_rate": 0.00017321388673194115, "loss": 0.2034, "step": 19680 }, { "epoch": 0.2767587321667018, "grad_norm": 0.4933346211910248, "learning_rate": 0.00017318221722023087, "loss": 0.2119, "step": 19690 }, { "epoch": 0.27689929018202264, "grad_norm": 0.4709338843822479, "learning_rate": 0.00017315053189680618, "loss": 0.1887, "step": 19700 }, { "epoch": 0.27703984819734345, "grad_norm": 0.5111008882522583, "learning_rate": 0.000173118830768513, "loss": 0.2051, "step": 19710 }, { "epoch": 0.27718040621266427, "grad_norm": 0.5342327952384949, "learning_rate": 0.00017308711384220066, "loss": 0.2095, "step": 19720 }, { "epoch": 0.2773209642279851, "grad_norm": 0.4055479168891907, "learning_rate": 0.00017305538112472193, "loss": 0.2163, "step": 19730 }, { "epoch": 0.27746152224330595, "grad_norm": 0.4838210344314575, "learning_rate": 0.00017302363262293292, "loss": 0.1924, "step": 19740 }, { "epoch": 0.27760208025862676, "grad_norm": 0.4406696557998657, "learning_rate": 0.0001729918683436932, "loss": 0.2102, "step": 19750 }, { "epoch": 0.2777426382739476, "grad_norm": 0.4246480464935303, "learning_rate": 0.0001729600882938658, "loss": 0.1846, "step": 19760 }, { "epoch": 0.2778831962892684, "grad_norm": 0.35406747460365295, "learning_rate": 0.00017292829248031705, "loss": 0.1803, "step": 19770 }, { "epoch": 0.2780237543045892, "grad_norm": 0.4278491735458374, "learning_rate": 0.00017289648090991673, "loss": 0.2352, "step": 19780 }, { "epoch": 0.27816431231991007, "grad_norm": 0.3947303295135498, "learning_rate": 0.0001728646535895381, "loss": 0.2102, "step": 19790 }, { "epoch": 0.2783048703352309, "grad_norm": 0.4705360233783722, "learning_rate": 0.00017283281052605766, "loss": 0.1874, "step": 19800 }, { "epoch": 0.2784454283505517, "grad_norm": 0.48299962282180786, "learning_rate": 0.0001728009517263555, "loss": 0.19, "step": 19810 }, { "epoch": 0.2785859863658725, "grad_norm": 0.5594393014907837, "learning_rate": 0.00017276907719731495, "loss": 0.2374, "step": 19820 }, { "epoch": 0.2787265443811933, "grad_norm": 0.351291686296463, "learning_rate": 0.00017273718694582285, "loss": 0.2106, "step": 19830 }, { "epoch": 0.2788671023965142, "grad_norm": 0.44059497117996216, "learning_rate": 0.00017270528097876935, "loss": 0.2039, "step": 19840 }, { "epoch": 0.279007660411835, "grad_norm": 0.35108813643455505, "learning_rate": 0.00017267335930304807, "loss": 0.179, "step": 19850 }, { "epoch": 0.2791482184271558, "grad_norm": 0.3934147357940674, "learning_rate": 0.00017264142192555598, "loss": 0.2089, "step": 19860 }, { "epoch": 0.2792887764424766, "grad_norm": 0.4623779356479645, "learning_rate": 0.00017260946885319344, "loss": 0.2022, "step": 19870 }, { "epoch": 0.27942933445779744, "grad_norm": 0.9234285354614258, "learning_rate": 0.00017257750009286427, "loss": 0.1883, "step": 19880 }, { "epoch": 0.27956989247311825, "grad_norm": 0.47531360387802124, "learning_rate": 0.00017254551565147556, "loss": 0.2205, "step": 19890 }, { "epoch": 0.2797104504884391, "grad_norm": 0.3452872335910797, "learning_rate": 0.0001725135155359379, "loss": 0.1775, "step": 19900 }, { "epoch": 0.27985100850375993, "grad_norm": 0.4100421369075775, "learning_rate": 0.00017248149975316517, "loss": 0.2196, "step": 19910 }, { "epoch": 0.27999156651908075, "grad_norm": 0.38270118832588196, "learning_rate": 0.00017244946831007472, "loss": 0.2085, "step": 19920 }, { "epoch": 0.28013212453440156, "grad_norm": 0.46484166383743286, "learning_rate": 0.00017241742121358727, "loss": 0.1854, "step": 19930 }, { "epoch": 0.2802726825497224, "grad_norm": 0.3831994831562042, "learning_rate": 0.00017238535847062682, "loss": 0.2175, "step": 19940 }, { "epoch": 0.28041324056504324, "grad_norm": 0.5021612644195557, "learning_rate": 0.00017235328008812092, "loss": 0.2161, "step": 19950 }, { "epoch": 0.28055379858036406, "grad_norm": 0.5603237748146057, "learning_rate": 0.00017232118607300037, "loss": 0.2288, "step": 19960 }, { "epoch": 0.28069435659568487, "grad_norm": 0.3286333680152893, "learning_rate": 0.00017228907643219939, "loss": 0.1912, "step": 19970 }, { "epoch": 0.2808349146110057, "grad_norm": 0.36596253514289856, "learning_rate": 0.00017225695117265556, "loss": 0.1874, "step": 19980 }, { "epoch": 0.2809754726263265, "grad_norm": 0.42818084359169006, "learning_rate": 0.00017222481030130987, "loss": 0.1769, "step": 19990 }, { "epoch": 0.28111603064164736, "grad_norm": 0.6397153735160828, "learning_rate": 0.00017219265382510663, "loss": 0.2251, "step": 20000 }, { "epoch": 0.28111603064164736, "eval_chrf": 74.02033345888455, "eval_loss": 0.42471030354499817, "eval_runtime": 286.3507, "eval_samples_per_second": 0.349, "eval_steps_per_second": 0.014, "step": 20000 }, { "epoch": 0.2812565886569682, "grad_norm": 0.41157883405685425, "learning_rate": 0.00017216048175099357, "loss": 0.2134, "step": 20010 }, { "epoch": 0.281397146672289, "grad_norm": 0.5360423922538757, "learning_rate": 0.0001721282940859218, "loss": 0.2334, "step": 20020 }, { "epoch": 0.2815377046876098, "grad_norm": 0.45648783445358276, "learning_rate": 0.00017209609083684575, "loss": 0.1913, "step": 20030 }, { "epoch": 0.2816782627029306, "grad_norm": 0.32651346921920776, "learning_rate": 0.00017206387201072323, "loss": 0.1697, "step": 20040 }, { "epoch": 0.2818188207182515, "grad_norm": 0.4762553870677948, "learning_rate": 0.00017203163761451545, "loss": 0.2135, "step": 20050 }, { "epoch": 0.2819593787335723, "grad_norm": 0.5247132182121277, "learning_rate": 0.0001719993876551869, "loss": 0.2065, "step": 20060 }, { "epoch": 0.2820999367488931, "grad_norm": 0.5137999057769775, "learning_rate": 0.00017196712213970555, "loss": 0.1927, "step": 20070 }, { "epoch": 0.2822404947642139, "grad_norm": 0.5455237030982971, "learning_rate": 0.00017193484107504267, "loss": 0.2025, "step": 20080 }, { "epoch": 0.28238105277953474, "grad_norm": 0.37664520740509033, "learning_rate": 0.00017190254446817284, "loss": 0.2149, "step": 20090 }, { "epoch": 0.2825216107948556, "grad_norm": 0.35842660069465637, "learning_rate": 0.0001718702323260741, "loss": 0.2263, "step": 20100 }, { "epoch": 0.2826621688101764, "grad_norm": 0.42714473605155945, "learning_rate": 0.00017183790465572779, "loss": 0.1862, "step": 20110 }, { "epoch": 0.28280272682549723, "grad_norm": 0.5020290613174438, "learning_rate": 0.00017180556146411858, "loss": 0.1904, "step": 20120 }, { "epoch": 0.28294328484081804, "grad_norm": 0.6206112504005432, "learning_rate": 0.00017177320275823453, "loss": 0.1985, "step": 20130 }, { "epoch": 0.28308384285613886, "grad_norm": 0.4329967200756073, "learning_rate": 0.00017174082854506706, "loss": 0.1947, "step": 20140 }, { "epoch": 0.28322440087145967, "grad_norm": 0.3217017650604248, "learning_rate": 0.00017170843883161093, "loss": 0.1752, "step": 20150 }, { "epoch": 0.28336495888678054, "grad_norm": 0.5323647856712341, "learning_rate": 0.00017167603362486426, "loss": 0.1975, "step": 20160 }, { "epoch": 0.28350551690210135, "grad_norm": 0.39277899265289307, "learning_rate": 0.0001716436129318285, "loss": 0.1985, "step": 20170 }, { "epoch": 0.28364607491742216, "grad_norm": 0.3518845736980438, "learning_rate": 0.00017161117675950838, "loss": 0.2053, "step": 20180 }, { "epoch": 0.283786632932743, "grad_norm": 0.3981213867664337, "learning_rate": 0.00017157872511491213, "loss": 0.197, "step": 20190 }, { "epoch": 0.2839271909480638, "grad_norm": 0.41575250029563904, "learning_rate": 0.0001715462580050512, "loss": 0.2127, "step": 20200 }, { "epoch": 0.28406774896338466, "grad_norm": 0.5877156257629395, "learning_rate": 0.00017151377543694042, "loss": 0.212, "step": 20210 }, { "epoch": 0.28420830697870547, "grad_norm": 0.6077449917793274, "learning_rate": 0.00017148127741759797, "loss": 0.1971, "step": 20220 }, { "epoch": 0.2843488649940263, "grad_norm": 0.406627893447876, "learning_rate": 0.00017144876395404533, "loss": 0.1944, "step": 20230 }, { "epoch": 0.2844894230093471, "grad_norm": 0.4879082143306732, "learning_rate": 0.00017141623505330738, "loss": 0.2024, "step": 20240 }, { "epoch": 0.2846299810246679, "grad_norm": 0.3847009539604187, "learning_rate": 0.00017138369072241226, "loss": 0.2069, "step": 20250 }, { "epoch": 0.2847705390399888, "grad_norm": 0.597709596157074, "learning_rate": 0.0001713511309683915, "loss": 0.1977, "step": 20260 }, { "epoch": 0.2849110970553096, "grad_norm": 0.5190564393997192, "learning_rate": 0.00017131855579827993, "loss": 0.1811, "step": 20270 }, { "epoch": 0.2850516550706304, "grad_norm": 0.43286004662513733, "learning_rate": 0.00017128596521911579, "loss": 0.2033, "step": 20280 }, { "epoch": 0.2851922130859512, "grad_norm": 0.380369633436203, "learning_rate": 0.00017125335923794048, "loss": 0.2236, "step": 20290 }, { "epoch": 0.28533277110127203, "grad_norm": 0.46421048045158386, "learning_rate": 0.00017122073786179893, "loss": 0.2127, "step": 20300 }, { "epoch": 0.2854733291165929, "grad_norm": 0.38199180364608765, "learning_rate": 0.00017118810109773924, "loss": 0.2146, "step": 20310 }, { "epoch": 0.2856138871319137, "grad_norm": 0.434123694896698, "learning_rate": 0.00017115544895281292, "loss": 0.2098, "step": 20320 }, { "epoch": 0.2857544451472345, "grad_norm": 0.6215119957923889, "learning_rate": 0.00017112278143407473, "loss": 0.2239, "step": 20330 }, { "epoch": 0.28589500316255534, "grad_norm": 0.3718737065792084, "learning_rate": 0.00017109009854858286, "loss": 0.1964, "step": 20340 }, { "epoch": 0.28603556117787615, "grad_norm": 0.4760577976703644, "learning_rate": 0.00017105740030339876, "loss": 0.1954, "step": 20350 }, { "epoch": 0.28617611919319697, "grad_norm": 0.48852115869522095, "learning_rate": 0.00017102468670558712, "loss": 0.2071, "step": 20360 }, { "epoch": 0.28631667720851783, "grad_norm": 0.31590837240219116, "learning_rate": 0.00017099195776221614, "loss": 0.1875, "step": 20370 }, { "epoch": 0.28645723522383865, "grad_norm": 0.4684780538082123, "learning_rate": 0.00017095921348035708, "loss": 0.1818, "step": 20380 }, { "epoch": 0.28659779323915946, "grad_norm": 0.4114950895309448, "learning_rate": 0.00017092645386708477, "loss": 0.1767, "step": 20390 }, { "epoch": 0.2867383512544803, "grad_norm": 0.5756374001502991, "learning_rate": 0.00017089367892947718, "loss": 0.2108, "step": 20400 }, { "epoch": 0.2868789092698011, "grad_norm": 0.45275378227233887, "learning_rate": 0.0001708608886746157, "loss": 0.2035, "step": 20410 }, { "epoch": 0.28701946728512195, "grad_norm": 0.6170960664749146, "learning_rate": 0.0001708280831095849, "loss": 0.2236, "step": 20420 }, { "epoch": 0.28716002530044277, "grad_norm": 0.5411795377731323, "learning_rate": 0.00017079526224147277, "loss": 0.2219, "step": 20430 }, { "epoch": 0.2873005833157636, "grad_norm": 0.5155619382858276, "learning_rate": 0.00017076242607737057, "loss": 0.2233, "step": 20440 }, { "epoch": 0.2874411413310844, "grad_norm": 0.4209650754928589, "learning_rate": 0.0001707295746243729, "loss": 0.1939, "step": 20450 }, { "epoch": 0.2875816993464052, "grad_norm": 0.5280047655105591, "learning_rate": 0.00017069670788957757, "loss": 0.2113, "step": 20460 }, { "epoch": 0.2877222573617261, "grad_norm": 0.43174266815185547, "learning_rate": 0.00017066382588008577, "loss": 0.2134, "step": 20470 }, { "epoch": 0.2878628153770469, "grad_norm": 0.3827521502971649, "learning_rate": 0.000170630928603002, "loss": 0.2027, "step": 20480 }, { "epoch": 0.2880033733923677, "grad_norm": 0.41761866211891174, "learning_rate": 0.000170598016065434, "loss": 0.2044, "step": 20490 }, { "epoch": 0.2881439314076885, "grad_norm": 0.5618655681610107, "learning_rate": 0.00017056508827449288, "loss": 0.1894, "step": 20500 }, { "epoch": 0.2882844894230093, "grad_norm": 0.564956784248352, "learning_rate": 0.0001705321452372929, "loss": 0.2218, "step": 20510 }, { "epoch": 0.2884250474383302, "grad_norm": 0.5125460624694824, "learning_rate": 0.00017049918696095183, "loss": 0.1988, "step": 20520 }, { "epoch": 0.288565605453651, "grad_norm": 0.40226978063583374, "learning_rate": 0.00017046621345259057, "loss": 0.2005, "step": 20530 }, { "epoch": 0.2887061634689718, "grad_norm": 0.46868157386779785, "learning_rate": 0.0001704332247193334, "loss": 0.1879, "step": 20540 }, { "epoch": 0.28884672148429263, "grad_norm": 0.42704060673713684, "learning_rate": 0.00017040022076830774, "loss": 0.2064, "step": 20550 }, { "epoch": 0.28898727949961345, "grad_norm": 0.4820135235786438, "learning_rate": 0.0001703672016066445, "loss": 0.1926, "step": 20560 }, { "epoch": 0.2891278375149343, "grad_norm": 0.3230167627334595, "learning_rate": 0.0001703341672414778, "loss": 0.1874, "step": 20570 }, { "epoch": 0.28926839553025513, "grad_norm": 0.49147534370422363, "learning_rate": 0.00017030111767994498, "loss": 0.2336, "step": 20580 }, { "epoch": 0.28940895354557594, "grad_norm": 0.5110415816307068, "learning_rate": 0.00017026805292918675, "loss": 0.1897, "step": 20590 }, { "epoch": 0.28954951156089676, "grad_norm": 0.36359795928001404, "learning_rate": 0.000170234972996347, "loss": 0.1973, "step": 20600 }, { "epoch": 0.28969006957621757, "grad_norm": 0.38895362615585327, "learning_rate": 0.00017020187788857304, "loss": 0.1973, "step": 20610 }, { "epoch": 0.2898306275915384, "grad_norm": 0.5448920726776123, "learning_rate": 0.00017016876761301533, "loss": 0.2058, "step": 20620 }, { "epoch": 0.28997118560685925, "grad_norm": 0.39889147877693176, "learning_rate": 0.0001701356421768277, "loss": 0.1964, "step": 20630 }, { "epoch": 0.29011174362218006, "grad_norm": 0.3628638684749603, "learning_rate": 0.00017010250158716717, "loss": 0.1901, "step": 20640 }, { "epoch": 0.2902523016375009, "grad_norm": 0.3719097375869751, "learning_rate": 0.0001700693458511941, "loss": 0.2037, "step": 20650 }, { "epoch": 0.2903928596528217, "grad_norm": 0.39121580123901367, "learning_rate": 0.00017003617497607216, "loss": 0.2085, "step": 20660 }, { "epoch": 0.2905334176681425, "grad_norm": 0.3391832411289215, "learning_rate": 0.00017000298896896812, "loss": 0.1819, "step": 20670 }, { "epoch": 0.29067397568346337, "grad_norm": 0.3420446813106537, "learning_rate": 0.00016996978783705223, "loss": 0.167, "step": 20680 }, { "epoch": 0.2908145336987842, "grad_norm": 0.6026040315628052, "learning_rate": 0.00016993657158749783, "loss": 0.2142, "step": 20690 }, { "epoch": 0.290955091714105, "grad_norm": 0.5469983816146851, "learning_rate": 0.0001699033402274817, "loss": 0.2045, "step": 20700 }, { "epoch": 0.2910956497294258, "grad_norm": 0.41889193654060364, "learning_rate": 0.00016987009376418373, "loss": 0.2001, "step": 20710 }, { "epoch": 0.2912362077447466, "grad_norm": 0.45113030076026917, "learning_rate": 0.0001698368322047871, "loss": 0.1858, "step": 20720 }, { "epoch": 0.2913767657600675, "grad_norm": 0.32660970091819763, "learning_rate": 0.0001698035555564784, "loss": 0.1993, "step": 20730 }, { "epoch": 0.2915173237753883, "grad_norm": 0.4603225588798523, "learning_rate": 0.00016977026382644726, "loss": 0.2065, "step": 20740 }, { "epoch": 0.2916578817907091, "grad_norm": 0.43970394134521484, "learning_rate": 0.0001697369570218867, "loss": 0.22, "step": 20750 }, { "epoch": 0.29179843980602993, "grad_norm": 0.36348602175712585, "learning_rate": 0.000169703635149993, "loss": 0.1755, "step": 20760 }, { "epoch": 0.29193899782135074, "grad_norm": 0.4576697051525116, "learning_rate": 0.00016967029821796567, "loss": 0.198, "step": 20770 }, { "epoch": 0.2920795558366716, "grad_norm": 0.4353881776332855, "learning_rate": 0.00016963694623300745, "loss": 0.1958, "step": 20780 }, { "epoch": 0.2922201138519924, "grad_norm": 0.5406979322433472, "learning_rate": 0.00016960357920232434, "loss": 0.1949, "step": 20790 }, { "epoch": 0.29236067186731324, "grad_norm": 0.4171971082687378, "learning_rate": 0.00016957019713312565, "loss": 0.1827, "step": 20800 }, { "epoch": 0.29250122988263405, "grad_norm": 0.3972976803779602, "learning_rate": 0.00016953680003262383, "loss": 0.1887, "step": 20810 }, { "epoch": 0.29264178789795486, "grad_norm": 0.34832844138145447, "learning_rate": 0.00016950338790803473, "loss": 0.209, "step": 20820 }, { "epoch": 0.29278234591327573, "grad_norm": 0.46556970477104187, "learning_rate": 0.00016946996076657727, "loss": 0.2036, "step": 20830 }, { "epoch": 0.29292290392859655, "grad_norm": 0.3853689432144165, "learning_rate": 0.00016943651861547375, "loss": 0.1986, "step": 20840 }, { "epoch": 0.29306346194391736, "grad_norm": 0.371705561876297, "learning_rate": 0.00016940306146194964, "loss": 0.1914, "step": 20850 }, { "epoch": 0.29320401995923817, "grad_norm": 0.3775120973587036, "learning_rate": 0.0001693695893132337, "loss": 0.2176, "step": 20860 }, { "epoch": 0.293344577974559, "grad_norm": 0.5206791162490845, "learning_rate": 0.0001693361021765579, "loss": 0.2141, "step": 20870 }, { "epoch": 0.2934851359898798, "grad_norm": 0.38626107573509216, "learning_rate": 0.00016930260005915742, "loss": 0.1928, "step": 20880 }, { "epoch": 0.29362569400520067, "grad_norm": 0.45110735297203064, "learning_rate": 0.00016926908296827078, "loss": 0.2122, "step": 20890 }, { "epoch": 0.2937662520205215, "grad_norm": 0.36143726110458374, "learning_rate": 0.00016923555091113957, "loss": 0.1837, "step": 20900 }, { "epoch": 0.2939068100358423, "grad_norm": 0.39780905842781067, "learning_rate": 0.00016920200389500885, "loss": 0.2166, "step": 20910 }, { "epoch": 0.2940473680511631, "grad_norm": 0.38996177911758423, "learning_rate": 0.00016916844192712662, "loss": 0.1919, "step": 20920 }, { "epoch": 0.2941879260664839, "grad_norm": 0.4880051910877228, "learning_rate": 0.00016913486501474436, "loss": 0.1985, "step": 20930 }, { "epoch": 0.2943284840818048, "grad_norm": 0.4125623106956482, "learning_rate": 0.00016910127316511664, "loss": 0.1913, "step": 20940 }, { "epoch": 0.2944690420971256, "grad_norm": 0.4753319025039673, "learning_rate": 0.00016906766638550134, "loss": 0.1957, "step": 20950 }, { "epoch": 0.2946096001124464, "grad_norm": 0.5684584975242615, "learning_rate": 0.0001690340446831595, "loss": 0.2181, "step": 20960 }, { "epoch": 0.2947501581277672, "grad_norm": 0.4074452221393585, "learning_rate": 0.00016900040806535537, "loss": 0.199, "step": 20970 }, { "epoch": 0.29489071614308804, "grad_norm": 0.42722028493881226, "learning_rate": 0.00016896675653935652, "loss": 0.2169, "step": 20980 }, { "epoch": 0.2950312741584089, "grad_norm": 0.5126949548721313, "learning_rate": 0.00016893309011243365, "loss": 0.1987, "step": 20990 }, { "epoch": 0.2951718321737297, "grad_norm": 0.6361300945281982, "learning_rate": 0.00016889940879186074, "loss": 0.2296, "step": 21000 }, { "epoch": 0.2951718321737297, "eval_chrf": 81.46533911299353, "eval_loss": 0.42531388998031616, "eval_runtime": 210.6293, "eval_samples_per_second": 0.475, "eval_steps_per_second": 0.019, "step": 21000 }, { "epoch": 0.29531239018905053, "grad_norm": 0.32635369896888733, "learning_rate": 0.0001688657125849149, "loss": 0.1957, "step": 21010 }, { "epoch": 0.29545294820437135, "grad_norm": 0.4398081600666046, "learning_rate": 0.0001688320014988766, "loss": 0.2096, "step": 21020 }, { "epoch": 0.29559350621969216, "grad_norm": 0.3880372941493988, "learning_rate": 0.00016879827554102937, "loss": 0.2124, "step": 21030 }, { "epoch": 0.295734064235013, "grad_norm": 0.3721957802772522, "learning_rate": 0.00016876453471866007, "loss": 0.2062, "step": 21040 }, { "epoch": 0.29587462225033384, "grad_norm": 0.4164586365222931, "learning_rate": 0.0001687307790390587, "loss": 0.1824, "step": 21050 }, { "epoch": 0.29601518026565465, "grad_norm": 0.4159533977508545, "learning_rate": 0.00016869700850951852, "loss": 0.1961, "step": 21060 }, { "epoch": 0.29615573828097547, "grad_norm": 0.368132084608078, "learning_rate": 0.00016866322313733594, "loss": 0.2022, "step": 21070 }, { "epoch": 0.2962962962962963, "grad_norm": 0.44499659538269043, "learning_rate": 0.0001686294229298106, "loss": 0.2088, "step": 21080 }, { "epoch": 0.2964368543116171, "grad_norm": 0.392949640750885, "learning_rate": 0.00016859560789424545, "loss": 0.1717, "step": 21090 }, { "epoch": 0.29657741232693796, "grad_norm": 0.33532652258872986, "learning_rate": 0.00016856177803794645, "loss": 0.2105, "step": 21100 }, { "epoch": 0.2967179703422588, "grad_norm": 0.425544798374176, "learning_rate": 0.0001685279333682229, "loss": 0.1839, "step": 21110 }, { "epoch": 0.2968585283575796, "grad_norm": 0.4223870038986206, "learning_rate": 0.00016849407389238726, "loss": 0.2251, "step": 21120 }, { "epoch": 0.2969990863729004, "grad_norm": 0.38955533504486084, "learning_rate": 0.00016846019961775518, "loss": 0.2222, "step": 21130 }, { "epoch": 0.2971396443882212, "grad_norm": 0.39352136850357056, "learning_rate": 0.00016842631055164557, "loss": 0.2088, "step": 21140 }, { "epoch": 0.2972802024035421, "grad_norm": 0.3265032172203064, "learning_rate": 0.0001683924067013804, "loss": 0.2031, "step": 21150 }, { "epoch": 0.2974207604188629, "grad_norm": 0.5114708542823792, "learning_rate": 0.000168358488074285, "loss": 0.2096, "step": 21160 }, { "epoch": 0.2975613184341837, "grad_norm": 0.3312976658344269, "learning_rate": 0.00016832455467768778, "loss": 0.1852, "step": 21170 }, { "epoch": 0.2977018764495045, "grad_norm": 0.3760843276977539, "learning_rate": 0.00016829060651892035, "loss": 0.2057, "step": 21180 }, { "epoch": 0.29784243446482533, "grad_norm": 0.4861770570278168, "learning_rate": 0.0001682566436053176, "loss": 0.1934, "step": 21190 }, { "epoch": 0.2979829924801462, "grad_norm": 0.44579216837882996, "learning_rate": 0.00016822266594421746, "loss": 0.2367, "step": 21200 }, { "epoch": 0.298123550495467, "grad_norm": 0.4577611982822418, "learning_rate": 0.00016818867354296115, "loss": 0.2103, "step": 21210 }, { "epoch": 0.29826410851078783, "grad_norm": 0.4669945538043976, "learning_rate": 0.00016815466640889308, "loss": 0.2096, "step": 21220 }, { "epoch": 0.29840466652610864, "grad_norm": 0.4052143096923828, "learning_rate": 0.0001681206445493608, "loss": 0.207, "step": 21230 }, { "epoch": 0.29854522454142945, "grad_norm": 0.4139108955860138, "learning_rate": 0.00016808660797171506, "loss": 0.1877, "step": 21240 }, { "epoch": 0.2986857825567503, "grad_norm": 0.441862016916275, "learning_rate": 0.00016805255668330977, "loss": 0.212, "step": 21250 }, { "epoch": 0.29882634057207114, "grad_norm": 0.5393145084381104, "learning_rate": 0.00016801849069150206, "loss": 0.2141, "step": 21260 }, { "epoch": 0.29896689858739195, "grad_norm": 0.7432650923728943, "learning_rate": 0.0001679844100036522, "loss": 0.2141, "step": 21270 }, { "epoch": 0.29910745660271276, "grad_norm": 0.38837116956710815, "learning_rate": 0.00016795031462712366, "loss": 0.1961, "step": 21280 }, { "epoch": 0.2992480146180336, "grad_norm": 0.4575008451938629, "learning_rate": 0.00016791620456928303, "loss": 0.2002, "step": 21290 }, { "epoch": 0.29938857263335444, "grad_norm": 0.5123000741004944, "learning_rate": 0.00016788207983750014, "loss": 0.2003, "step": 21300 }, { "epoch": 0.29952913064867526, "grad_norm": 0.4672884941101074, "learning_rate": 0.00016784794043914796, "loss": 0.217, "step": 21310 }, { "epoch": 0.29966968866399607, "grad_norm": 0.3814888894557953, "learning_rate": 0.00016781378638160266, "loss": 0.1916, "step": 21320 }, { "epoch": 0.2998102466793169, "grad_norm": 0.3043074905872345, "learning_rate": 0.0001677796176722435, "loss": 0.1816, "step": 21330 }, { "epoch": 0.2999508046946377, "grad_norm": 0.5070516467094421, "learning_rate": 0.000167745434318453, "loss": 0.2197, "step": 21340 }, { "epoch": 0.3000913627099585, "grad_norm": 0.39147183299064636, "learning_rate": 0.00016771123632761676, "loss": 0.1823, "step": 21350 }, { "epoch": 0.3002319207252794, "grad_norm": 0.3833473026752472, "learning_rate": 0.00016767702370712359, "loss": 0.206, "step": 21360 }, { "epoch": 0.3003724787406002, "grad_norm": 0.5475829243659973, "learning_rate": 0.00016764279646436548, "loss": 0.2121, "step": 21370 }, { "epoch": 0.300513036755921, "grad_norm": 0.6117001175880432, "learning_rate": 0.0001676085546067375, "loss": 0.195, "step": 21380 }, { "epoch": 0.3006535947712418, "grad_norm": 0.45678287744522095, "learning_rate": 0.00016757429814163798, "loss": 0.1876, "step": 21390 }, { "epoch": 0.30079415278656263, "grad_norm": 0.4021288752555847, "learning_rate": 0.00016754002707646839, "loss": 0.2076, "step": 21400 }, { "epoch": 0.3009347108018835, "grad_norm": 0.267534464597702, "learning_rate": 0.0001675057414186332, "loss": 0.211, "step": 21410 }, { "epoch": 0.3010752688172043, "grad_norm": 0.33864277601242065, "learning_rate": 0.00016747144117554023, "loss": 0.2011, "step": 21420 }, { "epoch": 0.3012158268325251, "grad_norm": 0.4673684239387512, "learning_rate": 0.00016743712635460036, "loss": 0.1935, "step": 21430 }, { "epoch": 0.30135638484784594, "grad_norm": 0.5295349359512329, "learning_rate": 0.00016740279696322765, "loss": 0.1794, "step": 21440 }, { "epoch": 0.30149694286316675, "grad_norm": 0.4514896273612976, "learning_rate": 0.00016736845300883925, "loss": 0.1848, "step": 21450 }, { "epoch": 0.3016375008784876, "grad_norm": 0.44729170203208923, "learning_rate": 0.00016733409449885557, "loss": 0.1871, "step": 21460 }, { "epoch": 0.30177805889380843, "grad_norm": 0.46859967708587646, "learning_rate": 0.00016729972144070003, "loss": 0.2038, "step": 21470 }, { "epoch": 0.30191861690912924, "grad_norm": 0.5687006711959839, "learning_rate": 0.00016726533384179925, "loss": 0.1793, "step": 21480 }, { "epoch": 0.30205917492445006, "grad_norm": 0.4455181062221527, "learning_rate": 0.00016723093170958303, "loss": 0.1949, "step": 21490 }, { "epoch": 0.30219973293977087, "grad_norm": 0.4142560362815857, "learning_rate": 0.0001671965150514843, "loss": 0.1794, "step": 21500 }, { "epoch": 0.30234029095509174, "grad_norm": 0.5188212990760803, "learning_rate": 0.00016716208387493908, "loss": 0.1994, "step": 21510 }, { "epoch": 0.30248084897041255, "grad_norm": 0.44499433040618896, "learning_rate": 0.00016712763818738652, "loss": 0.2142, "step": 21520 }, { "epoch": 0.30262140698573337, "grad_norm": 0.34323349595069885, "learning_rate": 0.00016709317799626898, "loss": 0.2045, "step": 21530 }, { "epoch": 0.3027619650010542, "grad_norm": 0.43302005529403687, "learning_rate": 0.0001670587033090319, "loss": 0.175, "step": 21540 }, { "epoch": 0.302902523016375, "grad_norm": 0.45775189995765686, "learning_rate": 0.00016702421413312388, "loss": 0.2127, "step": 21550 }, { "epoch": 0.30304308103169586, "grad_norm": 0.5039337873458862, "learning_rate": 0.00016698971047599662, "loss": 0.1861, "step": 21560 }, { "epoch": 0.3031836390470167, "grad_norm": 0.45097997784614563, "learning_rate": 0.00016695519234510492, "loss": 0.2329, "step": 21570 }, { "epoch": 0.3033241970623375, "grad_norm": 0.41815900802612305, "learning_rate": 0.00016692065974790684, "loss": 0.1934, "step": 21580 }, { "epoch": 0.3034647550776583, "grad_norm": 0.42974695563316345, "learning_rate": 0.00016688611269186342, "loss": 0.1921, "step": 21590 }, { "epoch": 0.3036053130929791, "grad_norm": 0.4625963866710663, "learning_rate": 0.0001668515511844389, "loss": 0.2079, "step": 21600 }, { "epoch": 0.3037458711082999, "grad_norm": 0.3194527328014374, "learning_rate": 0.0001668169752331006, "loss": 0.2104, "step": 21610 }, { "epoch": 0.3038864291236208, "grad_norm": 0.4360635280609131, "learning_rate": 0.000166782384845319, "loss": 0.1933, "step": 21620 }, { "epoch": 0.3040269871389416, "grad_norm": 0.2982389032840729, "learning_rate": 0.0001667477800285677, "loss": 0.1846, "step": 21630 }, { "epoch": 0.3041675451542624, "grad_norm": 0.43925002217292786, "learning_rate": 0.00016671316079032336, "loss": 0.2029, "step": 21640 }, { "epoch": 0.30430810316958323, "grad_norm": 0.41043299436569214, "learning_rate": 0.0001666785271380658, "loss": 0.1902, "step": 21650 }, { "epoch": 0.30444866118490405, "grad_norm": 0.4733089506626129, "learning_rate": 0.000166643879079278, "loss": 0.1974, "step": 21660 }, { "epoch": 0.3045892192002249, "grad_norm": 0.4703904092311859, "learning_rate": 0.00016660921662144595, "loss": 0.2195, "step": 21670 }, { "epoch": 0.3047297772155457, "grad_norm": 0.4680962860584259, "learning_rate": 0.0001665745397720588, "loss": 0.2057, "step": 21680 }, { "epoch": 0.30487033523086654, "grad_norm": 0.37620243430137634, "learning_rate": 0.00016653984853860887, "loss": 0.2152, "step": 21690 }, { "epoch": 0.30501089324618735, "grad_norm": 0.5280198454856873, "learning_rate": 0.00016650514292859147, "loss": 0.1821, "step": 21700 }, { "epoch": 0.30515145126150817, "grad_norm": 0.5200461745262146, "learning_rate": 0.00016647042294950511, "loss": 0.1995, "step": 21710 }, { "epoch": 0.30529200927682903, "grad_norm": 0.5603528618812561, "learning_rate": 0.00016643568860885138, "loss": 0.2014, "step": 21720 }, { "epoch": 0.30543256729214985, "grad_norm": 0.4702509045600891, "learning_rate": 0.00016640093991413495, "loss": 0.2164, "step": 21730 }, { "epoch": 0.30557312530747066, "grad_norm": 0.4287772476673126, "learning_rate": 0.0001663661768728636, "loss": 0.2112, "step": 21740 }, { "epoch": 0.3057136833227915, "grad_norm": 0.3885875642299652, "learning_rate": 0.0001663313994925482, "loss": 0.2374, "step": 21750 }, { "epoch": 0.3058542413381123, "grad_norm": 0.5625565648078918, "learning_rate": 0.0001662966077807028, "loss": 0.2035, "step": 21760 }, { "epoch": 0.30599479935343316, "grad_norm": 0.5335406064987183, "learning_rate": 0.00016626180174484444, "loss": 0.2292, "step": 21770 }, { "epoch": 0.30613535736875397, "grad_norm": 0.5790080428123474, "learning_rate": 0.00016622698139249324, "loss": 0.189, "step": 21780 }, { "epoch": 0.3062759153840748, "grad_norm": 0.4857686758041382, "learning_rate": 0.00016619214673117252, "loss": 0.1889, "step": 21790 }, { "epoch": 0.3064164733993956, "grad_norm": 0.34772664308547974, "learning_rate": 0.0001661572977684087, "loss": 0.1984, "step": 21800 }, { "epoch": 0.3065570314147164, "grad_norm": 0.493201345205307, "learning_rate": 0.00016612243451173114, "loss": 0.1846, "step": 21810 }, { "epoch": 0.3066975894300372, "grad_norm": 0.5512494444847107, "learning_rate": 0.0001660875569686724, "loss": 0.1978, "step": 21820 }, { "epoch": 0.3068381474453581, "grad_norm": 0.5157384276390076, "learning_rate": 0.00016605266514676812, "loss": 0.1975, "step": 21830 }, { "epoch": 0.3069787054606789, "grad_norm": 0.4055367708206177, "learning_rate": 0.00016601775905355698, "loss": 0.1948, "step": 21840 }, { "epoch": 0.3071192634759997, "grad_norm": 0.43613380193710327, "learning_rate": 0.00016598283869658084, "loss": 0.2258, "step": 21850 }, { "epoch": 0.30725982149132053, "grad_norm": 0.4905271828174591, "learning_rate": 0.00016594790408338452, "loss": 0.1999, "step": 21860 }, { "epoch": 0.30740037950664134, "grad_norm": 0.5527341365814209, "learning_rate": 0.00016591295522151597, "loss": 0.1974, "step": 21870 }, { "epoch": 0.3075409375219622, "grad_norm": 0.47549405694007874, "learning_rate": 0.0001658779921185263, "loss": 0.2088, "step": 21880 }, { "epoch": 0.307681495537283, "grad_norm": 0.46096235513687134, "learning_rate": 0.00016584301478196948, "loss": 0.2046, "step": 21890 }, { "epoch": 0.30782205355260384, "grad_norm": 0.44452401995658875, "learning_rate": 0.00016580802321940284, "loss": 0.1835, "step": 21900 }, { "epoch": 0.30796261156792465, "grad_norm": 0.3749406039714813, "learning_rate": 0.00016577301743838656, "loss": 0.2006, "step": 21910 }, { "epoch": 0.30810316958324546, "grad_norm": 0.45477259159088135, "learning_rate": 0.00016573799744648397, "loss": 0.2271, "step": 21920 }, { "epoch": 0.30824372759856633, "grad_norm": 0.34558719396591187, "learning_rate": 0.00016570296325126154, "loss": 0.1862, "step": 21930 }, { "epoch": 0.30838428561388714, "grad_norm": 0.4179009795188904, "learning_rate": 0.00016566791486028867, "loss": 0.2325, "step": 21940 }, { "epoch": 0.30852484362920796, "grad_norm": 0.38069191575050354, "learning_rate": 0.00016563285228113792, "loss": 0.1955, "step": 21950 }, { "epoch": 0.30866540164452877, "grad_norm": 0.4004470705986023, "learning_rate": 0.0001655977755213849, "loss": 0.2304, "step": 21960 }, { "epoch": 0.3088059596598496, "grad_norm": 0.3554079532623291, "learning_rate": 0.00016556268458860828, "loss": 0.1909, "step": 21970 }, { "epoch": 0.30894651767517045, "grad_norm": 0.5444430708885193, "learning_rate": 0.00016552757949038979, "loss": 0.2086, "step": 21980 }, { "epoch": 0.30908707569049126, "grad_norm": 0.3903445899486542, "learning_rate": 0.00016549246023431422, "loss": 0.18, "step": 21990 }, { "epoch": 0.3092276337058121, "grad_norm": 0.5591972470283508, "learning_rate": 0.0001654573268279694, "loss": 0.196, "step": 22000 }, { "epoch": 0.3092276337058121, "eval_chrf": 76.0635875128844, "eval_loss": 0.42267727851867676, "eval_runtime": 326.6993, "eval_samples_per_second": 0.306, "eval_steps_per_second": 0.012, "step": 22000 }, { "epoch": 0.3093681917211329, "grad_norm": 0.46631136536598206, "learning_rate": 0.00016542217927894626, "loss": 0.2193, "step": 22010 }, { "epoch": 0.3095087497364537, "grad_norm": 0.5263903737068176, "learning_rate": 0.0001653870175948388, "loss": 0.1821, "step": 22020 }, { "epoch": 0.30964930775177457, "grad_norm": 0.28334668278694153, "learning_rate": 0.00016535184178324394, "loss": 0.2041, "step": 22030 }, { "epoch": 0.3097898657670954, "grad_norm": 0.3982601463794708, "learning_rate": 0.0001653166518517618, "loss": 0.1877, "step": 22040 }, { "epoch": 0.3099304237824162, "grad_norm": 0.5098924040794373, "learning_rate": 0.0001652814478079955, "loss": 0.1948, "step": 22050 }, { "epoch": 0.310070981797737, "grad_norm": 0.47173330187797546, "learning_rate": 0.00016524622965955122, "loss": 0.1939, "step": 22060 }, { "epoch": 0.3102115398130578, "grad_norm": 0.41165921092033386, "learning_rate": 0.00016521099741403822, "loss": 0.1911, "step": 22070 }, { "epoch": 0.31035209782837864, "grad_norm": 0.43187540769577026, "learning_rate": 0.0001651757510790686, "loss": 0.1961, "step": 22080 }, { "epoch": 0.3104926558436995, "grad_norm": 0.4152243435382843, "learning_rate": 0.00016514049066225785, "loss": 0.187, "step": 22090 }, { "epoch": 0.3106332138590203, "grad_norm": 0.616346001625061, "learning_rate": 0.0001651052161712242, "loss": 0.21, "step": 22100 }, { "epoch": 0.31077377187434113, "grad_norm": 0.4170876145362854, "learning_rate": 0.0001650699276135891, "loss": 0.1979, "step": 22110 }, { "epoch": 0.31091432988966194, "grad_norm": 0.39913642406463623, "learning_rate": 0.00016503462499697696, "loss": 0.1885, "step": 22120 }, { "epoch": 0.31105488790498276, "grad_norm": 0.43161845207214355, "learning_rate": 0.00016499930832901524, "loss": 0.1961, "step": 22130 }, { "epoch": 0.3111954459203036, "grad_norm": 0.5345866084098816, "learning_rate": 0.00016496397761733443, "loss": 0.2122, "step": 22140 }, { "epoch": 0.31133600393562444, "grad_norm": 0.45740818977355957, "learning_rate": 0.00016492863286956812, "loss": 0.2093, "step": 22150 }, { "epoch": 0.31147656195094525, "grad_norm": 0.4239158630371094, "learning_rate": 0.00016489327409335283, "loss": 0.1962, "step": 22160 }, { "epoch": 0.31161711996626607, "grad_norm": 0.4945349097251892, "learning_rate": 0.00016485790129632818, "loss": 0.1856, "step": 22170 }, { "epoch": 0.3117576779815869, "grad_norm": 0.33270493149757385, "learning_rate": 0.0001648225144861368, "loss": 0.2224, "step": 22180 }, { "epoch": 0.31189823599690775, "grad_norm": 0.5921101570129395, "learning_rate": 0.0001647871136704243, "loss": 0.2017, "step": 22190 }, { "epoch": 0.31203879401222856, "grad_norm": 0.6349021196365356, "learning_rate": 0.00016475169885683943, "loss": 0.1913, "step": 22200 }, { "epoch": 0.3121793520275494, "grad_norm": 0.294806569814682, "learning_rate": 0.00016471627005303386, "loss": 0.1838, "step": 22210 }, { "epoch": 0.3123199100428702, "grad_norm": 0.4655342996120453, "learning_rate": 0.00016468082726666235, "loss": 0.202, "step": 22220 }, { "epoch": 0.312460468058191, "grad_norm": 0.40321406722068787, "learning_rate": 0.0001646453705053826, "loss": 0.183, "step": 22230 }, { "epoch": 0.31260102607351187, "grad_norm": 0.421733558177948, "learning_rate": 0.00016460989977685545, "loss": 0.2133, "step": 22240 }, { "epoch": 0.3127415840888327, "grad_norm": 0.4640405774116516, "learning_rate": 0.00016457441508874464, "loss": 0.2006, "step": 22250 }, { "epoch": 0.3128821421041535, "grad_norm": 0.4528983235359192, "learning_rate": 0.00016453891644871695, "loss": 0.1965, "step": 22260 }, { "epoch": 0.3130227001194743, "grad_norm": 0.39258962869644165, "learning_rate": 0.00016450340386444228, "loss": 0.1737, "step": 22270 }, { "epoch": 0.3131632581347951, "grad_norm": 0.3652934432029724, "learning_rate": 0.0001644678773435934, "loss": 0.1846, "step": 22280 }, { "epoch": 0.313303816150116, "grad_norm": 0.5782024264335632, "learning_rate": 0.00016443233689384615, "loss": 0.1864, "step": 22290 }, { "epoch": 0.3134443741654368, "grad_norm": 0.3768521249294281, "learning_rate": 0.00016439678252287942, "loss": 0.2079, "step": 22300 }, { "epoch": 0.3135849321807576, "grad_norm": 0.4075048863887787, "learning_rate": 0.00016436121423837505, "loss": 0.2033, "step": 22310 }, { "epoch": 0.3137254901960784, "grad_norm": 0.38486969470977783, "learning_rate": 0.0001643256320480179, "loss": 0.1918, "step": 22320 }, { "epoch": 0.31386604821139924, "grad_norm": 0.43265506625175476, "learning_rate": 0.0001642900359594959, "loss": 0.1837, "step": 22330 }, { "epoch": 0.31400660622672005, "grad_norm": 0.5715003609657288, "learning_rate": 0.0001642544259804998, "loss": 0.1797, "step": 22340 }, { "epoch": 0.3141471642420409, "grad_norm": 0.49120792746543884, "learning_rate": 0.00016421880211872357, "loss": 0.2218, "step": 22350 }, { "epoch": 0.31428772225736173, "grad_norm": 0.4198523461818695, "learning_rate": 0.00016418316438186406, "loss": 0.1843, "step": 22360 }, { "epoch": 0.31442828027268255, "grad_norm": 0.4386689066886902, "learning_rate": 0.00016414751277762116, "loss": 0.1927, "step": 22370 }, { "epoch": 0.31456883828800336, "grad_norm": 0.4620615839958191, "learning_rate": 0.0001641118473136977, "loss": 0.1942, "step": 22380 }, { "epoch": 0.3147093963033242, "grad_norm": 0.2823697626590729, "learning_rate": 0.0001640761679977996, "loss": 0.1989, "step": 22390 }, { "epoch": 0.31484995431864504, "grad_norm": 0.510749876499176, "learning_rate": 0.00016404047483763564, "loss": 0.2058, "step": 22400 }, { "epoch": 0.31499051233396586, "grad_norm": 0.4054532051086426, "learning_rate": 0.00016400476784091773, "loss": 0.1961, "step": 22410 }, { "epoch": 0.31513107034928667, "grad_norm": 0.36367928981781006, "learning_rate": 0.0001639690470153607, "loss": 0.196, "step": 22420 }, { "epoch": 0.3152716283646075, "grad_norm": 0.41342687606811523, "learning_rate": 0.00016393331236868233, "loss": 0.1961, "step": 22430 }, { "epoch": 0.3154121863799283, "grad_norm": 0.4696900546550751, "learning_rate": 0.00016389756390860347, "loss": 0.2123, "step": 22440 }, { "epoch": 0.31555274439524916, "grad_norm": 0.5743334293365479, "learning_rate": 0.0001638618016428479, "loss": 0.212, "step": 22450 }, { "epoch": 0.31569330241057, "grad_norm": 0.3811054527759552, "learning_rate": 0.0001638260255791424, "loss": 0.1841, "step": 22460 }, { "epoch": 0.3158338604258908, "grad_norm": 0.3848494589328766, "learning_rate": 0.00016379023572521675, "loss": 0.2067, "step": 22470 }, { "epoch": 0.3159744184412116, "grad_norm": 0.4317486882209778, "learning_rate": 0.00016375443208880362, "loss": 0.2067, "step": 22480 }, { "epoch": 0.3161149764565324, "grad_norm": 0.40094897150993347, "learning_rate": 0.00016371861467763882, "loss": 0.2024, "step": 22490 }, { "epoch": 0.3162555344718533, "grad_norm": 0.4053865671157837, "learning_rate": 0.00016368278349946098, "loss": 0.19, "step": 22500 }, { "epoch": 0.3163960924871741, "grad_norm": 0.37913885712623596, "learning_rate": 0.0001636469385620118, "loss": 0.1952, "step": 22510 }, { "epoch": 0.3165366505024949, "grad_norm": 0.48451685905456543, "learning_rate": 0.00016361107987303587, "loss": 0.2111, "step": 22520 }, { "epoch": 0.3166772085178157, "grad_norm": 0.3487234115600586, "learning_rate": 0.0001635752074402809, "loss": 0.1847, "step": 22530 }, { "epoch": 0.31681776653313654, "grad_norm": 0.5989207625389099, "learning_rate": 0.00016353932127149734, "loss": 0.2311, "step": 22540 }, { "epoch": 0.31695832454845735, "grad_norm": 0.3807673752307892, "learning_rate": 0.00016350342137443882, "loss": 0.1984, "step": 22550 }, { "epoch": 0.3170988825637782, "grad_norm": 0.3877153694629669, "learning_rate": 0.00016346750775686185, "loss": 0.1846, "step": 22560 }, { "epoch": 0.31723944057909903, "grad_norm": 0.42989611625671387, "learning_rate": 0.00016343158042652592, "loss": 0.214, "step": 22570 }, { "epoch": 0.31737999859441984, "grad_norm": 0.3693827986717224, "learning_rate": 0.0001633956393911934, "loss": 0.23, "step": 22580 }, { "epoch": 0.31752055660974066, "grad_norm": 0.6093205213546753, "learning_rate": 0.00016335968465862978, "loss": 0.2042, "step": 22590 }, { "epoch": 0.31766111462506147, "grad_norm": 0.3517461121082306, "learning_rate": 0.0001633237162366034, "loss": 0.2055, "step": 22600 }, { "epoch": 0.31780167264038234, "grad_norm": 0.3896740972995758, "learning_rate": 0.00016328773413288553, "loss": 0.1931, "step": 22610 }, { "epoch": 0.31794223065570315, "grad_norm": 0.4707591235637665, "learning_rate": 0.00016325173835525045, "loss": 0.1996, "step": 22620 }, { "epoch": 0.31808278867102396, "grad_norm": 0.44279807806015015, "learning_rate": 0.00016321572891147548, "loss": 0.191, "step": 22630 }, { "epoch": 0.3182233466863448, "grad_norm": 0.5250387787818909, "learning_rate": 0.0001631797058093407, "loss": 0.2373, "step": 22640 }, { "epoch": 0.3183639047016656, "grad_norm": 0.5072187185287476, "learning_rate": 0.0001631436690566293, "loss": 0.199, "step": 22650 }, { "epoch": 0.31850446271698646, "grad_norm": 0.5126092433929443, "learning_rate": 0.00016310761866112733, "loss": 0.1883, "step": 22660 }, { "epoch": 0.31864502073230727, "grad_norm": 0.5698773264884949, "learning_rate": 0.00016307155463062388, "loss": 0.1965, "step": 22670 }, { "epoch": 0.3187855787476281, "grad_norm": 0.4435856342315674, "learning_rate": 0.0001630354769729109, "loss": 0.2086, "step": 22680 }, { "epoch": 0.3189261367629489, "grad_norm": 0.3472186326980591, "learning_rate": 0.00016299938569578327, "loss": 0.1974, "step": 22690 }, { "epoch": 0.3190666947782697, "grad_norm": 0.3711005449295044, "learning_rate": 0.00016296328080703885, "loss": 0.1728, "step": 22700 }, { "epoch": 0.3192072527935906, "grad_norm": 0.37461093068122864, "learning_rate": 0.00016292716231447853, "loss": 0.212, "step": 22710 }, { "epoch": 0.3193478108089114, "grad_norm": 0.47322550415992737, "learning_rate": 0.000162891030225906, "loss": 0.205, "step": 22720 }, { "epoch": 0.3194883688242322, "grad_norm": 0.3658617436885834, "learning_rate": 0.0001628548845491279, "loss": 0.1726, "step": 22730 }, { "epoch": 0.319628926839553, "grad_norm": 0.44577306509017944, "learning_rate": 0.00016281872529195392, "loss": 0.2265, "step": 22740 }, { "epoch": 0.31976948485487383, "grad_norm": 0.4455224871635437, "learning_rate": 0.0001627825524621966, "loss": 0.2256, "step": 22750 }, { "epoch": 0.3199100428701947, "grad_norm": 0.5204285979270935, "learning_rate": 0.0001627463660676714, "loss": 0.1902, "step": 22760 }, { "epoch": 0.3200506008855155, "grad_norm": 0.387105256319046, "learning_rate": 0.00016271016611619672, "loss": 0.1894, "step": 22770 }, { "epoch": 0.3201911589008363, "grad_norm": 0.4587792158126831, "learning_rate": 0.00016267395261559394, "loss": 0.1799, "step": 22780 }, { "epoch": 0.32033171691615714, "grad_norm": 0.400437593460083, "learning_rate": 0.00016263772557368732, "loss": 0.1872, "step": 22790 }, { "epoch": 0.32047227493147795, "grad_norm": 0.501432478427887, "learning_rate": 0.00016260148499830403, "loss": 0.1977, "step": 22800 }, { "epoch": 0.32061283294679876, "grad_norm": 0.5811870694160461, "learning_rate": 0.00016256523089727421, "loss": 0.2002, "step": 22810 }, { "epoch": 0.32075339096211963, "grad_norm": 0.46564650535583496, "learning_rate": 0.00016252896327843095, "loss": 0.1865, "step": 22820 }, { "epoch": 0.32089394897744045, "grad_norm": 0.4727911949157715, "learning_rate": 0.00016249268214961018, "loss": 0.1926, "step": 22830 }, { "epoch": 0.32103450699276126, "grad_norm": 0.3758562505245209, "learning_rate": 0.00016245638751865075, "loss": 0.2167, "step": 22840 }, { "epoch": 0.3211750650080821, "grad_norm": 0.4082559049129486, "learning_rate": 0.00016242007939339445, "loss": 0.195, "step": 22850 }, { "epoch": 0.3213156230234029, "grad_norm": 0.466960608959198, "learning_rate": 0.00016238375778168605, "loss": 0.2011, "step": 22860 }, { "epoch": 0.32145618103872375, "grad_norm": 0.4250938296318054, "learning_rate": 0.00016234742269137322, "loss": 0.1948, "step": 22870 }, { "epoch": 0.32159673905404457, "grad_norm": 0.4879426658153534, "learning_rate": 0.00016231107413030638, "loss": 0.1942, "step": 22880 }, { "epoch": 0.3217372970693654, "grad_norm": 0.3468765914440155, "learning_rate": 0.00016227471210633908, "loss": 0.1897, "step": 22890 }, { "epoch": 0.3218778550846862, "grad_norm": 0.40255075693130493, "learning_rate": 0.00016223833662732764, "loss": 0.2037, "step": 22900 }, { "epoch": 0.322018413100007, "grad_norm": 0.4981006979942322, "learning_rate": 0.00016220194770113133, "loss": 0.1837, "step": 22910 }, { "epoch": 0.3221589711153279, "grad_norm": 0.4926389753818512, "learning_rate": 0.0001621655453356123, "loss": 0.2107, "step": 22920 }, { "epoch": 0.3222995291306487, "grad_norm": 0.45715492963790894, "learning_rate": 0.0001621291295386357, "loss": 0.1927, "step": 22930 }, { "epoch": 0.3224400871459695, "grad_norm": 0.5912772417068481, "learning_rate": 0.00016209270031806948, "loss": 0.1956, "step": 22940 }, { "epoch": 0.3225806451612903, "grad_norm": 0.3169713020324707, "learning_rate": 0.00016205625768178446, "loss": 0.17, "step": 22950 }, { "epoch": 0.3227212031766111, "grad_norm": 0.5209740400314331, "learning_rate": 0.0001620198016376545, "loss": 0.1947, "step": 22960 }, { "epoch": 0.322861761191932, "grad_norm": 0.5017517805099487, "learning_rate": 0.00016198333219355626, "loss": 0.1828, "step": 22970 }, { "epoch": 0.3230023192072528, "grad_norm": 0.4529948830604553, "learning_rate": 0.00016194684935736927, "loss": 0.1842, "step": 22980 }, { "epoch": 0.3231428772225736, "grad_norm": 0.526214063167572, "learning_rate": 0.000161910353136976, "loss": 0.2203, "step": 22990 }, { "epoch": 0.32328343523789443, "grad_norm": 0.42212504148483276, "learning_rate": 0.0001618738435402618, "loss": 0.2013, "step": 23000 }, { "epoch": 0.32328343523789443, "eval_chrf": 82.48321766359608, "eval_loss": 0.3963998854160309, "eval_runtime": 196.6304, "eval_samples_per_second": 0.509, "eval_steps_per_second": 0.02, "step": 23000 }, { "epoch": 0.32342399325321525, "grad_norm": 0.5202959179878235, "learning_rate": 0.00016183732057511497, "loss": 0.1968, "step": 23010 }, { "epoch": 0.3235645512685361, "grad_norm": 0.5392760634422302, "learning_rate": 0.0001618007842494266, "loss": 0.2024, "step": 23020 }, { "epoch": 0.32370510928385693, "grad_norm": 0.4830007553100586, "learning_rate": 0.00016176423457109072, "loss": 0.1902, "step": 23030 }, { "epoch": 0.32384566729917774, "grad_norm": 0.5701043009757996, "learning_rate": 0.00016172767154800424, "loss": 0.1675, "step": 23040 }, { "epoch": 0.32398622531449855, "grad_norm": 0.48150068521499634, "learning_rate": 0.00016169109518806692, "loss": 0.2048, "step": 23050 }, { "epoch": 0.32412678332981937, "grad_norm": 0.47341838479042053, "learning_rate": 0.0001616545054991815, "loss": 0.2221, "step": 23060 }, { "epoch": 0.3242673413451402, "grad_norm": 0.5987292528152466, "learning_rate": 0.0001616179024892535, "loss": 0.2002, "step": 23070 }, { "epoch": 0.32440789936046105, "grad_norm": 0.46618038415908813, "learning_rate": 0.0001615812861661913, "loss": 0.2115, "step": 23080 }, { "epoch": 0.32454845737578186, "grad_norm": 0.3018644452095032, "learning_rate": 0.00016154465653790625, "loss": 0.2034, "step": 23090 }, { "epoch": 0.3246890153911027, "grad_norm": 0.3897569477558136, "learning_rate": 0.00016150801361231252, "loss": 0.1973, "step": 23100 }, { "epoch": 0.3248295734064235, "grad_norm": 0.3934481739997864, "learning_rate": 0.00016147135739732715, "loss": 0.2161, "step": 23110 }, { "epoch": 0.3249701314217443, "grad_norm": 0.4347870349884033, "learning_rate": 0.0001614346879008701, "loss": 0.1901, "step": 23120 }, { "epoch": 0.32511068943706517, "grad_norm": 0.5074248909950256, "learning_rate": 0.00016139800513086412, "loss": 0.2003, "step": 23130 }, { "epoch": 0.325251247452386, "grad_norm": 0.4701947867870331, "learning_rate": 0.0001613613090952349, "loss": 0.1855, "step": 23140 }, { "epoch": 0.3253918054677068, "grad_norm": 0.38716921210289, "learning_rate": 0.00016132459980191098, "loss": 0.1892, "step": 23150 }, { "epoch": 0.3255323634830276, "grad_norm": 0.4173943102359772, "learning_rate": 0.0001612878772588237, "loss": 0.1923, "step": 23160 }, { "epoch": 0.3256729214983484, "grad_norm": 0.43605056405067444, "learning_rate": 0.00016125114147390733, "loss": 0.1913, "step": 23170 }, { "epoch": 0.3258134795136693, "grad_norm": 0.5023015141487122, "learning_rate": 0.00016121439245509906, "loss": 0.211, "step": 23180 }, { "epoch": 0.3259540375289901, "grad_norm": 0.4328177571296692, "learning_rate": 0.00016117763021033875, "loss": 0.2187, "step": 23190 }, { "epoch": 0.3260945955443109, "grad_norm": 0.3364429473876953, "learning_rate": 0.00016114085474756932, "loss": 0.1864, "step": 23200 }, { "epoch": 0.32623515355963173, "grad_norm": 0.5123487114906311, "learning_rate": 0.00016110406607473644, "loss": 0.2139, "step": 23210 }, { "epoch": 0.32637571157495254, "grad_norm": 0.40395841002464294, "learning_rate": 0.00016106726419978866, "loss": 0.2131, "step": 23220 }, { "epoch": 0.3265162695902734, "grad_norm": 0.7314327359199524, "learning_rate": 0.00016103044913067733, "loss": 0.1898, "step": 23230 }, { "epoch": 0.3266568276055942, "grad_norm": 0.45684218406677246, "learning_rate": 0.00016099362087535672, "loss": 0.1983, "step": 23240 }, { "epoch": 0.32679738562091504, "grad_norm": 0.43260887265205383, "learning_rate": 0.00016095677944178392, "loss": 0.1891, "step": 23250 }, { "epoch": 0.32693794363623585, "grad_norm": 0.5483604073524475, "learning_rate": 0.0001609199248379189, "loss": 0.218, "step": 23260 }, { "epoch": 0.32707850165155666, "grad_norm": 0.34057939052581787, "learning_rate": 0.00016088305707172442, "loss": 0.1746, "step": 23270 }, { "epoch": 0.3272190596668775, "grad_norm": 0.3196142911911011, "learning_rate": 0.00016084617615116613, "loss": 0.1825, "step": 23280 }, { "epoch": 0.32735961768219835, "grad_norm": 0.40307173132896423, "learning_rate": 0.00016080928208421247, "loss": 0.2045, "step": 23290 }, { "epoch": 0.32750017569751916, "grad_norm": 0.470195472240448, "learning_rate": 0.00016077237487883481, "loss": 0.1935, "step": 23300 }, { "epoch": 0.32764073371283997, "grad_norm": 0.34664785861968994, "learning_rate": 0.00016073545454300728, "loss": 0.1741, "step": 23310 }, { "epoch": 0.3277812917281608, "grad_norm": 0.3692355155944824, "learning_rate": 0.00016069852108470682, "loss": 0.2, "step": 23320 }, { "epoch": 0.3279218497434816, "grad_norm": 0.4574620723724365, "learning_rate": 0.00016066157451191332, "loss": 0.1738, "step": 23330 }, { "epoch": 0.32806240775880247, "grad_norm": 0.44873225688934326, "learning_rate": 0.0001606246148326094, "loss": 0.1918, "step": 23340 }, { "epoch": 0.3282029657741233, "grad_norm": 0.4090384244918823, "learning_rate": 0.00016058764205478058, "loss": 0.1681, "step": 23350 }, { "epoch": 0.3283435237894441, "grad_norm": 0.4433750510215759, "learning_rate": 0.00016055065618641515, "loss": 0.1936, "step": 23360 }, { "epoch": 0.3284840818047649, "grad_norm": 0.29744261503219604, "learning_rate": 0.0001605136572355043, "loss": 0.1776, "step": 23370 }, { "epoch": 0.3286246398200857, "grad_norm": 0.4242916703224182, "learning_rate": 0.00016047664521004194, "loss": 0.1839, "step": 23380 }, { "epoch": 0.3287651978354066, "grad_norm": 0.5000902414321899, "learning_rate": 0.00016043962011802495, "loss": 0.2317, "step": 23390 }, { "epoch": 0.3289057558507274, "grad_norm": 0.4689231216907501, "learning_rate": 0.00016040258196745293, "loss": 0.2173, "step": 23400 }, { "epoch": 0.3290463138660482, "grad_norm": 0.5616615414619446, "learning_rate": 0.00016036553076632828, "loss": 0.2117, "step": 23410 }, { "epoch": 0.329186871881369, "grad_norm": 0.43278253078460693, "learning_rate": 0.00016032846652265628, "loss": 0.23, "step": 23420 }, { "epoch": 0.32932742989668984, "grad_norm": 0.46418872475624084, "learning_rate": 0.00016029138924444504, "loss": 0.1797, "step": 23430 }, { "epoch": 0.3294679879120107, "grad_norm": 0.5142697691917419, "learning_rate": 0.00016025429893970548, "loss": 0.2135, "step": 23440 }, { "epoch": 0.3296085459273315, "grad_norm": 0.456232488155365, "learning_rate": 0.00016021719561645122, "loss": 0.2199, "step": 23450 }, { "epoch": 0.32974910394265233, "grad_norm": 0.4834028482437134, "learning_rate": 0.00016018007928269887, "loss": 0.2194, "step": 23460 }, { "epoch": 0.32988966195797315, "grad_norm": 0.343547523021698, "learning_rate": 0.00016014294994646773, "loss": 0.2129, "step": 23470 }, { "epoch": 0.33003021997329396, "grad_norm": 0.5281736254692078, "learning_rate": 0.00016010580761577998, "loss": 0.2313, "step": 23480 }, { "epoch": 0.3301707779886148, "grad_norm": 0.4694022834300995, "learning_rate": 0.00016006865229866053, "loss": 0.1975, "step": 23490 }, { "epoch": 0.33031133600393564, "grad_norm": 0.449082612991333, "learning_rate": 0.00016003148400313717, "loss": 0.1962, "step": 23500 }, { "epoch": 0.33045189401925645, "grad_norm": 0.5389946699142456, "learning_rate": 0.00015999430273724044, "loss": 0.1958, "step": 23510 }, { "epoch": 0.33059245203457727, "grad_norm": 0.38287481665611267, "learning_rate": 0.00015995710850900374, "loss": 0.1983, "step": 23520 }, { "epoch": 0.3307330100498981, "grad_norm": 0.42514464259147644, "learning_rate": 0.0001599199013264632, "loss": 0.1976, "step": 23530 }, { "epoch": 0.3308735680652189, "grad_norm": 0.3916068375110626, "learning_rate": 0.00015988268119765782, "loss": 0.1936, "step": 23540 }, { "epoch": 0.33101412608053976, "grad_norm": 0.5027921795845032, "learning_rate": 0.00015984544813062935, "loss": 0.2461, "step": 23550 }, { "epoch": 0.3311546840958606, "grad_norm": 0.47344645857810974, "learning_rate": 0.00015980820213342236, "loss": 0.194, "step": 23560 }, { "epoch": 0.3312952421111814, "grad_norm": 0.5741089582443237, "learning_rate": 0.00015977094321408414, "loss": 0.1883, "step": 23570 }, { "epoch": 0.3314358001265022, "grad_norm": 0.526816189289093, "learning_rate": 0.00015973367138066494, "loss": 0.1959, "step": 23580 }, { "epoch": 0.331576358141823, "grad_norm": 0.37461012601852417, "learning_rate": 0.0001596963866412176, "loss": 0.1948, "step": 23590 }, { "epoch": 0.3317169161571439, "grad_norm": 0.4073886573314667, "learning_rate": 0.00015965908900379789, "loss": 0.1879, "step": 23600 }, { "epoch": 0.3318574741724647, "grad_norm": 0.4429916441440582, "learning_rate": 0.0001596217784764643, "loss": 0.2059, "step": 23610 }, { "epoch": 0.3319980321877855, "grad_norm": 0.4292476773262024, "learning_rate": 0.00015958445506727813, "loss": 0.1914, "step": 23620 }, { "epoch": 0.3321385902031063, "grad_norm": 0.5539444088935852, "learning_rate": 0.00015954711878430347, "loss": 0.1895, "step": 23630 }, { "epoch": 0.33227914821842713, "grad_norm": 0.39998406171798706, "learning_rate": 0.00015950976963560718, "loss": 0.2033, "step": 23640 }, { "epoch": 0.332419706233748, "grad_norm": 0.3286818563938141, "learning_rate": 0.00015947240762925885, "loss": 0.217, "step": 23650 }, { "epoch": 0.3325602642490688, "grad_norm": 0.34060147404670715, "learning_rate": 0.00015943503277333099, "loss": 0.1989, "step": 23660 }, { "epoch": 0.33270082226438963, "grad_norm": 0.36114442348480225, "learning_rate": 0.0001593976450758987, "loss": 0.1967, "step": 23670 }, { "epoch": 0.33284138027971044, "grad_norm": 0.37886273860931396, "learning_rate": 0.00015936024454504, "loss": 0.2272, "step": 23680 }, { "epoch": 0.33298193829503125, "grad_norm": 0.3490985333919525, "learning_rate": 0.00015932283118883563, "loss": 0.1915, "step": 23690 }, { "epoch": 0.3331224963103521, "grad_norm": 0.4528276026248932, "learning_rate": 0.00015928540501536907, "loss": 0.1972, "step": 23700 }, { "epoch": 0.33326305432567294, "grad_norm": 0.3115861415863037, "learning_rate": 0.00015924796603272663, "loss": 0.1785, "step": 23710 }, { "epoch": 0.33340361234099375, "grad_norm": 0.42265215516090393, "learning_rate": 0.00015921051424899732, "loss": 0.2127, "step": 23720 }, { "epoch": 0.33354417035631456, "grad_norm": 0.4673636853694916, "learning_rate": 0.00015917304967227302, "loss": 0.2088, "step": 23730 }, { "epoch": 0.3336847283716354, "grad_norm": 0.5135847926139832, "learning_rate": 0.00015913557231064823, "loss": 0.22, "step": 23740 }, { "epoch": 0.33382528638695624, "grad_norm": 0.37671685218811035, "learning_rate": 0.0001590980821722204, "loss": 0.1973, "step": 23750 }, { "epoch": 0.33396584440227706, "grad_norm": 0.33548229932785034, "learning_rate": 0.00015906057926508952, "loss": 0.1891, "step": 23760 }, { "epoch": 0.33410640241759787, "grad_norm": 0.37380048632621765, "learning_rate": 0.0001590230635973585, "loss": 0.2028, "step": 23770 }, { "epoch": 0.3342469604329187, "grad_norm": 0.5409539937973022, "learning_rate": 0.00015898553517713294, "loss": 0.1939, "step": 23780 }, { "epoch": 0.3343875184482395, "grad_norm": 0.32086920738220215, "learning_rate": 0.00015894799401252124, "loss": 0.187, "step": 23790 }, { "epoch": 0.3345280764635603, "grad_norm": 0.4053663909435272, "learning_rate": 0.00015891044011163448, "loss": 0.1905, "step": 23800 }, { "epoch": 0.3346686344788812, "grad_norm": 0.5001649856567383, "learning_rate": 0.00015887287348258656, "loss": 0.1967, "step": 23810 }, { "epoch": 0.334809192494202, "grad_norm": 0.3653586804866791, "learning_rate": 0.00015883529413349417, "loss": 0.2088, "step": 23820 }, { "epoch": 0.3349497505095228, "grad_norm": 0.4791796803474426, "learning_rate": 0.00015879770207247656, "loss": 0.1996, "step": 23830 }, { "epoch": 0.3350903085248436, "grad_norm": 0.3865216374397278, "learning_rate": 0.00015876009730765594, "loss": 0.1929, "step": 23840 }, { "epoch": 0.33523086654016443, "grad_norm": 0.4039939045906067, "learning_rate": 0.0001587224798471572, "loss": 0.1838, "step": 23850 }, { "epoch": 0.3353714245554853, "grad_norm": 0.5174484848976135, "learning_rate": 0.0001586848496991079, "loss": 0.1938, "step": 23860 }, { "epoch": 0.3355119825708061, "grad_norm": 0.48277246952056885, "learning_rate": 0.00015864720687163835, "loss": 0.1937, "step": 23870 }, { "epoch": 0.3356525405861269, "grad_norm": 0.4065185785293579, "learning_rate": 0.00015860955137288172, "loss": 0.2079, "step": 23880 }, { "epoch": 0.33579309860144774, "grad_norm": 0.5326710343360901, "learning_rate": 0.00015857188321097382, "loss": 0.1969, "step": 23890 }, { "epoch": 0.33593365661676855, "grad_norm": 0.4714176058769226, "learning_rate": 0.0001585342023940532, "loss": 0.1886, "step": 23900 }, { "epoch": 0.3360742146320894, "grad_norm": 0.4024302661418915, "learning_rate": 0.00015849650893026116, "loss": 0.193, "step": 23910 }, { "epoch": 0.33621477264741023, "grad_norm": 0.521590530872345, "learning_rate": 0.00015845880282774175, "loss": 0.2082, "step": 23920 }, { "epoch": 0.33635533066273104, "grad_norm": 0.3961153030395508, "learning_rate": 0.0001584210840946417, "loss": 0.1984, "step": 23930 }, { "epoch": 0.33649588867805186, "grad_norm": 0.4426018297672272, "learning_rate": 0.00015838335273911052, "loss": 0.2076, "step": 23940 }, { "epoch": 0.33663644669337267, "grad_norm": 0.38262760639190674, "learning_rate": 0.00015834560876930043, "loss": 0.1729, "step": 23950 }, { "epoch": 0.33677700470869354, "grad_norm": 0.4940842092037201, "learning_rate": 0.00015830785219336638, "loss": 0.206, "step": 23960 }, { "epoch": 0.33691756272401435, "grad_norm": 0.4233616888523102, "learning_rate": 0.00015827008301946602, "loss": 0.1832, "step": 23970 }, { "epoch": 0.33705812073933517, "grad_norm": 0.4347827434539795, "learning_rate": 0.00015823230125575977, "loss": 0.2158, "step": 23980 }, { "epoch": 0.337198678754656, "grad_norm": 0.39352595806121826, "learning_rate": 0.0001581945069104107, "loss": 0.2111, "step": 23990 }, { "epoch": 0.3373392367699768, "grad_norm": 0.3998149633407593, "learning_rate": 0.00015815669999158467, "loss": 0.2143, "step": 24000 }, { "epoch": 0.3373392367699768, "eval_chrf": 80.06792337784833, "eval_loss": 0.4090176224708557, "eval_runtime": 326.915, "eval_samples_per_second": 0.306, "eval_steps_per_second": 0.012, "step": 24000 }, { "epoch": 0.3374797947852976, "grad_norm": 0.39046767354011536, "learning_rate": 0.0001581188805074502, "loss": 0.1933, "step": 24010 }, { "epoch": 0.3376203528006185, "grad_norm": 0.3566581904888153, "learning_rate": 0.00015808104846617855, "loss": 0.1996, "step": 24020 }, { "epoch": 0.3377609108159393, "grad_norm": 0.3693186938762665, "learning_rate": 0.0001580432038759437, "loss": 0.2069, "step": 24030 }, { "epoch": 0.3379014688312601, "grad_norm": 0.4311753213405609, "learning_rate": 0.00015800534674492237, "loss": 0.2165, "step": 24040 }, { "epoch": 0.3380420268465809, "grad_norm": 0.42389604449272156, "learning_rate": 0.00015796747708129386, "loss": 0.2079, "step": 24050 }, { "epoch": 0.3381825848619017, "grad_norm": 0.42120248079299927, "learning_rate": 0.0001579295948932404, "loss": 0.1859, "step": 24060 }, { "epoch": 0.3383231428772226, "grad_norm": 0.35434579849243164, "learning_rate": 0.0001578917001889467, "loss": 0.1832, "step": 24070 }, { "epoch": 0.3384637008925434, "grad_norm": 0.45823779702186584, "learning_rate": 0.00015785379297660027, "loss": 0.1804, "step": 24080 }, { "epoch": 0.3386042589078642, "grad_norm": 0.41358405351638794, "learning_rate": 0.0001578158732643914, "loss": 0.2048, "step": 24090 }, { "epoch": 0.33874481692318503, "grad_norm": 0.45774754881858826, "learning_rate": 0.00015777794106051297, "loss": 0.1947, "step": 24100 }, { "epoch": 0.33888537493850585, "grad_norm": 0.593467652797699, "learning_rate": 0.00015773999637316052, "loss": 0.212, "step": 24110 }, { "epoch": 0.3390259329538267, "grad_norm": 0.5359540581703186, "learning_rate": 0.00015770203921053246, "loss": 0.1842, "step": 24120 }, { "epoch": 0.3391664909691475, "grad_norm": 0.32489413022994995, "learning_rate": 0.00015766406958082975, "loss": 0.2051, "step": 24130 }, { "epoch": 0.33930704898446834, "grad_norm": 0.45257705450057983, "learning_rate": 0.0001576260874922561, "loss": 0.1957, "step": 24140 }, { "epoch": 0.33944760699978915, "grad_norm": 0.34853655099868774, "learning_rate": 0.00015758809295301788, "loss": 0.1677, "step": 24150 }, { "epoch": 0.33958816501510997, "grad_norm": 0.49031469225883484, "learning_rate": 0.00015755008597132422, "loss": 0.2104, "step": 24160 }, { "epoch": 0.33972872303043083, "grad_norm": 0.47045207023620605, "learning_rate": 0.00015751206655538686, "loss": 0.1913, "step": 24170 }, { "epoch": 0.33986928104575165, "grad_norm": 0.5129997730255127, "learning_rate": 0.00015747403471342027, "loss": 0.1976, "step": 24180 }, { "epoch": 0.34000983906107246, "grad_norm": 0.4752309322357178, "learning_rate": 0.00015743599045364153, "loss": 0.2212, "step": 24190 }, { "epoch": 0.3401503970763933, "grad_norm": 0.41055867075920105, "learning_rate": 0.00015739793378427057, "loss": 0.1898, "step": 24200 }, { "epoch": 0.3402909550917141, "grad_norm": 0.3874673545360565, "learning_rate": 0.00015735986471352982, "loss": 0.1825, "step": 24210 }, { "epoch": 0.34043151310703496, "grad_norm": 0.335651695728302, "learning_rate": 0.00015732178324964448, "loss": 0.2047, "step": 24220 }, { "epoch": 0.34057207112235577, "grad_norm": 0.4309629201889038, "learning_rate": 0.0001572836894008424, "loss": 0.1568, "step": 24230 }, { "epoch": 0.3407126291376766, "grad_norm": 0.4961472749710083, "learning_rate": 0.00015724558317535417, "loss": 0.207, "step": 24240 }, { "epoch": 0.3408531871529974, "grad_norm": 0.3872990310192108, "learning_rate": 0.00015720746458141295, "loss": 0.1961, "step": 24250 }, { "epoch": 0.3409937451683182, "grad_norm": 0.46306172013282776, "learning_rate": 0.00015716933362725465, "loss": 0.2034, "step": 24260 }, { "epoch": 0.341134303183639, "grad_norm": 0.4858575463294983, "learning_rate": 0.0001571311903211178, "loss": 0.2087, "step": 24270 }, { "epoch": 0.3412748611989599, "grad_norm": 0.30993708968162537, "learning_rate": 0.00015709303467124368, "loss": 0.2131, "step": 24280 }, { "epoch": 0.3414154192142807, "grad_norm": 0.3692980706691742, "learning_rate": 0.00015705486668587612, "loss": 0.1943, "step": 24290 }, { "epoch": 0.3415559772296015, "grad_norm": 0.42944425344467163, "learning_rate": 0.0001570166863732617, "loss": 0.2096, "step": 24300 }, { "epoch": 0.34169653524492233, "grad_norm": 0.6496607661247253, "learning_rate": 0.00015697849374164964, "loss": 0.2066, "step": 24310 }, { "epoch": 0.34183709326024314, "grad_norm": 0.4161585569381714, "learning_rate": 0.00015694028879929185, "loss": 0.1847, "step": 24320 }, { "epoch": 0.341977651275564, "grad_norm": 0.3806317448616028, "learning_rate": 0.00015690207155444284, "loss": 0.1714, "step": 24330 }, { "epoch": 0.3421182092908848, "grad_norm": 0.4975564777851105, "learning_rate": 0.00015686384201535979, "loss": 0.201, "step": 24340 }, { "epoch": 0.34225876730620564, "grad_norm": 0.4811922013759613, "learning_rate": 0.00015682560019030257, "loss": 0.1861, "step": 24350 }, { "epoch": 0.34239932532152645, "grad_norm": 0.3213375508785248, "learning_rate": 0.00015678734608753371, "loss": 0.1893, "step": 24360 }, { "epoch": 0.34253988333684726, "grad_norm": 0.4221675992012024, "learning_rate": 0.00015674907971531838, "loss": 0.2132, "step": 24370 }, { "epoch": 0.34268044135216813, "grad_norm": 0.48913589119911194, "learning_rate": 0.00015671080108192436, "loss": 0.1798, "step": 24380 }, { "epoch": 0.34282099936748894, "grad_norm": 0.3994086980819702, "learning_rate": 0.0001566725101956221, "loss": 0.1863, "step": 24390 }, { "epoch": 0.34296155738280976, "grad_norm": 0.40696194767951965, "learning_rate": 0.00015663420706468477, "loss": 0.206, "step": 24400 }, { "epoch": 0.34310211539813057, "grad_norm": 0.44838500022888184, "learning_rate": 0.00015659589169738812, "loss": 0.1901, "step": 24410 }, { "epoch": 0.3432426734134514, "grad_norm": 0.4780958294868469, "learning_rate": 0.0001565575641020105, "loss": 0.2107, "step": 24420 }, { "epoch": 0.34338323142877225, "grad_norm": 0.4724385440349579, "learning_rate": 0.00015651922428683296, "loss": 0.1899, "step": 24430 }, { "epoch": 0.34352378944409306, "grad_norm": 0.5628231763839722, "learning_rate": 0.00015648087226013925, "loss": 0.1875, "step": 24440 }, { "epoch": 0.3436643474594139, "grad_norm": 0.31662383675575256, "learning_rate": 0.0001564425080302156, "loss": 0.1964, "step": 24450 }, { "epoch": 0.3438049054747347, "grad_norm": 0.4724717438220978, "learning_rate": 0.00015640413160535098, "loss": 0.2064, "step": 24460 }, { "epoch": 0.3439454634900555, "grad_norm": 0.43273380398750305, "learning_rate": 0.00015636574299383703, "loss": 0.1795, "step": 24470 }, { "epoch": 0.34408602150537637, "grad_norm": 0.4508928060531616, "learning_rate": 0.00015632734220396796, "loss": 0.2112, "step": 24480 }, { "epoch": 0.3442265795206972, "grad_norm": 0.4900144934654236, "learning_rate": 0.00015628892924404063, "loss": 0.2006, "step": 24490 }, { "epoch": 0.344367137536018, "grad_norm": 0.440208375453949, "learning_rate": 0.0001562505041223545, "loss": 0.1994, "step": 24500 }, { "epoch": 0.3445076955513388, "grad_norm": 0.46662935614585876, "learning_rate": 0.0001562120668472117, "loss": 0.1965, "step": 24510 }, { "epoch": 0.3446482535666596, "grad_norm": 0.5307490229606628, "learning_rate": 0.00015617361742691695, "loss": 0.2195, "step": 24520 }, { "epoch": 0.34478881158198044, "grad_norm": 0.6571158170700073, "learning_rate": 0.00015613515586977762, "loss": 0.1994, "step": 24530 }, { "epoch": 0.3449293695973013, "grad_norm": 0.2975288927555084, "learning_rate": 0.00015609668218410374, "loss": 0.1942, "step": 24540 }, { "epoch": 0.3450699276126221, "grad_norm": 0.4509091079235077, "learning_rate": 0.00015605819637820787, "loss": 0.1865, "step": 24550 }, { "epoch": 0.34521048562794293, "grad_norm": 0.342122346162796, "learning_rate": 0.00015601969846040523, "loss": 0.1913, "step": 24560 }, { "epoch": 0.34535104364326374, "grad_norm": 0.3601060211658478, "learning_rate": 0.00015598118843901368, "loss": 0.2014, "step": 24570 }, { "epoch": 0.34549160165858456, "grad_norm": 0.41186758875846863, "learning_rate": 0.0001559426663223537, "loss": 0.1966, "step": 24580 }, { "epoch": 0.3456321596739054, "grad_norm": 0.4192986488342285, "learning_rate": 0.0001559041321187483, "loss": 0.1782, "step": 24590 }, { "epoch": 0.34577271768922624, "grad_norm": 0.4782000184059143, "learning_rate": 0.00015586558583652319, "loss": 0.1998, "step": 24600 }, { "epoch": 0.34591327570454705, "grad_norm": 0.42880335450172424, "learning_rate": 0.0001558270274840067, "loss": 0.2009, "step": 24610 }, { "epoch": 0.34605383371986786, "grad_norm": 0.3918968141078949, "learning_rate": 0.00015578845706952965, "loss": 0.2082, "step": 24620 }, { "epoch": 0.3461943917351887, "grad_norm": 0.3660811483860016, "learning_rate": 0.00015574987460142563, "loss": 0.1895, "step": 24630 }, { "epoch": 0.34633494975050955, "grad_norm": 0.6555930376052856, "learning_rate": 0.00015571128008803068, "loss": 0.2044, "step": 24640 }, { "epoch": 0.34647550776583036, "grad_norm": 0.34414178133010864, "learning_rate": 0.0001556726735376836, "loss": 0.1901, "step": 24650 }, { "epoch": 0.3466160657811512, "grad_norm": 0.5629868507385254, "learning_rate": 0.00015563405495872557, "loss": 0.2279, "step": 24660 }, { "epoch": 0.346756623796472, "grad_norm": 0.4871220886707306, "learning_rate": 0.00015559542435950057, "loss": 0.2243, "step": 24670 }, { "epoch": 0.3468971818117928, "grad_norm": 0.3737644553184509, "learning_rate": 0.00015555678174835515, "loss": 0.1799, "step": 24680 }, { "epoch": 0.34703773982711367, "grad_norm": 0.3960535228252411, "learning_rate": 0.00015551812713363839, "loss": 0.2316, "step": 24690 }, { "epoch": 0.3471782978424345, "grad_norm": 0.32365134358406067, "learning_rate": 0.00015547946052370194, "loss": 0.168, "step": 24700 }, { "epoch": 0.3473188558577553, "grad_norm": 0.44706541299819946, "learning_rate": 0.00015544078192690014, "loss": 0.1731, "step": 24710 }, { "epoch": 0.3474594138730761, "grad_norm": 0.34758463501930237, "learning_rate": 0.00015540209135158985, "loss": 0.1784, "step": 24720 }, { "epoch": 0.3475999718883969, "grad_norm": 0.4847332239151001, "learning_rate": 0.00015536338880613054, "loss": 0.1908, "step": 24730 }, { "epoch": 0.34774052990371773, "grad_norm": 0.3961634635925293, "learning_rate": 0.00015532467429888426, "loss": 0.2029, "step": 24740 }, { "epoch": 0.3478810879190386, "grad_norm": 0.48429301381111145, "learning_rate": 0.00015528594783821564, "loss": 0.1909, "step": 24750 }, { "epoch": 0.3480216459343594, "grad_norm": 0.3571798503398895, "learning_rate": 0.00015524720943249192, "loss": 0.1983, "step": 24760 }, { "epoch": 0.3481622039496802, "grad_norm": 0.42743244767189026, "learning_rate": 0.0001552084590900829, "loss": 0.2104, "step": 24770 }, { "epoch": 0.34830276196500104, "grad_norm": 0.4831909239292145, "learning_rate": 0.00015516969681936095, "loss": 0.2138, "step": 24780 }, { "epoch": 0.34844331998032185, "grad_norm": 0.4320811927318573, "learning_rate": 0.00015513092262870105, "loss": 0.1998, "step": 24790 }, { "epoch": 0.3485838779956427, "grad_norm": 0.3804016709327698, "learning_rate": 0.00015509213652648067, "loss": 0.1848, "step": 24800 }, { "epoch": 0.34872443601096353, "grad_norm": 0.33573248982429504, "learning_rate": 0.00015505333852107998, "loss": 0.2102, "step": 24810 }, { "epoch": 0.34886499402628435, "grad_norm": 0.4067336916923523, "learning_rate": 0.00015501452862088164, "loss": 0.1986, "step": 24820 }, { "epoch": 0.34900555204160516, "grad_norm": 0.609912097454071, "learning_rate": 0.00015497570683427087, "loss": 0.1976, "step": 24830 }, { "epoch": 0.349146110056926, "grad_norm": 0.39099013805389404, "learning_rate": 0.00015493687316963555, "loss": 0.2116, "step": 24840 }, { "epoch": 0.34928666807224684, "grad_norm": 0.38108426332473755, "learning_rate": 0.000154898027635366, "loss": 0.2155, "step": 24850 }, { "epoch": 0.34942722608756766, "grad_norm": 0.382459431886673, "learning_rate": 0.0001548591702398552, "loss": 0.1883, "step": 24860 }, { "epoch": 0.34956778410288847, "grad_norm": 0.40003153681755066, "learning_rate": 0.00015482030099149864, "loss": 0.2062, "step": 24870 }, { "epoch": 0.3497083421182093, "grad_norm": 0.48427441716194153, "learning_rate": 0.00015478141989869437, "loss": 0.2063, "step": 24880 }, { "epoch": 0.3498489001335301, "grad_norm": 0.43394699692726135, "learning_rate": 0.0001547425269698431, "loss": 0.1831, "step": 24890 }, { "epoch": 0.34998945814885096, "grad_norm": 0.3585866391658783, "learning_rate": 0.00015470362221334794, "loss": 0.2122, "step": 24900 }, { "epoch": 0.3501300161641718, "grad_norm": 0.655471920967102, "learning_rate": 0.00015466470563761471, "loss": 0.1693, "step": 24910 }, { "epoch": 0.3502705741794926, "grad_norm": 0.4609263837337494, "learning_rate": 0.00015462577725105164, "loss": 0.1979, "step": 24920 }, { "epoch": 0.3504111321948134, "grad_norm": 0.43604934215545654, "learning_rate": 0.00015458683706206957, "loss": 0.1821, "step": 24930 }, { "epoch": 0.3505516902101342, "grad_norm": 0.3810408413410187, "learning_rate": 0.00015454788507908196, "loss": 0.1994, "step": 24940 }, { "epoch": 0.3506922482254551, "grad_norm": 0.39820948243141174, "learning_rate": 0.0001545089213105047, "loss": 0.2092, "step": 24950 }, { "epoch": 0.3508328062407759, "grad_norm": 0.2860207259654999, "learning_rate": 0.00015446994576475632, "loss": 0.1709, "step": 24960 }, { "epoch": 0.3509733642560967, "grad_norm": 0.36918723583221436, "learning_rate": 0.00015443095845025782, "loss": 0.2007, "step": 24970 }, { "epoch": 0.3511139222714175, "grad_norm": 0.46982356905937195, "learning_rate": 0.00015439195937543284, "loss": 0.2095, "step": 24980 }, { "epoch": 0.35125448028673834, "grad_norm": 0.35570284724235535, "learning_rate": 0.00015435294854870742, "loss": 0.1673, "step": 24990 }, { "epoch": 0.35139503830205915, "grad_norm": 0.4363248348236084, "learning_rate": 0.00015431392597851028, "loss": 0.1936, "step": 25000 }, { "epoch": 0.35139503830205915, "eval_chrf": 81.15394096300686, "eval_loss": 0.4088630974292755, "eval_runtime": 262.4779, "eval_samples_per_second": 0.381, "eval_steps_per_second": 0.015, "step": 25000 }, { "epoch": 0.35153559631738, "grad_norm": 0.4828396141529083, "learning_rate": 0.00015427489167327263, "loss": 0.2159, "step": 25010 }, { "epoch": 0.35167615433270083, "grad_norm": 0.30296817421913147, "learning_rate": 0.00015423584564142816, "loss": 0.1785, "step": 25020 }, { "epoch": 0.35181671234802164, "grad_norm": 0.4212421476840973, "learning_rate": 0.00015419678789141314, "loss": 0.1845, "step": 25030 }, { "epoch": 0.35195727036334246, "grad_norm": 0.3856677711009979, "learning_rate": 0.0001541577184316664, "loss": 0.202, "step": 25040 }, { "epoch": 0.35209782837866327, "grad_norm": 0.45877838134765625, "learning_rate": 0.00015411863727062924, "loss": 0.1786, "step": 25050 }, { "epoch": 0.35223838639398414, "grad_norm": 0.4523278474807739, "learning_rate": 0.00015407954441674552, "loss": 0.1806, "step": 25060 }, { "epoch": 0.35237894440930495, "grad_norm": 0.4404580593109131, "learning_rate": 0.00015404043987846163, "loss": 0.2065, "step": 25070 }, { "epoch": 0.35251950242462576, "grad_norm": 0.45593929290771484, "learning_rate": 0.00015400132366422652, "loss": 0.1979, "step": 25080 }, { "epoch": 0.3526600604399466, "grad_norm": 0.3615800440311432, "learning_rate": 0.0001539621957824915, "loss": 0.1534, "step": 25090 }, { "epoch": 0.3528006184552674, "grad_norm": 0.37038901448249817, "learning_rate": 0.00015392305624171065, "loss": 0.1878, "step": 25100 }, { "epoch": 0.35294117647058826, "grad_norm": 0.3618924915790558, "learning_rate": 0.0001538839050503404, "loss": 0.1869, "step": 25110 }, { "epoch": 0.35308173448590907, "grad_norm": 0.45690590143203735, "learning_rate": 0.00015384474221683968, "loss": 0.1916, "step": 25120 }, { "epoch": 0.3532222925012299, "grad_norm": 1.2872811555862427, "learning_rate": 0.00015380556774967008, "loss": 0.1642, "step": 25130 }, { "epoch": 0.3533628505165507, "grad_norm": 0.4172723591327667, "learning_rate": 0.00015376638165729553, "loss": 0.1918, "step": 25140 }, { "epoch": 0.3535034085318715, "grad_norm": 0.4361129105091095, "learning_rate": 0.00015372718394818263, "loss": 0.2063, "step": 25150 }, { "epoch": 0.3536439665471924, "grad_norm": 0.45274701714515686, "learning_rate": 0.0001536879746308004, "loss": 0.2131, "step": 25160 }, { "epoch": 0.3537845245625132, "grad_norm": 0.4044634997844696, "learning_rate": 0.00015364875371362037, "loss": 0.209, "step": 25170 }, { "epoch": 0.353925082577834, "grad_norm": 0.4239368438720703, "learning_rate": 0.0001536095212051166, "loss": 0.2017, "step": 25180 }, { "epoch": 0.3540656405931548, "grad_norm": 0.3916068375110626, "learning_rate": 0.00015357027711376567, "loss": 0.1812, "step": 25190 }, { "epoch": 0.35420619860847563, "grad_norm": 0.47555193305015564, "learning_rate": 0.00015353102144804656, "loss": 0.1949, "step": 25200 }, { "epoch": 0.3543467566237965, "grad_norm": 0.4671885371208191, "learning_rate": 0.0001534917542164409, "loss": 0.2321, "step": 25210 }, { "epoch": 0.3544873146391173, "grad_norm": 0.3739650249481201, "learning_rate": 0.00015345247542743277, "loss": 0.2, "step": 25220 }, { "epoch": 0.3546278726544381, "grad_norm": 0.3284544348716736, "learning_rate": 0.0001534131850895087, "loss": 0.1709, "step": 25230 }, { "epoch": 0.35476843066975894, "grad_norm": 0.42038440704345703, "learning_rate": 0.00015337388321115766, "loss": 0.219, "step": 25240 }, { "epoch": 0.35490898868507975, "grad_norm": 0.5038439035415649, "learning_rate": 0.00015333456980087134, "loss": 0.2001, "step": 25250 }, { "epoch": 0.35504954670040056, "grad_norm": 0.40093642473220825, "learning_rate": 0.0001532952448671437, "loss": 0.2016, "step": 25260 }, { "epoch": 0.35519010471572143, "grad_norm": 0.4847545325756073, "learning_rate": 0.0001532559084184712, "loss": 0.1905, "step": 25270 }, { "epoch": 0.35533066273104225, "grad_norm": 0.4720471501350403, "learning_rate": 0.000153216560463353, "loss": 0.1891, "step": 25280 }, { "epoch": 0.35547122074636306, "grad_norm": 0.5366537570953369, "learning_rate": 0.0001531772010102905, "loss": 0.2147, "step": 25290 }, { "epoch": 0.35561177876168387, "grad_norm": 0.35445621609687805, "learning_rate": 0.0001531378300677877, "loss": 0.2055, "step": 25300 }, { "epoch": 0.3557523367770047, "grad_norm": 0.39145347476005554, "learning_rate": 0.00015309844764435106, "loss": 0.2144, "step": 25310 }, { "epoch": 0.35589289479232555, "grad_norm": 0.5245220065116882, "learning_rate": 0.00015305905374848956, "loss": 0.1929, "step": 25320 }, { "epoch": 0.35603345280764637, "grad_norm": 0.5469962358474731, "learning_rate": 0.0001530196483887146, "loss": 0.2135, "step": 25330 }, { "epoch": 0.3561740108229672, "grad_norm": 0.4332871735095978, "learning_rate": 0.00015298023157354012, "loss": 0.1983, "step": 25340 }, { "epoch": 0.356314568838288, "grad_norm": 0.40701428055763245, "learning_rate": 0.00015294080331148245, "loss": 0.2051, "step": 25350 }, { "epoch": 0.3564551268536088, "grad_norm": 0.5671848058700562, "learning_rate": 0.00015290136361106047, "loss": 0.2128, "step": 25360 }, { "epoch": 0.3565956848689297, "grad_norm": 0.39455220103263855, "learning_rate": 0.00015286191248079547, "loss": 0.2067, "step": 25370 }, { "epoch": 0.3567362428842505, "grad_norm": 0.3148670792579651, "learning_rate": 0.0001528224499292113, "loss": 0.1832, "step": 25380 }, { "epoch": 0.3568768008995713, "grad_norm": 0.4024641215801239, "learning_rate": 0.00015278297596483417, "loss": 0.1805, "step": 25390 }, { "epoch": 0.3570173589148921, "grad_norm": 0.5334517955780029, "learning_rate": 0.0001527434905961928, "loss": 0.208, "step": 25400 }, { "epoch": 0.3571579169302129, "grad_norm": 0.4131854176521301, "learning_rate": 0.00015270399383181843, "loss": 0.1718, "step": 25410 }, { "epoch": 0.3572984749455338, "grad_norm": 0.448175847530365, "learning_rate": 0.00015266448568024464, "loss": 0.1713, "step": 25420 }, { "epoch": 0.3574390329608546, "grad_norm": 0.402320921421051, "learning_rate": 0.00015262496615000762, "loss": 0.1946, "step": 25430 }, { "epoch": 0.3575795909761754, "grad_norm": 0.4061627686023712, "learning_rate": 0.0001525854352496459, "loss": 0.2063, "step": 25440 }, { "epoch": 0.35772014899149623, "grad_norm": 0.6434004902839661, "learning_rate": 0.00015254589298770052, "loss": 0.2101, "step": 25450 }, { "epoch": 0.35786070700681705, "grad_norm": 0.38769012689590454, "learning_rate": 0.00015250633937271494, "loss": 0.1921, "step": 25460 }, { "epoch": 0.35800126502213786, "grad_norm": 0.4754445552825928, "learning_rate": 0.00015246677441323512, "loss": 0.2021, "step": 25470 }, { "epoch": 0.35814182303745873, "grad_norm": 0.4753167927265167, "learning_rate": 0.0001524271981178094, "loss": 0.2043, "step": 25480 }, { "epoch": 0.35828238105277954, "grad_norm": 0.4295427203178406, "learning_rate": 0.0001523876104949887, "loss": 0.1838, "step": 25490 }, { "epoch": 0.35842293906810035, "grad_norm": 0.4452112913131714, "learning_rate": 0.00015234801155332623, "loss": 0.1948, "step": 25500 }, { "epoch": 0.35856349708342117, "grad_norm": 0.4044558107852936, "learning_rate": 0.00015230840130137777, "loss": 0.1915, "step": 25510 }, { "epoch": 0.358704055098742, "grad_norm": 0.46111053228378296, "learning_rate": 0.0001522687797477014, "loss": 0.1995, "step": 25520 }, { "epoch": 0.35884461311406285, "grad_norm": 0.33259961009025574, "learning_rate": 0.00015222914690085783, "loss": 0.2093, "step": 25530 }, { "epoch": 0.35898517112938366, "grad_norm": 0.3852463960647583, "learning_rate": 0.00015218950276941012, "loss": 0.1865, "step": 25540 }, { "epoch": 0.3591257291447045, "grad_norm": 0.437765896320343, "learning_rate": 0.00015214984736192365, "loss": 0.2064, "step": 25550 }, { "epoch": 0.3592662871600253, "grad_norm": 0.44627997279167175, "learning_rate": 0.00015211018068696648, "loss": 0.1807, "step": 25560 }, { "epoch": 0.3594068451753461, "grad_norm": 0.4240309000015259, "learning_rate": 0.00015207050275310886, "loss": 0.1795, "step": 25570 }, { "epoch": 0.35954740319066697, "grad_norm": 0.4682501256465912, "learning_rate": 0.00015203081356892368, "loss": 0.2183, "step": 25580 }, { "epoch": 0.3596879612059878, "grad_norm": 0.5094911456108093, "learning_rate": 0.0001519911131429861, "loss": 0.19, "step": 25590 }, { "epoch": 0.3598285192213086, "grad_norm": 0.467891663312912, "learning_rate": 0.0001519514014838738, "loss": 0.1903, "step": 25600 }, { "epoch": 0.3599690772366294, "grad_norm": 0.4371752440929413, "learning_rate": 0.00015191167860016688, "loss": 0.2062, "step": 25610 }, { "epoch": 0.3601096352519502, "grad_norm": 0.3752915561199188, "learning_rate": 0.0001518719445004478, "loss": 0.1883, "step": 25620 }, { "epoch": 0.3602501932672711, "grad_norm": 0.40677115321159363, "learning_rate": 0.00015183219919330155, "loss": 0.2387, "step": 25630 }, { "epoch": 0.3603907512825919, "grad_norm": 0.406812846660614, "learning_rate": 0.00015179244268731542, "loss": 0.1941, "step": 25640 }, { "epoch": 0.3605313092979127, "grad_norm": 0.4294555187225342, "learning_rate": 0.00015175267499107923, "loss": 0.1938, "step": 25650 }, { "epoch": 0.36067186731323353, "grad_norm": 0.5252370238304138, "learning_rate": 0.00015171289611318515, "loss": 0.2067, "step": 25660 }, { "epoch": 0.36081242532855434, "grad_norm": 0.36849528551101685, "learning_rate": 0.00015167310606222778, "loss": 0.1794, "step": 25670 }, { "epoch": 0.3609529833438752, "grad_norm": 0.3125917613506317, "learning_rate": 0.00015163330484680413, "loss": 0.225, "step": 25680 }, { "epoch": 0.361093541359196, "grad_norm": 0.4415869116783142, "learning_rate": 0.00015159349247551365, "loss": 0.1617, "step": 25690 }, { "epoch": 0.36123409937451684, "grad_norm": 0.5339673161506653, "learning_rate": 0.0001515536689569582, "loss": 0.2319, "step": 25700 }, { "epoch": 0.36137465738983765, "grad_norm": 0.4298225939273834, "learning_rate": 0.000151513834299742, "loss": 0.1851, "step": 25710 }, { "epoch": 0.36151521540515846, "grad_norm": 0.45103615522384644, "learning_rate": 0.00015147398851247173, "loss": 0.18, "step": 25720 }, { "epoch": 0.3616557734204793, "grad_norm": 0.35722455382347107, "learning_rate": 0.0001514341316037564, "loss": 0.2086, "step": 25730 }, { "epoch": 0.36179633143580014, "grad_norm": 0.45123592019081116, "learning_rate": 0.00015139426358220753, "loss": 0.1824, "step": 25740 }, { "epoch": 0.36193688945112096, "grad_norm": 0.35224390029907227, "learning_rate": 0.00015135438445643894, "loss": 0.1852, "step": 25750 }, { "epoch": 0.36207744746644177, "grad_norm": 0.41913264989852905, "learning_rate": 0.00015131449423506693, "loss": 0.1771, "step": 25760 }, { "epoch": 0.3622180054817626, "grad_norm": 0.390919953584671, "learning_rate": 0.0001512745929267102, "loss": 0.1847, "step": 25770 }, { "epoch": 0.3623585634970834, "grad_norm": 0.5339962840080261, "learning_rate": 0.00015123468053998971, "loss": 0.1906, "step": 25780 }, { "epoch": 0.36249912151240427, "grad_norm": 0.4494784474372864, "learning_rate": 0.00015119475708352898, "loss": 0.1651, "step": 25790 }, { "epoch": 0.3626396795277251, "grad_norm": 0.3535462021827698, "learning_rate": 0.00015115482256595382, "loss": 0.1948, "step": 25800 }, { "epoch": 0.3627802375430459, "grad_norm": 0.35224857926368713, "learning_rate": 0.00015111487699589248, "loss": 0.1704, "step": 25810 }, { "epoch": 0.3629207955583667, "grad_norm": 0.32310375571250916, "learning_rate": 0.0001510749203819756, "loss": 0.2059, "step": 25820 }, { "epoch": 0.3630613535736875, "grad_norm": 0.31112274527549744, "learning_rate": 0.00015103495273283612, "loss": 0.1818, "step": 25830 }, { "epoch": 0.3632019115890084, "grad_norm": 0.47570037841796875, "learning_rate": 0.0001509949740571095, "loss": 0.1964, "step": 25840 }, { "epoch": 0.3633424696043292, "grad_norm": 0.5536921620368958, "learning_rate": 0.00015095498436343348, "loss": 0.2223, "step": 25850 }, { "epoch": 0.36348302761965, "grad_norm": 0.47299832105636597, "learning_rate": 0.00015091498366044822, "loss": 0.1902, "step": 25860 }, { "epoch": 0.3636235856349708, "grad_norm": 0.36159366369247437, "learning_rate": 0.00015087497195679625, "loss": 0.1644, "step": 25870 }, { "epoch": 0.36376414365029164, "grad_norm": 0.44765231013298035, "learning_rate": 0.00015083494926112248, "loss": 0.2018, "step": 25880 }, { "epoch": 0.3639047016656125, "grad_norm": 0.3738773465156555, "learning_rate": 0.0001507949155820742, "loss": 0.2001, "step": 25890 }, { "epoch": 0.3640452596809333, "grad_norm": 0.3980732262134552, "learning_rate": 0.00015075487092830106, "loss": 0.188, "step": 25900 }, { "epoch": 0.36418581769625413, "grad_norm": 0.4188894033432007, "learning_rate": 0.00015071481530845511, "loss": 0.1737, "step": 25910 }, { "epoch": 0.36432637571157495, "grad_norm": 0.3625176250934601, "learning_rate": 0.0001506747487311907, "loss": 0.2304, "step": 25920 }, { "epoch": 0.36446693372689576, "grad_norm": 0.3531343638896942, "learning_rate": 0.00015063467120516469, "loss": 0.2039, "step": 25930 }, { "epoch": 0.3646074917422166, "grad_norm": 0.3657441735267639, "learning_rate": 0.00015059458273903608, "loss": 0.175, "step": 25940 }, { "epoch": 0.36474804975753744, "grad_norm": 0.3509301245212555, "learning_rate": 0.00015055448334146644, "loss": 0.1954, "step": 25950 }, { "epoch": 0.36488860777285825, "grad_norm": 0.33539968729019165, "learning_rate": 0.00015051437302111964, "loss": 0.1806, "step": 25960 }, { "epoch": 0.36502916578817907, "grad_norm": 0.6233363151550293, "learning_rate": 0.00015047425178666188, "loss": 0.1923, "step": 25970 }, { "epoch": 0.3651697238034999, "grad_norm": 0.3398738205432892, "learning_rate": 0.0001504341196467617, "loss": 0.1756, "step": 25980 }, { "epoch": 0.3653102818188207, "grad_norm": 0.47035306692123413, "learning_rate": 0.0001503939766100901, "loss": 0.1843, "step": 25990 }, { "epoch": 0.36545083983414156, "grad_norm": 0.3709024488925934, "learning_rate": 0.00015035382268532027, "loss": 0.2017, "step": 26000 }, { "epoch": 0.36545083983414156, "eval_chrf": 80.16979195018159, "eval_loss": 0.4062730669975281, "eval_runtime": 326.8393, "eval_samples_per_second": 0.306, "eval_steps_per_second": 0.012, "step": 26000 }, { "epoch": 0.3655913978494624, "grad_norm": 0.4761894643306732, "learning_rate": 0.00015031365788112795, "loss": 0.1971, "step": 26010 }, { "epoch": 0.3657319558647832, "grad_norm": 0.4762408435344696, "learning_rate": 0.00015027348220619105, "loss": 0.2146, "step": 26020 }, { "epoch": 0.365872513880104, "grad_norm": 0.421764999628067, "learning_rate": 0.00015023329566918995, "loss": 0.2101, "step": 26030 }, { "epoch": 0.3660130718954248, "grad_norm": 0.46174123883247375, "learning_rate": 0.0001501930982788073, "loss": 0.2025, "step": 26040 }, { "epoch": 0.3661536299107457, "grad_norm": 0.37564560770988464, "learning_rate": 0.00015015289004372817, "loss": 0.1912, "step": 26050 }, { "epoch": 0.3662941879260665, "grad_norm": 0.4434491991996765, "learning_rate": 0.00015011267097263993, "loss": 0.1862, "step": 26060 }, { "epoch": 0.3664347459413873, "grad_norm": 0.3763035535812378, "learning_rate": 0.00015007244107423224, "loss": 0.2299, "step": 26070 }, { "epoch": 0.3665753039567081, "grad_norm": 0.4968195855617523, "learning_rate": 0.00015003220035719717, "loss": 0.1949, "step": 26080 }, { "epoch": 0.36671586197202893, "grad_norm": 0.44107377529144287, "learning_rate": 0.00014999194883022918, "loss": 0.1881, "step": 26090 }, { "epoch": 0.3668564199873498, "grad_norm": 0.36147475242614746, "learning_rate": 0.0001499516865020249, "loss": 0.1979, "step": 26100 }, { "epoch": 0.3669969780026706, "grad_norm": 0.31467700004577637, "learning_rate": 0.00014991141338128346, "loss": 0.1858, "step": 26110 }, { "epoch": 0.36713753601799143, "grad_norm": 0.4247540533542633, "learning_rate": 0.0001498711294767062, "loss": 0.1927, "step": 26120 }, { "epoch": 0.36727809403331224, "grad_norm": 0.5584858059883118, "learning_rate": 0.00014983083479699688, "loss": 0.1917, "step": 26130 }, { "epoch": 0.36741865204863305, "grad_norm": 0.4436037242412567, "learning_rate": 0.00014979052935086153, "loss": 0.1789, "step": 26140 }, { "epoch": 0.3675592100639539, "grad_norm": 0.366242915391922, "learning_rate": 0.00014975021314700851, "loss": 0.1944, "step": 26150 }, { "epoch": 0.36769976807927474, "grad_norm": 0.3136445879936218, "learning_rate": 0.00014970988619414857, "loss": 0.1893, "step": 26160 }, { "epoch": 0.36784032609459555, "grad_norm": 0.46261394023895264, "learning_rate": 0.00014966954850099466, "loss": 0.1855, "step": 26170 }, { "epoch": 0.36798088410991636, "grad_norm": 0.4286184310913086, "learning_rate": 0.00014962920007626214, "loss": 0.193, "step": 26180 }, { "epoch": 0.3681214421252372, "grad_norm": 0.3819252550601959, "learning_rate": 0.00014958884092866877, "loss": 0.1972, "step": 26190 }, { "epoch": 0.368262000140558, "grad_norm": 0.32119134068489075, "learning_rate": 0.0001495484710669344, "loss": 0.2079, "step": 26200 }, { "epoch": 0.36840255815587886, "grad_norm": 0.6470380425453186, "learning_rate": 0.00014950809049978135, "loss": 0.2185, "step": 26210 }, { "epoch": 0.36854311617119967, "grad_norm": 0.5115725994110107, "learning_rate": 0.0001494676992359343, "loss": 0.1962, "step": 26220 }, { "epoch": 0.3686836741865205, "grad_norm": 0.4752556383609772, "learning_rate": 0.00014942729728412008, "loss": 0.1734, "step": 26230 }, { "epoch": 0.3688242322018413, "grad_norm": 0.3664419949054718, "learning_rate": 0.00014938688465306797, "loss": 0.2078, "step": 26240 }, { "epoch": 0.3689647902171621, "grad_norm": 0.3666921555995941, "learning_rate": 0.00014934646135150947, "loss": 0.1887, "step": 26250 }, { "epoch": 0.369105348232483, "grad_norm": 0.40116775035858154, "learning_rate": 0.00014930602738817843, "loss": 0.1955, "step": 26260 }, { "epoch": 0.3692459062478038, "grad_norm": 0.4006282389163971, "learning_rate": 0.00014926558277181102, "loss": 0.1845, "step": 26270 }, { "epoch": 0.3693864642631246, "grad_norm": 0.36226609349250793, "learning_rate": 0.00014922512751114564, "loss": 0.1893, "step": 26280 }, { "epoch": 0.3695270222784454, "grad_norm": 0.42465442419052124, "learning_rate": 0.00014918466161492307, "loss": 0.1977, "step": 26290 }, { "epoch": 0.36966758029376623, "grad_norm": 0.4116226136684418, "learning_rate": 0.0001491441850918863, "loss": 0.1887, "step": 26300 }, { "epoch": 0.3698081383090871, "grad_norm": 0.4674515426158905, "learning_rate": 0.00014910369795078073, "loss": 0.1741, "step": 26310 }, { "epoch": 0.3699486963244079, "grad_norm": 0.534906268119812, "learning_rate": 0.00014906320020035397, "loss": 0.1715, "step": 26320 }, { "epoch": 0.3700892543397287, "grad_norm": 0.4622785747051239, "learning_rate": 0.00014902269184935595, "loss": 0.206, "step": 26330 }, { "epoch": 0.37022981235504954, "grad_norm": 0.4936352074146271, "learning_rate": 0.00014898217290653886, "loss": 0.1968, "step": 26340 }, { "epoch": 0.37037037037037035, "grad_norm": 0.4342193901538849, "learning_rate": 0.00014894164338065722, "loss": 0.2211, "step": 26350 }, { "epoch": 0.3705109283856912, "grad_norm": 0.3889023959636688, "learning_rate": 0.0001489011032804678, "loss": 0.2049, "step": 26360 }, { "epoch": 0.37065148640101203, "grad_norm": 0.43299949169158936, "learning_rate": 0.00014886055261472973, "loss": 0.1946, "step": 26370 }, { "epoch": 0.37079204441633284, "grad_norm": 0.41790804266929626, "learning_rate": 0.00014881999139220432, "loss": 0.2017, "step": 26380 }, { "epoch": 0.37093260243165366, "grad_norm": 0.48441413044929504, "learning_rate": 0.0001487794196216552, "loss": 0.2093, "step": 26390 }, { "epoch": 0.37107316044697447, "grad_norm": 0.48560383915901184, "learning_rate": 0.0001487388373118483, "loss": 0.2107, "step": 26400 }, { "epoch": 0.37121371846229534, "grad_norm": 0.402222216129303, "learning_rate": 0.00014869824447155184, "loss": 0.2212, "step": 26410 }, { "epoch": 0.37135427647761615, "grad_norm": 0.32909220457077026, "learning_rate": 0.0001486576411095363, "loss": 0.1932, "step": 26420 }, { "epoch": 0.37149483449293697, "grad_norm": 0.40750375390052795, "learning_rate": 0.00014861702723457433, "loss": 0.2119, "step": 26430 }, { "epoch": 0.3716353925082578, "grad_norm": 0.6265875101089478, "learning_rate": 0.00014857640285544105, "loss": 0.2163, "step": 26440 }, { "epoch": 0.3717759505235786, "grad_norm": 0.4019491374492645, "learning_rate": 0.00014853576798091369, "loss": 0.1855, "step": 26450 }, { "epoch": 0.3719165085388994, "grad_norm": 0.38702765107154846, "learning_rate": 0.0001484951226197718, "loss": 0.1856, "step": 26460 }, { "epoch": 0.3720570665542203, "grad_norm": 0.4920539855957031, "learning_rate": 0.0001484544667807972, "loss": 0.2007, "step": 26470 }, { "epoch": 0.3721976245695411, "grad_norm": 0.33784493803977966, "learning_rate": 0.00014841380047277397, "loss": 0.2243, "step": 26480 }, { "epoch": 0.3723381825848619, "grad_norm": 0.4236350357532501, "learning_rate": 0.00014837312370448848, "loss": 0.1936, "step": 26490 }, { "epoch": 0.3724787406001827, "grad_norm": 0.40542203187942505, "learning_rate": 0.00014833243648472932, "loss": 0.2115, "step": 26500 }, { "epoch": 0.3726192986155035, "grad_norm": 0.3526892364025116, "learning_rate": 0.00014829173882228736, "loss": 0.1929, "step": 26510 }, { "epoch": 0.3727598566308244, "grad_norm": 0.30464527010917664, "learning_rate": 0.00014825103072595565, "loss": 0.1994, "step": 26520 }, { "epoch": 0.3729004146461452, "grad_norm": 0.5723264217376709, "learning_rate": 0.00014821031220452966, "loss": 0.1867, "step": 26530 }, { "epoch": 0.373040972661466, "grad_norm": 0.4225793778896332, "learning_rate": 0.00014816958326680695, "loss": 0.1961, "step": 26540 }, { "epoch": 0.37318153067678683, "grad_norm": 0.3909800350666046, "learning_rate": 0.0001481288439215874, "loss": 0.1813, "step": 26550 }, { "epoch": 0.37332208869210765, "grad_norm": 0.6097882986068726, "learning_rate": 0.00014808809417767317, "loss": 0.2281, "step": 26560 }, { "epoch": 0.3734626467074285, "grad_norm": 0.34307539463043213, "learning_rate": 0.0001480473340438686, "loss": 0.2191, "step": 26570 }, { "epoch": 0.3736032047227493, "grad_norm": 0.35505470633506775, "learning_rate": 0.0001480065635289803, "loss": 0.1825, "step": 26580 }, { "epoch": 0.37374376273807014, "grad_norm": 0.334445059299469, "learning_rate": 0.00014796578264181714, "loss": 0.1772, "step": 26590 }, { "epoch": 0.37388432075339095, "grad_norm": 0.3102055788040161, "learning_rate": 0.0001479249913911902, "loss": 0.17, "step": 26600 }, { "epoch": 0.37402487876871177, "grad_norm": 0.373024046421051, "learning_rate": 0.00014788418978591288, "loss": 0.1569, "step": 26610 }, { "epoch": 0.37416543678403263, "grad_norm": 0.47594505548477173, "learning_rate": 0.00014784337783480068, "loss": 0.1946, "step": 26620 }, { "epoch": 0.37430599479935345, "grad_norm": 0.36843621730804443, "learning_rate": 0.00014780255554667143, "loss": 0.1964, "step": 26630 }, { "epoch": 0.37444655281467426, "grad_norm": 0.44522184133529663, "learning_rate": 0.00014776172293034522, "loss": 0.1773, "step": 26640 }, { "epoch": 0.3745871108299951, "grad_norm": 0.48647576570510864, "learning_rate": 0.00014772087999464427, "loss": 0.1863, "step": 26650 }, { "epoch": 0.3747276688453159, "grad_norm": 0.45451873540878296, "learning_rate": 0.0001476800267483931, "loss": 0.1915, "step": 26660 }, { "epoch": 0.37486822686063676, "grad_norm": 0.4005815088748932, "learning_rate": 0.00014763916320041843, "loss": 0.1648, "step": 26670 }, { "epoch": 0.37500878487595757, "grad_norm": 0.4150063395500183, "learning_rate": 0.00014759828935954927, "loss": 0.1978, "step": 26680 }, { "epoch": 0.3751493428912784, "grad_norm": 0.4720441401004791, "learning_rate": 0.00014755740523461673, "loss": 0.1898, "step": 26690 }, { "epoch": 0.3752899009065992, "grad_norm": 0.4665713608264923, "learning_rate": 0.0001475165108344543, "loss": 0.183, "step": 26700 }, { "epoch": 0.37543045892192, "grad_norm": 0.4463876187801361, "learning_rate": 0.0001474756061678975, "loss": 0.1882, "step": 26710 }, { "epoch": 0.3755710169372408, "grad_norm": 0.3561939299106598, "learning_rate": 0.0001474346912437843, "loss": 0.2042, "step": 26720 }, { "epoch": 0.3757115749525617, "grad_norm": 0.4115943908691406, "learning_rate": 0.00014739376607095461, "loss": 0.217, "step": 26730 }, { "epoch": 0.3758521329678825, "grad_norm": 0.5582150220870972, "learning_rate": 0.00014735283065825082, "loss": 0.1848, "step": 26740 }, { "epoch": 0.3759926909832033, "grad_norm": 0.4477035403251648, "learning_rate": 0.00014731188501451739, "loss": 0.1843, "step": 26750 }, { "epoch": 0.37613324899852413, "grad_norm": 0.4303182363510132, "learning_rate": 0.00014727092914860096, "loss": 0.208, "step": 26760 }, { "epoch": 0.37627380701384494, "grad_norm": 0.4030746519565582, "learning_rate": 0.0001472299630693505, "loss": 0.1797, "step": 26770 }, { "epoch": 0.3764143650291658, "grad_norm": 0.37013038992881775, "learning_rate": 0.0001471889867856171, "loss": 0.1991, "step": 26780 }, { "epoch": 0.3765549230444866, "grad_norm": 0.38665181398391724, "learning_rate": 0.00014714800030625407, "loss": 0.1945, "step": 26790 }, { "epoch": 0.37669548105980744, "grad_norm": 0.5154008865356445, "learning_rate": 0.0001471070036401169, "loss": 0.1846, "step": 26800 }, { "epoch": 0.37683603907512825, "grad_norm": 0.5148115754127502, "learning_rate": 0.00014706599679606338, "loss": 0.1935, "step": 26810 }, { "epoch": 0.37697659709044906, "grad_norm": 0.33753859996795654, "learning_rate": 0.00014702497978295339, "loss": 0.1942, "step": 26820 }, { "epoch": 0.37711715510576993, "grad_norm": 0.38848909735679626, "learning_rate": 0.00014698395260964904, "loss": 0.1692, "step": 26830 }, { "epoch": 0.37725771312109074, "grad_norm": 0.43191730976104736, "learning_rate": 0.00014694291528501464, "loss": 0.1856, "step": 26840 }, { "epoch": 0.37739827113641156, "grad_norm": 0.384564608335495, "learning_rate": 0.00014690186781791674, "loss": 0.1801, "step": 26850 }, { "epoch": 0.37753882915173237, "grad_norm": 0.3669491112232208, "learning_rate": 0.00014686081021722392, "loss": 0.1794, "step": 26860 }, { "epoch": 0.3776793871670532, "grad_norm": 0.4367958903312683, "learning_rate": 0.0001468197424918072, "loss": 0.1858, "step": 26870 }, { "epoch": 0.37781994518237405, "grad_norm": 0.388022243976593, "learning_rate": 0.0001467786646505396, "loss": 0.168, "step": 26880 }, { "epoch": 0.37796050319769486, "grad_norm": 0.4472343623638153, "learning_rate": 0.00014673757670229635, "loss": 0.1801, "step": 26890 }, { "epoch": 0.3781010612130157, "grad_norm": 0.36750322580337524, "learning_rate": 0.00014669647865595494, "loss": 0.1877, "step": 26900 }, { "epoch": 0.3782416192283365, "grad_norm": 0.3891570568084717, "learning_rate": 0.00014665537052039498, "loss": 0.1732, "step": 26910 }, { "epoch": 0.3783821772436573, "grad_norm": 0.41716599464416504, "learning_rate": 0.00014661425230449825, "loss": 0.1946, "step": 26920 }, { "epoch": 0.3785227352589781, "grad_norm": 0.37398362159729004, "learning_rate": 0.00014657312401714873, "loss": 0.2045, "step": 26930 }, { "epoch": 0.378663293274299, "grad_norm": 0.3927588164806366, "learning_rate": 0.0001465319856672326, "loss": 0.1902, "step": 26940 }, { "epoch": 0.3788038512896198, "grad_norm": 0.5926169157028198, "learning_rate": 0.00014649083726363818, "loss": 0.1974, "step": 26950 }, { "epoch": 0.3789444093049406, "grad_norm": 0.3898915946483612, "learning_rate": 0.000146449678815256, "loss": 0.2059, "step": 26960 }, { "epoch": 0.3790849673202614, "grad_norm": 0.4634039103984833, "learning_rate": 0.00014640851033097873, "loss": 0.2182, "step": 26970 }, { "epoch": 0.37922552533558224, "grad_norm": 0.42778122425079346, "learning_rate": 0.00014636733181970117, "loss": 0.2049, "step": 26980 }, { "epoch": 0.3793660833509031, "grad_norm": 0.42831602692604065, "learning_rate": 0.00014632614329032037, "loss": 0.1908, "step": 26990 }, { "epoch": 0.3795066413662239, "grad_norm": 0.600955605506897, "learning_rate": 0.0001462849447517355, "loss": 0.1944, "step": 27000 }, { "epoch": 0.3795066413662239, "eval_chrf": 81.85665205492397, "eval_loss": 0.4122803807258606, "eval_runtime": 268.0612, "eval_samples_per_second": 0.373, "eval_steps_per_second": 0.015, "step": 27000 }, { "epoch": 0.37964719938154473, "grad_norm": 0.45613276958465576, "learning_rate": 0.00014624373621284785, "loss": 0.1937, "step": 27010 }, { "epoch": 0.37978775739686554, "grad_norm": 0.4588741362094879, "learning_rate": 0.000146202517682561, "loss": 0.1972, "step": 27020 }, { "epoch": 0.37992831541218636, "grad_norm": 0.605856716632843, "learning_rate": 0.00014616128916978053, "loss": 0.1955, "step": 27030 }, { "epoch": 0.3800688734275072, "grad_norm": 0.5468184351921082, "learning_rate": 0.00014612005068341427, "loss": 0.2204, "step": 27040 }, { "epoch": 0.38020943144282804, "grad_norm": 0.37999919056892395, "learning_rate": 0.00014607880223237223, "loss": 0.18, "step": 27050 }, { "epoch": 0.38034998945814885, "grad_norm": 0.37661004066467285, "learning_rate": 0.00014603754382556649, "loss": 0.1853, "step": 27060 }, { "epoch": 0.38049054747346966, "grad_norm": 0.4400460720062256, "learning_rate": 0.00014599627547191133, "loss": 0.2043, "step": 27070 }, { "epoch": 0.3806311054887905, "grad_norm": 0.3990391790866852, "learning_rate": 0.00014595499718032318, "loss": 0.1889, "step": 27080 }, { "epoch": 0.38077166350411135, "grad_norm": 0.34186676144599915, "learning_rate": 0.00014591370895972058, "loss": 0.2308, "step": 27090 }, { "epoch": 0.38091222151943216, "grad_norm": 0.42443957924842834, "learning_rate": 0.00014587241081902427, "loss": 0.1986, "step": 27100 }, { "epoch": 0.381052779534753, "grad_norm": 0.4864816665649414, "learning_rate": 0.00014583110276715714, "loss": 0.178, "step": 27110 }, { "epoch": 0.3811933375500738, "grad_norm": 0.3732251524925232, "learning_rate": 0.0001457897848130441, "loss": 0.1863, "step": 27120 }, { "epoch": 0.3813338955653946, "grad_norm": 0.33789321780204773, "learning_rate": 0.00014574845696561234, "loss": 0.203, "step": 27130 }, { "epoch": 0.38147445358071547, "grad_norm": 0.3282896876335144, "learning_rate": 0.00014570711923379115, "loss": 0.1823, "step": 27140 }, { "epoch": 0.3816150115960363, "grad_norm": 0.3856814205646515, "learning_rate": 0.00014566577162651188, "loss": 0.173, "step": 27150 }, { "epoch": 0.3817555696113571, "grad_norm": 0.2681775391101837, "learning_rate": 0.00014562441415270813, "loss": 0.1774, "step": 27160 }, { "epoch": 0.3818961276266779, "grad_norm": 0.4621484875679016, "learning_rate": 0.00014558304682131556, "loss": 0.2096, "step": 27170 }, { "epoch": 0.3820366856419987, "grad_norm": 0.4743730425834656, "learning_rate": 0.000145541669641272, "loss": 0.189, "step": 27180 }, { "epoch": 0.38217724365731953, "grad_norm": 0.4920254349708557, "learning_rate": 0.00014550028262151732, "loss": 0.2073, "step": 27190 }, { "epoch": 0.3823178016726404, "grad_norm": 0.3450794219970703, "learning_rate": 0.00014545888577099364, "loss": 0.1805, "step": 27200 }, { "epoch": 0.3824583596879612, "grad_norm": 0.4046187698841095, "learning_rate": 0.0001454174790986451, "loss": 0.1976, "step": 27210 }, { "epoch": 0.382598917703282, "grad_norm": 0.38482755422592163, "learning_rate": 0.00014537606261341802, "loss": 0.1744, "step": 27220 }, { "epoch": 0.38273947571860284, "grad_norm": 0.4715237021446228, "learning_rate": 0.00014533463632426086, "loss": 0.1769, "step": 27230 }, { "epoch": 0.38288003373392365, "grad_norm": 0.3997655212879181, "learning_rate": 0.0001452932002401241, "loss": 0.1845, "step": 27240 }, { "epoch": 0.3830205917492445, "grad_norm": 0.5396472811698914, "learning_rate": 0.00014525175436996048, "loss": 0.216, "step": 27250 }, { "epoch": 0.38316114976456533, "grad_norm": 0.4229860305786133, "learning_rate": 0.00014521029872272473, "loss": 0.1819, "step": 27260 }, { "epoch": 0.38330170777988615, "grad_norm": 0.5465221405029297, "learning_rate": 0.0001451688333073737, "loss": 0.2071, "step": 27270 }, { "epoch": 0.38344226579520696, "grad_norm": 0.46100810170173645, "learning_rate": 0.00014512735813286643, "loss": 0.1994, "step": 27280 }, { "epoch": 0.3835828238105278, "grad_norm": 0.5227595567703247, "learning_rate": 0.00014508587320816404, "loss": 0.1996, "step": 27290 }, { "epoch": 0.38372338182584864, "grad_norm": 0.36178404092788696, "learning_rate": 0.00014504437854222975, "loss": 0.1898, "step": 27300 }, { "epoch": 0.38386393984116945, "grad_norm": 0.371266633272171, "learning_rate": 0.0001450028741440288, "loss": 0.1782, "step": 27310 }, { "epoch": 0.38400449785649027, "grad_norm": 0.3904975652694702, "learning_rate": 0.0001449613600225287, "loss": 0.1673, "step": 27320 }, { "epoch": 0.3841450558718111, "grad_norm": 0.3872131109237671, "learning_rate": 0.00014491983618669888, "loss": 0.1851, "step": 27330 }, { "epoch": 0.3842856138871319, "grad_norm": 0.4446090757846832, "learning_rate": 0.0001448783026455111, "loss": 0.1924, "step": 27340 }, { "epoch": 0.38442617190245276, "grad_norm": 0.3272359073162079, "learning_rate": 0.0001448367594079389, "loss": 0.1792, "step": 27350 }, { "epoch": 0.3845667299177736, "grad_norm": 0.4467935860157013, "learning_rate": 0.0001447952064829582, "loss": 0.2026, "step": 27360 }, { "epoch": 0.3847072879330944, "grad_norm": 0.40729010105133057, "learning_rate": 0.00014475364387954692, "loss": 0.2163, "step": 27370 }, { "epoch": 0.3848478459484152, "grad_norm": 0.42603397369384766, "learning_rate": 0.000144712071606685, "loss": 0.1802, "step": 27380 }, { "epoch": 0.384988403963736, "grad_norm": 0.34360334277153015, "learning_rate": 0.00014467048967335458, "loss": 0.1806, "step": 27390 }, { "epoch": 0.3851289619790569, "grad_norm": 0.5067285895347595, "learning_rate": 0.00014462889808853973, "loss": 0.1905, "step": 27400 }, { "epoch": 0.3852695199943777, "grad_norm": 0.42074596881866455, "learning_rate": 0.00014458729686122682, "loss": 0.1928, "step": 27410 }, { "epoch": 0.3854100780096985, "grad_norm": 0.4252513647079468, "learning_rate": 0.00014454568600040418, "loss": 0.179, "step": 27420 }, { "epoch": 0.3855506360250193, "grad_norm": 0.36288729310035706, "learning_rate": 0.00014450406551506215, "loss": 0.1901, "step": 27430 }, { "epoch": 0.38569119404034014, "grad_norm": 0.4799077808856964, "learning_rate": 0.0001444624354141933, "loss": 0.1747, "step": 27440 }, { "epoch": 0.38583175205566095, "grad_norm": 0.3890087902545929, "learning_rate": 0.0001444207957067922, "loss": 0.1786, "step": 27450 }, { "epoch": 0.3859723100709818, "grad_norm": 0.3378077745437622, "learning_rate": 0.0001443791464018555, "loss": 0.1662, "step": 27460 }, { "epoch": 0.38611286808630263, "grad_norm": 0.41123804450035095, "learning_rate": 0.00014433748750838191, "loss": 0.1993, "step": 27470 }, { "epoch": 0.38625342610162344, "grad_norm": 0.34629175066947937, "learning_rate": 0.00014429581903537225, "loss": 0.1914, "step": 27480 }, { "epoch": 0.38639398411694426, "grad_norm": 0.5317151546478271, "learning_rate": 0.0001442541409918294, "loss": 0.1862, "step": 27490 }, { "epoch": 0.38653454213226507, "grad_norm": 0.4020622968673706, "learning_rate": 0.00014421245338675824, "loss": 0.2017, "step": 27500 }, { "epoch": 0.38667510014758594, "grad_norm": 0.4146219491958618, "learning_rate": 0.00014417075622916585, "loss": 0.1717, "step": 27510 }, { "epoch": 0.38681565816290675, "grad_norm": 0.45714467763900757, "learning_rate": 0.00014412904952806126, "loss": 0.2013, "step": 27520 }, { "epoch": 0.38695621617822756, "grad_norm": 0.3680949807167053, "learning_rate": 0.0001440873332924556, "loss": 0.2321, "step": 27530 }, { "epoch": 0.3870967741935484, "grad_norm": 0.5289217829704285, "learning_rate": 0.00014404560753136209, "loss": 0.1907, "step": 27540 }, { "epoch": 0.3872373322088692, "grad_norm": 0.4522726833820343, "learning_rate": 0.00014400387225379594, "loss": 0.1902, "step": 27550 }, { "epoch": 0.38737789022419006, "grad_norm": 0.37127548456192017, "learning_rate": 0.00014396212746877444, "loss": 0.1961, "step": 27560 }, { "epoch": 0.38751844823951087, "grad_norm": 0.4859474301338196, "learning_rate": 0.000143920373185317, "loss": 0.209, "step": 27570 }, { "epoch": 0.3876590062548317, "grad_norm": 0.4222281873226166, "learning_rate": 0.000143878609412445, "loss": 0.156, "step": 27580 }, { "epoch": 0.3877995642701525, "grad_norm": 0.34376925230026245, "learning_rate": 0.00014383683615918188, "loss": 0.1735, "step": 27590 }, { "epoch": 0.3879401222854733, "grad_norm": 0.44502338767051697, "learning_rate": 0.00014379505343455322, "loss": 0.1869, "step": 27600 }, { "epoch": 0.3880806803007942, "grad_norm": 0.36206844449043274, "learning_rate": 0.0001437532612475865, "loss": 0.1988, "step": 27610 }, { "epoch": 0.388221238316115, "grad_norm": 0.46434277296066284, "learning_rate": 0.00014371145960731136, "loss": 0.1987, "step": 27620 }, { "epoch": 0.3883617963314358, "grad_norm": 0.41770943999290466, "learning_rate": 0.00014366964852275942, "loss": 0.1728, "step": 27630 }, { "epoch": 0.3885023543467566, "grad_norm": 0.4312964081764221, "learning_rate": 0.00014362782800296438, "loss": 0.2, "step": 27640 }, { "epoch": 0.38864291236207743, "grad_norm": 0.508865475654602, "learning_rate": 0.00014358599805696195, "loss": 0.1996, "step": 27650 }, { "epoch": 0.38878347037739824, "grad_norm": 0.4049730896949768, "learning_rate": 0.0001435441586937899, "loss": 0.2066, "step": 27660 }, { "epoch": 0.3889240283927191, "grad_norm": 0.509647011756897, "learning_rate": 0.000143502309922488, "loss": 0.207, "step": 27670 }, { "epoch": 0.3890645864080399, "grad_norm": 0.3966078758239746, "learning_rate": 0.0001434604517520981, "loss": 0.1784, "step": 27680 }, { "epoch": 0.38920514442336074, "grad_norm": 0.4309219419956207, "learning_rate": 0.00014341858419166407, "loss": 0.2181, "step": 27690 }, { "epoch": 0.38934570243868155, "grad_norm": 0.4796231687068939, "learning_rate": 0.00014337670725023173, "loss": 0.2018, "step": 27700 }, { "epoch": 0.38948626045400236, "grad_norm": 0.40402162075042725, "learning_rate": 0.00014333482093684908, "loss": 0.2026, "step": 27710 }, { "epoch": 0.38962681846932323, "grad_norm": 0.4409323036670685, "learning_rate": 0.00014329292526056597, "loss": 0.1912, "step": 27720 }, { "epoch": 0.38976737648464405, "grad_norm": 0.4390415549278259, "learning_rate": 0.0001432510202304344, "loss": 0.2037, "step": 27730 }, { "epoch": 0.38990793449996486, "grad_norm": 0.49357882142066956, "learning_rate": 0.00014320910585550838, "loss": 0.1859, "step": 27740 }, { "epoch": 0.39004849251528567, "grad_norm": 0.414014995098114, "learning_rate": 0.00014316718214484384, "loss": 0.2, "step": 27750 }, { "epoch": 0.3901890505306065, "grad_norm": 0.45711779594421387, "learning_rate": 0.00014312524910749884, "loss": 0.1848, "step": 27760 }, { "epoch": 0.39032960854592735, "grad_norm": 0.5342926979064941, "learning_rate": 0.00014308330675253346, "loss": 0.1993, "step": 27770 }, { "epoch": 0.39047016656124817, "grad_norm": 0.4213629961013794, "learning_rate": 0.00014304135508900963, "loss": 0.1966, "step": 27780 }, { "epoch": 0.390610724576569, "grad_norm": 0.4413548409938812, "learning_rate": 0.0001429993941259915, "loss": 0.1938, "step": 27790 }, { "epoch": 0.3907512825918898, "grad_norm": 0.3546472191810608, "learning_rate": 0.00014295742387254512, "loss": 0.1861, "step": 27800 }, { "epoch": 0.3908918406072106, "grad_norm": 0.4804078936576843, "learning_rate": 0.0001429154443377385, "loss": 0.2145, "step": 27810 }, { "epoch": 0.3910323986225315, "grad_norm": 0.4604095220565796, "learning_rate": 0.00014287345553064178, "loss": 0.1848, "step": 27820 }, { "epoch": 0.3911729566378523, "grad_norm": 0.36869752407073975, "learning_rate": 0.00014283145746032705, "loss": 0.1816, "step": 27830 }, { "epoch": 0.3913135146531731, "grad_norm": 0.34594419598579407, "learning_rate": 0.00014278945013586835, "loss": 0.1638, "step": 27840 }, { "epoch": 0.3914540726684939, "grad_norm": 0.41775190830230713, "learning_rate": 0.0001427474335663418, "loss": 0.22, "step": 27850 }, { "epoch": 0.3915946306838147, "grad_norm": 0.3673890233039856, "learning_rate": 0.00014270540776082543, "loss": 0.2051, "step": 27860 }, { "epoch": 0.3917351886991356, "grad_norm": 0.379302978515625, "learning_rate": 0.00014266337272839935, "loss": 0.1975, "step": 27870 }, { "epoch": 0.3918757467144564, "grad_norm": 0.4624926447868347, "learning_rate": 0.00014262132847814565, "loss": 0.1911, "step": 27880 }, { "epoch": 0.3920163047297772, "grad_norm": 0.3992692530155182, "learning_rate": 0.00014257927501914834, "loss": 0.1886, "step": 27890 }, { "epoch": 0.39215686274509803, "grad_norm": 0.3637385964393616, "learning_rate": 0.0001425372123604935, "loss": 0.1959, "step": 27900 }, { "epoch": 0.39229742076041885, "grad_norm": 0.44345909357070923, "learning_rate": 0.00014249514051126917, "loss": 0.1772, "step": 27910 }, { "epoch": 0.39243797877573966, "grad_norm": 0.44268521666526794, "learning_rate": 0.00014245305948056534, "loss": 0.187, "step": 27920 }, { "epoch": 0.39257853679106053, "grad_norm": 0.39628198742866516, "learning_rate": 0.00014241096927747408, "loss": 0.1715, "step": 27930 }, { "epoch": 0.39271909480638134, "grad_norm": 0.4096054434776306, "learning_rate": 0.00014236886991108932, "loss": 0.1934, "step": 27940 }, { "epoch": 0.39285965282170215, "grad_norm": 0.5102948546409607, "learning_rate": 0.00014232676139050706, "loss": 0.2156, "step": 27950 }, { "epoch": 0.39300021083702297, "grad_norm": 0.485239714384079, "learning_rate": 0.00014228464372482524, "loss": 0.1939, "step": 27960 }, { "epoch": 0.3931407688523438, "grad_norm": 0.44243955612182617, "learning_rate": 0.00014224251692314377, "loss": 0.2131, "step": 27970 }, { "epoch": 0.39328132686766465, "grad_norm": 0.47501078248023987, "learning_rate": 0.00014220038099456455, "loss": 0.1921, "step": 27980 }, { "epoch": 0.39342188488298546, "grad_norm": 0.5273157358169556, "learning_rate": 0.00014215823594819146, "loss": 0.2171, "step": 27990 }, { "epoch": 0.3935624428983063, "grad_norm": 0.6149890422821045, "learning_rate": 0.00014211608179313034, "loss": 0.1935, "step": 28000 }, { "epoch": 0.3935624428983063, "eval_chrf": 77.61258146719794, "eval_loss": 0.41459283232688904, "eval_runtime": 260.8751, "eval_samples_per_second": 0.383, "eval_steps_per_second": 0.015, "step": 28000 }, { "epoch": 0.3937030009136271, "grad_norm": 0.3866507411003113, "learning_rate": 0.000142073918538489, "loss": 0.1956, "step": 28010 }, { "epoch": 0.3938435589289479, "grad_norm": 0.4531294107437134, "learning_rate": 0.00014203174619337714, "loss": 0.1747, "step": 28020 }, { "epoch": 0.39398411694426877, "grad_norm": 0.5326635241508484, "learning_rate": 0.00014198956476690663, "loss": 0.19, "step": 28030 }, { "epoch": 0.3941246749595896, "grad_norm": 0.3673665523529053, "learning_rate": 0.00014194737426819108, "loss": 0.1926, "step": 28040 }, { "epoch": 0.3942652329749104, "grad_norm": 0.39492812752723694, "learning_rate": 0.00014190517470634616, "loss": 0.1817, "step": 28050 }, { "epoch": 0.3944057909902312, "grad_norm": 0.41755416989326477, "learning_rate": 0.00014186296609048946, "loss": 0.1698, "step": 28060 }, { "epoch": 0.394546349005552, "grad_norm": 0.44416022300720215, "learning_rate": 0.0001418207484297406, "loss": 0.1725, "step": 28070 }, { "epoch": 0.3946869070208729, "grad_norm": 0.44826391339302063, "learning_rate": 0.00014177852173322112, "loss": 0.1837, "step": 28080 }, { "epoch": 0.3948274650361937, "grad_norm": 0.4329129755496979, "learning_rate": 0.00014173628601005445, "loss": 0.1838, "step": 28090 }, { "epoch": 0.3949680230515145, "grad_norm": 0.4439648389816284, "learning_rate": 0.00014169404126936603, "loss": 0.1995, "step": 28100 }, { "epoch": 0.39510858106683533, "grad_norm": 0.3365096151828766, "learning_rate": 0.00014165178752028327, "loss": 0.2013, "step": 28110 }, { "epoch": 0.39524913908215614, "grad_norm": 0.4657164216041565, "learning_rate": 0.00014160952477193547, "loss": 0.2057, "step": 28120 }, { "epoch": 0.395389697097477, "grad_norm": 0.36999014019966125, "learning_rate": 0.0001415672530334539, "loss": 0.175, "step": 28130 }, { "epoch": 0.3955302551127978, "grad_norm": 0.3687359094619751, "learning_rate": 0.00014152497231397176, "loss": 0.1886, "step": 28140 }, { "epoch": 0.39567081312811864, "grad_norm": 0.5541471242904663, "learning_rate": 0.00014148268262262426, "loss": 0.192, "step": 28150 }, { "epoch": 0.39581137114343945, "grad_norm": 0.5069437623023987, "learning_rate": 0.00014144038396854842, "loss": 0.2007, "step": 28160 }, { "epoch": 0.39595192915876026, "grad_norm": 0.4327186942100525, "learning_rate": 0.0001413980763608833, "loss": 0.1938, "step": 28170 }, { "epoch": 0.3960924871740811, "grad_norm": 0.37074199318885803, "learning_rate": 0.00014135575980876982, "loss": 0.1907, "step": 28180 }, { "epoch": 0.39623304518940194, "grad_norm": 0.3386900722980499, "learning_rate": 0.00014131343432135095, "loss": 0.1834, "step": 28190 }, { "epoch": 0.39637360320472276, "grad_norm": 0.42331430315971375, "learning_rate": 0.00014127109990777145, "loss": 0.1741, "step": 28200 }, { "epoch": 0.39651416122004357, "grad_norm": 0.4919547736644745, "learning_rate": 0.0001412287565771781, "loss": 0.1972, "step": 28210 }, { "epoch": 0.3966547192353644, "grad_norm": 0.4163661301136017, "learning_rate": 0.00014118640433871956, "loss": 0.204, "step": 28220 }, { "epoch": 0.3967952772506852, "grad_norm": 0.4148716628551483, "learning_rate": 0.00014114404320154648, "loss": 0.1884, "step": 28230 }, { "epoch": 0.39693583526600607, "grad_norm": 0.3614480197429657, "learning_rate": 0.00014110167317481133, "loss": 0.1894, "step": 28240 }, { "epoch": 0.3970763932813269, "grad_norm": 0.5240013003349304, "learning_rate": 0.00014105929426766863, "loss": 0.2107, "step": 28250 }, { "epoch": 0.3972169512966477, "grad_norm": 0.44796907901763916, "learning_rate": 0.00014101690648927464, "loss": 0.1985, "step": 28260 }, { "epoch": 0.3973575093119685, "grad_norm": 0.451960951089859, "learning_rate": 0.00014097450984878774, "loss": 0.1878, "step": 28270 }, { "epoch": 0.3974980673272893, "grad_norm": 0.5274572372436523, "learning_rate": 0.00014093210435536807, "loss": 0.2039, "step": 28280 }, { "epoch": 0.3976386253426102, "grad_norm": 0.5012400150299072, "learning_rate": 0.00014088969001817777, "loss": 0.2114, "step": 28290 }, { "epoch": 0.397779183357931, "grad_norm": 0.398480087518692, "learning_rate": 0.00014084726684638086, "loss": 0.2074, "step": 28300 }, { "epoch": 0.3979197413732518, "grad_norm": 0.5972045660018921, "learning_rate": 0.00014080483484914323, "loss": 0.2121, "step": 28310 }, { "epoch": 0.3980602993885726, "grad_norm": 0.37806838750839233, "learning_rate": 0.00014076239403563277, "loss": 0.1786, "step": 28320 }, { "epoch": 0.39820085740389344, "grad_norm": 0.31259986758232117, "learning_rate": 0.0001407199444150192, "loss": 0.1867, "step": 28330 }, { "epoch": 0.3983414154192143, "grad_norm": 0.47804269194602966, "learning_rate": 0.00014067748599647418, "loss": 0.1821, "step": 28340 }, { "epoch": 0.3984819734345351, "grad_norm": 0.30588817596435547, "learning_rate": 0.0001406350187891712, "loss": 0.1988, "step": 28350 }, { "epoch": 0.39862253144985593, "grad_norm": 0.4533221423625946, "learning_rate": 0.00014059254280228574, "loss": 0.1784, "step": 28360 }, { "epoch": 0.39876308946517675, "grad_norm": 0.41111546754837036, "learning_rate": 0.0001405500580449952, "loss": 0.2089, "step": 28370 }, { "epoch": 0.39890364748049756, "grad_norm": 0.3993707597255707, "learning_rate": 0.00014050756452647873, "loss": 0.1705, "step": 28380 }, { "epoch": 0.39904420549581837, "grad_norm": 0.398714542388916, "learning_rate": 0.00014046506225591748, "loss": 0.1802, "step": 28390 }, { "epoch": 0.39918476351113924, "grad_norm": 0.4545983076095581, "learning_rate": 0.0001404225512424945, "loss": 0.2177, "step": 28400 }, { "epoch": 0.39932532152646005, "grad_norm": 0.438765287399292, "learning_rate": 0.00014038003149539468, "loss": 0.2054, "step": 28410 }, { "epoch": 0.39946587954178087, "grad_norm": 0.49160799384117126, "learning_rate": 0.0001403375030238048, "loss": 0.1798, "step": 28420 }, { "epoch": 0.3996064375571017, "grad_norm": 0.41309601068496704, "learning_rate": 0.00014029496583691358, "loss": 0.2224, "step": 28430 }, { "epoch": 0.3997469955724225, "grad_norm": 0.43208959698677063, "learning_rate": 0.0001402524199439115, "loss": 0.2046, "step": 28440 }, { "epoch": 0.39988755358774336, "grad_norm": 0.3021363914012909, "learning_rate": 0.00014020986535399113, "loss": 0.1961, "step": 28450 }, { "epoch": 0.4000281116030642, "grad_norm": 0.4002769887447357, "learning_rate": 0.00014016730207634673, "loss": 0.1969, "step": 28460 }, { "epoch": 0.400168669618385, "grad_norm": 0.3687337040901184, "learning_rate": 0.00014012473012017446, "loss": 0.2353, "step": 28470 }, { "epoch": 0.4003092276337058, "grad_norm": 0.41458526253700256, "learning_rate": 0.00014008214949467248, "loss": 0.1902, "step": 28480 }, { "epoch": 0.4004497856490266, "grad_norm": 0.3621925413608551, "learning_rate": 0.00014003956020904065, "loss": 0.1678, "step": 28490 }, { "epoch": 0.4005903436643475, "grad_norm": 0.6078202724456787, "learning_rate": 0.00013999696227248083, "loss": 0.1908, "step": 28500 }, { "epoch": 0.4007309016796683, "grad_norm": 0.4965280592441559, "learning_rate": 0.00013995435569419678, "loss": 0.1933, "step": 28510 }, { "epoch": 0.4008714596949891, "grad_norm": 0.29492101073265076, "learning_rate": 0.00013991174048339392, "loss": 0.1781, "step": 28520 }, { "epoch": 0.4010120177103099, "grad_norm": 0.3656838536262512, "learning_rate": 0.0001398691166492798, "loss": 0.1667, "step": 28530 }, { "epoch": 0.40115257572563073, "grad_norm": 0.42332929372787476, "learning_rate": 0.00013982648420106362, "loss": 0.1977, "step": 28540 }, { "epoch": 0.4012931337409516, "grad_norm": 0.5157352685928345, "learning_rate": 0.00013978384314795655, "loss": 0.1909, "step": 28550 }, { "epoch": 0.4014336917562724, "grad_norm": 0.4120025038719177, "learning_rate": 0.00013974119349917163, "loss": 0.173, "step": 28560 }, { "epoch": 0.40157424977159323, "grad_norm": 0.3541117012500763, "learning_rate": 0.00013969853526392368, "loss": 0.1934, "step": 28570 }, { "epoch": 0.40171480778691404, "grad_norm": 0.5453256368637085, "learning_rate": 0.00013965586845142943, "loss": 0.1927, "step": 28580 }, { "epoch": 0.40185536580223485, "grad_norm": 0.43507325649261475, "learning_rate": 0.00013961319307090743, "loss": 0.1848, "step": 28590 }, { "epoch": 0.4019959238175557, "grad_norm": 0.37308594584465027, "learning_rate": 0.00013957050913157814, "loss": 0.1835, "step": 28600 }, { "epoch": 0.40213648183287654, "grad_norm": 0.4535294473171234, "learning_rate": 0.0001395278166426638, "loss": 0.2153, "step": 28610 }, { "epoch": 0.40227703984819735, "grad_norm": 0.4658803939819336, "learning_rate": 0.00013948511561338858, "loss": 0.1854, "step": 28620 }, { "epoch": 0.40241759786351816, "grad_norm": 0.4867015480995178, "learning_rate": 0.0001394424060529784, "loss": 0.2026, "step": 28630 }, { "epoch": 0.402558155878839, "grad_norm": 0.4584827125072479, "learning_rate": 0.00013939968797066103, "loss": 0.1915, "step": 28640 }, { "epoch": 0.4026987138941598, "grad_norm": 0.37534889578819275, "learning_rate": 0.0001393569613756662, "loss": 0.1783, "step": 28650 }, { "epoch": 0.40283927190948066, "grad_norm": 0.3874412477016449, "learning_rate": 0.00013931422627722534, "loss": 0.1889, "step": 28660 }, { "epoch": 0.40297982992480147, "grad_norm": 0.5786199569702148, "learning_rate": 0.0001392714826845718, "loss": 0.2024, "step": 28670 }, { "epoch": 0.4031203879401223, "grad_norm": 0.4794784188270569, "learning_rate": 0.00013922873060694075, "loss": 0.1832, "step": 28680 }, { "epoch": 0.4032609459554431, "grad_norm": 0.6809529066085815, "learning_rate": 0.00013918597005356916, "loss": 0.2182, "step": 28690 }, { "epoch": 0.4034015039707639, "grad_norm": 0.40321481227874756, "learning_rate": 0.00013914320103369587, "loss": 0.1956, "step": 28700 }, { "epoch": 0.4035420619860848, "grad_norm": 0.3870124816894531, "learning_rate": 0.0001391004235565615, "loss": 0.1873, "step": 28710 }, { "epoch": 0.4036826200014056, "grad_norm": 0.3738168179988861, "learning_rate": 0.00013905763763140858, "loss": 0.1874, "step": 28720 }, { "epoch": 0.4038231780167264, "grad_norm": 0.38982343673706055, "learning_rate": 0.00013901484326748136, "loss": 0.2103, "step": 28730 }, { "epoch": 0.4039637360320472, "grad_norm": 0.4845069348812103, "learning_rate": 0.00013897204047402605, "loss": 0.1945, "step": 28740 }, { "epoch": 0.40410429404736803, "grad_norm": 0.5125102996826172, "learning_rate": 0.00013892922926029053, "loss": 0.1922, "step": 28750 }, { "epoch": 0.4042448520626889, "grad_norm": 0.4411031901836395, "learning_rate": 0.0001388864096355246, "loss": 0.2086, "step": 28760 }, { "epoch": 0.4043854100780097, "grad_norm": 0.43539199233055115, "learning_rate": 0.00013884358160897982, "loss": 0.2, "step": 28770 }, { "epoch": 0.4045259680933305, "grad_norm": 0.388676255941391, "learning_rate": 0.00013880074518990963, "loss": 0.1872, "step": 28780 }, { "epoch": 0.40466652610865134, "grad_norm": 0.4598192572593689, "learning_rate": 0.00013875790038756923, "loss": 0.1959, "step": 28790 }, { "epoch": 0.40480708412397215, "grad_norm": 0.36540326476097107, "learning_rate": 0.00013871504721121565, "loss": 0.19, "step": 28800 }, { "epoch": 0.404947642139293, "grad_norm": 0.3289833962917328, "learning_rate": 0.0001386721856701077, "loss": 0.1977, "step": 28810 }, { "epoch": 0.40508820015461383, "grad_norm": 0.3701980710029602, "learning_rate": 0.00013862931577350606, "loss": 0.1801, "step": 28820 }, { "epoch": 0.40522875816993464, "grad_norm": 0.3710598647594452, "learning_rate": 0.00013858643753067318, "loss": 0.1626, "step": 28830 }, { "epoch": 0.40536931618525546, "grad_norm": 0.4302135109901428, "learning_rate": 0.00013854355095087328, "loss": 0.1876, "step": 28840 }, { "epoch": 0.40550987420057627, "grad_norm": 0.4434810280799866, "learning_rate": 0.00013850065604337244, "loss": 0.1891, "step": 28850 }, { "epoch": 0.40565043221589714, "grad_norm": 0.4210662841796875, "learning_rate": 0.0001384577528174385, "loss": 0.1802, "step": 28860 }, { "epoch": 0.40579099023121795, "grad_norm": 0.5197582244873047, "learning_rate": 0.00013841484128234113, "loss": 0.2159, "step": 28870 }, { "epoch": 0.40593154824653876, "grad_norm": 0.4137897193431854, "learning_rate": 0.00013837192144735175, "loss": 0.181, "step": 28880 }, { "epoch": 0.4060721062618596, "grad_norm": 0.5064562559127808, "learning_rate": 0.00013832899332174358, "loss": 0.2087, "step": 28890 }, { "epoch": 0.4062126642771804, "grad_norm": 0.4158742427825928, "learning_rate": 0.0001382860569147917, "loss": 0.2004, "step": 28900 }, { "epoch": 0.4063532222925012, "grad_norm": 0.4387105405330658, "learning_rate": 0.00013824311223577292, "loss": 0.197, "step": 28910 }, { "epoch": 0.4064937803078221, "grad_norm": 0.3631752133369446, "learning_rate": 0.00013820015929396585, "loss": 0.2, "step": 28920 }, { "epoch": 0.4066343383231429, "grad_norm": 0.456225723028183, "learning_rate": 0.00013815719809865082, "loss": 0.2216, "step": 28930 }, { "epoch": 0.4067748963384637, "grad_norm": 0.36056461930274963, "learning_rate": 0.0001381142286591101, "loss": 0.1935, "step": 28940 }, { "epoch": 0.4069154543537845, "grad_norm": 0.43140852451324463, "learning_rate": 0.00013807125098462756, "loss": 0.169, "step": 28950 }, { "epoch": 0.4070560123691053, "grad_norm": 0.29992830753326416, "learning_rate": 0.00013802826508448897, "loss": 0.1701, "step": 28960 }, { "epoch": 0.4071965703844262, "grad_norm": 0.3830731213092804, "learning_rate": 0.00013798527096798189, "loss": 0.1916, "step": 28970 }, { "epoch": 0.407337128399747, "grad_norm": 0.3855833113193512, "learning_rate": 0.00013794226864439555, "loss": 0.1848, "step": 28980 }, { "epoch": 0.4074776864150678, "grad_norm": 0.4350045621395111, "learning_rate": 0.000137899258123021, "loss": 0.1908, "step": 28990 }, { "epoch": 0.40761824443038863, "grad_norm": 0.4594937562942505, "learning_rate": 0.00013785623941315112, "loss": 0.2323, "step": 29000 }, { "epoch": 0.40761824443038863, "eval_chrf": 81.69022545145947, "eval_loss": 0.40677130222320557, "eval_runtime": 275.1454, "eval_samples_per_second": 0.363, "eval_steps_per_second": 0.015, "step": 29000 }, { "epoch": 0.40775880244570945, "grad_norm": 0.34250855445861816, "learning_rate": 0.00013781321252408048, "loss": 0.2033, "step": 29010 }, { "epoch": 0.4078993604610303, "grad_norm": 0.436296284198761, "learning_rate": 0.00013777017746510547, "loss": 0.2063, "step": 29020 }, { "epoch": 0.4080399184763511, "grad_norm": 0.4227631390094757, "learning_rate": 0.0001377271342455242, "loss": 0.1932, "step": 29030 }, { "epoch": 0.40818047649167194, "grad_norm": 0.35152482986450195, "learning_rate": 0.00013768408287463657, "loss": 0.1758, "step": 29040 }, { "epoch": 0.40832103450699275, "grad_norm": 0.4631098508834839, "learning_rate": 0.00013764102336174427, "loss": 0.2096, "step": 29050 }, { "epoch": 0.40846159252231357, "grad_norm": 0.4547802209854126, "learning_rate": 0.0001375979557161507, "loss": 0.2019, "step": 29060 }, { "epoch": 0.40860215053763443, "grad_norm": 0.3912622332572937, "learning_rate": 0.000137554879947161, "loss": 0.188, "step": 29070 }, { "epoch": 0.40874270855295525, "grad_norm": 0.43728306889533997, "learning_rate": 0.00013751179606408216, "loss": 0.2023, "step": 29080 }, { "epoch": 0.40888326656827606, "grad_norm": 0.43088382482528687, "learning_rate": 0.0001374687040762228, "loss": 0.1858, "step": 29090 }, { "epoch": 0.4090238245835969, "grad_norm": 0.33864808082580566, "learning_rate": 0.00013742560399289343, "loss": 0.1832, "step": 29100 }, { "epoch": 0.4091643825989177, "grad_norm": 0.38373056054115295, "learning_rate": 0.00013738249582340617, "loss": 0.1974, "step": 29110 }, { "epoch": 0.4093049406142385, "grad_norm": 0.4404429495334625, "learning_rate": 0.00013733937957707498, "loss": 0.1761, "step": 29120 }, { "epoch": 0.40944549862955937, "grad_norm": 0.3572157621383667, "learning_rate": 0.00013729625526321553, "loss": 0.1828, "step": 29130 }, { "epoch": 0.4095860566448802, "grad_norm": 0.4174441993236542, "learning_rate": 0.0001372531228911452, "loss": 0.1845, "step": 29140 }, { "epoch": 0.409726614660201, "grad_norm": 0.5497799515724182, "learning_rate": 0.0001372099824701832, "loss": 0.208, "step": 29150 }, { "epoch": 0.4098671726755218, "grad_norm": 0.42890477180480957, "learning_rate": 0.0001371668340096504, "loss": 0.185, "step": 29160 }, { "epoch": 0.4100077306908426, "grad_norm": 0.4199981689453125, "learning_rate": 0.0001371236775188695, "loss": 0.1879, "step": 29170 }, { "epoch": 0.4101482887061635, "grad_norm": 0.4099089205265045, "learning_rate": 0.00013708051300716478, "loss": 0.196, "step": 29180 }, { "epoch": 0.4102888467214843, "grad_norm": 0.5033042430877686, "learning_rate": 0.0001370373404838624, "loss": 0.1872, "step": 29190 }, { "epoch": 0.4104294047368051, "grad_norm": 0.36574870347976685, "learning_rate": 0.0001369941599582901, "loss": 0.19, "step": 29200 }, { "epoch": 0.4105699627521259, "grad_norm": 0.4706093668937683, "learning_rate": 0.0001369509714397776, "loss": 0.192, "step": 29210 }, { "epoch": 0.41071052076744674, "grad_norm": 0.4324891269207001, "learning_rate": 0.0001369077749376561, "loss": 0.2115, "step": 29220 }, { "epoch": 0.4108510787827676, "grad_norm": 0.38830357789993286, "learning_rate": 0.0001368645704612586, "loss": 0.1933, "step": 29230 }, { "epoch": 0.4109916367980884, "grad_norm": 0.44283023476600647, "learning_rate": 0.0001368213580199199, "loss": 0.1897, "step": 29240 }, { "epoch": 0.41113219481340924, "grad_norm": 0.3675142228603363, "learning_rate": 0.00013677813762297638, "loss": 0.2035, "step": 29250 }, { "epoch": 0.41127275282873005, "grad_norm": 0.4156498312950134, "learning_rate": 0.00013673490927976626, "loss": 0.1758, "step": 29260 }, { "epoch": 0.41141331084405086, "grad_norm": 0.43260544538497925, "learning_rate": 0.00013669167299962945, "loss": 0.177, "step": 29270 }, { "epoch": 0.41155386885937173, "grad_norm": 0.38720545172691345, "learning_rate": 0.00013664842879190756, "loss": 0.1986, "step": 29280 }, { "epoch": 0.41169442687469254, "grad_norm": 0.453714519739151, "learning_rate": 0.00013660517666594387, "loss": 0.1853, "step": 29290 }, { "epoch": 0.41183498489001336, "grad_norm": 0.3589514493942261, "learning_rate": 0.00013656191663108345, "loss": 0.2112, "step": 29300 }, { "epoch": 0.41197554290533417, "grad_norm": 0.2884405553340912, "learning_rate": 0.00013651864869667306, "loss": 0.1964, "step": 29310 }, { "epoch": 0.412116100920655, "grad_norm": 0.4473627507686615, "learning_rate": 0.00013647537287206108, "loss": 0.1715, "step": 29320 }, { "epoch": 0.41225665893597585, "grad_norm": 0.37632375955581665, "learning_rate": 0.00013643208916659774, "loss": 0.1693, "step": 29330 }, { "epoch": 0.41239721695129666, "grad_norm": 0.5020673274993896, "learning_rate": 0.00013638879758963486, "loss": 0.1828, "step": 29340 }, { "epoch": 0.4125377749666175, "grad_norm": 0.33215266466140747, "learning_rate": 0.00013634549815052597, "loss": 0.1789, "step": 29350 }, { "epoch": 0.4126783329819383, "grad_norm": 0.4781617224216461, "learning_rate": 0.00013630219085862639, "loss": 0.1757, "step": 29360 }, { "epoch": 0.4128188909972591, "grad_norm": 0.5313750505447388, "learning_rate": 0.000136258875723293, "loss": 0.1863, "step": 29370 }, { "epoch": 0.4129594490125799, "grad_norm": 0.45688632130622864, "learning_rate": 0.00013621555275388452, "loss": 0.1863, "step": 29380 }, { "epoch": 0.4131000070279008, "grad_norm": 0.38106387853622437, "learning_rate": 0.00013617222195976125, "loss": 0.1726, "step": 29390 }, { "epoch": 0.4132405650432216, "grad_norm": 0.3599858582019806, "learning_rate": 0.0001361288833502852, "loss": 0.1556, "step": 29400 }, { "epoch": 0.4133811230585424, "grad_norm": 0.5172914862632751, "learning_rate": 0.00013608553693482013, "loss": 0.1913, "step": 29410 }, { "epoch": 0.4135216810738632, "grad_norm": 0.38247478008270264, "learning_rate": 0.00013604218272273143, "loss": 0.1804, "step": 29420 }, { "epoch": 0.41366223908918404, "grad_norm": 0.3799438178539276, "learning_rate": 0.0001359988207233862, "loss": 0.1891, "step": 29430 }, { "epoch": 0.4138027971045049, "grad_norm": 0.5469621419906616, "learning_rate": 0.0001359554509461532, "loss": 0.1884, "step": 29440 }, { "epoch": 0.4139433551198257, "grad_norm": 0.40987929701805115, "learning_rate": 0.00013591207340040286, "loss": 0.182, "step": 29450 }, { "epoch": 0.41408391313514653, "grad_norm": 0.40848323702812195, "learning_rate": 0.00013586868809550737, "loss": 0.2087, "step": 29460 }, { "epoch": 0.41422447115046734, "grad_norm": 0.3570212423801422, "learning_rate": 0.00013582529504084048, "loss": 0.2001, "step": 29470 }, { "epoch": 0.41436502916578816, "grad_norm": 0.40303704142570496, "learning_rate": 0.0001357818942457777, "loss": 0.1646, "step": 29480 }, { "epoch": 0.414505587181109, "grad_norm": 0.4557267725467682, "learning_rate": 0.0001357384857196962, "loss": 0.196, "step": 29490 }, { "epoch": 0.41464614519642984, "grad_norm": 0.41009917855262756, "learning_rate": 0.00013569506947197477, "loss": 0.2005, "step": 29500 }, { "epoch": 0.41478670321175065, "grad_norm": 0.37826165556907654, "learning_rate": 0.0001356516455119939, "loss": 0.2175, "step": 29510 }, { "epoch": 0.41492726122707146, "grad_norm": 0.2804080843925476, "learning_rate": 0.00013560821384913583, "loss": 0.1681, "step": 29520 }, { "epoch": 0.4150678192423923, "grad_norm": 0.45264995098114014, "learning_rate": 0.00013556477449278427, "loss": 0.2006, "step": 29530 }, { "epoch": 0.41520837725771315, "grad_norm": 0.42463797330856323, "learning_rate": 0.00013552132745232477, "loss": 0.1771, "step": 29540 }, { "epoch": 0.41534893527303396, "grad_norm": 0.5739278793334961, "learning_rate": 0.0001354778727371445, "loss": 0.2045, "step": 29550 }, { "epoch": 0.41548949328835477, "grad_norm": 0.3454574644565582, "learning_rate": 0.0001354344103566322, "loss": 0.1861, "step": 29560 }, { "epoch": 0.4156300513036756, "grad_norm": 0.5129108428955078, "learning_rate": 0.00013539094032017837, "loss": 0.1747, "step": 29570 }, { "epoch": 0.4157706093189964, "grad_norm": 0.37665149569511414, "learning_rate": 0.00013534746263717511, "loss": 0.1862, "step": 29580 }, { "epoch": 0.41591116733431727, "grad_norm": 0.45705080032348633, "learning_rate": 0.00013530397731701622, "loss": 0.1956, "step": 29590 }, { "epoch": 0.4160517253496381, "grad_norm": 0.40519487857818604, "learning_rate": 0.00013526048436909705, "loss": 0.2066, "step": 29600 }, { "epoch": 0.4161922833649589, "grad_norm": 0.32703614234924316, "learning_rate": 0.00013521698380281473, "loss": 0.1903, "step": 29610 }, { "epoch": 0.4163328413802797, "grad_norm": 0.6017855405807495, "learning_rate": 0.00013517347562756794, "loss": 0.2394, "step": 29620 }, { "epoch": 0.4164733993956005, "grad_norm": 0.43627041578292847, "learning_rate": 0.00013512995985275706, "loss": 0.1829, "step": 29630 }, { "epoch": 0.41661395741092133, "grad_norm": 0.30025333166122437, "learning_rate": 0.00013508643648778407, "loss": 0.2177, "step": 29640 }, { "epoch": 0.4167545154262422, "grad_norm": 0.4151211977005005, "learning_rate": 0.00013504290554205262, "loss": 0.177, "step": 29650 }, { "epoch": 0.416895073441563, "grad_norm": 0.3238172233104706, "learning_rate": 0.00013499936702496794, "loss": 0.1727, "step": 29660 }, { "epoch": 0.4170356314568838, "grad_norm": 0.45996254682540894, "learning_rate": 0.000134955820945937, "loss": 0.1613, "step": 29670 }, { "epoch": 0.41717618947220464, "grad_norm": 0.4581054449081421, "learning_rate": 0.0001349122673143683, "loss": 0.1908, "step": 29680 }, { "epoch": 0.41731674748752545, "grad_norm": 0.6247484087944031, "learning_rate": 0.00013486870613967203, "loss": 0.2026, "step": 29690 }, { "epoch": 0.4174573055028463, "grad_norm": 0.3672480881214142, "learning_rate": 0.00013482513743126004, "loss": 0.2064, "step": 29700 }, { "epoch": 0.41759786351816713, "grad_norm": 0.4779052734375, "learning_rate": 0.0001347815611985457, "loss": 0.1943, "step": 29710 }, { "epoch": 0.41773842153348795, "grad_norm": 0.5254116654396057, "learning_rate": 0.00013473797745094409, "loss": 0.1954, "step": 29720 }, { "epoch": 0.41787897954880876, "grad_norm": 0.35492080450057983, "learning_rate": 0.00013469438619787192, "loss": 0.1879, "step": 29730 }, { "epoch": 0.4180195375641296, "grad_norm": 0.3733363747596741, "learning_rate": 0.00013465078744874745, "loss": 0.1801, "step": 29740 }, { "epoch": 0.41816009557945044, "grad_norm": 0.4023160934448242, "learning_rate": 0.00013460718121299062, "loss": 0.202, "step": 29750 }, { "epoch": 0.41830065359477125, "grad_norm": 0.4042220115661621, "learning_rate": 0.000134563567500023, "loss": 0.1929, "step": 29760 }, { "epoch": 0.41844121161009207, "grad_norm": 0.40776363015174866, "learning_rate": 0.00013451994631926773, "loss": 0.201, "step": 29770 }, { "epoch": 0.4185817696254129, "grad_norm": 0.5823947191238403, "learning_rate": 0.0001344763176801496, "loss": 0.192, "step": 29780 }, { "epoch": 0.4187223276407337, "grad_norm": 0.3720506429672241, "learning_rate": 0.00013443268159209491, "loss": 0.1861, "step": 29790 }, { "epoch": 0.41886288565605456, "grad_norm": 0.5004929304122925, "learning_rate": 0.0001343890380645318, "loss": 0.1978, "step": 29800 }, { "epoch": 0.4190034436713754, "grad_norm": 0.49414458870887756, "learning_rate": 0.00013434538710688972, "loss": 0.1811, "step": 29810 }, { "epoch": 0.4191440016866962, "grad_norm": 0.46427121758461, "learning_rate": 0.00013430172872859995, "loss": 0.1868, "step": 29820 }, { "epoch": 0.419284559702017, "grad_norm": 0.37025830149650574, "learning_rate": 0.00013425806293909532, "loss": 0.1786, "step": 29830 }, { "epoch": 0.4194251177173378, "grad_norm": 0.39013585448265076, "learning_rate": 0.00013421438974781018, "loss": 0.1801, "step": 29840 }, { "epoch": 0.4195656757326586, "grad_norm": 0.40397846698760986, "learning_rate": 0.00013417070916418056, "loss": 0.1753, "step": 29850 }, { "epoch": 0.4197062337479795, "grad_norm": 0.4552091956138611, "learning_rate": 0.00013412702119764406, "loss": 0.1885, "step": 29860 }, { "epoch": 0.4198467917633003, "grad_norm": 0.40092238783836365, "learning_rate": 0.00013408332585763993, "loss": 0.1877, "step": 29870 }, { "epoch": 0.4199873497786211, "grad_norm": 0.40552565455436707, "learning_rate": 0.00013403962315360887, "loss": 0.2058, "step": 29880 }, { "epoch": 0.42012790779394193, "grad_norm": 0.4105009138584137, "learning_rate": 0.00013399591309499334, "loss": 0.1835, "step": 29890 }, { "epoch": 0.42026846580926275, "grad_norm": 0.5223738551139832, "learning_rate": 0.0001339521956912373, "loss": 0.176, "step": 29900 }, { "epoch": 0.4204090238245836, "grad_norm": 0.29498058557510376, "learning_rate": 0.00013390847095178632, "loss": 0.1914, "step": 29910 }, { "epoch": 0.42054958183990443, "grad_norm": 0.41524139046669006, "learning_rate": 0.00013386473888608752, "loss": 0.1673, "step": 29920 }, { "epoch": 0.42069013985522524, "grad_norm": 0.36371296644210815, "learning_rate": 0.00013382099950358964, "loss": 0.1881, "step": 29930 }, { "epoch": 0.42083069787054606, "grad_norm": 0.34990018606185913, "learning_rate": 0.000133777252813743, "loss": 0.1753, "step": 29940 }, { "epoch": 0.42097125588586687, "grad_norm": 0.43816953897476196, "learning_rate": 0.00013373349882599947, "loss": 0.2146, "step": 29950 }, { "epoch": 0.42111181390118774, "grad_norm": 0.42917823791503906, "learning_rate": 0.0001336897375498125, "loss": 0.2063, "step": 29960 }, { "epoch": 0.42125237191650855, "grad_norm": 0.3981551229953766, "learning_rate": 0.00013364596899463719, "loss": 0.1914, "step": 29970 }, { "epoch": 0.42139292993182936, "grad_norm": 0.3675701320171356, "learning_rate": 0.0001336021931699301, "loss": 0.1757, "step": 29980 }, { "epoch": 0.4215334879471502, "grad_norm": 0.4614124596118927, "learning_rate": 0.00013355841008514944, "loss": 0.214, "step": 29990 }, { "epoch": 0.421674045962471, "grad_norm": 0.3702334761619568, "learning_rate": 0.00013351461974975496, "loss": 0.2035, "step": 30000 }, { "epoch": 0.421674045962471, "eval_chrf": 78.626866083246, "eval_loss": 0.392253577709198, "eval_runtime": 325.0765, "eval_samples_per_second": 0.308, "eval_steps_per_second": 0.012, "step": 30000 }, { "epoch": 0.42181460397779186, "grad_norm": 0.31596478819847107, "learning_rate": 0.00013347082217320794, "loss": 0.1744, "step": 30010 }, { "epoch": 0.42195516199311267, "grad_norm": 0.4133164584636688, "learning_rate": 0.0001334270173649713, "loss": 0.218, "step": 30020 }, { "epoch": 0.4220957200084335, "grad_norm": 0.40303972363471985, "learning_rate": 0.0001333832053345095, "loss": 0.1817, "step": 30030 }, { "epoch": 0.4222362780237543, "grad_norm": 0.47535112500190735, "learning_rate": 0.0001333393860912885, "loss": 0.1762, "step": 30040 }, { "epoch": 0.4223768360390751, "grad_norm": 0.4388129413127899, "learning_rate": 0.00013329555964477593, "loss": 0.1811, "step": 30050 }, { "epoch": 0.422517394054396, "grad_norm": 0.4499322772026062, "learning_rate": 0.00013325172600444084, "loss": 0.1955, "step": 30060 }, { "epoch": 0.4226579520697168, "grad_norm": 0.5237435102462769, "learning_rate": 0.00013320788517975395, "loss": 0.1679, "step": 30070 }, { "epoch": 0.4227985100850376, "grad_norm": 0.5499116778373718, "learning_rate": 0.0001331640371801875, "loss": 0.1894, "step": 30080 }, { "epoch": 0.4229390681003584, "grad_norm": 0.3641197681427002, "learning_rate": 0.00013312018201521518, "loss": 0.1929, "step": 30090 }, { "epoch": 0.42307962611567923, "grad_norm": 0.4277847707271576, "learning_rate": 0.00013307631969431243, "loss": 0.1748, "step": 30100 }, { "epoch": 0.42322018413100004, "grad_norm": 0.522125244140625, "learning_rate": 0.00013303245022695603, "loss": 0.197, "step": 30110 }, { "epoch": 0.4233607421463209, "grad_norm": 0.4268653392791748, "learning_rate": 0.00013298857362262445, "loss": 0.1718, "step": 30120 }, { "epoch": 0.4235013001616417, "grad_norm": 0.5319456458091736, "learning_rate": 0.00013294468989079764, "loss": 0.2118, "step": 30130 }, { "epoch": 0.42364185817696254, "grad_norm": 0.4997817575931549, "learning_rate": 0.0001329007990409571, "loss": 0.1686, "step": 30140 }, { "epoch": 0.42378241619228335, "grad_norm": 0.3909251391887665, "learning_rate": 0.00013285690108258584, "loss": 0.1824, "step": 30150 }, { "epoch": 0.42392297420760416, "grad_norm": 0.405780553817749, "learning_rate": 0.00013281299602516843, "loss": 0.2133, "step": 30160 }, { "epoch": 0.42406353222292503, "grad_norm": 0.37109053134918213, "learning_rate": 0.00013276908387819102, "loss": 0.2054, "step": 30170 }, { "epoch": 0.42420409023824585, "grad_norm": 0.3621840476989746, "learning_rate": 0.0001327251646511412, "loss": 0.1623, "step": 30180 }, { "epoch": 0.42434464825356666, "grad_norm": 0.367295503616333, "learning_rate": 0.00013268123835350818, "loss": 0.1961, "step": 30190 }, { "epoch": 0.42448520626888747, "grad_norm": 0.4074755609035492, "learning_rate": 0.00013263730499478261, "loss": 0.2072, "step": 30200 }, { "epoch": 0.4246257642842083, "grad_norm": 0.5425959825515747, "learning_rate": 0.00013259336458445678, "loss": 0.1978, "step": 30210 }, { "epoch": 0.42476632229952915, "grad_norm": 0.3877510726451874, "learning_rate": 0.00013254941713202432, "loss": 0.2074, "step": 30220 }, { "epoch": 0.42490688031484997, "grad_norm": 0.3716188669204712, "learning_rate": 0.00013250546264698061, "loss": 0.1942, "step": 30230 }, { "epoch": 0.4250474383301708, "grad_norm": 0.3302082121372223, "learning_rate": 0.00013246150113882234, "loss": 0.1865, "step": 30240 }, { "epoch": 0.4251879963454916, "grad_norm": 0.4123994708061218, "learning_rate": 0.0001324175326170479, "loss": 0.2006, "step": 30250 }, { "epoch": 0.4253285543608124, "grad_norm": 0.37305039167404175, "learning_rate": 0.00013237355709115706, "loss": 0.1696, "step": 30260 }, { "epoch": 0.4254691123761333, "grad_norm": 0.4247036576271057, "learning_rate": 0.00013232957457065113, "loss": 0.1904, "step": 30270 }, { "epoch": 0.4256096703914541, "grad_norm": 0.34812501072883606, "learning_rate": 0.000132285585065033, "loss": 0.1844, "step": 30280 }, { "epoch": 0.4257502284067749, "grad_norm": 0.39941826462745667, "learning_rate": 0.000132241588583807, "loss": 0.1913, "step": 30290 }, { "epoch": 0.4258907864220957, "grad_norm": 0.28224509954452515, "learning_rate": 0.000132197585136479, "loss": 0.1586, "step": 30300 }, { "epoch": 0.4260313444374165, "grad_norm": 0.4895475506782532, "learning_rate": 0.0001321535747325563, "loss": 0.173, "step": 30310 }, { "epoch": 0.4261719024527374, "grad_norm": 0.41064637899398804, "learning_rate": 0.0001321095573815479, "loss": 0.2005, "step": 30320 }, { "epoch": 0.4263124604680582, "grad_norm": 0.44213464856147766, "learning_rate": 0.000132065533092964, "loss": 0.1621, "step": 30330 }, { "epoch": 0.426453018483379, "grad_norm": 0.4305702745914459, "learning_rate": 0.00013202150187631662, "loss": 0.1737, "step": 30340 }, { "epoch": 0.42659357649869983, "grad_norm": 0.5681144595146179, "learning_rate": 0.00013197746374111902, "loss": 0.1883, "step": 30350 }, { "epoch": 0.42673413451402065, "grad_norm": 0.40257877111434937, "learning_rate": 0.0001319334186968861, "loss": 0.2054, "step": 30360 }, { "epoch": 0.42687469252934146, "grad_norm": 0.4283764958381653, "learning_rate": 0.0001318893667531342, "loss": 0.1691, "step": 30370 }, { "epoch": 0.42701525054466233, "grad_norm": 0.5085741281509399, "learning_rate": 0.0001318453079193812, "loss": 0.2176, "step": 30380 }, { "epoch": 0.42715580855998314, "grad_norm": 0.44593504071235657, "learning_rate": 0.00013180124220514637, "loss": 0.1949, "step": 30390 }, { "epoch": 0.42729636657530395, "grad_norm": 0.42691755294799805, "learning_rate": 0.00013175716961995052, "loss": 0.1924, "step": 30400 }, { "epoch": 0.42743692459062477, "grad_norm": 0.4168946444988251, "learning_rate": 0.00013171309017331604, "loss": 0.1677, "step": 30410 }, { "epoch": 0.4275774826059456, "grad_norm": 0.3309684693813324, "learning_rate": 0.00013166900387476664, "loss": 0.2184, "step": 30420 }, { "epoch": 0.42771804062126645, "grad_norm": 0.4581981897354126, "learning_rate": 0.0001316249107338276, "loss": 0.1948, "step": 30430 }, { "epoch": 0.42785859863658726, "grad_norm": 0.4287571907043457, "learning_rate": 0.00013158081076002567, "loss": 0.2007, "step": 30440 }, { "epoch": 0.4279991566519081, "grad_norm": 0.41077181696891785, "learning_rate": 0.00013153670396288904, "loss": 0.1747, "step": 30450 }, { "epoch": 0.4281397146672289, "grad_norm": 0.40592631697654724, "learning_rate": 0.00013149259035194746, "loss": 0.1802, "step": 30460 }, { "epoch": 0.4282802726825497, "grad_norm": 0.3631041646003723, "learning_rate": 0.00013144846993673207, "loss": 0.1799, "step": 30470 }, { "epoch": 0.42842083069787057, "grad_norm": 0.401694655418396, "learning_rate": 0.00013140434272677548, "loss": 0.1762, "step": 30480 }, { "epoch": 0.4285613887131914, "grad_norm": 0.4939548969268799, "learning_rate": 0.00013136020873161182, "loss": 0.1884, "step": 30490 }, { "epoch": 0.4287019467285122, "grad_norm": 0.5443631410598755, "learning_rate": 0.00013131606796077666, "loss": 0.1854, "step": 30500 }, { "epoch": 0.428842504743833, "grad_norm": 0.4527170658111572, "learning_rate": 0.000131271920423807, "loss": 0.2076, "step": 30510 }, { "epoch": 0.4289830627591538, "grad_norm": 0.42109552025794983, "learning_rate": 0.00013122776613024137, "loss": 0.2124, "step": 30520 }, { "epoch": 0.4291236207744747, "grad_norm": 0.4336777925491333, "learning_rate": 0.0001311836050896197, "loss": 0.1754, "step": 30530 }, { "epoch": 0.4292641787897955, "grad_norm": 0.2628873884677887, "learning_rate": 0.00013113943731148347, "loss": 0.1917, "step": 30540 }, { "epoch": 0.4294047368051163, "grad_norm": 0.40117138624191284, "learning_rate": 0.00013109526280537546, "loss": 0.193, "step": 30550 }, { "epoch": 0.42954529482043713, "grad_norm": 0.36322981119155884, "learning_rate": 0.00013105108158084004, "loss": 0.1799, "step": 30560 }, { "epoch": 0.42968585283575794, "grad_norm": 0.34170740842819214, "learning_rate": 0.00013100689364742296, "loss": 0.1863, "step": 30570 }, { "epoch": 0.42982641085107876, "grad_norm": 0.612261176109314, "learning_rate": 0.00013096269901467146, "loss": 0.2018, "step": 30580 }, { "epoch": 0.4299669688663996, "grad_norm": 0.42685508728027344, "learning_rate": 0.00013091849769213422, "loss": 0.1789, "step": 30590 }, { "epoch": 0.43010752688172044, "grad_norm": 0.3997404873371124, "learning_rate": 0.00013087428968936137, "loss": 0.1894, "step": 30600 }, { "epoch": 0.43024808489704125, "grad_norm": 0.38838812708854675, "learning_rate": 0.0001308300750159044, "loss": 0.1699, "step": 30610 }, { "epoch": 0.43038864291236206, "grad_norm": 0.43439820408821106, "learning_rate": 0.0001307858536813164, "loss": 0.1808, "step": 30620 }, { "epoch": 0.4305292009276829, "grad_norm": 0.3681730628013611, "learning_rate": 0.00013074162569515172, "loss": 0.1899, "step": 30630 }, { "epoch": 0.43066975894300374, "grad_norm": 0.46958011388778687, "learning_rate": 0.0001306973910669663, "loss": 0.1861, "step": 30640 }, { "epoch": 0.43081031695832456, "grad_norm": 0.39444705843925476, "learning_rate": 0.00013065314980631747, "loss": 0.1974, "step": 30650 }, { "epoch": 0.43095087497364537, "grad_norm": 0.4495857059955597, "learning_rate": 0.00013060890192276393, "loss": 0.1887, "step": 30660 }, { "epoch": 0.4310914329889662, "grad_norm": 0.417550653219223, "learning_rate": 0.00013056464742586583, "loss": 0.1893, "step": 30670 }, { "epoch": 0.431231991004287, "grad_norm": 0.4482434391975403, "learning_rate": 0.0001305203863251848, "loss": 0.1882, "step": 30680 }, { "epoch": 0.43137254901960786, "grad_norm": 0.44060251116752625, "learning_rate": 0.00013047611863028396, "loss": 0.1964, "step": 30690 }, { "epoch": 0.4315131070349287, "grad_norm": 0.3529464900493622, "learning_rate": 0.00013043184435072763, "loss": 0.18, "step": 30700 }, { "epoch": 0.4316536650502495, "grad_norm": 0.4485524296760559, "learning_rate": 0.00013038756349608176, "loss": 0.1757, "step": 30710 }, { "epoch": 0.4317942230655703, "grad_norm": 0.4147986173629761, "learning_rate": 0.00013034327607591364, "loss": 0.1702, "step": 30720 }, { "epoch": 0.4319347810808911, "grad_norm": 0.46421390771865845, "learning_rate": 0.000130298982099792, "loss": 0.1745, "step": 30730 }, { "epoch": 0.432075339096212, "grad_norm": 0.4629431962966919, "learning_rate": 0.00013025468157728696, "loss": 0.2061, "step": 30740 }, { "epoch": 0.4322158971115328, "grad_norm": 0.34252962470054626, "learning_rate": 0.0001302103745179701, "loss": 0.1708, "step": 30750 }, { "epoch": 0.4323564551268536, "grad_norm": 0.3832494020462036, "learning_rate": 0.00013016606093141432, "loss": 0.1782, "step": 30760 }, { "epoch": 0.4324970131421744, "grad_norm": 0.4208075702190399, "learning_rate": 0.00013012174082719405, "loss": 0.2094, "step": 30770 }, { "epoch": 0.43263757115749524, "grad_norm": 0.3315330445766449, "learning_rate": 0.00013007741421488503, "loss": 0.2001, "step": 30780 }, { "epoch": 0.4327781291728161, "grad_norm": 0.42039304971694946, "learning_rate": 0.00013003308110406446, "loss": 0.1908, "step": 30790 }, { "epoch": 0.4329186871881369, "grad_norm": 0.5534359216690063, "learning_rate": 0.00012998874150431099, "loss": 0.1977, "step": 30800 }, { "epoch": 0.43305924520345773, "grad_norm": 0.3723289668560028, "learning_rate": 0.00012994439542520452, "loss": 0.191, "step": 30810 }, { "epoch": 0.43319980321877855, "grad_norm": 0.5126309394836426, "learning_rate": 0.00012990004287632647, "loss": 0.1958, "step": 30820 }, { "epoch": 0.43334036123409936, "grad_norm": 0.4108158349990845, "learning_rate": 0.00012985568386725962, "loss": 0.177, "step": 30830 }, { "epoch": 0.43348091924942017, "grad_norm": 0.440837562084198, "learning_rate": 0.0001298113184075882, "loss": 0.1865, "step": 30840 }, { "epoch": 0.43362147726474104, "grad_norm": 0.43276897072792053, "learning_rate": 0.00012976694650689778, "loss": 0.1736, "step": 30850 }, { "epoch": 0.43376203528006185, "grad_norm": 0.3253236413002014, "learning_rate": 0.0001297225681747753, "loss": 0.1736, "step": 30860 }, { "epoch": 0.43390259329538267, "grad_norm": 0.4109024703502655, "learning_rate": 0.00012967818342080913, "loss": 0.1978, "step": 30870 }, { "epoch": 0.4340431513107035, "grad_norm": 0.5015186071395874, "learning_rate": 0.00012963379225458904, "loss": 0.1845, "step": 30880 }, { "epoch": 0.4341837093260243, "grad_norm": 0.35976794362068176, "learning_rate": 0.00012958939468570615, "loss": 0.1916, "step": 30890 }, { "epoch": 0.43432426734134516, "grad_norm": 0.39229390025138855, "learning_rate": 0.00012954499072375297, "loss": 0.2056, "step": 30900 }, { "epoch": 0.434464825356666, "grad_norm": 0.469139039516449, "learning_rate": 0.0001295005803783234, "loss": 0.2059, "step": 30910 }, { "epoch": 0.4346053833719868, "grad_norm": 0.45978787541389465, "learning_rate": 0.0001294561636590127, "loss": 0.2012, "step": 30920 }, { "epoch": 0.4347459413873076, "grad_norm": 0.3263724148273468, "learning_rate": 0.0001294117405754176, "loss": 0.1946, "step": 30930 }, { "epoch": 0.4348864994026284, "grad_norm": 0.4245821237564087, "learning_rate": 0.00012936731113713607, "loss": 0.1807, "step": 30940 }, { "epoch": 0.4350270574179493, "grad_norm": 0.4069032669067383, "learning_rate": 0.0001293228753537675, "loss": 0.1967, "step": 30950 }, { "epoch": 0.4351676154332701, "grad_norm": 0.3639621436595917, "learning_rate": 0.00012927843323491268, "loss": 0.1872, "step": 30960 }, { "epoch": 0.4353081734485909, "grad_norm": 0.394876629114151, "learning_rate": 0.00012923398479017378, "loss": 0.2026, "step": 30970 }, { "epoch": 0.4354487314639117, "grad_norm": 0.5878793597221375, "learning_rate": 0.0001291895300291543, "loss": 0.1807, "step": 30980 }, { "epoch": 0.43558928947923253, "grad_norm": 0.33681783080101013, "learning_rate": 0.00012914506896145907, "loss": 0.1824, "step": 30990 }, { "epoch": 0.4357298474945534, "grad_norm": 0.41459086537361145, "learning_rate": 0.00012910060159669436, "loss": 0.1984, "step": 31000 }, { "epoch": 0.4357298474945534, "eval_chrf": 77.59793143605809, "eval_loss": 0.3922773599624634, "eval_runtime": 325.7326, "eval_samples_per_second": 0.307, "eval_steps_per_second": 0.012, "step": 31000 }, { "epoch": 0.4358704055098742, "grad_norm": 0.29785963892936707, "learning_rate": 0.00012905612794446774, "loss": 0.202, "step": 31010 }, { "epoch": 0.436010963525195, "grad_norm": 0.40135979652404785, "learning_rate": 0.00012901164801438822, "loss": 0.177, "step": 31020 }, { "epoch": 0.43615152154051584, "grad_norm": 0.32746079564094543, "learning_rate": 0.00012896716181606612, "loss": 0.1813, "step": 31030 }, { "epoch": 0.43629207955583665, "grad_norm": 0.44277051091194153, "learning_rate": 0.00012892266935911301, "loss": 0.1676, "step": 31040 }, { "epoch": 0.4364326375711575, "grad_norm": 0.4225252866744995, "learning_rate": 0.000128878170653142, "loss": 0.175, "step": 31050 }, { "epoch": 0.43657319558647834, "grad_norm": 0.39410722255706787, "learning_rate": 0.0001288336657077674, "loss": 0.1797, "step": 31060 }, { "epoch": 0.43671375360179915, "grad_norm": 0.5188403129577637, "learning_rate": 0.000128789154532605, "loss": 0.18, "step": 31070 }, { "epoch": 0.43685431161711996, "grad_norm": 0.5571266412734985, "learning_rate": 0.0001287446371372718, "loss": 0.1728, "step": 31080 }, { "epoch": 0.4369948696324408, "grad_norm": 0.4622716009616852, "learning_rate": 0.0001287001135313862, "loss": 0.1775, "step": 31090 }, { "epoch": 0.4371354276477616, "grad_norm": 0.35365378856658936, "learning_rate": 0.00012865558372456798, "loss": 0.185, "step": 31100 }, { "epoch": 0.43727598566308246, "grad_norm": 0.41484931111335754, "learning_rate": 0.00012861104772643825, "loss": 0.1842, "step": 31110 }, { "epoch": 0.43741654367840327, "grad_norm": 0.4718504846096039, "learning_rate": 0.00012856650554661937, "loss": 0.1982, "step": 31120 }, { "epoch": 0.4375571016937241, "grad_norm": 0.31714683771133423, "learning_rate": 0.00012852195719473516, "loss": 0.1905, "step": 31130 }, { "epoch": 0.4376976597090449, "grad_norm": 0.3640306890010834, "learning_rate": 0.00012847740268041071, "loss": 0.2141, "step": 31140 }, { "epoch": 0.4378382177243657, "grad_norm": 0.36697953939437866, "learning_rate": 0.0001284328420132724, "loss": 0.1718, "step": 31150 }, { "epoch": 0.4379787757396866, "grad_norm": 0.4646686911582947, "learning_rate": 0.00012838827520294803, "loss": 0.1716, "step": 31160 }, { "epoch": 0.4381193337550074, "grad_norm": 0.3565073311328888, "learning_rate": 0.00012834370225906668, "loss": 0.1978, "step": 31170 }, { "epoch": 0.4382598917703282, "grad_norm": 0.3856450617313385, "learning_rate": 0.00012829912319125875, "loss": 0.1956, "step": 31180 }, { "epoch": 0.438400449785649, "grad_norm": 0.5122458934783936, "learning_rate": 0.00012825453800915595, "loss": 0.1863, "step": 31190 }, { "epoch": 0.43854100780096983, "grad_norm": 0.3763080835342407, "learning_rate": 0.00012820994672239135, "loss": 0.2008, "step": 31200 }, { "epoch": 0.4386815658162907, "grad_norm": 0.3721507489681244, "learning_rate": 0.00012816534934059933, "loss": 0.1913, "step": 31210 }, { "epoch": 0.4388221238316115, "grad_norm": 0.30980393290519714, "learning_rate": 0.00012812074587341562, "loss": 0.1676, "step": 31220 }, { "epoch": 0.4389626818469323, "grad_norm": 0.3242766559123993, "learning_rate": 0.00012807613633047712, "loss": 0.1652, "step": 31230 }, { "epoch": 0.43910323986225314, "grad_norm": 0.49657100439071655, "learning_rate": 0.00012803152072142222, "loss": 0.2039, "step": 31240 }, { "epoch": 0.43924379787757395, "grad_norm": 0.3150328993797302, "learning_rate": 0.00012798689905589052, "loss": 0.1749, "step": 31250 }, { "epoch": 0.4393843558928948, "grad_norm": 0.3681991398334503, "learning_rate": 0.00012794227134352298, "loss": 0.1781, "step": 31260 }, { "epoch": 0.43952491390821563, "grad_norm": 0.4745456576347351, "learning_rate": 0.00012789763759396184, "loss": 0.185, "step": 31270 }, { "epoch": 0.43966547192353644, "grad_norm": 0.4351559281349182, "learning_rate": 0.00012785299781685065, "loss": 0.1967, "step": 31280 }, { "epoch": 0.43980602993885726, "grad_norm": 0.3964342474937439, "learning_rate": 0.00012780835202183422, "loss": 0.1856, "step": 31290 }, { "epoch": 0.43994658795417807, "grad_norm": 0.5241504311561584, "learning_rate": 0.00012776370021855873, "loss": 0.2079, "step": 31300 }, { "epoch": 0.4400871459694989, "grad_norm": 0.45348280668258667, "learning_rate": 0.00012771904241667162, "loss": 0.1838, "step": 31310 }, { "epoch": 0.44022770398481975, "grad_norm": 0.3943793475627899, "learning_rate": 0.00012767437862582162, "loss": 0.1795, "step": 31320 }, { "epoch": 0.44036826200014056, "grad_norm": 0.3890320658683777, "learning_rate": 0.00012762970885565878, "loss": 0.1705, "step": 31330 }, { "epoch": 0.4405088200154614, "grad_norm": 0.34188640117645264, "learning_rate": 0.00012758503311583442, "loss": 0.1688, "step": 31340 }, { "epoch": 0.4406493780307822, "grad_norm": 0.5416295528411865, "learning_rate": 0.00012754035141600119, "loss": 0.1899, "step": 31350 }, { "epoch": 0.440789936046103, "grad_norm": 0.3821467161178589, "learning_rate": 0.00012749566376581294, "loss": 0.1727, "step": 31360 }, { "epoch": 0.44093049406142387, "grad_norm": 0.39596933126449585, "learning_rate": 0.00012745097017492494, "loss": 0.1685, "step": 31370 }, { "epoch": 0.4410710520767447, "grad_norm": 0.34711122512817383, "learning_rate": 0.0001274062706529936, "loss": 0.2023, "step": 31380 }, { "epoch": 0.4412116100920655, "grad_norm": 0.3162592053413391, "learning_rate": 0.0001273615652096767, "loss": 0.1958, "step": 31390 }, { "epoch": 0.4413521681073863, "grad_norm": 0.48744603991508484, "learning_rate": 0.00012731685385463328, "loss": 0.1977, "step": 31400 }, { "epoch": 0.4414927261227071, "grad_norm": 0.4292646050453186, "learning_rate": 0.00012727213659752363, "loss": 0.2317, "step": 31410 }, { "epoch": 0.441633284138028, "grad_norm": 0.40441256761550903, "learning_rate": 0.00012722741344800936, "loss": 0.1794, "step": 31420 }, { "epoch": 0.4417738421533488, "grad_norm": 0.48188072443008423, "learning_rate": 0.00012718268441575336, "loss": 0.2171, "step": 31430 }, { "epoch": 0.4419144001686696, "grad_norm": 0.46570271253585815, "learning_rate": 0.0001271379495104197, "loss": 0.1781, "step": 31440 }, { "epoch": 0.44205495818399043, "grad_norm": 0.32672974467277527, "learning_rate": 0.00012709320874167382, "loss": 0.176, "step": 31450 }, { "epoch": 0.44219551619931124, "grad_norm": 0.3220718801021576, "learning_rate": 0.0001270484621191824, "loss": 0.1883, "step": 31460 }, { "epoch": 0.4423360742146321, "grad_norm": 0.3279297947883606, "learning_rate": 0.00012700370965261336, "loss": 0.1882, "step": 31470 }, { "epoch": 0.4424766322299529, "grad_norm": 0.48345816135406494, "learning_rate": 0.00012695895135163586, "loss": 0.2158, "step": 31480 }, { "epoch": 0.44261719024527374, "grad_norm": 0.4876115620136261, "learning_rate": 0.00012691418722592042, "loss": 0.1797, "step": 31490 }, { "epoch": 0.44275774826059455, "grad_norm": 0.3962550461292267, "learning_rate": 0.00012686941728513873, "loss": 0.1706, "step": 31500 }, { "epoch": 0.44289830627591537, "grad_norm": 0.3843517005443573, "learning_rate": 0.00012682464153896373, "loss": 0.1948, "step": 31510 }, { "epoch": 0.44303886429123623, "grad_norm": 0.4024946391582489, "learning_rate": 0.00012677985999706967, "loss": 0.1807, "step": 31520 }, { "epoch": 0.44317942230655705, "grad_norm": 0.35814380645751953, "learning_rate": 0.00012673507266913205, "loss": 0.2129, "step": 31530 }, { "epoch": 0.44331998032187786, "grad_norm": 0.46133631467819214, "learning_rate": 0.00012669027956482758, "loss": 0.1855, "step": 31540 }, { "epoch": 0.4434605383371987, "grad_norm": 0.5115548968315125, "learning_rate": 0.00012664548069383423, "loss": 0.1896, "step": 31550 }, { "epoch": 0.4436010963525195, "grad_norm": 0.423533171415329, "learning_rate": 0.00012660067606583123, "loss": 0.2351, "step": 31560 }, { "epoch": 0.4437416543678403, "grad_norm": 0.35322490334510803, "learning_rate": 0.00012655586569049903, "loss": 0.1905, "step": 31570 }, { "epoch": 0.44388221238316117, "grad_norm": 0.4697679579257965, "learning_rate": 0.00012651104957751939, "loss": 0.173, "step": 31580 }, { "epoch": 0.444022770398482, "grad_norm": 0.36890703439712524, "learning_rate": 0.00012646622773657517, "loss": 0.1718, "step": 31590 }, { "epoch": 0.4441633284138028, "grad_norm": 0.37209486961364746, "learning_rate": 0.00012642140017735064, "loss": 0.2068, "step": 31600 }, { "epoch": 0.4443038864291236, "grad_norm": 0.46700626611709595, "learning_rate": 0.00012637656690953116, "loss": 0.2146, "step": 31610 }, { "epoch": 0.4444444444444444, "grad_norm": 0.40611961483955383, "learning_rate": 0.00012633172794280343, "loss": 0.1943, "step": 31620 }, { "epoch": 0.4445850024597653, "grad_norm": 0.3542831540107727, "learning_rate": 0.0001262868832868553, "loss": 0.1833, "step": 31630 }, { "epoch": 0.4447255604750861, "grad_norm": 0.4369039535522461, "learning_rate": 0.00012624203295137594, "loss": 0.178, "step": 31640 }, { "epoch": 0.4448661184904069, "grad_norm": 0.3356902003288269, "learning_rate": 0.0001261971769460556, "loss": 0.1878, "step": 31650 }, { "epoch": 0.4450066765057277, "grad_norm": 0.3247499167919159, "learning_rate": 0.00012615231528058593, "loss": 0.19, "step": 31660 }, { "epoch": 0.44514723452104854, "grad_norm": 0.3748258650302887, "learning_rate": 0.0001261074479646597, "loss": 0.1887, "step": 31670 }, { "epoch": 0.4452877925363694, "grad_norm": 0.4689333438873291, "learning_rate": 0.00012606257500797087, "loss": 0.1858, "step": 31680 }, { "epoch": 0.4454283505516902, "grad_norm": 0.5092548131942749, "learning_rate": 0.00012601769642021476, "loss": 0.1985, "step": 31690 }, { "epoch": 0.44556890856701103, "grad_norm": 0.35228753089904785, "learning_rate": 0.00012597281221108769, "loss": 0.1773, "step": 31700 }, { "epoch": 0.44570946658233185, "grad_norm": 0.5632361769676208, "learning_rate": 0.00012592792239028746, "loss": 0.1812, "step": 31710 }, { "epoch": 0.44585002459765266, "grad_norm": 0.4932362139225006, "learning_rate": 0.00012588302696751286, "loss": 0.2045, "step": 31720 }, { "epoch": 0.44599058261297353, "grad_norm": 0.41713523864746094, "learning_rate": 0.00012583812595246398, "loss": 0.1805, "step": 31730 }, { "epoch": 0.44613114062829434, "grad_norm": 0.4142841398715973, "learning_rate": 0.00012579321935484212, "loss": 0.1726, "step": 31740 }, { "epoch": 0.44627169864361516, "grad_norm": 0.4331299066543579, "learning_rate": 0.0001257483071843498, "loss": 0.1894, "step": 31750 }, { "epoch": 0.44641225665893597, "grad_norm": 0.3925873339176178, "learning_rate": 0.00012570338945069068, "loss": 0.1845, "step": 31760 }, { "epoch": 0.4465528146742568, "grad_norm": 0.4228861629962921, "learning_rate": 0.00012565846616356972, "loss": 0.2181, "step": 31770 }, { "epoch": 0.44669337268957765, "grad_norm": 0.4462876319885254, "learning_rate": 0.00012561353733269294, "loss": 0.1942, "step": 31780 }, { "epoch": 0.44683393070489846, "grad_norm": 0.3477708101272583, "learning_rate": 0.00012556860296776773, "loss": 0.2002, "step": 31790 }, { "epoch": 0.4469744887202193, "grad_norm": 0.36163148283958435, "learning_rate": 0.00012552366307850253, "loss": 0.1849, "step": 31800 }, { "epoch": 0.4471150467355401, "grad_norm": 0.4544931650161743, "learning_rate": 0.00012547871767460707, "loss": 0.1779, "step": 31810 }, { "epoch": 0.4472556047508609, "grad_norm": 0.45102718472480774, "learning_rate": 0.0001254337667657922, "loss": 0.1745, "step": 31820 }, { "epoch": 0.4473961627661817, "grad_norm": 0.4464181065559387, "learning_rate": 0.00012538881036177002, "loss": 0.2091, "step": 31830 }, { "epoch": 0.4475367207815026, "grad_norm": 0.45269858837127686, "learning_rate": 0.00012534384847225373, "loss": 0.217, "step": 31840 }, { "epoch": 0.4476772787968234, "grad_norm": 0.5486025214195251, "learning_rate": 0.00012529888110695785, "loss": 0.1976, "step": 31850 }, { "epoch": 0.4478178368121442, "grad_norm": 0.3856346309185028, "learning_rate": 0.00012525390827559796, "loss": 0.1894, "step": 31860 }, { "epoch": 0.447958394827465, "grad_norm": 0.5907379388809204, "learning_rate": 0.00012520892998789087, "loss": 0.1867, "step": 31870 }, { "epoch": 0.44809895284278584, "grad_norm": 0.5895049571990967, "learning_rate": 0.0001251639462535546, "loss": 0.1682, "step": 31880 }, { "epoch": 0.4482395108581067, "grad_norm": 0.3159005045890808, "learning_rate": 0.00012511895708230828, "loss": 0.1803, "step": 31890 }, { "epoch": 0.4483800688734275, "grad_norm": 0.4468821883201599, "learning_rate": 0.00012507396248387224, "loss": 0.1849, "step": 31900 }, { "epoch": 0.44852062688874833, "grad_norm": 0.35855212807655334, "learning_rate": 0.00012502896246796802, "loss": 0.1737, "step": 31910 }, { "epoch": 0.44866118490406914, "grad_norm": 0.44083160161972046, "learning_rate": 0.0001249839570443183, "loss": 0.1771, "step": 31920 }, { "epoch": 0.44880174291938996, "grad_norm": 0.37042614817619324, "learning_rate": 0.00012493894622264695, "loss": 0.1706, "step": 31930 }, { "epoch": 0.4489423009347108, "grad_norm": 0.9062139987945557, "learning_rate": 0.00012489393001267893, "loss": 0.1964, "step": 31940 }, { "epoch": 0.44908285895003164, "grad_norm": 0.27651646733283997, "learning_rate": 0.00012484890842414046, "loss": 0.1701, "step": 31950 }, { "epoch": 0.44922341696535245, "grad_norm": 0.5589680671691895, "learning_rate": 0.00012480388146675884, "loss": 0.199, "step": 31960 }, { "epoch": 0.44936397498067326, "grad_norm": 0.502112627029419, "learning_rate": 0.00012475884915026265, "loss": 0.2058, "step": 31970 }, { "epoch": 0.4495045329959941, "grad_norm": 0.38898777961730957, "learning_rate": 0.0001247138114843815, "loss": 0.1892, "step": 31980 }, { "epoch": 0.44964509101131495, "grad_norm": 0.5574616193771362, "learning_rate": 0.00012466876847884618, "loss": 0.1802, "step": 31990 }, { "epoch": 0.44978564902663576, "grad_norm": 0.36505675315856934, "learning_rate": 0.00012462372014338872, "loss": 0.1796, "step": 32000 }, { "epoch": 0.44978564902663576, "eval_chrf": 77.43063803877338, "eval_loss": 0.39680206775665283, "eval_runtime": 326.2019, "eval_samples_per_second": 0.307, "eval_steps_per_second": 0.012, "step": 32000 }, { "epoch": 0.44992620704195657, "grad_norm": 0.31887194514274597, "learning_rate": 0.00012457866648774222, "loss": 0.1848, "step": 32010 }, { "epoch": 0.4500667650572774, "grad_norm": 0.391029953956604, "learning_rate": 0.00012453360752164093, "loss": 0.172, "step": 32020 }, { "epoch": 0.4502073230725982, "grad_norm": 0.4374319911003113, "learning_rate": 0.0001244885432548203, "loss": 0.1822, "step": 32030 }, { "epoch": 0.450347881087919, "grad_norm": 0.31356281042099, "learning_rate": 0.00012444347369701682, "loss": 0.1942, "step": 32040 }, { "epoch": 0.4504884391032399, "grad_norm": 0.41861143708229065, "learning_rate": 0.0001243983988579683, "loss": 0.1839, "step": 32050 }, { "epoch": 0.4506289971185607, "grad_norm": 0.3170023560523987, "learning_rate": 0.00012435331874741356, "loss": 0.1956, "step": 32060 }, { "epoch": 0.4507695551338815, "grad_norm": 0.3818584680557251, "learning_rate": 0.00012430823337509253, "loss": 0.1619, "step": 32070 }, { "epoch": 0.4509101131492023, "grad_norm": 0.4189601540565491, "learning_rate": 0.0001242631427507464, "loss": 0.1769, "step": 32080 }, { "epoch": 0.45105067116452313, "grad_norm": 0.3937597870826721, "learning_rate": 0.00012421804688411738, "loss": 0.1737, "step": 32090 }, { "epoch": 0.451191229179844, "grad_norm": 0.49900174140930176, "learning_rate": 0.0001241729457849489, "loss": 0.2054, "step": 32100 }, { "epoch": 0.4513317871951648, "grad_norm": 0.5906989574432373, "learning_rate": 0.00012412783946298542, "loss": 0.1727, "step": 32110 }, { "epoch": 0.4514723452104856, "grad_norm": 0.38759422302246094, "learning_rate": 0.00012408272792797267, "loss": 0.1829, "step": 32120 }, { "epoch": 0.45161290322580644, "grad_norm": 0.4419613182544708, "learning_rate": 0.0001240376111896574, "loss": 0.1961, "step": 32130 }, { "epoch": 0.45175346124112725, "grad_norm": 0.4294855296611786, "learning_rate": 0.00012399248925778746, "loss": 0.1987, "step": 32140 }, { "epoch": 0.4518940192564481, "grad_norm": 0.5556253790855408, "learning_rate": 0.00012394736214211196, "loss": 0.174, "step": 32150 }, { "epoch": 0.45203457727176893, "grad_norm": 0.3596500754356384, "learning_rate": 0.00012390222985238095, "loss": 0.194, "step": 32160 }, { "epoch": 0.45217513528708975, "grad_norm": 0.4233664572238922, "learning_rate": 0.0001238570923983458, "loss": 0.2017, "step": 32170 }, { "epoch": 0.45231569330241056, "grad_norm": 0.33549150824546814, "learning_rate": 0.00012381194978975876, "loss": 0.15, "step": 32180 }, { "epoch": 0.4524562513177314, "grad_norm": 0.47828513383865356, "learning_rate": 0.0001237668020363734, "loss": 0.1949, "step": 32190 }, { "epoch": 0.45259680933305224, "grad_norm": 0.3744308054447174, "learning_rate": 0.0001237216491479443, "loss": 0.1867, "step": 32200 }, { "epoch": 0.45273736734837305, "grad_norm": 0.4806154668331146, "learning_rate": 0.0001236764911342272, "loss": 0.1859, "step": 32210 }, { "epoch": 0.45287792536369387, "grad_norm": 0.4275056719779968, "learning_rate": 0.0001236313280049789, "loss": 0.1814, "step": 32220 }, { "epoch": 0.4530184833790147, "grad_norm": 0.5100582838058472, "learning_rate": 0.00012358615976995727, "loss": 0.1869, "step": 32230 }, { "epoch": 0.4531590413943355, "grad_norm": 0.4076481759548187, "learning_rate": 0.0001235409864389214, "loss": 0.2026, "step": 32240 }, { "epoch": 0.45329959940965636, "grad_norm": 0.4245436489582062, "learning_rate": 0.0001234958080216314, "loss": 0.2016, "step": 32250 }, { "epoch": 0.4534401574249772, "grad_norm": 0.3572922348976135, "learning_rate": 0.00012345062452784854, "loss": 0.1825, "step": 32260 }, { "epoch": 0.453580715440298, "grad_norm": 0.37890714406967163, "learning_rate": 0.00012340543596733508, "loss": 0.1654, "step": 32270 }, { "epoch": 0.4537212734556188, "grad_norm": 0.3865712285041809, "learning_rate": 0.00012336024234985444, "loss": 0.2064, "step": 32280 }, { "epoch": 0.4538618314709396, "grad_norm": 0.4462614357471466, "learning_rate": 0.00012331504368517118, "loss": 0.195, "step": 32290 }, { "epoch": 0.4540023894862604, "grad_norm": 0.43747371435165405, "learning_rate": 0.00012326983998305085, "loss": 0.1932, "step": 32300 }, { "epoch": 0.4541429475015813, "grad_norm": 0.44260385632514954, "learning_rate": 0.0001232246312532602, "loss": 0.1591, "step": 32310 }, { "epoch": 0.4542835055169021, "grad_norm": 0.3652852475643158, "learning_rate": 0.00012317941750556694, "loss": 0.195, "step": 32320 }, { "epoch": 0.4544240635322229, "grad_norm": 0.3209255635738373, "learning_rate": 0.00012313419874974, "loss": 0.1853, "step": 32330 }, { "epoch": 0.45456462154754373, "grad_norm": 0.2987498641014099, "learning_rate": 0.00012308897499554923, "loss": 0.196, "step": 32340 }, { "epoch": 0.45470517956286455, "grad_norm": 0.37293121218681335, "learning_rate": 0.00012304374625276575, "loss": 0.1855, "step": 32350 }, { "epoch": 0.4548457375781854, "grad_norm": 0.3847970962524414, "learning_rate": 0.0001229985125311616, "loss": 0.1982, "step": 32360 }, { "epoch": 0.45498629559350623, "grad_norm": 0.39437276124954224, "learning_rate": 0.00012295327384050997, "loss": 0.197, "step": 32370 }, { "epoch": 0.45512685360882704, "grad_norm": 0.44739240407943726, "learning_rate": 0.0001229080301905851, "loss": 0.1794, "step": 32380 }, { "epoch": 0.45526741162414786, "grad_norm": 0.4419465661048889, "learning_rate": 0.00012286278159116237, "loss": 0.2031, "step": 32390 }, { "epoch": 0.45540796963946867, "grad_norm": 0.3559831976890564, "learning_rate": 0.0001228175280520181, "loss": 0.1791, "step": 32400 }, { "epoch": 0.45554852765478954, "grad_norm": 0.45665082335472107, "learning_rate": 0.0001227722695829298, "loss": 0.1759, "step": 32410 }, { "epoch": 0.45568908567011035, "grad_norm": 0.30191051959991455, "learning_rate": 0.0001227270061936759, "loss": 0.1737, "step": 32420 }, { "epoch": 0.45582964368543116, "grad_norm": 0.392424076795578, "learning_rate": 0.0001226817378940361, "loss": 0.1998, "step": 32430 }, { "epoch": 0.455970201700752, "grad_norm": 0.41360557079315186, "learning_rate": 0.00012263646469379098, "loss": 0.1944, "step": 32440 }, { "epoch": 0.4561107597160728, "grad_norm": 0.4178352355957031, "learning_rate": 0.00012259118660272225, "loss": 0.1905, "step": 32450 }, { "epoch": 0.45625131773139366, "grad_norm": 0.3767535984516144, "learning_rate": 0.0001225459036306127, "loss": 0.162, "step": 32460 }, { "epoch": 0.45639187574671447, "grad_norm": 0.5038419961929321, "learning_rate": 0.00012250061578724614, "loss": 0.2058, "step": 32470 }, { "epoch": 0.4565324337620353, "grad_norm": 0.48213034868240356, "learning_rate": 0.00012245532308240742, "loss": 0.219, "step": 32480 }, { "epoch": 0.4566729917773561, "grad_norm": 0.3669455945491791, "learning_rate": 0.00012241002552588248, "loss": 0.1882, "step": 32490 }, { "epoch": 0.4568135497926769, "grad_norm": 0.32814040780067444, "learning_rate": 0.00012236472312745827, "loss": 0.1834, "step": 32500 }, { "epoch": 0.4569541078079978, "grad_norm": 0.6646811366081238, "learning_rate": 0.00012231941589692285, "loss": 0.2112, "step": 32510 }, { "epoch": 0.4570946658233186, "grad_norm": 0.3640930652618408, "learning_rate": 0.0001222741038440652, "loss": 0.1853, "step": 32520 }, { "epoch": 0.4572352238386394, "grad_norm": 0.34037432074546814, "learning_rate": 0.00012222878697867547, "loss": 0.2059, "step": 32530 }, { "epoch": 0.4573757818539602, "grad_norm": 0.6122976541519165, "learning_rate": 0.0001221834653105448, "loss": 0.1895, "step": 32540 }, { "epoch": 0.45751633986928103, "grad_norm": 0.4294646084308624, "learning_rate": 0.00012213813884946536, "loss": 0.1903, "step": 32550 }, { "epoch": 0.45765689788460184, "grad_norm": 0.46151578426361084, "learning_rate": 0.00012209280760523035, "loss": 0.1875, "step": 32560 }, { "epoch": 0.4577974558999227, "grad_norm": 0.4326536953449249, "learning_rate": 0.00012204747158763406, "loss": 0.1583, "step": 32570 }, { "epoch": 0.4579380139152435, "grad_norm": 0.4379238188266754, "learning_rate": 0.00012200213080647174, "loss": 0.1881, "step": 32580 }, { "epoch": 0.45807857193056434, "grad_norm": 0.4234411418437958, "learning_rate": 0.00012195678527153967, "loss": 0.17, "step": 32590 }, { "epoch": 0.45821912994588515, "grad_norm": 0.3968019485473633, "learning_rate": 0.00012191143499263522, "loss": 0.1975, "step": 32600 }, { "epoch": 0.45835968796120596, "grad_norm": 0.36948105692863464, "learning_rate": 0.00012186607997955675, "loss": 0.1973, "step": 32610 }, { "epoch": 0.45850024597652683, "grad_norm": 0.43759265542030334, "learning_rate": 0.0001218207202421036, "loss": 0.1862, "step": 32620 }, { "epoch": 0.45864080399184765, "grad_norm": 0.45381754636764526, "learning_rate": 0.00012177535579007621, "loss": 0.171, "step": 32630 }, { "epoch": 0.45878136200716846, "grad_norm": 0.33697906136512756, "learning_rate": 0.00012172998663327604, "loss": 0.2045, "step": 32640 }, { "epoch": 0.45892192002248927, "grad_norm": 0.39810094237327576, "learning_rate": 0.00012168461278150545, "loss": 0.1777, "step": 32650 }, { "epoch": 0.4590624780378101, "grad_norm": 0.4088006019592285, "learning_rate": 0.00012163923424456793, "loss": 0.1849, "step": 32660 }, { "epoch": 0.45920303605313095, "grad_norm": 0.3532857596874237, "learning_rate": 0.00012159385103226793, "loss": 0.1818, "step": 32670 }, { "epoch": 0.45934359406845177, "grad_norm": 0.44034630060195923, "learning_rate": 0.00012154846315441095, "loss": 0.1774, "step": 32680 }, { "epoch": 0.4594841520837726, "grad_norm": 0.43760716915130615, "learning_rate": 0.00012150307062080345, "loss": 0.1635, "step": 32690 }, { "epoch": 0.4596247100990934, "grad_norm": 0.28532445430755615, "learning_rate": 0.00012145767344125294, "loss": 0.1867, "step": 32700 }, { "epoch": 0.4597652681144142, "grad_norm": 0.3794820308685303, "learning_rate": 0.0001214122716255679, "loss": 0.1779, "step": 32710 }, { "epoch": 0.4599058261297351, "grad_norm": 0.39578428864479065, "learning_rate": 0.00012136686518355782, "loss": 0.1737, "step": 32720 }, { "epoch": 0.4600463841450559, "grad_norm": 0.4092610776424408, "learning_rate": 0.00012132145412503319, "loss": 0.2062, "step": 32730 }, { "epoch": 0.4601869421603767, "grad_norm": 0.374067485332489, "learning_rate": 0.00012127603845980553, "loss": 0.1978, "step": 32740 }, { "epoch": 0.4603275001756975, "grad_norm": 0.42492353916168213, "learning_rate": 0.00012123061819768729, "loss": 0.1772, "step": 32750 }, { "epoch": 0.4604680581910183, "grad_norm": 0.40157759189605713, "learning_rate": 0.00012118519334849196, "loss": 0.197, "step": 32760 }, { "epoch": 0.4606086162063392, "grad_norm": 0.4197980463504791, "learning_rate": 0.00012113976392203404, "loss": 0.177, "step": 32770 }, { "epoch": 0.46074917422166, "grad_norm": 0.2869431972503662, "learning_rate": 0.00012109432992812894, "loss": 0.1828, "step": 32780 }, { "epoch": 0.4608897322369808, "grad_norm": 0.45081043243408203, "learning_rate": 0.00012104889137659317, "loss": 0.1928, "step": 32790 }, { "epoch": 0.46103029025230163, "grad_norm": 0.2808906137943268, "learning_rate": 0.00012100344827724411, "loss": 0.1725, "step": 32800 }, { "epoch": 0.46117084826762245, "grad_norm": 0.4212302565574646, "learning_rate": 0.00012095800063990018, "loss": 0.1993, "step": 32810 }, { "epoch": 0.46131140628294326, "grad_norm": 0.36177191138267517, "learning_rate": 0.00012091254847438078, "loss": 0.1844, "step": 32820 }, { "epoch": 0.46145196429826413, "grad_norm": 0.3451072871685028, "learning_rate": 0.0001208670917905063, "loss": 0.1936, "step": 32830 }, { "epoch": 0.46159252231358494, "grad_norm": 0.3625692129135132, "learning_rate": 0.00012082163059809807, "loss": 0.1739, "step": 32840 }, { "epoch": 0.46173308032890575, "grad_norm": 0.4096881151199341, "learning_rate": 0.00012077616490697842, "loss": 0.1796, "step": 32850 }, { "epoch": 0.46187363834422657, "grad_norm": 0.342422753572464, "learning_rate": 0.00012073069472697063, "loss": 0.1936, "step": 32860 }, { "epoch": 0.4620141963595474, "grad_norm": 0.39686256647109985, "learning_rate": 0.00012068522006789899, "loss": 0.1709, "step": 32870 }, { "epoch": 0.46215475437486825, "grad_norm": 0.2695378065109253, "learning_rate": 0.00012063974093958874, "loss": 0.1746, "step": 32880 }, { "epoch": 0.46229531239018906, "grad_norm": 0.4101853370666504, "learning_rate": 0.00012059425735186605, "loss": 0.1949, "step": 32890 }, { "epoch": 0.4624358704055099, "grad_norm": 0.3779439330101013, "learning_rate": 0.00012054876931455807, "loss": 0.1822, "step": 32900 }, { "epoch": 0.4625764284208307, "grad_norm": 0.438286691904068, "learning_rate": 0.00012050327683749296, "loss": 0.1837, "step": 32910 }, { "epoch": 0.4627169864361515, "grad_norm": 0.45026054978370667, "learning_rate": 0.00012045777993049978, "loss": 0.1652, "step": 32920 }, { "epoch": 0.46285754445147237, "grad_norm": 0.3140639662742615, "learning_rate": 0.0001204122786034086, "loss": 0.1658, "step": 32930 }, { "epoch": 0.4629981024667932, "grad_norm": 0.3761960566043854, "learning_rate": 0.00012036677286605038, "loss": 0.1733, "step": 32940 }, { "epoch": 0.463138660482114, "grad_norm": 0.4367549419403076, "learning_rate": 0.00012032126272825708, "loss": 0.192, "step": 32950 }, { "epoch": 0.4632792184974348, "grad_norm": 0.5385770201683044, "learning_rate": 0.00012027574819986159, "loss": 0.17, "step": 32960 }, { "epoch": 0.4634197765127556, "grad_norm": 0.40825560688972473, "learning_rate": 0.00012023022929069778, "loss": 0.1941, "step": 32970 }, { "epoch": 0.4635603345280765, "grad_norm": 0.3881412744522095, "learning_rate": 0.00012018470601060042, "loss": 0.1755, "step": 32980 }, { "epoch": 0.4637008925433973, "grad_norm": 0.38460129499435425, "learning_rate": 0.00012013917836940525, "loss": 0.1712, "step": 32990 }, { "epoch": 0.4638414505587181, "grad_norm": 0.3805694580078125, "learning_rate": 0.00012009364637694897, "loss": 0.1568, "step": 33000 }, { "epoch": 0.4638414505587181, "eval_chrf": 80.88196750831037, "eval_loss": 0.40006187558174133, "eval_runtime": 237.8765, "eval_samples_per_second": 0.42, "eval_steps_per_second": 0.017, "step": 33000 }, { "epoch": 0.46398200857403893, "grad_norm": 0.6292216777801514, "learning_rate": 0.00012004811004306917, "loss": 0.1922, "step": 33010 }, { "epoch": 0.46412256658935974, "grad_norm": 0.33237358927726746, "learning_rate": 0.00012000256937760445, "loss": 0.169, "step": 33020 }, { "epoch": 0.46426312460468055, "grad_norm": 0.38910937309265137, "learning_rate": 0.00011995702439039426, "loss": 0.1917, "step": 33030 }, { "epoch": 0.4644036826200014, "grad_norm": 0.31341931223869324, "learning_rate": 0.00011991147509127907, "loss": 0.1707, "step": 33040 }, { "epoch": 0.46454424063532224, "grad_norm": 0.3623914122581482, "learning_rate": 0.00011986592149010019, "loss": 0.2166, "step": 33050 }, { "epoch": 0.46468479865064305, "grad_norm": 0.40682142972946167, "learning_rate": 0.00011982036359669995, "loss": 0.194, "step": 33060 }, { "epoch": 0.46482535666596386, "grad_norm": 0.3650200664997101, "learning_rate": 0.00011977480142092155, "loss": 0.1685, "step": 33070 }, { "epoch": 0.4649659146812847, "grad_norm": 0.48915278911590576, "learning_rate": 0.00011972923497260912, "loss": 0.1666, "step": 33080 }, { "epoch": 0.46510647269660554, "grad_norm": 0.3498603403568268, "learning_rate": 0.00011968366426160778, "loss": 0.2049, "step": 33090 }, { "epoch": 0.46524703071192636, "grad_norm": 0.5278698801994324, "learning_rate": 0.00011963808929776343, "loss": 0.1731, "step": 33100 }, { "epoch": 0.46538758872724717, "grad_norm": 0.3445676267147064, "learning_rate": 0.00011959251009092302, "loss": 0.1904, "step": 33110 }, { "epoch": 0.465528146742568, "grad_norm": 0.3266010880470276, "learning_rate": 0.00011954692665093441, "loss": 0.1839, "step": 33120 }, { "epoch": 0.4656687047578888, "grad_norm": 0.3357848525047302, "learning_rate": 0.00011950133898764626, "loss": 0.1787, "step": 33130 }, { "epoch": 0.46580926277320966, "grad_norm": 0.3921014964580536, "learning_rate": 0.00011945574711090828, "loss": 0.1854, "step": 33140 }, { "epoch": 0.4659498207885305, "grad_norm": 0.5705327987670898, "learning_rate": 0.00011941015103057098, "loss": 0.1831, "step": 33150 }, { "epoch": 0.4660903788038513, "grad_norm": 0.3367246687412262, "learning_rate": 0.0001193645507564859, "loss": 0.1946, "step": 33160 }, { "epoch": 0.4662309368191721, "grad_norm": 0.4650691747665405, "learning_rate": 0.0001193189462985053, "loss": 0.2061, "step": 33170 }, { "epoch": 0.4663714948344929, "grad_norm": 0.44068074226379395, "learning_rate": 0.00011927333766648254, "loss": 0.1625, "step": 33180 }, { "epoch": 0.4665120528498138, "grad_norm": 0.463360071182251, "learning_rate": 0.0001192277248702718, "loss": 0.1731, "step": 33190 }, { "epoch": 0.4666526108651346, "grad_norm": 0.669105589389801, "learning_rate": 0.00011918210791972812, "loss": 0.182, "step": 33200 }, { "epoch": 0.4667931688804554, "grad_norm": 0.33570921421051025, "learning_rate": 0.0001191364868247075, "loss": 0.1923, "step": 33210 }, { "epoch": 0.4669337268957762, "grad_norm": 0.37507128715515137, "learning_rate": 0.00011909086159506679, "loss": 0.1831, "step": 33220 }, { "epoch": 0.46707428491109704, "grad_norm": 0.5032942295074463, "learning_rate": 0.00011904523224066379, "loss": 0.1956, "step": 33230 }, { "epoch": 0.4672148429264179, "grad_norm": 0.4646669924259186, "learning_rate": 0.00011899959877135714, "loss": 0.1766, "step": 33240 }, { "epoch": 0.4673554009417387, "grad_norm": 0.41880184412002563, "learning_rate": 0.00011895396119700637, "loss": 0.1866, "step": 33250 }, { "epoch": 0.46749595895705953, "grad_norm": 0.46342238783836365, "learning_rate": 0.00011890831952747193, "loss": 0.1801, "step": 33260 }, { "epoch": 0.46763651697238034, "grad_norm": 0.36706215143203735, "learning_rate": 0.00011886267377261514, "loss": 0.1787, "step": 33270 }, { "epoch": 0.46777707498770116, "grad_norm": 0.4679487347602844, "learning_rate": 0.00011881702394229819, "loss": 0.1764, "step": 33280 }, { "epoch": 0.46791763300302197, "grad_norm": 0.3672434389591217, "learning_rate": 0.00011877137004638415, "loss": 0.1868, "step": 33290 }, { "epoch": 0.46805819101834284, "grad_norm": 0.3122919201850891, "learning_rate": 0.00011872571209473699, "loss": 0.1562, "step": 33300 }, { "epoch": 0.46819874903366365, "grad_norm": 0.5655485987663269, "learning_rate": 0.00011868005009722156, "loss": 0.1907, "step": 33310 }, { "epoch": 0.46833930704898447, "grad_norm": 0.4060775935649872, "learning_rate": 0.00011863438406370355, "loss": 0.1876, "step": 33320 }, { "epoch": 0.4684798650643053, "grad_norm": 0.41255131363868713, "learning_rate": 0.00011858871400404954, "loss": 0.1886, "step": 33330 }, { "epoch": 0.4686204230796261, "grad_norm": 0.312167763710022, "learning_rate": 0.00011854303992812699, "loss": 0.1772, "step": 33340 }, { "epoch": 0.46876098109494696, "grad_norm": 0.33613449335098267, "learning_rate": 0.0001184973618458042, "loss": 0.1711, "step": 33350 }, { "epoch": 0.4689015391102678, "grad_norm": 0.39489543437957764, "learning_rate": 0.00011845167976695039, "loss": 0.1929, "step": 33360 }, { "epoch": 0.4690420971255886, "grad_norm": 0.3719608783721924, "learning_rate": 0.00011840599370143559, "loss": 0.1497, "step": 33370 }, { "epoch": 0.4691826551409094, "grad_norm": 0.5516514778137207, "learning_rate": 0.00011836030365913069, "loss": 0.1848, "step": 33380 }, { "epoch": 0.4693232131562302, "grad_norm": 0.378025084733963, "learning_rate": 0.00011831460964990748, "loss": 0.1702, "step": 33390 }, { "epoch": 0.4694637711715511, "grad_norm": 0.37435686588287354, "learning_rate": 0.00011826891168363859, "loss": 0.1629, "step": 33400 }, { "epoch": 0.4696043291868719, "grad_norm": 0.432295024394989, "learning_rate": 0.00011822320977019748, "loss": 0.1856, "step": 33410 }, { "epoch": 0.4697448872021927, "grad_norm": 0.34483829140663147, "learning_rate": 0.00011817750391945849, "loss": 0.1782, "step": 33420 }, { "epoch": 0.4698854452175135, "grad_norm": 0.5057793855667114, "learning_rate": 0.00011813179414129683, "loss": 0.1911, "step": 33430 }, { "epoch": 0.47002600323283433, "grad_norm": 0.3595276176929474, "learning_rate": 0.00011808608044558848, "loss": 0.1798, "step": 33440 }, { "epoch": 0.4701665612481552, "grad_norm": 0.38613057136535645, "learning_rate": 0.00011804036284221038, "loss": 0.1955, "step": 33450 }, { "epoch": 0.470307119263476, "grad_norm": 0.4427388310432434, "learning_rate": 0.00011799464134104021, "loss": 0.1849, "step": 33460 }, { "epoch": 0.4704476772787968, "grad_norm": 0.4762212634086609, "learning_rate": 0.00011794891595195654, "loss": 0.1781, "step": 33470 }, { "epoch": 0.47058823529411764, "grad_norm": 0.495022714138031, "learning_rate": 0.0001179031866848388, "loss": 0.1846, "step": 33480 }, { "epoch": 0.47072879330943845, "grad_norm": 0.3033413589000702, "learning_rate": 0.00011785745354956721, "loss": 0.1617, "step": 33490 }, { "epoch": 0.4708693513247593, "grad_norm": 0.5457707643508911, "learning_rate": 0.00011781171655602285, "loss": 0.1808, "step": 33500 }, { "epoch": 0.47100990934008014, "grad_norm": 0.4955344498157501, "learning_rate": 0.00011776597571408765, "loss": 0.1851, "step": 33510 }, { "epoch": 0.47115046735540095, "grad_norm": 0.44962772727012634, "learning_rate": 0.00011772023103364434, "loss": 0.1811, "step": 33520 }, { "epoch": 0.47129102537072176, "grad_norm": 0.33938634395599365, "learning_rate": 0.00011767448252457649, "loss": 0.1868, "step": 33530 }, { "epoch": 0.4714315833860426, "grad_norm": 0.42045858502388, "learning_rate": 0.0001176287301967685, "loss": 0.1927, "step": 33540 }, { "epoch": 0.4715721414013634, "grad_norm": 0.5053248405456543, "learning_rate": 0.00011758297406010563, "loss": 0.1832, "step": 33550 }, { "epoch": 0.47171269941668426, "grad_norm": 0.4107218384742737, "learning_rate": 0.00011753721412447387, "loss": 0.1985, "step": 33560 }, { "epoch": 0.47185325743200507, "grad_norm": 0.3895529508590698, "learning_rate": 0.00011749145039976015, "loss": 0.1695, "step": 33570 }, { "epoch": 0.4719938154473259, "grad_norm": 0.45933929085731506, "learning_rate": 0.00011744568289585211, "loss": 0.1757, "step": 33580 }, { "epoch": 0.4721343734626467, "grad_norm": 0.3263451159000397, "learning_rate": 0.00011739991162263828, "loss": 0.1695, "step": 33590 }, { "epoch": 0.4722749314779675, "grad_norm": 0.4005551338195801, "learning_rate": 0.00011735413659000798, "loss": 0.1895, "step": 33600 }, { "epoch": 0.4724154894932884, "grad_norm": 0.42519721388816833, "learning_rate": 0.00011730835780785132, "loss": 0.1876, "step": 33610 }, { "epoch": 0.4725560475086092, "grad_norm": 0.4660035967826843, "learning_rate": 0.00011726257528605926, "loss": 0.1904, "step": 33620 }, { "epoch": 0.47269660552393, "grad_norm": 0.34425896406173706, "learning_rate": 0.00011721678903452356, "loss": 0.184, "step": 33630 }, { "epoch": 0.4728371635392508, "grad_norm": 0.46487653255462646, "learning_rate": 0.00011717099906313675, "loss": 0.1991, "step": 33640 }, { "epoch": 0.47297772155457163, "grad_norm": 0.3566063642501831, "learning_rate": 0.00011712520538179221, "loss": 0.1717, "step": 33650 }, { "epoch": 0.4731182795698925, "grad_norm": 0.41969212889671326, "learning_rate": 0.00011707940800038408, "loss": 0.1789, "step": 33660 }, { "epoch": 0.4732588375852133, "grad_norm": 0.41644376516342163, "learning_rate": 0.00011703360692880734, "loss": 0.1804, "step": 33670 }, { "epoch": 0.4733993956005341, "grad_norm": 0.3613830506801605, "learning_rate": 0.00011698780217695771, "loss": 0.1883, "step": 33680 }, { "epoch": 0.47353995361585494, "grad_norm": 0.38608357310295105, "learning_rate": 0.0001169419937547318, "loss": 0.1838, "step": 33690 }, { "epoch": 0.47368051163117575, "grad_norm": 0.44402819871902466, "learning_rate": 0.00011689618167202692, "loss": 0.2057, "step": 33700 }, { "epoch": 0.4738210696464966, "grad_norm": 0.3911150097846985, "learning_rate": 0.00011685036593874122, "loss": 0.1801, "step": 33710 }, { "epoch": 0.47396162766181743, "grad_norm": 0.3988668620586395, "learning_rate": 0.0001168045465647736, "loss": 0.1965, "step": 33720 }, { "epoch": 0.47410218567713824, "grad_norm": 0.39634349942207336, "learning_rate": 0.00011675872356002377, "loss": 0.1985, "step": 33730 }, { "epoch": 0.47424274369245906, "grad_norm": 0.2985493838787079, "learning_rate": 0.0001167128969343923, "loss": 0.1831, "step": 33740 }, { "epoch": 0.47438330170777987, "grad_norm": 0.41120585799217224, "learning_rate": 0.00011666706669778038, "loss": 0.1944, "step": 33750 }, { "epoch": 0.4745238597231007, "grad_norm": 0.5060003399848938, "learning_rate": 0.0001166212328600901, "loss": 0.1971, "step": 33760 }, { "epoch": 0.47466441773842155, "grad_norm": 0.44410645961761475, "learning_rate": 0.0001165753954312243, "loss": 0.193, "step": 33770 }, { "epoch": 0.47480497575374236, "grad_norm": 0.3515392541885376, "learning_rate": 0.00011652955442108662, "loss": 0.1792, "step": 33780 }, { "epoch": 0.4749455337690632, "grad_norm": 0.3999476432800293, "learning_rate": 0.00011648370983958139, "loss": 0.1837, "step": 33790 }, { "epoch": 0.475086091784384, "grad_norm": 0.40338027477264404, "learning_rate": 0.00011643786169661379, "loss": 0.2021, "step": 33800 }, { "epoch": 0.4752266497997048, "grad_norm": 0.27882617712020874, "learning_rate": 0.0001163920100020898, "loss": 0.1635, "step": 33810 }, { "epoch": 0.47536720781502567, "grad_norm": 0.5009968876838684, "learning_rate": 0.00011634615476591601, "loss": 0.1913, "step": 33820 }, { "epoch": 0.4755077658303465, "grad_norm": 0.4280058741569519, "learning_rate": 0.000116300295998, "loss": 0.2067, "step": 33830 }, { "epoch": 0.4756483238456673, "grad_norm": 0.43252554535865784, "learning_rate": 0.00011625443370824989, "loss": 0.1769, "step": 33840 }, { "epoch": 0.4757888818609881, "grad_norm": 0.4498821198940277, "learning_rate": 0.00011620856790657472, "loss": 0.1958, "step": 33850 }, { "epoch": 0.4759294398763089, "grad_norm": 0.4288261830806732, "learning_rate": 0.00011616269860288422, "loss": 0.1859, "step": 33860 }, { "epoch": 0.4760699978916298, "grad_norm": 0.3644183576107025, "learning_rate": 0.00011611682580708889, "loss": 0.1715, "step": 33870 }, { "epoch": 0.4762105559069506, "grad_norm": 0.29036372900009155, "learning_rate": 0.00011607094952909998, "loss": 0.1746, "step": 33880 }, { "epoch": 0.4763511139222714, "grad_norm": 0.4704986810684204, "learning_rate": 0.0001160250697788295, "loss": 0.1794, "step": 33890 }, { "epoch": 0.47649167193759223, "grad_norm": 0.31704598665237427, "learning_rate": 0.0001159791865661902, "loss": 0.2009, "step": 33900 }, { "epoch": 0.47663222995291304, "grad_norm": 0.39006519317626953, "learning_rate": 0.00011593329990109558, "loss": 0.1608, "step": 33910 }, { "epoch": 0.4767727879682339, "grad_norm": 0.3246588706970215, "learning_rate": 0.00011588740979345993, "loss": 0.1874, "step": 33920 }, { "epoch": 0.4769133459835547, "grad_norm": 0.5718732476234436, "learning_rate": 0.0001158415162531982, "loss": 0.1862, "step": 33930 }, { "epoch": 0.47705390399887554, "grad_norm": 0.34852126240730286, "learning_rate": 0.00011579561929022613, "loss": 0.1918, "step": 33940 }, { "epoch": 0.47719446201419635, "grad_norm": 0.3804478645324707, "learning_rate": 0.00011574971891446022, "loss": 0.1837, "step": 33950 }, { "epoch": 0.47733502002951717, "grad_norm": 0.29604998230934143, "learning_rate": 0.00011570381513581768, "loss": 0.1955, "step": 33960 }, { "epoch": 0.47747557804483803, "grad_norm": 0.6176908016204834, "learning_rate": 0.00011565790796421645, "loss": 0.1702, "step": 33970 }, { "epoch": 0.47761613606015885, "grad_norm": 0.4599247872829437, "learning_rate": 0.00011561199740957521, "loss": 0.1996, "step": 33980 }, { "epoch": 0.47775669407547966, "grad_norm": 0.3114668130874634, "learning_rate": 0.00011556608348181335, "loss": 0.1835, "step": 33990 }, { "epoch": 0.4778972520908005, "grad_norm": 0.3145674765110016, "learning_rate": 0.00011552016619085104, "loss": 0.1832, "step": 34000 }, { "epoch": 0.4778972520908005, "eval_chrf": 82.71781475485383, "eval_loss": 0.3998299837112427, "eval_runtime": 202.4047, "eval_samples_per_second": 0.494, "eval_steps_per_second": 0.02, "step": 34000 }, { "epoch": 0.4780378101061213, "grad_norm": 0.4320267140865326, "learning_rate": 0.00011547424554660915, "loss": 0.188, "step": 34010 }, { "epoch": 0.4781783681214421, "grad_norm": 0.4681837558746338, "learning_rate": 0.00011542832155900924, "loss": 0.195, "step": 34020 }, { "epoch": 0.47831892613676297, "grad_norm": 0.32885053753852844, "learning_rate": 0.00011538239423797367, "loss": 0.1598, "step": 34030 }, { "epoch": 0.4784594841520838, "grad_norm": 0.4490990936756134, "learning_rate": 0.00011533646359342544, "loss": 0.1798, "step": 34040 }, { "epoch": 0.4786000421674046, "grad_norm": 0.3614675998687744, "learning_rate": 0.00011529052963528835, "loss": 0.1913, "step": 34050 }, { "epoch": 0.4787406001827254, "grad_norm": 0.5846191048622131, "learning_rate": 0.0001152445923734868, "loss": 0.1761, "step": 34060 }, { "epoch": 0.4788811581980462, "grad_norm": 0.3790275454521179, "learning_rate": 0.000115198651817946, "loss": 0.1933, "step": 34070 }, { "epoch": 0.4790217162133671, "grad_norm": 0.33133602142333984, "learning_rate": 0.00011515270797859187, "loss": 0.1782, "step": 34080 }, { "epoch": 0.4791622742286879, "grad_norm": 0.3211809992790222, "learning_rate": 0.000115106760865351, "loss": 0.1747, "step": 34090 }, { "epoch": 0.4793028322440087, "grad_norm": 0.624617874622345, "learning_rate": 0.00011506081048815067, "loss": 0.2045, "step": 34100 }, { "epoch": 0.4794433902593295, "grad_norm": 0.5131089687347412, "learning_rate": 0.00011501485685691893, "loss": 0.1975, "step": 34110 }, { "epoch": 0.47958394827465034, "grad_norm": 0.64466792345047, "learning_rate": 0.00011496889998158451, "loss": 0.2255, "step": 34120 }, { "epoch": 0.4797245062899712, "grad_norm": 0.4620768427848816, "learning_rate": 0.00011492293987207678, "loss": 0.2072, "step": 34130 }, { "epoch": 0.479865064305292, "grad_norm": 0.5024957060813904, "learning_rate": 0.00011487697653832591, "loss": 0.1834, "step": 34140 }, { "epoch": 0.48000562232061283, "grad_norm": 0.3953956663608551, "learning_rate": 0.00011483100999026268, "loss": 0.1889, "step": 34150 }, { "epoch": 0.48014618033593365, "grad_norm": 0.44761160016059875, "learning_rate": 0.00011478504023781864, "loss": 0.1971, "step": 34160 }, { "epoch": 0.48028673835125446, "grad_norm": 0.4793362319469452, "learning_rate": 0.00011473906729092596, "loss": 0.1936, "step": 34170 }, { "epoch": 0.48042729636657533, "grad_norm": 0.5443357825279236, "learning_rate": 0.00011469309115951754, "loss": 0.203, "step": 34180 }, { "epoch": 0.48056785438189614, "grad_norm": 0.5695284008979797, "learning_rate": 0.00011464711185352697, "loss": 0.1706, "step": 34190 }, { "epoch": 0.48070841239721696, "grad_norm": 0.4707348048686981, "learning_rate": 0.0001146011293828885, "loss": 0.1784, "step": 34200 }, { "epoch": 0.48084897041253777, "grad_norm": 0.43354344367980957, "learning_rate": 0.00011455514375753706, "loss": 0.1865, "step": 34210 }, { "epoch": 0.4809895284278586, "grad_norm": 0.6231280565261841, "learning_rate": 0.00011450915498740835, "loss": 0.1621, "step": 34220 }, { "epoch": 0.48113008644317945, "grad_norm": 0.36138397455215454, "learning_rate": 0.0001144631630824386, "loss": 0.1945, "step": 34230 }, { "epoch": 0.48127064445850026, "grad_norm": 0.4865313768386841, "learning_rate": 0.00011441716805256488, "loss": 0.1698, "step": 34240 }, { "epoch": 0.4814112024738211, "grad_norm": 0.35231879353523254, "learning_rate": 0.0001143711699077248, "loss": 0.204, "step": 34250 }, { "epoch": 0.4815517604891419, "grad_norm": 0.5219005942344666, "learning_rate": 0.00011432516865785672, "loss": 0.1762, "step": 34260 }, { "epoch": 0.4816923185044627, "grad_norm": 0.39501020312309265, "learning_rate": 0.00011427916431289963, "loss": 0.1715, "step": 34270 }, { "epoch": 0.4818328765197835, "grad_norm": 0.4571538269519806, "learning_rate": 0.00011423315688279319, "loss": 0.1695, "step": 34280 }, { "epoch": 0.4819734345351044, "grad_norm": 0.29014673829078674, "learning_rate": 0.0001141871463774778, "loss": 0.1737, "step": 34290 }, { "epoch": 0.4821139925504252, "grad_norm": 0.38009896874427795, "learning_rate": 0.00011414113280689442, "loss": 0.1883, "step": 34300 }, { "epoch": 0.482254550565746, "grad_norm": 0.44037485122680664, "learning_rate": 0.00011409511618098477, "loss": 0.1773, "step": 34310 }, { "epoch": 0.4823951085810668, "grad_norm": 0.30522382259368896, "learning_rate": 0.00011404909650969111, "loss": 0.1687, "step": 34320 }, { "epoch": 0.48253566659638764, "grad_norm": 0.345973402261734, "learning_rate": 0.00011400307380295648, "loss": 0.1884, "step": 34330 }, { "epoch": 0.4826762246117085, "grad_norm": 0.46845465898513794, "learning_rate": 0.00011395704807072449, "loss": 0.1864, "step": 34340 }, { "epoch": 0.4828167826270293, "grad_norm": 0.5156724452972412, "learning_rate": 0.00011391101932293946, "loss": 0.1744, "step": 34350 }, { "epoch": 0.48295734064235013, "grad_norm": 0.27931588888168335, "learning_rate": 0.00011386498756954633, "loss": 0.1949, "step": 34360 }, { "epoch": 0.48309789865767094, "grad_norm": 0.38670992851257324, "learning_rate": 0.0001138189528204907, "loss": 0.1696, "step": 34370 }, { "epoch": 0.48323845667299176, "grad_norm": 0.4227330982685089, "learning_rate": 0.00011377291508571884, "loss": 0.1929, "step": 34380 }, { "epoch": 0.4833790146883126, "grad_norm": 0.4957590401172638, "learning_rate": 0.00011372687437517761, "loss": 0.1895, "step": 34390 }, { "epoch": 0.48351957270363344, "grad_norm": 0.3703496754169464, "learning_rate": 0.00011368083069881454, "loss": 0.1713, "step": 34400 }, { "epoch": 0.48366013071895425, "grad_norm": 0.41321736574172974, "learning_rate": 0.00011363478406657782, "loss": 0.1755, "step": 34410 }, { "epoch": 0.48380068873427506, "grad_norm": 0.35261067748069763, "learning_rate": 0.00011358873448841627, "loss": 0.1685, "step": 34420 }, { "epoch": 0.4839412467495959, "grad_norm": 0.44572609663009644, "learning_rate": 0.00011354268197427931, "loss": 0.2073, "step": 34430 }, { "epoch": 0.48408180476491675, "grad_norm": 0.4484621286392212, "learning_rate": 0.00011349662653411705, "loss": 0.2035, "step": 34440 }, { "epoch": 0.48422236278023756, "grad_norm": 0.32330384850502014, "learning_rate": 0.00011345056817788023, "loss": 0.1734, "step": 34450 }, { "epoch": 0.48436292079555837, "grad_norm": 0.45375800132751465, "learning_rate": 0.00011340450691552011, "loss": 0.163, "step": 34460 }, { "epoch": 0.4845034788108792, "grad_norm": 0.36283940076828003, "learning_rate": 0.00011335844275698874, "loss": 0.1856, "step": 34470 }, { "epoch": 0.4846440368262, "grad_norm": 0.3621930181980133, "learning_rate": 0.00011331237571223872, "loss": 0.2008, "step": 34480 }, { "epoch": 0.4847845948415208, "grad_norm": 0.35948091745376587, "learning_rate": 0.00011326630579122323, "loss": 0.1699, "step": 34490 }, { "epoch": 0.4849251528568417, "grad_norm": 0.31326571106910706, "learning_rate": 0.00011322023300389617, "loss": 0.1733, "step": 34500 }, { "epoch": 0.4850657108721625, "grad_norm": 0.41531944274902344, "learning_rate": 0.00011317415736021195, "loss": 0.1616, "step": 34510 }, { "epoch": 0.4852062688874833, "grad_norm": 0.39085519313812256, "learning_rate": 0.00011312807887012566, "loss": 0.1741, "step": 34520 }, { "epoch": 0.4853468269028041, "grad_norm": 0.44744712114334106, "learning_rate": 0.00011308199754359303, "loss": 0.2084, "step": 34530 }, { "epoch": 0.48548738491812493, "grad_norm": 0.4303813576698303, "learning_rate": 0.00011303591339057034, "loss": 0.1712, "step": 34540 }, { "epoch": 0.4856279429334458, "grad_norm": 0.3569890260696411, "learning_rate": 0.0001129898264210145, "loss": 0.1827, "step": 34550 }, { "epoch": 0.4857685009487666, "grad_norm": 0.40748974680900574, "learning_rate": 0.00011294373664488307, "loss": 0.191, "step": 34560 }, { "epoch": 0.4859090589640874, "grad_norm": 0.3596547544002533, "learning_rate": 0.00011289764407213417, "loss": 0.1876, "step": 34570 }, { "epoch": 0.48604961697940824, "grad_norm": 0.4161217212677002, "learning_rate": 0.00011285154871272652, "loss": 0.1968, "step": 34580 }, { "epoch": 0.48619017499472905, "grad_norm": 0.3585597574710846, "learning_rate": 0.00011280545057661951, "loss": 0.1848, "step": 34590 }, { "epoch": 0.4863307330100499, "grad_norm": 0.4169002175331116, "learning_rate": 0.00011275934967377305, "loss": 0.1901, "step": 34600 }, { "epoch": 0.48647129102537073, "grad_norm": 0.39894920587539673, "learning_rate": 0.00011271324601414766, "loss": 0.1787, "step": 34610 }, { "epoch": 0.48661184904069155, "grad_norm": 0.46989011764526367, "learning_rate": 0.0001126671396077045, "loss": 0.2102, "step": 34620 }, { "epoch": 0.48675240705601236, "grad_norm": 0.3442949950695038, "learning_rate": 0.00011262103046440528, "loss": 0.1696, "step": 34630 }, { "epoch": 0.4868929650713332, "grad_norm": 0.35811588168144226, "learning_rate": 0.00011257491859421234, "loss": 0.1651, "step": 34640 }, { "epoch": 0.48703352308665404, "grad_norm": 0.40460866689682007, "learning_rate": 0.00011252880400708861, "loss": 0.1604, "step": 34650 }, { "epoch": 0.48717408110197485, "grad_norm": 0.3593714237213135, "learning_rate": 0.00011248268671299751, "loss": 0.2043, "step": 34660 }, { "epoch": 0.48731463911729567, "grad_norm": 0.38151001930236816, "learning_rate": 0.00011243656672190319, "loss": 0.1869, "step": 34670 }, { "epoch": 0.4874551971326165, "grad_norm": 0.3958202004432678, "learning_rate": 0.00011239044404377029, "loss": 0.17, "step": 34680 }, { "epoch": 0.4875957551479373, "grad_norm": 0.5403009653091431, "learning_rate": 0.00011234431868856406, "loss": 0.1945, "step": 34690 }, { "epoch": 0.48773631316325816, "grad_norm": 0.34916040301322937, "learning_rate": 0.00011229819066625029, "loss": 0.1911, "step": 34700 }, { "epoch": 0.487876871178579, "grad_norm": 0.40562403202056885, "learning_rate": 0.00011225205998679539, "loss": 0.1918, "step": 34710 }, { "epoch": 0.4880174291938998, "grad_norm": 0.47156044840812683, "learning_rate": 0.00011220592666016638, "loss": 0.1842, "step": 34720 }, { "epoch": 0.4881579872092206, "grad_norm": 0.3460146486759186, "learning_rate": 0.00011215979069633075, "loss": 0.1727, "step": 34730 }, { "epoch": 0.4882985452245414, "grad_norm": 0.447365403175354, "learning_rate": 0.00011211365210525664, "loss": 0.2014, "step": 34740 }, { "epoch": 0.4884391032398622, "grad_norm": 0.3855605125427246, "learning_rate": 0.00011206751089691272, "loss": 0.1682, "step": 34750 }, { "epoch": 0.4885796612551831, "grad_norm": 0.3858765959739685, "learning_rate": 0.00011202136708126825, "loss": 0.1727, "step": 34760 }, { "epoch": 0.4887202192705039, "grad_norm": 0.4090711176395416, "learning_rate": 0.00011197522066829303, "loss": 0.1872, "step": 34770 }, { "epoch": 0.4888607772858247, "grad_norm": 0.3905404806137085, "learning_rate": 0.00011192907166795741, "loss": 0.1525, "step": 34780 }, { "epoch": 0.48900133530114553, "grad_norm": 0.3775665760040283, "learning_rate": 0.00011188292009023235, "loss": 0.1803, "step": 34790 }, { "epoch": 0.48914189331646635, "grad_norm": 0.44145843386650085, "learning_rate": 0.00011183676594508937, "loss": 0.1932, "step": 34800 }, { "epoch": 0.4892824513317872, "grad_norm": 0.4132518470287323, "learning_rate": 0.00011179060924250043, "loss": 0.1701, "step": 34810 }, { "epoch": 0.48942300934710803, "grad_norm": 0.33350783586502075, "learning_rate": 0.00011174444999243819, "loss": 0.1734, "step": 34820 }, { "epoch": 0.48956356736242884, "grad_norm": 0.41543808579444885, "learning_rate": 0.00011169828820487575, "loss": 0.1971, "step": 34830 }, { "epoch": 0.48970412537774966, "grad_norm": 0.39029350876808167, "learning_rate": 0.00011165212388978685, "loss": 0.1684, "step": 34840 }, { "epoch": 0.48984468339307047, "grad_norm": 0.5022748708724976, "learning_rate": 0.00011160595705714569, "loss": 0.176, "step": 34850 }, { "epoch": 0.48998524140839134, "grad_norm": 0.3474188446998596, "learning_rate": 0.0001115597877169271, "loss": 0.1958, "step": 34860 }, { "epoch": 0.49012579942371215, "grad_norm": 0.3608366549015045, "learning_rate": 0.00011151361587910635, "loss": 0.204, "step": 34870 }, { "epoch": 0.49026635743903296, "grad_norm": 0.3608495593070984, "learning_rate": 0.00011146744155365934, "loss": 0.1718, "step": 34880 }, { "epoch": 0.4904069154543538, "grad_norm": 0.358041912317276, "learning_rate": 0.00011142126475056245, "loss": 0.1927, "step": 34890 }, { "epoch": 0.4905474734696746, "grad_norm": 0.4198744595050812, "learning_rate": 0.00011137508547979265, "loss": 0.1851, "step": 34900 }, { "epoch": 0.49068803148499546, "grad_norm": 0.36275073885917664, "learning_rate": 0.00011132890375132736, "loss": 0.1778, "step": 34910 }, { "epoch": 0.49082858950031627, "grad_norm": 0.42901909351348877, "learning_rate": 0.00011128271957514465, "loss": 0.1908, "step": 34920 }, { "epoch": 0.4909691475156371, "grad_norm": 0.5087999105453491, "learning_rate": 0.00011123653296122298, "loss": 0.1899, "step": 34930 }, { "epoch": 0.4911097055309579, "grad_norm": 0.3971782624721527, "learning_rate": 0.00011119034391954146, "loss": 0.1712, "step": 34940 }, { "epoch": 0.4912502635462787, "grad_norm": 0.4209510385990143, "learning_rate": 0.00011114415246007968, "loss": 0.1814, "step": 34950 }, { "epoch": 0.4913908215615996, "grad_norm": 0.32000836730003357, "learning_rate": 0.00011109795859281767, "loss": 0.1681, "step": 34960 }, { "epoch": 0.4915313795769204, "grad_norm": 0.43289345502853394, "learning_rate": 0.00011105176232773612, "loss": 0.175, "step": 34970 }, { "epoch": 0.4916719375922412, "grad_norm": 0.39267903566360474, "learning_rate": 0.00011100556367481616, "loss": 0.178, "step": 34980 }, { "epoch": 0.491812495607562, "grad_norm": 0.5739196538925171, "learning_rate": 0.00011095936264403945, "loss": 0.1908, "step": 34990 }, { "epoch": 0.49195305362288283, "grad_norm": 0.3983236849308014, "learning_rate": 0.00011091315924538814, "loss": 0.169, "step": 35000 }, { "epoch": 0.49195305362288283, "eval_chrf": 83.79861109162626, "eval_loss": 0.39429256319999695, "eval_runtime": 139.9694, "eval_samples_per_second": 0.714, "eval_steps_per_second": 0.029, "step": 35000 }, { "epoch": 0.49209361163820364, "grad_norm": 0.4108808636665344, "learning_rate": 0.00011086695348884493, "loss": 0.1764, "step": 35010 }, { "epoch": 0.4922341696535245, "grad_norm": 0.5308189392089844, "learning_rate": 0.00011082074538439302, "loss": 0.1863, "step": 35020 }, { "epoch": 0.4923747276688453, "grad_norm": 0.39313215017318726, "learning_rate": 0.0001107745349420161, "loss": 0.1595, "step": 35030 }, { "epoch": 0.49251528568416614, "grad_norm": 0.3336087167263031, "learning_rate": 0.00011072832217169839, "loss": 0.1688, "step": 35040 }, { "epoch": 0.49265584369948695, "grad_norm": 0.41592684388160706, "learning_rate": 0.00011068210708342459, "loss": 0.1841, "step": 35050 }, { "epoch": 0.49279640171480776, "grad_norm": 0.48767977952957153, "learning_rate": 0.00011063588968717989, "loss": 0.1711, "step": 35060 }, { "epoch": 0.49293695973012863, "grad_norm": 0.4140823185443878, "learning_rate": 0.00011058966999295002, "loss": 0.1836, "step": 35070 }, { "epoch": 0.49307751774544945, "grad_norm": 0.4304581880569458, "learning_rate": 0.00011054344801072119, "loss": 0.179, "step": 35080 }, { "epoch": 0.49321807576077026, "grad_norm": 0.43712833523750305, "learning_rate": 0.00011049722375048008, "loss": 0.1792, "step": 35090 }, { "epoch": 0.49335863377609107, "grad_norm": 0.4752121567726135, "learning_rate": 0.0001104509972222139, "loss": 0.1902, "step": 35100 }, { "epoch": 0.4934991917914119, "grad_norm": 0.4985567331314087, "learning_rate": 0.00011040476843591032, "loss": 0.1821, "step": 35110 }, { "epoch": 0.49363974980673275, "grad_norm": 0.31013724207878113, "learning_rate": 0.0001103585374015575, "loss": 0.1438, "step": 35120 }, { "epoch": 0.49378030782205357, "grad_norm": 0.46870386600494385, "learning_rate": 0.00011031230412914413, "loss": 0.1936, "step": 35130 }, { "epoch": 0.4939208658373744, "grad_norm": 0.35234829783439636, "learning_rate": 0.00011026606862865932, "loss": 0.1676, "step": 35140 }, { "epoch": 0.4940614238526952, "grad_norm": 0.34757283329963684, "learning_rate": 0.0001102198309100927, "loss": 0.1774, "step": 35150 }, { "epoch": 0.494201981868016, "grad_norm": 0.6265631914138794, "learning_rate": 0.00011017359098343434, "loss": 0.1761, "step": 35160 }, { "epoch": 0.4943425398833369, "grad_norm": 0.4004918336868286, "learning_rate": 0.00011012734885867486, "loss": 0.2101, "step": 35170 }, { "epoch": 0.4944830978986577, "grad_norm": 0.4307462275028229, "learning_rate": 0.00011008110454580529, "loss": 0.1699, "step": 35180 }, { "epoch": 0.4946236559139785, "grad_norm": 0.37476179003715515, "learning_rate": 0.00011003485805481715, "loss": 0.1985, "step": 35190 }, { "epoch": 0.4947642139292993, "grad_norm": 0.38108786940574646, "learning_rate": 0.00010998860939570243, "loss": 0.1686, "step": 35200 }, { "epoch": 0.4949047719446201, "grad_norm": 0.4117715358734131, "learning_rate": 0.0001099423585784536, "loss": 0.1774, "step": 35210 }, { "epoch": 0.49504532995994094, "grad_norm": 0.37168529629707336, "learning_rate": 0.00010989610561306363, "loss": 0.1572, "step": 35220 }, { "epoch": 0.4951858879752618, "grad_norm": 0.3484684228897095, "learning_rate": 0.00010984985050952583, "loss": 0.1696, "step": 35230 }, { "epoch": 0.4953264459905826, "grad_norm": 0.5094234347343445, "learning_rate": 0.00010980359327783412, "loss": 0.1819, "step": 35240 }, { "epoch": 0.49546700400590343, "grad_norm": 0.5020202398300171, "learning_rate": 0.0001097573339279828, "loss": 0.2098, "step": 35250 }, { "epoch": 0.49560756202122425, "grad_norm": 0.44014009833335876, "learning_rate": 0.00010971107246996664, "loss": 0.2071, "step": 35260 }, { "epoch": 0.49574812003654506, "grad_norm": 0.25457870960235596, "learning_rate": 0.00010966480891378083, "loss": 0.1948, "step": 35270 }, { "epoch": 0.4958886780518659, "grad_norm": 0.34224721789360046, "learning_rate": 0.00010961854326942114, "loss": 0.1739, "step": 35280 }, { "epoch": 0.49602923606718674, "grad_norm": 0.2786223888397217, "learning_rate": 0.00010957227554688361, "loss": 0.1844, "step": 35290 }, { "epoch": 0.49616979408250755, "grad_norm": 0.461157888174057, "learning_rate": 0.00010952600575616487, "loss": 0.1579, "step": 35300 }, { "epoch": 0.49631035209782837, "grad_norm": 0.3331812620162964, "learning_rate": 0.00010947973390726196, "loss": 0.1923, "step": 35310 }, { "epoch": 0.4964509101131492, "grad_norm": 0.45609837770462036, "learning_rate": 0.00010943346001017232, "loss": 0.1975, "step": 35320 }, { "epoch": 0.49659146812847005, "grad_norm": 0.4462905526161194, "learning_rate": 0.00010938718407489388, "loss": 0.1738, "step": 35330 }, { "epoch": 0.49673202614379086, "grad_norm": 0.3662757873535156, "learning_rate": 0.00010934090611142501, "loss": 0.1827, "step": 35340 }, { "epoch": 0.4968725841591117, "grad_norm": 0.37068015336990356, "learning_rate": 0.00010929462612976446, "loss": 0.1705, "step": 35350 }, { "epoch": 0.4970131421744325, "grad_norm": 0.4905817210674286, "learning_rate": 0.00010924834413991152, "loss": 0.2016, "step": 35360 }, { "epoch": 0.4971537001897533, "grad_norm": 0.4112740457057953, "learning_rate": 0.0001092020601518658, "loss": 0.1686, "step": 35370 }, { "epoch": 0.49729425820507417, "grad_norm": 0.31365418434143066, "learning_rate": 0.00010915577417562743, "loss": 0.1827, "step": 35380 }, { "epoch": 0.497434816220395, "grad_norm": 0.32228732109069824, "learning_rate": 0.00010910948622119695, "loss": 0.1683, "step": 35390 }, { "epoch": 0.4975753742357158, "grad_norm": 0.3350965082645416, "learning_rate": 0.00010906319629857524, "loss": 0.1751, "step": 35400 }, { "epoch": 0.4977159322510366, "grad_norm": 0.3961012065410614, "learning_rate": 0.00010901690441776377, "loss": 0.1752, "step": 35410 }, { "epoch": 0.4978564902663574, "grad_norm": 0.4828808307647705, "learning_rate": 0.00010897061058876428, "loss": 0.1969, "step": 35420 }, { "epoch": 0.4979970482816783, "grad_norm": 0.39320072531700134, "learning_rate": 0.00010892431482157903, "loss": 0.1851, "step": 35430 }, { "epoch": 0.4981376062969991, "grad_norm": 0.403592586517334, "learning_rate": 0.00010887801712621062, "loss": 0.1842, "step": 35440 }, { "epoch": 0.4982781643123199, "grad_norm": 0.5812364816665649, "learning_rate": 0.00010883171751266215, "loss": 0.1895, "step": 35450 }, { "epoch": 0.49841872232764073, "grad_norm": 0.48084115982055664, "learning_rate": 0.00010878541599093706, "loss": 0.2011, "step": 35460 }, { "epoch": 0.49855928034296154, "grad_norm": 0.5177582502365112, "learning_rate": 0.00010873911257103926, "loss": 0.1797, "step": 35470 }, { "epoch": 0.49869983835828235, "grad_norm": 0.5985971689224243, "learning_rate": 0.00010869280726297302, "loss": 0.1998, "step": 35480 }, { "epoch": 0.4988403963736032, "grad_norm": 0.48021918535232544, "learning_rate": 0.00010864650007674304, "loss": 0.1723, "step": 35490 }, { "epoch": 0.49898095438892404, "grad_norm": 0.35807913541793823, "learning_rate": 0.00010860019102235445, "loss": 0.165, "step": 35500 }, { "epoch": 0.49912151240424485, "grad_norm": 0.5980018377304077, "learning_rate": 0.0001085538801098128, "loss": 0.1654, "step": 35510 }, { "epoch": 0.49926207041956566, "grad_norm": 0.46984145045280457, "learning_rate": 0.00010850756734912391, "loss": 0.2066, "step": 35520 }, { "epoch": 0.4994026284348865, "grad_norm": 0.3486853241920471, "learning_rate": 0.00010846125275029415, "loss": 0.1677, "step": 35530 }, { "epoch": 0.49954318645020734, "grad_norm": 0.5842266082763672, "learning_rate": 0.00010841493632333021, "loss": 0.1851, "step": 35540 }, { "epoch": 0.49968374446552816, "grad_norm": 0.30013322830200195, "learning_rate": 0.00010836861807823921, "loss": 0.1776, "step": 35550 }, { "epoch": 0.49982430248084897, "grad_norm": 0.3671824634075165, "learning_rate": 0.00010832229802502864, "loss": 0.1957, "step": 35560 }, { "epoch": 0.4999648604961698, "grad_norm": 0.42613282799720764, "learning_rate": 0.00010827597617370639, "loss": 0.1783, "step": 35570 }, { "epoch": 0.5001054185114906, "grad_norm": 0.3783090114593506, "learning_rate": 0.00010822965253428074, "loss": 0.2099, "step": 35580 }, { "epoch": 0.5002459765268115, "grad_norm": 0.3716335892677307, "learning_rate": 0.00010818332711676036, "loss": 0.1741, "step": 35590 }, { "epoch": 0.5003865345421322, "grad_norm": 0.336635023355484, "learning_rate": 0.00010813699993115427, "loss": 0.189, "step": 35600 }, { "epoch": 0.5005270925574531, "grad_norm": 0.46448060870170593, "learning_rate": 0.00010809067098747191, "loss": 0.1969, "step": 35610 }, { "epoch": 0.500667650572774, "grad_norm": 0.41883373260498047, "learning_rate": 0.00010804434029572313, "loss": 0.1931, "step": 35620 }, { "epoch": 0.5008082085880947, "grad_norm": 0.3746285140514374, "learning_rate": 0.00010799800786591805, "loss": 0.2023, "step": 35630 }, { "epoch": 0.5009487666034156, "grad_norm": 0.5401817560195923, "learning_rate": 0.00010795167370806726, "loss": 0.1662, "step": 35640 }, { "epoch": 0.5010893246187363, "grad_norm": 0.34712448716163635, "learning_rate": 0.00010790533783218174, "loss": 0.1881, "step": 35650 }, { "epoch": 0.5012298826340572, "grad_norm": 0.3793562650680542, "learning_rate": 0.00010785900024827271, "loss": 0.1591, "step": 35660 }, { "epoch": 0.5013704406493781, "grad_norm": 0.3823815882205963, "learning_rate": 0.00010781266096635191, "loss": 0.1899, "step": 35670 }, { "epoch": 0.5015109986646988, "grad_norm": 0.30518558621406555, "learning_rate": 0.00010776631999643138, "loss": 0.1503, "step": 35680 }, { "epoch": 0.5016515566800197, "grad_norm": 0.4431426227092743, "learning_rate": 0.0001077199773485235, "loss": 0.1786, "step": 35690 }, { "epoch": 0.5017921146953405, "grad_norm": 0.490865021944046, "learning_rate": 0.00010767363303264106, "loss": 0.1917, "step": 35700 }, { "epoch": 0.5019326727106613, "grad_norm": 0.4799773693084717, "learning_rate": 0.00010762728705879719, "loss": 0.1869, "step": 35710 }, { "epoch": 0.5020732307259822, "grad_norm": 0.3862857520580292, "learning_rate": 0.00010758093943700538, "loss": 0.1859, "step": 35720 }, { "epoch": 0.502213788741303, "grad_norm": 0.43539267778396606, "learning_rate": 0.00010753459017727945, "loss": 0.2054, "step": 35730 }, { "epoch": 0.5023543467566238, "grad_norm": 0.43648383021354675, "learning_rate": 0.00010748823928963361, "loss": 0.1594, "step": 35740 }, { "epoch": 0.5024949047719446, "grad_norm": 0.44934573769569397, "learning_rate": 0.00010744188678408242, "loss": 0.1726, "step": 35750 }, { "epoch": 0.5026354627872655, "grad_norm": 0.4945816993713379, "learning_rate": 0.0001073955326706408, "loss": 0.1557, "step": 35760 }, { "epoch": 0.5027760208025863, "grad_norm": 0.38272902369499207, "learning_rate": 0.00010734917695932394, "loss": 0.1952, "step": 35770 }, { "epoch": 0.5029165788179071, "grad_norm": 0.4691140949726105, "learning_rate": 0.00010730281966014748, "loss": 0.1731, "step": 35780 }, { "epoch": 0.503057136833228, "grad_norm": 0.4082811176776886, "learning_rate": 0.00010725646078312734, "loss": 0.1875, "step": 35790 }, { "epoch": 0.5031976948485487, "grad_norm": 0.5158867239952087, "learning_rate": 0.00010721010033827981, "loss": 0.183, "step": 35800 }, { "epoch": 0.5033382528638696, "grad_norm": 0.399926096200943, "learning_rate": 0.00010716373833562147, "loss": 0.2157, "step": 35810 }, { "epoch": 0.5034788108791903, "grad_norm": 0.3821859359741211, "learning_rate": 0.00010711737478516933, "loss": 0.1705, "step": 35820 }, { "epoch": 0.5036193688945112, "grad_norm": 0.5483487844467163, "learning_rate": 0.00010707100969694062, "loss": 0.1743, "step": 35830 }, { "epoch": 0.5037599269098321, "grad_norm": 0.35209783911705017, "learning_rate": 0.00010702464308095297, "loss": 0.1829, "step": 35840 }, { "epoch": 0.5039004849251528, "grad_norm": 0.4123036861419678, "learning_rate": 0.00010697827494722435, "loss": 0.1919, "step": 35850 }, { "epoch": 0.5040410429404737, "grad_norm": 0.548042893409729, "learning_rate": 0.00010693190530577301, "loss": 0.206, "step": 35860 }, { "epoch": 0.5041816009557945, "grad_norm": 0.40403738617897034, "learning_rate": 0.0001068855341666176, "loss": 0.1972, "step": 35870 }, { "epoch": 0.5043221589711153, "grad_norm": 0.3454335927963257, "learning_rate": 0.000106839161539777, "loss": 0.1972, "step": 35880 }, { "epoch": 0.5044627169864362, "grad_norm": 0.2906029522418976, "learning_rate": 0.00010679278743527046, "loss": 0.1753, "step": 35890 }, { "epoch": 0.504603275001757, "grad_norm": 0.46441200375556946, "learning_rate": 0.00010674641186311756, "loss": 0.1804, "step": 35900 }, { "epoch": 0.5047438330170778, "grad_norm": 0.4945662319660187, "learning_rate": 0.00010670003483333819, "loss": 0.1874, "step": 35910 }, { "epoch": 0.5048843910323986, "grad_norm": 0.44752687215805054, "learning_rate": 0.00010665365635595252, "loss": 0.191, "step": 35920 }, { "epoch": 0.5050249490477194, "grad_norm": 0.3905443549156189, "learning_rate": 0.0001066072764409811, "loss": 0.166, "step": 35930 }, { "epoch": 0.5051655070630403, "grad_norm": 0.547477662563324, "learning_rate": 0.00010656089509844474, "loss": 0.1905, "step": 35940 }, { "epoch": 0.5053060650783611, "grad_norm": 0.3551482558250427, "learning_rate": 0.00010651451233836454, "loss": 0.1978, "step": 35950 }, { "epoch": 0.5054466230936819, "grad_norm": 0.5016312003135681, "learning_rate": 0.00010646812817076197, "loss": 0.2035, "step": 35960 }, { "epoch": 0.5055871811090027, "grad_norm": 0.33267927169799805, "learning_rate": 0.00010642174260565876, "loss": 0.1741, "step": 35970 }, { "epoch": 0.5057277391243236, "grad_norm": 0.41453447937965393, "learning_rate": 0.00010637535565307695, "loss": 0.1701, "step": 35980 }, { "epoch": 0.5058682971396444, "grad_norm": 0.4634517729282379, "learning_rate": 0.00010632896732303889, "loss": 0.1914, "step": 35990 }, { "epoch": 0.5060088551549652, "grad_norm": 0.3329765498638153, "learning_rate": 0.00010628257762556722, "loss": 0.1665, "step": 36000 }, { "epoch": 0.5060088551549652, "eval_chrf": 79.90633488812688, "eval_loss": 0.3829784393310547, "eval_runtime": 284.4675, "eval_samples_per_second": 0.352, "eval_steps_per_second": 0.014, "step": 36000 }, { "epoch": 0.506149413170286, "grad_norm": 0.432464599609375, "learning_rate": 0.00010623618657068486, "loss": 0.1979, "step": 36010 }, { "epoch": 0.5062899711856068, "grad_norm": 0.3946160078048706, "learning_rate": 0.00010618979416841505, "loss": 0.1917, "step": 36020 }, { "epoch": 0.5064305292009277, "grad_norm": 0.39581990242004395, "learning_rate": 0.00010614340042878132, "loss": 0.1966, "step": 36030 }, { "epoch": 0.5065710872162486, "grad_norm": 0.4072232246398926, "learning_rate": 0.00010609700536180749, "loss": 0.183, "step": 36040 }, { "epoch": 0.5067116452315693, "grad_norm": 0.38204532861709595, "learning_rate": 0.0001060506089775176, "loss": 0.1692, "step": 36050 }, { "epoch": 0.5068522032468902, "grad_norm": 0.553989827632904, "learning_rate": 0.00010600421128593608, "loss": 0.1958, "step": 36060 }, { "epoch": 0.5069927612622109, "grad_norm": 0.4289276599884033, "learning_rate": 0.00010595781229708756, "loss": 0.1753, "step": 36070 }, { "epoch": 0.5071333192775318, "grad_norm": 0.41937553882598877, "learning_rate": 0.00010591141202099704, "loss": 0.1925, "step": 36080 }, { "epoch": 0.5072738772928527, "grad_norm": 0.39642074704170227, "learning_rate": 0.00010586501046768967, "loss": 0.1653, "step": 36090 }, { "epoch": 0.5074144353081734, "grad_norm": 0.36946389079093933, "learning_rate": 0.00010581860764719098, "loss": 0.2015, "step": 36100 }, { "epoch": 0.5075549933234943, "grad_norm": 0.4972825348377228, "learning_rate": 0.00010577220356952676, "loss": 0.1818, "step": 36110 }, { "epoch": 0.507695551338815, "grad_norm": 0.36207687854766846, "learning_rate": 0.00010572579824472302, "loss": 0.1725, "step": 36120 }, { "epoch": 0.5078361093541359, "grad_norm": 0.4180412292480469, "learning_rate": 0.00010567939168280608, "loss": 0.1728, "step": 36130 }, { "epoch": 0.5079766673694568, "grad_norm": 0.3445020616054535, "learning_rate": 0.00010563298389380251, "loss": 0.1779, "step": 36140 }, { "epoch": 0.5081172253847775, "grad_norm": 0.41422396898269653, "learning_rate": 0.0001055865748877392, "loss": 0.1967, "step": 36150 }, { "epoch": 0.5082577834000984, "grad_norm": 0.34044012427330017, "learning_rate": 0.00010554016467464321, "loss": 0.1912, "step": 36160 }, { "epoch": 0.5083983414154192, "grad_norm": 0.30843567848205566, "learning_rate": 0.00010549375326454194, "loss": 0.1828, "step": 36170 }, { "epoch": 0.50853889943074, "grad_norm": 0.40784013271331787, "learning_rate": 0.000105447340667463, "loss": 0.1861, "step": 36180 }, { "epoch": 0.5086794574460609, "grad_norm": 0.49464085698127747, "learning_rate": 0.00010540092689343427, "loss": 0.1882, "step": 36190 }, { "epoch": 0.5088200154613817, "grad_norm": 0.4711960554122925, "learning_rate": 0.00010535451195248387, "loss": 0.184, "step": 36200 }, { "epoch": 0.5089605734767025, "grad_norm": 0.34780821204185486, "learning_rate": 0.00010530809585464023, "loss": 0.1787, "step": 36210 }, { "epoch": 0.5091011314920233, "grad_norm": 0.6330233216285706, "learning_rate": 0.00010526167860993197, "loss": 0.2015, "step": 36220 }, { "epoch": 0.5092416895073442, "grad_norm": 0.36124908924102783, "learning_rate": 0.00010521526022838797, "loss": 0.167, "step": 36230 }, { "epoch": 0.509382247522665, "grad_norm": 0.6212900876998901, "learning_rate": 0.00010516884072003739, "loss": 0.1863, "step": 36240 }, { "epoch": 0.5095228055379858, "grad_norm": 0.3643467128276825, "learning_rate": 0.00010512242009490956, "loss": 0.1832, "step": 36250 }, { "epoch": 0.5096633635533067, "grad_norm": 0.34473875164985657, "learning_rate": 0.00010507599836303415, "loss": 0.174, "step": 36260 }, { "epoch": 0.5098039215686274, "grad_norm": 0.4868568480014801, "learning_rate": 0.00010502957553444097, "loss": 0.1681, "step": 36270 }, { "epoch": 0.5099444795839483, "grad_norm": 0.3094613254070282, "learning_rate": 0.00010498315161916015, "loss": 0.167, "step": 36280 }, { "epoch": 0.5100850375992692, "grad_norm": 0.47234678268432617, "learning_rate": 0.000104936726627222, "loss": 0.1805, "step": 36290 }, { "epoch": 0.5102255956145899, "grad_norm": 0.3965536057949066, "learning_rate": 0.0001048903005686571, "loss": 0.1735, "step": 36300 }, { "epoch": 0.5103661536299108, "grad_norm": 0.4829418957233429, "learning_rate": 0.0001048438734534962, "loss": 0.1909, "step": 36310 }, { "epoch": 0.5105067116452315, "grad_norm": 0.44158634543418884, "learning_rate": 0.00010479744529177038, "loss": 0.1855, "step": 36320 }, { "epoch": 0.5106472696605524, "grad_norm": 1.4402402639389038, "learning_rate": 0.00010475101609351083, "loss": 0.1744, "step": 36330 }, { "epoch": 0.5107878276758732, "grad_norm": 0.455005019903183, "learning_rate": 0.00010470458586874906, "loss": 0.1945, "step": 36340 }, { "epoch": 0.510928385691194, "grad_norm": 0.31440451741218567, "learning_rate": 0.00010465815462751674, "loss": 0.1733, "step": 36350 }, { "epoch": 0.5110689437065149, "grad_norm": 0.40663766860961914, "learning_rate": 0.00010461172237984578, "loss": 0.1881, "step": 36360 }, { "epoch": 0.5112095017218357, "grad_norm": 0.36541232466697693, "learning_rate": 0.00010456528913576833, "loss": 0.1849, "step": 36370 }, { "epoch": 0.5113500597371565, "grad_norm": 0.28649237751960754, "learning_rate": 0.00010451885490531672, "loss": 0.17, "step": 36380 }, { "epoch": 0.5114906177524773, "grad_norm": 0.3698398768901825, "learning_rate": 0.0001044724196985235, "loss": 0.178, "step": 36390 }, { "epoch": 0.5116311757677982, "grad_norm": 0.45252224802970886, "learning_rate": 0.00010442598352542143, "loss": 0.1966, "step": 36400 }, { "epoch": 0.511771733783119, "grad_norm": 0.38496294617652893, "learning_rate": 0.00010437954639604352, "loss": 0.1808, "step": 36410 }, { "epoch": 0.5119122917984398, "grad_norm": 0.4141913056373596, "learning_rate": 0.00010433310832042296, "loss": 0.1911, "step": 36420 }, { "epoch": 0.5120528498137606, "grad_norm": 0.4155624806880951, "learning_rate": 0.0001042866693085931, "loss": 0.1737, "step": 36430 }, { "epoch": 0.5121934078290814, "grad_norm": 0.3937687575817108, "learning_rate": 0.00010424022937058752, "loss": 0.1819, "step": 36440 }, { "epoch": 0.5123339658444023, "grad_norm": 0.40030568838119507, "learning_rate": 0.00010419378851644006, "loss": 0.1696, "step": 36450 }, { "epoch": 0.5124745238597231, "grad_norm": 0.42675095796585083, "learning_rate": 0.0001041473467561847, "loss": 0.1785, "step": 36460 }, { "epoch": 0.5126150818750439, "grad_norm": 0.5587252378463745, "learning_rate": 0.00010410090409985558, "loss": 0.1895, "step": 36470 }, { "epoch": 0.5127556398903648, "grad_norm": 0.3191625475883484, "learning_rate": 0.00010405446055748712, "loss": 0.1687, "step": 36480 }, { "epoch": 0.5128961979056855, "grad_norm": 0.393315851688385, "learning_rate": 0.00010400801613911386, "loss": 0.1804, "step": 36490 }, { "epoch": 0.5130367559210064, "grad_norm": 0.3991680443286896, "learning_rate": 0.00010396157085477059, "loss": 0.1911, "step": 36500 }, { "epoch": 0.5131773139363273, "grad_norm": 0.45736780762672424, "learning_rate": 0.00010391512471449223, "loss": 0.1984, "step": 36510 }, { "epoch": 0.513317871951648, "grad_norm": 0.392051100730896, "learning_rate": 0.00010386867772831394, "loss": 0.1757, "step": 36520 }, { "epoch": 0.5134584299669689, "grad_norm": 0.3728495240211487, "learning_rate": 0.00010382222990627102, "loss": 0.1999, "step": 36530 }, { "epoch": 0.5135989879822896, "grad_norm": 0.41388094425201416, "learning_rate": 0.00010377578125839893, "loss": 0.1693, "step": 36540 }, { "epoch": 0.5137395459976105, "grad_norm": 0.35319089889526367, "learning_rate": 0.00010372933179473338, "loss": 0.1824, "step": 36550 }, { "epoch": 0.5138801040129314, "grad_norm": 0.5678279399871826, "learning_rate": 0.0001036828815253102, "loss": 0.1639, "step": 36560 }, { "epoch": 0.5140206620282521, "grad_norm": 0.40534478425979614, "learning_rate": 0.00010363643046016545, "loss": 0.1565, "step": 36570 }, { "epoch": 0.514161220043573, "grad_norm": 0.40824031829833984, "learning_rate": 0.00010358997860933528, "loss": 0.1728, "step": 36580 }, { "epoch": 0.5143017780588938, "grad_norm": 0.45494553446769714, "learning_rate": 0.00010354352598285606, "loss": 0.2081, "step": 36590 }, { "epoch": 0.5144423360742146, "grad_norm": 0.4255465269088745, "learning_rate": 0.00010349707259076433, "loss": 0.1785, "step": 36600 }, { "epoch": 0.5145828940895355, "grad_norm": 0.4779079854488373, "learning_rate": 0.0001034506184430968, "loss": 0.1776, "step": 36610 }, { "epoch": 0.5147234521048563, "grad_norm": 0.40576520562171936, "learning_rate": 0.00010340416354989033, "loss": 0.1777, "step": 36620 }, { "epoch": 0.5148640101201771, "grad_norm": 0.38735461235046387, "learning_rate": 0.00010335770792118191, "loss": 0.1616, "step": 36630 }, { "epoch": 0.5150045681354979, "grad_norm": 0.32425543665885925, "learning_rate": 0.00010331125156700876, "loss": 0.1661, "step": 36640 }, { "epoch": 0.5151451261508188, "grad_norm": 0.4369176924228668, "learning_rate": 0.0001032647944974082, "loss": 0.1912, "step": 36650 }, { "epoch": 0.5152856841661396, "grad_norm": 0.3734516501426697, "learning_rate": 0.00010321833672241772, "loss": 0.1793, "step": 36660 }, { "epoch": 0.5154262421814604, "grad_norm": 0.45591041445732117, "learning_rate": 0.00010317187825207497, "loss": 0.1922, "step": 36670 }, { "epoch": 0.5155668001967812, "grad_norm": 0.4752703905105591, "learning_rate": 0.00010312541909641773, "loss": 0.1755, "step": 36680 }, { "epoch": 0.515707358212102, "grad_norm": 0.41179245710372925, "learning_rate": 0.00010307895926548396, "loss": 0.1888, "step": 36690 }, { "epoch": 0.5158479162274229, "grad_norm": 0.5543412566184998, "learning_rate": 0.00010303249876931174, "loss": 0.1806, "step": 36700 }, { "epoch": 0.5159884742427437, "grad_norm": 0.40941378474235535, "learning_rate": 0.0001029860376179393, "loss": 0.1757, "step": 36710 }, { "epoch": 0.5161290322580645, "grad_norm": 0.4833003580570221, "learning_rate": 0.00010293957582140503, "loss": 0.197, "step": 36720 }, { "epoch": 0.5162695902733854, "grad_norm": 0.40841785073280334, "learning_rate": 0.00010289311338974743, "loss": 0.1865, "step": 36730 }, { "epoch": 0.5164101482887061, "grad_norm": 0.5991966128349304, "learning_rate": 0.00010284665033300515, "loss": 0.1688, "step": 36740 }, { "epoch": 0.516550706304027, "grad_norm": 0.31613221764564514, "learning_rate": 0.00010280018666121698, "loss": 0.1724, "step": 36750 }, { "epoch": 0.5166912643193479, "grad_norm": 0.36570432782173157, "learning_rate": 0.00010275372238442185, "loss": 0.1873, "step": 36760 }, { "epoch": 0.5168318223346686, "grad_norm": 0.2841842770576477, "learning_rate": 0.00010270725751265877, "loss": 0.1729, "step": 36770 }, { "epoch": 0.5169723803499895, "grad_norm": 0.5042742490768433, "learning_rate": 0.00010266079205596695, "loss": 0.1893, "step": 36780 }, { "epoch": 0.5171129383653102, "grad_norm": 0.3679254353046417, "learning_rate": 0.0001026143260243857, "loss": 0.1759, "step": 36790 }, { "epoch": 0.5172534963806311, "grad_norm": 0.3465472459793091, "learning_rate": 0.00010256785942795442, "loss": 0.1682, "step": 36800 }, { "epoch": 0.5173940543959519, "grad_norm": 0.39342233538627625, "learning_rate": 0.00010252139227671269, "loss": 0.1794, "step": 36810 }, { "epoch": 0.5175346124112727, "grad_norm": 0.4358043074607849, "learning_rate": 0.00010247492458070016, "loss": 0.1927, "step": 36820 }, { "epoch": 0.5176751704265936, "grad_norm": 0.3802052140235901, "learning_rate": 0.00010242845634995662, "loss": 0.1719, "step": 36830 }, { "epoch": 0.5178157284419144, "grad_norm": 0.4845336377620697, "learning_rate": 0.00010238198759452197, "loss": 0.1841, "step": 36840 }, { "epoch": 0.5179562864572352, "grad_norm": 0.5214284062385559, "learning_rate": 0.00010233551832443626, "loss": 0.1831, "step": 36850 }, { "epoch": 0.518096844472556, "grad_norm": 0.3969631493091583, "learning_rate": 0.00010228904854973957, "loss": 0.1836, "step": 36860 }, { "epoch": 0.5182374024878769, "grad_norm": 0.3800993859767914, "learning_rate": 0.00010224257828047219, "loss": 0.1623, "step": 36870 }, { "epoch": 0.5183779605031977, "grad_norm": 0.3893860876560211, "learning_rate": 0.0001021961075266744, "loss": 0.173, "step": 36880 }, { "epoch": 0.5185185185185185, "grad_norm": 0.4365367591381073, "learning_rate": 0.0001021496362983867, "loss": 0.1498, "step": 36890 }, { "epoch": 0.5186590765338394, "grad_norm": 0.44684091210365295, "learning_rate": 0.0001021031646056496, "loss": 0.1872, "step": 36900 }, { "epoch": 0.5187996345491601, "grad_norm": 0.3404183089733124, "learning_rate": 0.00010205669245850377, "loss": 0.1881, "step": 36910 }, { "epoch": 0.518940192564481, "grad_norm": 0.4390861988067627, "learning_rate": 0.00010201021986698999, "loss": 0.198, "step": 36920 }, { "epoch": 0.5190807505798019, "grad_norm": 0.6471704840660095, "learning_rate": 0.00010196374684114906, "loss": 0.1732, "step": 36930 }, { "epoch": 0.5192213085951226, "grad_norm": 0.330267995595932, "learning_rate": 0.00010191727339102197, "loss": 0.1813, "step": 36940 }, { "epoch": 0.5193618666104435, "grad_norm": 0.5875914692878723, "learning_rate": 0.00010187079952664968, "loss": 0.1752, "step": 36950 }, { "epoch": 0.5195024246257642, "grad_norm": 0.28342390060424805, "learning_rate": 0.00010182432525807337, "loss": 0.1759, "step": 36960 }, { "epoch": 0.5196429826410851, "grad_norm": 0.448543518781662, "learning_rate": 0.00010177785059533425, "loss": 0.1778, "step": 36970 }, { "epoch": 0.519783540656406, "grad_norm": 0.4849998950958252, "learning_rate": 0.00010173137554847356, "loss": 0.1858, "step": 36980 }, { "epoch": 0.5199240986717267, "grad_norm": 0.4241492450237274, "learning_rate": 0.00010168490012753272, "loss": 0.1918, "step": 36990 }, { "epoch": 0.5200646566870476, "grad_norm": 0.3861071765422821, "learning_rate": 0.00010163842434255318, "loss": 0.1836, "step": 37000 }, { "epoch": 0.5200646566870476, "eval_chrf": 80.22303827352079, "eval_loss": 0.38697686791419983, "eval_runtime": 326.8138, "eval_samples_per_second": 0.306, "eval_steps_per_second": 0.012, "step": 37000 }, { "epoch": 0.5202052147023684, "grad_norm": 0.49819403886795044, "learning_rate": 0.00010159194820357644, "loss": 0.1747, "step": 37010 }, { "epoch": 0.5203457727176892, "grad_norm": 0.34560924768447876, "learning_rate": 0.00010154547172064417, "loss": 0.1958, "step": 37020 }, { "epoch": 0.5204863307330101, "grad_norm": 0.373997300863266, "learning_rate": 0.00010149899490379801, "loss": 0.1941, "step": 37030 }, { "epoch": 0.5206268887483309, "grad_norm": 0.3896332085132599, "learning_rate": 0.00010145251776307976, "loss": 0.1825, "step": 37040 }, { "epoch": 0.5207674467636517, "grad_norm": 0.6348997354507446, "learning_rate": 0.00010140604030853121, "loss": 0.1778, "step": 37050 }, { "epoch": 0.5209080047789725, "grad_norm": 0.3897026777267456, "learning_rate": 0.00010135956255019427, "loss": 0.1704, "step": 37060 }, { "epoch": 0.5210485627942933, "grad_norm": 0.3688303232192993, "learning_rate": 0.00010131308449811093, "loss": 0.1983, "step": 37070 }, { "epoch": 0.5211891208096142, "grad_norm": 0.36505571007728577, "learning_rate": 0.00010126660616232315, "loss": 0.1873, "step": 37080 }, { "epoch": 0.521329678824935, "grad_norm": 0.3224770426750183, "learning_rate": 0.00010122012755287309, "loss": 0.1918, "step": 37090 }, { "epoch": 0.5214702368402558, "grad_norm": 0.4346891939640045, "learning_rate": 0.00010117364867980284, "loss": 0.191, "step": 37100 }, { "epoch": 0.5216107948555766, "grad_norm": 0.3521254062652588, "learning_rate": 0.00010112716955315465, "loss": 0.1638, "step": 37110 }, { "epoch": 0.5217513528708975, "grad_norm": 0.3422907292842865, "learning_rate": 0.00010108069018297073, "loss": 0.1667, "step": 37120 }, { "epoch": 0.5218919108862183, "grad_norm": 0.4173455834388733, "learning_rate": 0.0001010342105792934, "loss": 0.1714, "step": 37130 }, { "epoch": 0.5220324689015391, "grad_norm": 0.4705955386161804, "learning_rate": 0.00010098773075216505, "loss": 0.1802, "step": 37140 }, { "epoch": 0.52217302691686, "grad_norm": 0.5038504004478455, "learning_rate": 0.00010094125071162807, "loss": 0.2096, "step": 37150 }, { "epoch": 0.5223135849321807, "grad_norm": 0.4828181564807892, "learning_rate": 0.00010089477046772493, "loss": 0.1928, "step": 37160 }, { "epoch": 0.5224541429475016, "grad_norm": 0.5034831166267395, "learning_rate": 0.00010084829003049812, "loss": 0.1799, "step": 37170 }, { "epoch": 0.5225947009628225, "grad_norm": 0.40919286012649536, "learning_rate": 0.00010080180940999015, "loss": 0.1881, "step": 37180 }, { "epoch": 0.5227352589781432, "grad_norm": 0.45640450716018677, "learning_rate": 0.00010075532861624364, "loss": 0.168, "step": 37190 }, { "epoch": 0.5228758169934641, "grad_norm": 0.47407418489456177, "learning_rate": 0.0001007088476593012, "loss": 0.1887, "step": 37200 }, { "epoch": 0.5230163750087848, "grad_norm": 0.32496556639671326, "learning_rate": 0.00010066236654920546, "loss": 0.1781, "step": 37210 }, { "epoch": 0.5231569330241057, "grad_norm": 0.4471747577190399, "learning_rate": 0.00010061588529599915, "loss": 0.1884, "step": 37220 }, { "epoch": 0.5232974910394266, "grad_norm": 0.45092710852622986, "learning_rate": 0.00010056940390972495, "loss": 0.187, "step": 37230 }, { "epoch": 0.5234380490547473, "grad_norm": 0.5467546582221985, "learning_rate": 0.00010052292240042562, "loss": 0.1982, "step": 37240 }, { "epoch": 0.5235786070700682, "grad_norm": 0.3501761257648468, "learning_rate": 0.00010047644077814393, "loss": 0.1884, "step": 37250 }, { "epoch": 0.523719165085389, "grad_norm": 0.5036628842353821, "learning_rate": 0.00010042995905292267, "loss": 0.2039, "step": 37260 }, { "epoch": 0.5238597231007098, "grad_norm": 0.440433531999588, "learning_rate": 0.00010038347723480466, "loss": 0.1892, "step": 37270 }, { "epoch": 0.5240002811160306, "grad_norm": 0.4935232400894165, "learning_rate": 0.00010033699533383277, "loss": 0.1596, "step": 37280 }, { "epoch": 0.5241408391313515, "grad_norm": 0.38276660442352295, "learning_rate": 0.0001002905133600498, "loss": 0.163, "step": 37290 }, { "epoch": 0.5242813971466723, "grad_norm": 0.3699820339679718, "learning_rate": 0.00010024403132349867, "loss": 0.1836, "step": 37300 }, { "epoch": 0.5244219551619931, "grad_norm": 0.2769719064235687, "learning_rate": 0.00010019754923422225, "loss": 0.1545, "step": 37310 }, { "epoch": 0.524562513177314, "grad_norm": 0.3033786118030548, "learning_rate": 0.00010015106710226343, "loss": 0.1662, "step": 37320 }, { "epoch": 0.5247030711926347, "grad_norm": 0.49226874113082886, "learning_rate": 0.00010010458493766512, "loss": 0.2252, "step": 37330 }, { "epoch": 0.5248436292079556, "grad_norm": 0.3806842863559723, "learning_rate": 0.00010005810275047025, "loss": 0.1894, "step": 37340 }, { "epoch": 0.5249841872232764, "grad_norm": 0.38338780403137207, "learning_rate": 0.00010001162055072173, "loss": 0.1613, "step": 37350 }, { "epoch": 0.5251247452385972, "grad_norm": 0.36801716685295105, "learning_rate": 9.996513834846251e-05, "loss": 0.1724, "step": 37360 }, { "epoch": 0.5252653032539181, "grad_norm": 0.30908024311065674, "learning_rate": 9.991865615373545e-05, "loss": 0.1683, "step": 37370 }, { "epoch": 0.5254058612692388, "grad_norm": 0.4597463309764862, "learning_rate": 9.987217397658349e-05, "loss": 0.1851, "step": 37380 }, { "epoch": 0.5255464192845597, "grad_norm": 0.2701462209224701, "learning_rate": 9.982569182704959e-05, "loss": 0.1782, "step": 37390 }, { "epoch": 0.5256869772998806, "grad_norm": 0.3483943045139313, "learning_rate": 9.97792097151766e-05, "loss": 0.1924, "step": 37400 }, { "epoch": 0.5258275353152013, "grad_norm": 0.42426466941833496, "learning_rate": 9.973272765100747e-05, "loss": 0.185, "step": 37410 }, { "epoch": 0.5259680933305222, "grad_norm": 0.40943610668182373, "learning_rate": 9.968624564458506e-05, "loss": 0.1922, "step": 37420 }, { "epoch": 0.526108651345843, "grad_norm": 0.3415329158306122, "learning_rate": 9.963976370595226e-05, "loss": 0.1665, "step": 37430 }, { "epoch": 0.5262492093611638, "grad_norm": 0.4101947546005249, "learning_rate": 9.959328184515195e-05, "loss": 0.169, "step": 37440 }, { "epoch": 0.5263897673764847, "grad_norm": 0.33659279346466064, "learning_rate": 9.954680007222696e-05, "loss": 0.1846, "step": 37450 }, { "epoch": 0.5265303253918054, "grad_norm": 0.43769797682762146, "learning_rate": 9.950031839722014e-05, "loss": 0.1691, "step": 37460 }, { "epoch": 0.5266708834071263, "grad_norm": 0.34851711988449097, "learning_rate": 9.945383683017427e-05, "loss": 0.1611, "step": 37470 }, { "epoch": 0.5268114414224471, "grad_norm": 0.6938095688819885, "learning_rate": 9.940735538113216e-05, "loss": 0.2046, "step": 37480 }, { "epoch": 0.5269519994377679, "grad_norm": 0.4683268964290619, "learning_rate": 9.936087406013656e-05, "loss": 0.1661, "step": 37490 }, { "epoch": 0.5270925574530888, "grad_norm": 0.4278859496116638, "learning_rate": 9.93143928772302e-05, "loss": 0.1783, "step": 37500 }, { "epoch": 0.5272331154684096, "grad_norm": 0.42725682258605957, "learning_rate": 9.926791184245579e-05, "loss": 0.1731, "step": 37510 }, { "epoch": 0.5273736734837304, "grad_norm": 0.4099690914154053, "learning_rate": 9.922143096585598e-05, "loss": 0.1459, "step": 37520 }, { "epoch": 0.5275142314990512, "grad_norm": 0.4662937521934509, "learning_rate": 9.917495025747343e-05, "loss": 0.1759, "step": 37530 }, { "epoch": 0.5276547895143721, "grad_norm": 0.2619777321815491, "learning_rate": 9.91284697273507e-05, "loss": 0.1808, "step": 37540 }, { "epoch": 0.5277953475296929, "grad_norm": 0.4805005192756653, "learning_rate": 9.908198938553041e-05, "loss": 0.1749, "step": 37550 }, { "epoch": 0.5279359055450137, "grad_norm": 0.5037651062011719, "learning_rate": 9.903550924205504e-05, "loss": 0.1955, "step": 37560 }, { "epoch": 0.5280764635603346, "grad_norm": 0.41545727849006653, "learning_rate": 9.898902930696706e-05, "loss": 0.1804, "step": 37570 }, { "epoch": 0.5282170215756553, "grad_norm": 0.36044058203697205, "learning_rate": 9.894254959030895e-05, "loss": 0.1941, "step": 37580 }, { "epoch": 0.5283575795909762, "grad_norm": 0.44440537691116333, "learning_rate": 9.889607010212302e-05, "loss": 0.161, "step": 37590 }, { "epoch": 0.528498137606297, "grad_norm": 0.6077246069908142, "learning_rate": 9.884959085245168e-05, "loss": 0.183, "step": 37600 }, { "epoch": 0.5286386956216178, "grad_norm": 0.3023645877838135, "learning_rate": 9.880311185133719e-05, "loss": 0.1788, "step": 37610 }, { "epoch": 0.5287792536369387, "grad_norm": 0.3990058898925781, "learning_rate": 9.875663310882174e-05, "loss": 0.2035, "step": 37620 }, { "epoch": 0.5289198116522594, "grad_norm": 0.30556347966194153, "learning_rate": 9.871015463494757e-05, "loss": 0.1794, "step": 37630 }, { "epoch": 0.5290603696675803, "grad_norm": 0.3057769536972046, "learning_rate": 9.866367643975676e-05, "loss": 0.1723, "step": 37640 }, { "epoch": 0.5292009276829012, "grad_norm": 0.4252437651157379, "learning_rate": 9.861719853329135e-05, "loss": 0.182, "step": 37650 }, { "epoch": 0.5293414856982219, "grad_norm": 0.3990003764629364, "learning_rate": 9.857072092559338e-05, "loss": 0.1683, "step": 37660 }, { "epoch": 0.5294820437135428, "grad_norm": 0.444238543510437, "learning_rate": 9.852424362670474e-05, "loss": 0.1886, "step": 37670 }, { "epoch": 0.5296226017288636, "grad_norm": 0.37116536498069763, "learning_rate": 9.847776664666732e-05, "loss": 0.1867, "step": 37680 }, { "epoch": 0.5297631597441844, "grad_norm": 0.4291340112686157, "learning_rate": 9.84312899955229e-05, "loss": 0.182, "step": 37690 }, { "epoch": 0.5299037177595053, "grad_norm": 0.3295157551765442, "learning_rate": 9.838481368331318e-05, "loss": 0.1853, "step": 37700 }, { "epoch": 0.530044275774826, "grad_norm": 0.5010133385658264, "learning_rate": 9.833833772007985e-05, "loss": 0.173, "step": 37710 }, { "epoch": 0.5301848337901469, "grad_norm": 0.4085811376571655, "learning_rate": 9.829186211586447e-05, "loss": 0.1828, "step": 37720 }, { "epoch": 0.5303253918054677, "grad_norm": 0.2573600113391876, "learning_rate": 9.824538688070852e-05, "loss": 0.1752, "step": 37730 }, { "epoch": 0.5304659498207885, "grad_norm": 0.4763016104698181, "learning_rate": 9.819891202465344e-05, "loss": 0.1962, "step": 37740 }, { "epoch": 0.5306065078361094, "grad_norm": 0.47767651081085205, "learning_rate": 9.815243755774056e-05, "loss": 0.1811, "step": 37750 }, { "epoch": 0.5307470658514302, "grad_norm": 0.417579710483551, "learning_rate": 9.810596349001113e-05, "loss": 0.1826, "step": 37760 }, { "epoch": 0.530887623866751, "grad_norm": 0.3614961504936218, "learning_rate": 9.80594898315063e-05, "loss": 0.1622, "step": 37770 }, { "epoch": 0.5310281818820718, "grad_norm": 0.3221801817417145, "learning_rate": 9.801301659226716e-05, "loss": 0.1447, "step": 37780 }, { "epoch": 0.5311687398973927, "grad_norm": 0.4179418981075287, "learning_rate": 9.79665437823347e-05, "loss": 0.1655, "step": 37790 }, { "epoch": 0.5313092979127134, "grad_norm": 0.3602157235145569, "learning_rate": 9.792007141174979e-05, "loss": 0.1818, "step": 37800 }, { "epoch": 0.5314498559280343, "grad_norm": 0.30040112137794495, "learning_rate": 9.787359949055326e-05, "loss": 0.1932, "step": 37810 }, { "epoch": 0.5315904139433552, "grad_norm": 0.37365633249282837, "learning_rate": 9.782712802878577e-05, "loss": 0.1536, "step": 37820 }, { "epoch": 0.5317309719586759, "grad_norm": 0.4313228130340576, "learning_rate": 9.778065703648798e-05, "loss": 0.1934, "step": 37830 }, { "epoch": 0.5318715299739968, "grad_norm": 0.36040905117988586, "learning_rate": 9.773418652370033e-05, "loss": 0.2154, "step": 37840 }, { "epoch": 0.5320120879893175, "grad_norm": 0.4079858064651489, "learning_rate": 9.768771650046325e-05, "loss": 0.1841, "step": 37850 }, { "epoch": 0.5321526460046384, "grad_norm": 0.3194751739501953, "learning_rate": 9.764124697681702e-05, "loss": 0.1886, "step": 37860 }, { "epoch": 0.5322932040199593, "grad_norm": 0.2546814978122711, "learning_rate": 9.759477796280183e-05, "loss": 0.1865, "step": 37870 }, { "epoch": 0.53243376203528, "grad_norm": 0.4091358482837677, "learning_rate": 9.754830946845775e-05, "loss": 0.1794, "step": 37880 }, { "epoch": 0.5325743200506009, "grad_norm": 0.580310583114624, "learning_rate": 9.750184150382473e-05, "loss": 0.1813, "step": 37890 }, { "epoch": 0.5327148780659217, "grad_norm": 0.3971152901649475, "learning_rate": 9.745537407894263e-05, "loss": 0.1806, "step": 37900 }, { "epoch": 0.5328554360812425, "grad_norm": 0.34374722838401794, "learning_rate": 9.740890720385117e-05, "loss": 0.1755, "step": 37910 }, { "epoch": 0.5329959940965634, "grad_norm": 0.43604105710983276, "learning_rate": 9.736244088858997e-05, "loss": 0.1956, "step": 37920 }, { "epoch": 0.5331365521118842, "grad_norm": 0.3789943754673004, "learning_rate": 9.731597514319852e-05, "loss": 0.1944, "step": 37930 }, { "epoch": 0.533277110127205, "grad_norm": 0.26952505111694336, "learning_rate": 9.726950997771616e-05, "loss": 0.1775, "step": 37940 }, { "epoch": 0.5334176681425258, "grad_norm": 0.3544701039791107, "learning_rate": 9.722304540218216e-05, "loss": 0.1693, "step": 37950 }, { "epoch": 0.5335582261578466, "grad_norm": 0.34584537148475647, "learning_rate": 9.717658142663564e-05, "loss": 0.1771, "step": 37960 }, { "epoch": 0.5336987841731675, "grad_norm": 0.33332979679107666, "learning_rate": 9.713011806111554e-05, "loss": 0.1749, "step": 37970 }, { "epoch": 0.5338393421884883, "grad_norm": 0.3988155126571655, "learning_rate": 9.708365531566078e-05, "loss": 0.1798, "step": 37980 }, { "epoch": 0.5339799002038091, "grad_norm": 0.38039711117744446, "learning_rate": 9.703719320031001e-05, "loss": 0.1661, "step": 37990 }, { "epoch": 0.5341204582191299, "grad_norm": 0.3397109806537628, "learning_rate": 9.699073172510186e-05, "loss": 0.167, "step": 38000 }, { "epoch": 0.5341204582191299, "eval_chrf": 71.61878239659443, "eval_loss": 0.36813652515411377, "eval_runtime": 326.4744, "eval_samples_per_second": 0.306, "eval_steps_per_second": 0.012, "step": 38000 }, { "epoch": 0.5342610162344508, "grad_norm": 0.3394010663032532, "learning_rate": 9.694427090007474e-05, "loss": 0.1764, "step": 38010 }, { "epoch": 0.5344015742497716, "grad_norm": 0.24723602831363678, "learning_rate": 9.689781073526698e-05, "loss": 0.2023, "step": 38020 }, { "epoch": 0.5345421322650924, "grad_norm": 0.47091975808143616, "learning_rate": 9.685135124071675e-05, "loss": 0.1811, "step": 38030 }, { "epoch": 0.5346826902804133, "grad_norm": 0.5207058191299438, "learning_rate": 9.680489242646201e-05, "loss": 0.176, "step": 38040 }, { "epoch": 0.534823248295734, "grad_norm": 0.4883480966091156, "learning_rate": 9.675843430254068e-05, "loss": 0.1744, "step": 38050 }, { "epoch": 0.5349638063110549, "grad_norm": 0.450432151556015, "learning_rate": 9.671197687899046e-05, "loss": 0.1801, "step": 38060 }, { "epoch": 0.5351043643263758, "grad_norm": 0.3998246490955353, "learning_rate": 9.666552016584894e-05, "loss": 0.1863, "step": 38070 }, { "epoch": 0.5352449223416965, "grad_norm": 0.4299420416355133, "learning_rate": 9.661906417315349e-05, "loss": 0.1832, "step": 38080 }, { "epoch": 0.5353854803570174, "grad_norm": 0.3945608139038086, "learning_rate": 9.657260891094141e-05, "loss": 0.179, "step": 38090 }, { "epoch": 0.5355260383723381, "grad_norm": 0.409795880317688, "learning_rate": 9.652615438924979e-05, "loss": 0.1784, "step": 38100 }, { "epoch": 0.535666596387659, "grad_norm": 0.38981491327285767, "learning_rate": 9.647970061811556e-05, "loss": 0.1796, "step": 38110 }, { "epoch": 0.5358071544029799, "grad_norm": 0.47124508023262024, "learning_rate": 9.643324760757552e-05, "loss": 0.1847, "step": 38120 }, { "epoch": 0.5359477124183006, "grad_norm": 0.3736439049243927, "learning_rate": 9.638679536766627e-05, "loss": 0.19, "step": 38130 }, { "epoch": 0.5360882704336215, "grad_norm": 0.40727341175079346, "learning_rate": 9.634034390842426e-05, "loss": 0.193, "step": 38140 }, { "epoch": 0.5362288284489423, "grad_norm": 0.5049578547477722, "learning_rate": 9.629389323988578e-05, "loss": 0.1657, "step": 38150 }, { "epoch": 0.5363693864642631, "grad_norm": 0.4338602125644684, "learning_rate": 9.624744337208692e-05, "loss": 0.1883, "step": 38160 }, { "epoch": 0.536509944479584, "grad_norm": 0.3599756062030792, "learning_rate": 9.620099431506364e-05, "loss": 0.1853, "step": 38170 }, { "epoch": 0.5366505024949048, "grad_norm": 0.4722822308540344, "learning_rate": 9.615454607885168e-05, "loss": 0.1575, "step": 38180 }, { "epoch": 0.5367910605102256, "grad_norm": 0.5411520004272461, "learning_rate": 9.610809867348664e-05, "loss": 0.2003, "step": 38190 }, { "epoch": 0.5369316185255464, "grad_norm": 0.5211664438247681, "learning_rate": 9.606165210900391e-05, "loss": 0.1781, "step": 38200 }, { "epoch": 0.5370721765408673, "grad_norm": 0.4710027575492859, "learning_rate": 9.601520639543871e-05, "loss": 0.1752, "step": 38210 }, { "epoch": 0.5372127345561881, "grad_norm": 0.387617826461792, "learning_rate": 9.596876154282609e-05, "loss": 0.1564, "step": 38220 }, { "epoch": 0.5373532925715089, "grad_norm": 0.40515100955963135, "learning_rate": 9.592231756120091e-05, "loss": 0.1681, "step": 38230 }, { "epoch": 0.5374938505868297, "grad_norm": 0.49223166704177856, "learning_rate": 9.58758744605978e-05, "loss": 0.1587, "step": 38240 }, { "epoch": 0.5376344086021505, "grad_norm": 0.4113706946372986, "learning_rate": 9.582943225105127e-05, "loss": 0.1711, "step": 38250 }, { "epoch": 0.5377749666174714, "grad_norm": 0.429145485162735, "learning_rate": 9.57829909425956e-05, "loss": 0.1673, "step": 38260 }, { "epoch": 0.5379155246327921, "grad_norm": 0.5879194736480713, "learning_rate": 9.573655054526485e-05, "loss": 0.1806, "step": 38270 }, { "epoch": 0.538056082648113, "grad_norm": 0.37690460681915283, "learning_rate": 9.569011106909293e-05, "loss": 0.1814, "step": 38280 }, { "epoch": 0.5381966406634339, "grad_norm": 0.2968907356262207, "learning_rate": 9.564367252411351e-05, "loss": 0.1504, "step": 38290 }, { "epoch": 0.5383371986787546, "grad_norm": 0.44435423612594604, "learning_rate": 9.559723492036011e-05, "loss": 0.185, "step": 38300 }, { "epoch": 0.5384777566940755, "grad_norm": 0.3620116412639618, "learning_rate": 9.5550798267866e-05, "loss": 0.1562, "step": 38310 }, { "epoch": 0.5386183147093963, "grad_norm": 0.42804157733917236, "learning_rate": 9.550436257666425e-05, "loss": 0.1995, "step": 38320 }, { "epoch": 0.5387588727247171, "grad_norm": 0.4588015675544739, "learning_rate": 9.545792785678776e-05, "loss": 0.166, "step": 38330 }, { "epoch": 0.538899430740038, "grad_norm": 0.44554048776626587, "learning_rate": 9.541149411826917e-05, "loss": 0.1716, "step": 38340 }, { "epoch": 0.5390399887553587, "grad_norm": 0.5267353057861328, "learning_rate": 9.536506137114092e-05, "loss": 0.1567, "step": 38350 }, { "epoch": 0.5391805467706796, "grad_norm": 0.4399483799934387, "learning_rate": 9.531862962543528e-05, "loss": 0.1775, "step": 38360 }, { "epoch": 0.5393211047860004, "grad_norm": 0.4195297658443451, "learning_rate": 9.527219889118426e-05, "loss": 0.161, "step": 38370 }, { "epoch": 0.5394616628013212, "grad_norm": 0.4252000153064728, "learning_rate": 9.522576917841966e-05, "loss": 0.1813, "step": 38380 }, { "epoch": 0.5396022208166421, "grad_norm": 0.3439667224884033, "learning_rate": 9.517934049717304e-05, "loss": 0.1732, "step": 38390 }, { "epoch": 0.5397427788319629, "grad_norm": 0.4506164789199829, "learning_rate": 9.51329128574758e-05, "loss": 0.193, "step": 38400 }, { "epoch": 0.5398833368472837, "grad_norm": 0.54718416929245, "learning_rate": 9.508648626935903e-05, "loss": 0.2013, "step": 38410 }, { "epoch": 0.5400238948626045, "grad_norm": 0.4310196340084076, "learning_rate": 9.504006074285367e-05, "loss": 0.1659, "step": 38420 }, { "epoch": 0.5401644528779254, "grad_norm": 0.380362868309021, "learning_rate": 9.499363628799039e-05, "loss": 0.1869, "step": 38430 }, { "epoch": 0.5403050108932462, "grad_norm": 0.32745522260665894, "learning_rate": 9.494721291479961e-05, "loss": 0.1787, "step": 38440 }, { "epoch": 0.540445568908567, "grad_norm": 0.43625086545944214, "learning_rate": 9.490079063331156e-05, "loss": 0.1669, "step": 38450 }, { "epoch": 0.5405861269238879, "grad_norm": 0.4483635723590851, "learning_rate": 9.485436945355623e-05, "loss": 0.1531, "step": 38460 }, { "epoch": 0.5407266849392086, "grad_norm": 0.45638909935951233, "learning_rate": 9.480794938556334e-05, "loss": 0.1668, "step": 38470 }, { "epoch": 0.5408672429545295, "grad_norm": 0.3815574645996094, "learning_rate": 9.476153043936239e-05, "loss": 0.1885, "step": 38480 }, { "epoch": 0.5410078009698503, "grad_norm": 0.3492136299610138, "learning_rate": 9.471511262498263e-05, "loss": 0.1687, "step": 38490 }, { "epoch": 0.5411483589851711, "grad_norm": 0.3178686499595642, "learning_rate": 9.466869595245308e-05, "loss": 0.1733, "step": 38500 }, { "epoch": 0.541288917000492, "grad_norm": 0.4572628140449524, "learning_rate": 9.46222804318025e-05, "loss": 0.1669, "step": 38510 }, { "epoch": 0.5414294750158127, "grad_norm": 0.34754234552383423, "learning_rate": 9.45758660730594e-05, "loss": 0.185, "step": 38520 }, { "epoch": 0.5415700330311336, "grad_norm": 0.43663638830184937, "learning_rate": 9.452945288625206e-05, "loss": 0.1672, "step": 38530 }, { "epoch": 0.5417105910464545, "grad_norm": 0.3496999740600586, "learning_rate": 9.448304088140847e-05, "loss": 0.1825, "step": 38540 }, { "epoch": 0.5418511490617752, "grad_norm": 0.40601423382759094, "learning_rate": 9.44366300685564e-05, "loss": 0.1899, "step": 38550 }, { "epoch": 0.5419917070770961, "grad_norm": 0.4218822419643402, "learning_rate": 9.439022045772335e-05, "loss": 0.1852, "step": 38560 }, { "epoch": 0.5421322650924169, "grad_norm": 0.3982556462287903, "learning_rate": 9.434381205893653e-05, "loss": 0.169, "step": 38570 }, { "epoch": 0.5422728231077377, "grad_norm": 0.5806414484977722, "learning_rate": 9.429740488222295e-05, "loss": 0.1743, "step": 38580 }, { "epoch": 0.5424133811230586, "grad_norm": 0.4485076069831848, "learning_rate": 9.425099893760931e-05, "loss": 0.1708, "step": 38590 }, { "epoch": 0.5425539391383793, "grad_norm": 0.39736509323120117, "learning_rate": 9.420459423512204e-05, "loss": 0.163, "step": 38600 }, { "epoch": 0.5426944971537002, "grad_norm": 0.34437084197998047, "learning_rate": 9.415819078478736e-05, "loss": 0.1705, "step": 38610 }, { "epoch": 0.542835055169021, "grad_norm": 0.3532562553882599, "learning_rate": 9.411178859663112e-05, "loss": 0.1522, "step": 38620 }, { "epoch": 0.5429756131843418, "grad_norm": 0.38678795099258423, "learning_rate": 9.406538768067901e-05, "loss": 0.188, "step": 38630 }, { "epoch": 0.5431161711996627, "grad_norm": 0.456844687461853, "learning_rate": 9.401898804695635e-05, "loss": 0.1839, "step": 38640 }, { "epoch": 0.5432567292149835, "grad_norm": 0.3993634581565857, "learning_rate": 9.397258970548823e-05, "loss": 0.1612, "step": 38650 }, { "epoch": 0.5433972872303043, "grad_norm": 0.42853468656539917, "learning_rate": 9.392619266629947e-05, "loss": 0.2004, "step": 38660 }, { "epoch": 0.5435378452456251, "grad_norm": 0.4220224618911743, "learning_rate": 9.387979693941457e-05, "loss": 0.1661, "step": 38670 }, { "epoch": 0.543678403260946, "grad_norm": 0.3359472453594208, "learning_rate": 9.383340253485779e-05, "loss": 0.1551, "step": 38680 }, { "epoch": 0.5438189612762668, "grad_norm": 0.4105564057826996, "learning_rate": 9.378700946265305e-05, "loss": 0.1752, "step": 38690 }, { "epoch": 0.5439595192915876, "grad_norm": 0.4355560541152954, "learning_rate": 9.37406177328241e-05, "loss": 0.1838, "step": 38700 }, { "epoch": 0.5441000773069085, "grad_norm": 0.5953741669654846, "learning_rate": 9.369422735539421e-05, "loss": 0.207, "step": 38710 }, { "epoch": 0.5442406353222292, "grad_norm": 0.42537426948547363, "learning_rate": 9.364783834038648e-05, "loss": 0.1863, "step": 38720 }, { "epoch": 0.5443811933375501, "grad_norm": 0.36153459548950195, "learning_rate": 9.360145069782373e-05, "loss": 0.1701, "step": 38730 }, { "epoch": 0.5445217513528708, "grad_norm": 0.36473673582077026, "learning_rate": 9.355506443772846e-05, "loss": 0.1773, "step": 38740 }, { "epoch": 0.5446623093681917, "grad_norm": 0.32480740547180176, "learning_rate": 9.350867957012283e-05, "loss": 0.1631, "step": 38750 }, { "epoch": 0.5448028673835126, "grad_norm": 0.6103132367134094, "learning_rate": 9.346229610502874e-05, "loss": 0.1778, "step": 38760 }, { "epoch": 0.5449434253988333, "grad_norm": 0.4113089144229889, "learning_rate": 9.341591405246782e-05, "loss": 0.1755, "step": 38770 }, { "epoch": 0.5450839834141542, "grad_norm": 0.3067460358142853, "learning_rate": 9.336953342246129e-05, "loss": 0.174, "step": 38780 }, { "epoch": 0.545224541429475, "grad_norm": 0.6269018650054932, "learning_rate": 9.332315422503017e-05, "loss": 0.197, "step": 38790 }, { "epoch": 0.5453650994447958, "grad_norm": 0.38305601477622986, "learning_rate": 9.32767764701951e-05, "loss": 0.1696, "step": 38800 }, { "epoch": 0.5455056574601167, "grad_norm": 0.3979334831237793, "learning_rate": 9.323040016797646e-05, "loss": 0.1744, "step": 38810 }, { "epoch": 0.5456462154754375, "grad_norm": 0.46121445298194885, "learning_rate": 9.318402532839427e-05, "loss": 0.1849, "step": 38820 }, { "epoch": 0.5457867734907583, "grad_norm": 0.40946972370147705, "learning_rate": 9.313765196146829e-05, "loss": 0.1805, "step": 38830 }, { "epoch": 0.5459273315060791, "grad_norm": 0.522844135761261, "learning_rate": 9.309128007721788e-05, "loss": 0.179, "step": 38840 }, { "epoch": 0.5460678895214, "grad_norm": 0.3552819788455963, "learning_rate": 9.304490968566216e-05, "loss": 0.1784, "step": 38850 }, { "epoch": 0.5462084475367208, "grad_norm": 0.34201109409332275, "learning_rate": 9.299854079681989e-05, "loss": 0.1754, "step": 38860 }, { "epoch": 0.5463490055520416, "grad_norm": 0.48454296588897705, "learning_rate": 9.295217342070949e-05, "loss": 0.1848, "step": 38870 }, { "epoch": 0.5464895635673624, "grad_norm": 0.3617215156555176, "learning_rate": 9.290580756734909e-05, "loss": 0.1616, "step": 38880 }, { "epoch": 0.5466301215826832, "grad_norm": 0.45073843002319336, "learning_rate": 9.285944324675648e-05, "loss": 0.2035, "step": 38890 }, { "epoch": 0.5467706795980041, "grad_norm": 0.37417346239089966, "learning_rate": 9.28130804689491e-05, "loss": 0.1646, "step": 38900 }, { "epoch": 0.5469112376133249, "grad_norm": 0.4643031656742096, "learning_rate": 9.276671924394407e-05, "loss": 0.1408, "step": 38910 }, { "epoch": 0.5470517956286457, "grad_norm": 0.34645259380340576, "learning_rate": 9.272035958175816e-05, "loss": 0.1635, "step": 38920 }, { "epoch": 0.5471923536439666, "grad_norm": 0.39230334758758545, "learning_rate": 9.267400149240786e-05, "loss": 0.1831, "step": 38930 }, { "epoch": 0.5473329116592873, "grad_norm": 0.3278043270111084, "learning_rate": 9.262764498590924e-05, "loss": 0.17, "step": 38940 }, { "epoch": 0.5474734696746082, "grad_norm": 0.3736383318901062, "learning_rate": 9.258129007227807e-05, "loss": 0.1762, "step": 38950 }, { "epoch": 0.5476140276899291, "grad_norm": 0.3995749056339264, "learning_rate": 9.253493676152976e-05, "loss": 0.1861, "step": 38960 }, { "epoch": 0.5477545857052498, "grad_norm": 0.3844054341316223, "learning_rate": 9.248858506367943e-05, "loss": 0.177, "step": 38970 }, { "epoch": 0.5478951437205707, "grad_norm": 0.3187495470046997, "learning_rate": 9.244223498874174e-05, "loss": 0.1734, "step": 38980 }, { "epoch": 0.5480357017358914, "grad_norm": 0.3380083441734314, "learning_rate": 9.239588654673111e-05, "loss": 0.1782, "step": 38990 }, { "epoch": 0.5481762597512123, "grad_norm": 0.3558598458766937, "learning_rate": 9.234953974766156e-05, "loss": 0.1777, "step": 39000 }, { "epoch": 0.5481762597512123, "eval_chrf": 84.03459253857896, "eval_loss": 0.3780422806739807, "eval_runtime": 173.4269, "eval_samples_per_second": 0.577, "eval_steps_per_second": 0.023, "step": 39000 }, { "epoch": 0.5483168177665332, "grad_norm": 0.3646630048751831, "learning_rate": 9.230319460154672e-05, "loss": 0.1859, "step": 39010 }, { "epoch": 0.5484573757818539, "grad_norm": 0.3137366473674774, "learning_rate": 9.225685111839995e-05, "loss": 0.1758, "step": 39020 }, { "epoch": 0.5485979337971748, "grad_norm": 0.4131186008453369, "learning_rate": 9.221050930823418e-05, "loss": 0.1904, "step": 39030 }, { "epoch": 0.5487384918124956, "grad_norm": 0.42204728722572327, "learning_rate": 9.216416918106198e-05, "loss": 0.1864, "step": 39040 }, { "epoch": 0.5488790498278164, "grad_norm": 0.44460195302963257, "learning_rate": 9.211783074689559e-05, "loss": 0.185, "step": 39050 }, { "epoch": 0.5490196078431373, "grad_norm": 0.3441852331161499, "learning_rate": 9.207149401574688e-05, "loss": 0.175, "step": 39060 }, { "epoch": 0.5491601658584581, "grad_norm": 0.2591150104999542, "learning_rate": 9.202515899762733e-05, "loss": 0.1828, "step": 39070 }, { "epoch": 0.5493007238737789, "grad_norm": 0.47425776720046997, "learning_rate": 9.197882570254806e-05, "loss": 0.1655, "step": 39080 }, { "epoch": 0.5494412818890997, "grad_norm": 0.4549548029899597, "learning_rate": 9.193249414051983e-05, "loss": 0.1806, "step": 39090 }, { "epoch": 0.5495818399044206, "grad_norm": 0.3400246500968933, "learning_rate": 9.188616432155301e-05, "loss": 0.145, "step": 39100 }, { "epoch": 0.5497223979197414, "grad_norm": 0.39775407314300537, "learning_rate": 9.183983625565759e-05, "loss": 0.1753, "step": 39110 }, { "epoch": 0.5498629559350622, "grad_norm": 0.32858631014823914, "learning_rate": 9.17935099528432e-05, "loss": 0.1635, "step": 39120 }, { "epoch": 0.550003513950383, "grad_norm": 0.4243503212928772, "learning_rate": 9.174718542311907e-05, "loss": 0.1822, "step": 39130 }, { "epoch": 0.5501440719657038, "grad_norm": 0.421855092048645, "learning_rate": 9.170086267649406e-05, "loss": 0.1888, "step": 39140 }, { "epoch": 0.5502846299810247, "grad_norm": 0.38638654351234436, "learning_rate": 9.165454172297664e-05, "loss": 0.1737, "step": 39150 }, { "epoch": 0.5504251879963455, "grad_norm": 0.43157637119293213, "learning_rate": 9.16082225725749e-05, "loss": 0.1757, "step": 39160 }, { "epoch": 0.5505657460116663, "grad_norm": 0.3671276271343231, "learning_rate": 9.156190523529651e-05, "loss": 0.1755, "step": 39170 }, { "epoch": 0.5507063040269872, "grad_norm": 0.3911401033401489, "learning_rate": 9.151558972114879e-05, "loss": 0.1654, "step": 39180 }, { "epoch": 0.5508468620423079, "grad_norm": 0.3790930509567261, "learning_rate": 9.146927604013866e-05, "loss": 0.1944, "step": 39190 }, { "epoch": 0.5509874200576288, "grad_norm": 0.41435009241104126, "learning_rate": 9.142296420227261e-05, "loss": 0.1716, "step": 39200 }, { "epoch": 0.5511279780729497, "grad_norm": 0.35916468501091003, "learning_rate": 9.137665421755677e-05, "loss": 0.1821, "step": 39210 }, { "epoch": 0.5512685360882704, "grad_norm": 0.3221915066242218, "learning_rate": 9.133034609599683e-05, "loss": 0.1985, "step": 39220 }, { "epoch": 0.5514090941035913, "grad_norm": 0.417850524187088, "learning_rate": 9.128403984759811e-05, "loss": 0.2018, "step": 39230 }, { "epoch": 0.551549652118912, "grad_norm": 0.46724241971969604, "learning_rate": 9.123773548236553e-05, "loss": 0.172, "step": 39240 }, { "epoch": 0.5516902101342329, "grad_norm": 0.3086174726486206, "learning_rate": 9.119143301030356e-05, "loss": 0.1723, "step": 39250 }, { "epoch": 0.5518307681495537, "grad_norm": 0.3772790729999542, "learning_rate": 9.114513244141632e-05, "loss": 0.1934, "step": 39260 }, { "epoch": 0.5519713261648745, "grad_norm": 0.2803363502025604, "learning_rate": 9.109883378570747e-05, "loss": 0.1863, "step": 39270 }, { "epoch": 0.5521118841801954, "grad_norm": 0.3337017595767975, "learning_rate": 9.105253705318028e-05, "loss": 0.1671, "step": 39280 }, { "epoch": 0.5522524421955162, "grad_norm": 0.3834514617919922, "learning_rate": 9.100624225383761e-05, "loss": 0.177, "step": 39290 }, { "epoch": 0.552393000210837, "grad_norm": 0.37610965967178345, "learning_rate": 9.095994939768187e-05, "loss": 0.1801, "step": 39300 }, { "epoch": 0.5525335582261578, "grad_norm": 0.3597879111766815, "learning_rate": 9.09136584947151e-05, "loss": 0.1818, "step": 39310 }, { "epoch": 0.5526741162414787, "grad_norm": 0.499024897813797, "learning_rate": 9.086736955493887e-05, "loss": 0.2017, "step": 39320 }, { "epoch": 0.5528146742567995, "grad_norm": 0.2908949553966522, "learning_rate": 9.082108258835433e-05, "loss": 0.158, "step": 39330 }, { "epoch": 0.5529552322721203, "grad_norm": 0.31243860721588135, "learning_rate": 9.077479760496226e-05, "loss": 0.1815, "step": 39340 }, { "epoch": 0.5530957902874412, "grad_norm": 0.44429317116737366, "learning_rate": 9.072851461476296e-05, "loss": 0.2251, "step": 39350 }, { "epoch": 0.5532363483027619, "grad_norm": 0.373526006937027, "learning_rate": 9.068223362775627e-05, "loss": 0.1629, "step": 39360 }, { "epoch": 0.5533769063180828, "grad_norm": 0.3967413604259491, "learning_rate": 9.063595465394172e-05, "loss": 0.1601, "step": 39370 }, { "epoch": 0.5535174643334037, "grad_norm": 0.41019967198371887, "learning_rate": 9.058967770331822e-05, "loss": 0.1644, "step": 39380 }, { "epoch": 0.5536580223487244, "grad_norm": 0.4954618811607361, "learning_rate": 9.054340278588443e-05, "loss": 0.1681, "step": 39390 }, { "epoch": 0.5537985803640453, "grad_norm": 0.44948747754096985, "learning_rate": 9.049712991163842e-05, "loss": 0.1729, "step": 39400 }, { "epoch": 0.553939138379366, "grad_norm": 0.37606337666511536, "learning_rate": 9.045085909057795e-05, "loss": 0.179, "step": 39410 }, { "epoch": 0.5540796963946869, "grad_norm": 0.3375130891799927, "learning_rate": 9.040459033270021e-05, "loss": 0.2018, "step": 39420 }, { "epoch": 0.5542202544100078, "grad_norm": 0.4176686704158783, "learning_rate": 9.035832364800205e-05, "loss": 0.1786, "step": 39430 }, { "epoch": 0.5543608124253285, "grad_norm": 0.33619213104248047, "learning_rate": 9.031205904647981e-05, "loss": 0.1892, "step": 39440 }, { "epoch": 0.5545013704406494, "grad_norm": 0.3469635248184204, "learning_rate": 9.02657965381294e-05, "loss": 0.1484, "step": 39450 }, { "epoch": 0.5546419284559702, "grad_norm": 0.4018300771713257, "learning_rate": 9.021953613294628e-05, "loss": 0.1497, "step": 39460 }, { "epoch": 0.554782486471291, "grad_norm": 0.42926666140556335, "learning_rate": 9.017327784092544e-05, "loss": 0.1942, "step": 39470 }, { "epoch": 0.5549230444866119, "grad_norm": 0.37829723954200745, "learning_rate": 9.012702167206144e-05, "loss": 0.1916, "step": 39480 }, { "epoch": 0.5550636025019327, "grad_norm": 0.37087535858154297, "learning_rate": 9.008076763634834e-05, "loss": 0.1876, "step": 39490 }, { "epoch": 0.5552041605172535, "grad_norm": 0.3491499722003937, "learning_rate": 9.00345157437798e-05, "loss": 0.1668, "step": 39500 }, { "epoch": 0.5553447185325743, "grad_norm": 0.37351173162460327, "learning_rate": 8.998826600434893e-05, "loss": 0.183, "step": 39510 }, { "epoch": 0.5554852765478951, "grad_norm": 0.469526082277298, "learning_rate": 8.994201842804848e-05, "loss": 0.207, "step": 39520 }, { "epoch": 0.555625834563216, "grad_norm": 0.4910909831523895, "learning_rate": 8.989577302487066e-05, "loss": 0.1552, "step": 39530 }, { "epoch": 0.5557663925785368, "grad_norm": 0.42745739221572876, "learning_rate": 8.98495298048072e-05, "loss": 0.1805, "step": 39540 }, { "epoch": 0.5559069505938576, "grad_norm": 0.493329256772995, "learning_rate": 8.980328877784943e-05, "loss": 0.1839, "step": 39550 }, { "epoch": 0.5560475086091784, "grad_norm": 0.48328346014022827, "learning_rate": 8.975704995398816e-05, "loss": 0.1947, "step": 39560 }, { "epoch": 0.5561880666244993, "grad_norm": 0.4481086730957031, "learning_rate": 8.97108133432137e-05, "loss": 0.1766, "step": 39570 }, { "epoch": 0.5563286246398201, "grad_norm": 0.38591402769088745, "learning_rate": 8.966457895551592e-05, "loss": 0.1893, "step": 39580 }, { "epoch": 0.5564691826551409, "grad_norm": 0.43421241641044617, "learning_rate": 8.961834680088423e-05, "loss": 0.1892, "step": 39590 }, { "epoch": 0.5566097406704618, "grad_norm": 0.371267169713974, "learning_rate": 8.957211688930748e-05, "loss": 0.1862, "step": 39600 }, { "epoch": 0.5567502986857825, "grad_norm": 0.4000300467014313, "learning_rate": 8.952588923077409e-05, "loss": 0.1742, "step": 39610 }, { "epoch": 0.5568908567011034, "grad_norm": 0.47955048084259033, "learning_rate": 8.947966383527203e-05, "loss": 0.1993, "step": 39620 }, { "epoch": 0.5570314147164243, "grad_norm": 0.3886945843696594, "learning_rate": 8.943344071278869e-05, "loss": 0.1999, "step": 39630 }, { "epoch": 0.557171972731745, "grad_norm": 0.3787805438041687, "learning_rate": 8.938721987331104e-05, "loss": 0.1777, "step": 39640 }, { "epoch": 0.5573125307470659, "grad_norm": 0.36745595932006836, "learning_rate": 8.934100132682551e-05, "loss": 0.1904, "step": 39650 }, { "epoch": 0.5574530887623866, "grad_norm": 0.3356489837169647, "learning_rate": 8.929478508331807e-05, "loss": 0.164, "step": 39660 }, { "epoch": 0.5575936467777075, "grad_norm": 0.4322313666343689, "learning_rate": 8.92485711527742e-05, "loss": 0.1652, "step": 39670 }, { "epoch": 0.5577342047930284, "grad_norm": 0.4178856611251831, "learning_rate": 8.920235954517882e-05, "loss": 0.194, "step": 39680 }, { "epoch": 0.5578747628083491, "grad_norm": 0.3862941563129425, "learning_rate": 8.915615027051642e-05, "loss": 0.1811, "step": 39690 }, { "epoch": 0.55801532082367, "grad_norm": 0.4267183840274811, "learning_rate": 8.910994333877091e-05, "loss": 0.2013, "step": 39700 }, { "epoch": 0.5581558788389908, "grad_norm": 0.32371628284454346, "learning_rate": 8.90637387599258e-05, "loss": 0.2189, "step": 39710 }, { "epoch": 0.5582964368543116, "grad_norm": 0.3845933973789215, "learning_rate": 8.901753654396398e-05, "loss": 0.1729, "step": 39720 }, { "epoch": 0.5584369948696324, "grad_norm": 0.34567537903785706, "learning_rate": 8.89713367008679e-05, "loss": 0.19, "step": 39730 }, { "epoch": 0.5585775528849533, "grad_norm": 0.34996098279953003, "learning_rate": 8.892513924061948e-05, "loss": 0.177, "step": 39740 }, { "epoch": 0.5587181109002741, "grad_norm": 0.44245201349258423, "learning_rate": 8.88789441732001e-05, "loss": 0.181, "step": 39750 }, { "epoch": 0.5588586689155949, "grad_norm": 0.45229363441467285, "learning_rate": 8.883275150859065e-05, "loss": 0.1667, "step": 39760 }, { "epoch": 0.5589992269309157, "grad_norm": 0.41712188720703125, "learning_rate": 8.87865612567715e-05, "loss": 0.1905, "step": 39770 }, { "epoch": 0.5591397849462365, "grad_norm": 0.3251127600669861, "learning_rate": 8.87403734277225e-05, "loss": 0.1748, "step": 39780 }, { "epoch": 0.5592803429615574, "grad_norm": 0.4091569781303406, "learning_rate": 8.869418803142297e-05, "loss": 0.2082, "step": 39790 }, { "epoch": 0.5594209009768782, "grad_norm": 0.5211314558982849, "learning_rate": 8.864800507785168e-05, "loss": 0.1671, "step": 39800 }, { "epoch": 0.559561458992199, "grad_norm": 0.5784714818000793, "learning_rate": 8.860182457698692e-05, "loss": 0.1808, "step": 39810 }, { "epoch": 0.5597020170075199, "grad_norm": 0.31065332889556885, "learning_rate": 8.855564653880644e-05, "loss": 0.1769, "step": 39820 }, { "epoch": 0.5598425750228406, "grad_norm": 0.39571863412857056, "learning_rate": 8.85094709732874e-05, "loss": 0.2061, "step": 39830 }, { "epoch": 0.5599831330381615, "grad_norm": 0.5271860361099243, "learning_rate": 8.84632978904065e-05, "loss": 0.1812, "step": 39840 }, { "epoch": 0.5601236910534824, "grad_norm": 0.3915952742099762, "learning_rate": 8.841712730013988e-05, "loss": 0.1686, "step": 39850 }, { "epoch": 0.5602642490688031, "grad_norm": 0.35233601927757263, "learning_rate": 8.837095921246312e-05, "loss": 0.1561, "step": 39860 }, { "epoch": 0.560404807084124, "grad_norm": 0.36162757873535156, "learning_rate": 8.832479363735128e-05, "loss": 0.1867, "step": 39870 }, { "epoch": 0.5605453650994447, "grad_norm": 0.35949790477752686, "learning_rate": 8.827863058477886e-05, "loss": 0.1749, "step": 39880 }, { "epoch": 0.5606859231147656, "grad_norm": 0.45237404108047485, "learning_rate": 8.823247006471985e-05, "loss": 0.1818, "step": 39890 }, { "epoch": 0.5608264811300865, "grad_norm": 0.39162522554397583, "learning_rate": 8.818631208714765e-05, "loss": 0.1728, "step": 39900 }, { "epoch": 0.5609670391454072, "grad_norm": 0.28499725461006165, "learning_rate": 8.814015666203514e-05, "loss": 0.1625, "step": 39910 }, { "epoch": 0.5611075971607281, "grad_norm": 0.36940038204193115, "learning_rate": 8.809400379935466e-05, "loss": 0.1648, "step": 39920 }, { "epoch": 0.5612481551760489, "grad_norm": 0.38717323541641235, "learning_rate": 8.804785350907791e-05, "loss": 0.1416, "step": 39930 }, { "epoch": 0.5613887131913697, "grad_norm": 0.3635973036289215, "learning_rate": 8.800170580117617e-05, "loss": 0.1642, "step": 39940 }, { "epoch": 0.5615292712066906, "grad_norm": 0.49187415838241577, "learning_rate": 8.795556068562005e-05, "loss": 0.1688, "step": 39950 }, { "epoch": 0.5616698292220114, "grad_norm": 0.362467497587204, "learning_rate": 8.790941817237966e-05, "loss": 0.1737, "step": 39960 }, { "epoch": 0.5618103872373322, "grad_norm": 0.3785790205001831, "learning_rate": 8.786327827142453e-05, "loss": 0.1499, "step": 39970 }, { "epoch": 0.561950945252653, "grad_norm": 0.39637821912765503, "learning_rate": 8.781714099272362e-05, "loss": 0.1574, "step": 39980 }, { "epoch": 0.5620915032679739, "grad_norm": 0.47537532448768616, "learning_rate": 8.77710063462453e-05, "loss": 0.1842, "step": 39990 }, { "epoch": 0.5622320612832947, "grad_norm": 0.42900022864341736, "learning_rate": 8.772487434195745e-05, "loss": 0.1754, "step": 40000 }, { "epoch": 0.5622320612832947, "eval_chrf": 64.4424962345778, "eval_loss": 0.3777381181716919, "eval_runtime": 326.6983, "eval_samples_per_second": 0.306, "eval_steps_per_second": 0.012, "step": 40000 }, { "epoch": 0.5623726192986155, "grad_norm": 0.4063546657562256, "learning_rate": 8.767874498982728e-05, "loss": 0.1794, "step": 40010 }, { "epoch": 0.5625131773139364, "grad_norm": 0.35100147128105164, "learning_rate": 8.763261829982153e-05, "loss": 0.1845, "step": 40020 }, { "epoch": 0.5626537353292571, "grad_norm": 0.3833320736885071, "learning_rate": 8.758649428190626e-05, "loss": 0.1744, "step": 40030 }, { "epoch": 0.562794293344578, "grad_norm": 0.3445558547973633, "learning_rate": 8.754037294604707e-05, "loss": 0.1708, "step": 40040 }, { "epoch": 0.5629348513598988, "grad_norm": 0.47707948088645935, "learning_rate": 8.749425430220882e-05, "loss": 0.1587, "step": 40050 }, { "epoch": 0.5630754093752196, "grad_norm": 0.4903023838996887, "learning_rate": 8.744813836035593e-05, "loss": 0.174, "step": 40060 }, { "epoch": 0.5632159673905405, "grad_norm": 0.36455291509628296, "learning_rate": 8.740202513045219e-05, "loss": 0.1656, "step": 40070 }, { "epoch": 0.5633565254058612, "grad_norm": 0.4470309913158417, "learning_rate": 8.735591462246078e-05, "loss": 0.1922, "step": 40080 }, { "epoch": 0.5634970834211821, "grad_norm": 0.488297700881958, "learning_rate": 8.730980684634435e-05, "loss": 0.1852, "step": 40090 }, { "epoch": 0.563637641436503, "grad_norm": 0.5019252896308899, "learning_rate": 8.726370181206492e-05, "loss": 0.1811, "step": 40100 }, { "epoch": 0.5637781994518237, "grad_norm": 0.3459615111351013, "learning_rate": 8.721759952958388e-05, "loss": 0.1762, "step": 40110 }, { "epoch": 0.5639187574671446, "grad_norm": 0.3986132740974426, "learning_rate": 8.71715000088621e-05, "loss": 0.1808, "step": 40120 }, { "epoch": 0.5640593154824654, "grad_norm": 0.6364266872406006, "learning_rate": 8.712540325985982e-05, "loss": 0.1837, "step": 40130 }, { "epoch": 0.5641998734977862, "grad_norm": 0.3040315508842468, "learning_rate": 8.707930929253668e-05, "loss": 0.1474, "step": 40140 }, { "epoch": 0.5643404315131071, "grad_norm": 0.39072078466415405, "learning_rate": 8.703321811685169e-05, "loss": 0.2028, "step": 40150 }, { "epoch": 0.5644809895284278, "grad_norm": 0.5064664483070374, "learning_rate": 8.698712974276333e-05, "loss": 0.1691, "step": 40160 }, { "epoch": 0.5646215475437487, "grad_norm": 0.32676252722740173, "learning_rate": 8.694104418022941e-05, "loss": 0.1771, "step": 40170 }, { "epoch": 0.5647621055590695, "grad_norm": 0.3531136214733124, "learning_rate": 8.689496143920716e-05, "loss": 0.1792, "step": 40180 }, { "epoch": 0.5649026635743903, "grad_norm": 0.38171055912971497, "learning_rate": 8.684888152965321e-05, "loss": 0.186, "step": 40190 }, { "epoch": 0.5650432215897112, "grad_norm": 0.4591136574745178, "learning_rate": 8.680280446152352e-05, "loss": 0.1693, "step": 40200 }, { "epoch": 0.565183779605032, "grad_norm": 0.3574647605419159, "learning_rate": 8.675673024477353e-05, "loss": 0.1526, "step": 40210 }, { "epoch": 0.5653243376203528, "grad_norm": 0.3667439818382263, "learning_rate": 8.671065888935797e-05, "loss": 0.158, "step": 40220 }, { "epoch": 0.5654648956356736, "grad_norm": 0.3794707655906677, "learning_rate": 8.666459040523102e-05, "loss": 0.1534, "step": 40230 }, { "epoch": 0.5656054536509945, "grad_norm": 0.3461852967739105, "learning_rate": 8.66185248023462e-05, "loss": 0.1937, "step": 40240 }, { "epoch": 0.5657460116663152, "grad_norm": 0.32846784591674805, "learning_rate": 8.657246209065644e-05, "loss": 0.185, "step": 40250 }, { "epoch": 0.5658865696816361, "grad_norm": 0.4151070713996887, "learning_rate": 8.652640228011401e-05, "loss": 0.1666, "step": 40260 }, { "epoch": 0.566027127696957, "grad_norm": 0.43426674604415894, "learning_rate": 8.648034538067058e-05, "loss": 0.1796, "step": 40270 }, { "epoch": 0.5661676857122777, "grad_norm": 0.3492185175418854, "learning_rate": 8.643429140227718e-05, "loss": 0.1485, "step": 40280 }, { "epoch": 0.5663082437275986, "grad_norm": 0.5607163906097412, "learning_rate": 8.63882403548842e-05, "loss": 0.1965, "step": 40290 }, { "epoch": 0.5664488017429193, "grad_norm": 0.42045024037361145, "learning_rate": 8.634219224844142e-05, "loss": 0.165, "step": 40300 }, { "epoch": 0.5665893597582402, "grad_norm": 0.315655380487442, "learning_rate": 8.629614709289798e-05, "loss": 0.1908, "step": 40310 }, { "epoch": 0.5667299177735611, "grad_norm": 0.47120293974876404, "learning_rate": 8.625010489820234e-05, "loss": 0.1913, "step": 40320 }, { "epoch": 0.5668704757888818, "grad_norm": 0.5474007725715637, "learning_rate": 8.62040656743024e-05, "loss": 0.1632, "step": 40330 }, { "epoch": 0.5670110338042027, "grad_norm": 0.35841497778892517, "learning_rate": 8.615802943114533e-05, "loss": 0.16, "step": 40340 }, { "epoch": 0.5671515918195235, "grad_norm": 0.507109522819519, "learning_rate": 8.611199617867772e-05, "loss": 0.1864, "step": 40350 }, { "epoch": 0.5672921498348443, "grad_norm": 0.43633168935775757, "learning_rate": 8.606596592684548e-05, "loss": 0.1734, "step": 40360 }, { "epoch": 0.5674327078501652, "grad_norm": 0.4774210751056671, "learning_rate": 8.60199386855939e-05, "loss": 0.1757, "step": 40370 }, { "epoch": 0.567573265865486, "grad_norm": 0.4148516058921814, "learning_rate": 8.597391446486758e-05, "loss": 0.1808, "step": 40380 }, { "epoch": 0.5677138238808068, "grad_norm": 0.33463791012763977, "learning_rate": 8.592789327461052e-05, "loss": 0.1974, "step": 40390 }, { "epoch": 0.5678543818961276, "grad_norm": 0.4078878164291382, "learning_rate": 8.588187512476603e-05, "loss": 0.1585, "step": 40400 }, { "epoch": 0.5679949399114484, "grad_norm": 0.383430540561676, "learning_rate": 8.583586002527676e-05, "loss": 0.1604, "step": 40410 }, { "epoch": 0.5681354979267693, "grad_norm": 0.4105486571788788, "learning_rate": 8.57898479860847e-05, "loss": 0.1751, "step": 40420 }, { "epoch": 0.5682760559420901, "grad_norm": 0.5492495894432068, "learning_rate": 8.574383901713121e-05, "loss": 0.1764, "step": 40430 }, { "epoch": 0.5684166139574109, "grad_norm": 0.4135695993900299, "learning_rate": 8.569783312835697e-05, "loss": 0.1687, "step": 40440 }, { "epoch": 0.5685571719727317, "grad_norm": 0.3660704493522644, "learning_rate": 8.565183032970195e-05, "loss": 0.1766, "step": 40450 }, { "epoch": 0.5686977299880526, "grad_norm": 0.5003263354301453, "learning_rate": 8.560583063110553e-05, "loss": 0.192, "step": 40460 }, { "epoch": 0.5688382880033734, "grad_norm": 0.37117767333984375, "learning_rate": 8.555983404250638e-05, "loss": 0.1699, "step": 40470 }, { "epoch": 0.5689788460186942, "grad_norm": 0.4867563843727112, "learning_rate": 8.551384057384247e-05, "loss": 0.1613, "step": 40480 }, { "epoch": 0.5691194040340151, "grad_norm": 0.36081835627555847, "learning_rate": 8.546785023505117e-05, "loss": 0.1786, "step": 40490 }, { "epoch": 0.5692599620493358, "grad_norm": 0.46900445222854614, "learning_rate": 8.542186303606909e-05, "loss": 0.1884, "step": 40500 }, { "epoch": 0.5694005200646567, "grad_norm": 0.5062784552574158, "learning_rate": 8.537587898683224e-05, "loss": 0.195, "step": 40510 }, { "epoch": 0.5695410780799776, "grad_norm": 0.3653715252876282, "learning_rate": 8.532989809727588e-05, "loss": 0.1945, "step": 40520 }, { "epoch": 0.5696816360952983, "grad_norm": 0.2945951521396637, "learning_rate": 8.528392037733464e-05, "loss": 0.1703, "step": 40530 }, { "epoch": 0.5698221941106192, "grad_norm": 0.33782869577407837, "learning_rate": 8.523794583694243e-05, "loss": 0.1915, "step": 40540 }, { "epoch": 0.5699627521259399, "grad_norm": 0.3650352954864502, "learning_rate": 8.51919744860325e-05, "loss": 0.1746, "step": 40550 }, { "epoch": 0.5701033101412608, "grad_norm": 0.3990078866481781, "learning_rate": 8.514600633453737e-05, "loss": 0.1675, "step": 40560 }, { "epoch": 0.5702438681565817, "grad_norm": 0.43076345324516296, "learning_rate": 8.510004139238893e-05, "loss": 0.1861, "step": 40570 }, { "epoch": 0.5703844261719024, "grad_norm": 0.39436662197113037, "learning_rate": 8.505407966951833e-05, "loss": 0.172, "step": 40580 }, { "epoch": 0.5705249841872233, "grad_norm": 0.34969374537467957, "learning_rate": 8.500812117585602e-05, "loss": 0.1758, "step": 40590 }, { "epoch": 0.5706655422025441, "grad_norm": 0.3330826759338379, "learning_rate": 8.496216592133181e-05, "loss": 0.1647, "step": 40600 }, { "epoch": 0.5708061002178649, "grad_norm": 0.5729359984397888, "learning_rate": 8.491621391587473e-05, "loss": 0.2057, "step": 40610 }, { "epoch": 0.5709466582331858, "grad_norm": 0.3391762375831604, "learning_rate": 8.487026516941318e-05, "loss": 0.1642, "step": 40620 }, { "epoch": 0.5710872162485066, "grad_norm": 0.4327235221862793, "learning_rate": 8.482431969187479e-05, "loss": 0.1756, "step": 40630 }, { "epoch": 0.5712277742638274, "grad_norm": 0.4161527454853058, "learning_rate": 8.477837749318655e-05, "loss": 0.1593, "step": 40640 }, { "epoch": 0.5713683322791482, "grad_norm": 0.3668680489063263, "learning_rate": 8.473243858327469e-05, "loss": 0.1821, "step": 40650 }, { "epoch": 0.571508890294469, "grad_norm": 0.43897321820259094, "learning_rate": 8.468650297206476e-05, "loss": 0.1796, "step": 40660 }, { "epoch": 0.5716494483097899, "grad_norm": 0.44199275970458984, "learning_rate": 8.464057066948158e-05, "loss": 0.1754, "step": 40670 }, { "epoch": 0.5717900063251107, "grad_norm": 0.4629787802696228, "learning_rate": 8.459464168544927e-05, "loss": 0.1847, "step": 40680 }, { "epoch": 0.5719305643404315, "grad_norm": 0.3117261826992035, "learning_rate": 8.454871602989119e-05, "loss": 0.1559, "step": 40690 }, { "epoch": 0.5720711223557523, "grad_norm": 0.3957635760307312, "learning_rate": 8.450279371273006e-05, "loss": 0.1875, "step": 40700 }, { "epoch": 0.5722116803710732, "grad_norm": 0.31855207681655884, "learning_rate": 8.44568747438878e-05, "loss": 0.1644, "step": 40710 }, { "epoch": 0.5723522383863939, "grad_norm": 0.38155150413513184, "learning_rate": 8.441095913328568e-05, "loss": 0.1728, "step": 40720 }, { "epoch": 0.5724927964017148, "grad_norm": 0.4814320504665375, "learning_rate": 8.436504689084416e-05, "loss": 0.1698, "step": 40730 }, { "epoch": 0.5726333544170357, "grad_norm": 0.5537896752357483, "learning_rate": 8.431913802648304e-05, "loss": 0.2047, "step": 40740 }, { "epoch": 0.5727739124323564, "grad_norm": 0.32907259464263916, "learning_rate": 8.427323255012136e-05, "loss": 0.1641, "step": 40750 }, { "epoch": 0.5729144704476773, "grad_norm": 0.48211991786956787, "learning_rate": 8.422733047167743e-05, "loss": 0.2014, "step": 40760 }, { "epoch": 0.573055028462998, "grad_norm": 0.41999366879463196, "learning_rate": 8.418143180106883e-05, "loss": 0.1858, "step": 40770 }, { "epoch": 0.5731955864783189, "grad_norm": 0.3663288354873657, "learning_rate": 8.413553654821243e-05, "loss": 0.1854, "step": 40780 }, { "epoch": 0.5733361444936398, "grad_norm": 0.3534975051879883, "learning_rate": 8.408964472302432e-05, "loss": 0.1972, "step": 40790 }, { "epoch": 0.5734767025089605, "grad_norm": 0.43506941199302673, "learning_rate": 8.404375633541985e-05, "loss": 0.1823, "step": 40800 }, { "epoch": 0.5736172605242814, "grad_norm": 0.3940921723842621, "learning_rate": 8.399787139531367e-05, "loss": 0.1743, "step": 40810 }, { "epoch": 0.5737578185396022, "grad_norm": 0.34778329730033875, "learning_rate": 8.395198991261963e-05, "loss": 0.163, "step": 40820 }, { "epoch": 0.573898376554923, "grad_norm": 0.691506028175354, "learning_rate": 8.390611189725088e-05, "loss": 0.1815, "step": 40830 }, { "epoch": 0.5740389345702439, "grad_norm": 0.37461793422698975, "learning_rate": 8.386023735911981e-05, "loss": 0.2127, "step": 40840 }, { "epoch": 0.5741794925855647, "grad_norm": 0.4207349121570587, "learning_rate": 8.381436630813803e-05, "loss": 0.1821, "step": 40850 }, { "epoch": 0.5743200506008855, "grad_norm": 0.36333221197128296, "learning_rate": 8.376849875421642e-05, "loss": 0.1753, "step": 40860 }, { "epoch": 0.5744606086162063, "grad_norm": 0.4035714566707611, "learning_rate": 8.372263470726512e-05, "loss": 0.1559, "step": 40870 }, { "epoch": 0.5746011666315272, "grad_norm": 0.3609062135219574, "learning_rate": 8.367677417719348e-05, "loss": 0.1753, "step": 40880 }, { "epoch": 0.574741724646848, "grad_norm": 0.4326991140842438, "learning_rate": 8.36309171739101e-05, "loss": 0.1652, "step": 40890 }, { "epoch": 0.5748822826621688, "grad_norm": 0.4023519456386566, "learning_rate": 8.358506370732282e-05, "loss": 0.1817, "step": 40900 }, { "epoch": 0.5750228406774897, "grad_norm": 0.35921162366867065, "learning_rate": 8.353921378733873e-05, "loss": 0.1642, "step": 40910 }, { "epoch": 0.5751633986928104, "grad_norm": 0.3350284695625305, "learning_rate": 8.349336742386413e-05, "loss": 0.1715, "step": 40920 }, { "epoch": 0.5753039567081313, "grad_norm": 0.4562940001487732, "learning_rate": 8.344752462680458e-05, "loss": 0.1768, "step": 40930 }, { "epoch": 0.5754445147234521, "grad_norm": 0.3363032341003418, "learning_rate": 8.340168540606483e-05, "loss": 0.1793, "step": 40940 }, { "epoch": 0.5755850727387729, "grad_norm": 0.29547351598739624, "learning_rate": 8.33558497715489e-05, "loss": 0.1704, "step": 40950 }, { "epoch": 0.5757256307540938, "grad_norm": 0.31880930066108704, "learning_rate": 8.331001773315998e-05, "loss": 0.185, "step": 40960 }, { "epoch": 0.5758661887694145, "grad_norm": 0.4245101809501648, "learning_rate": 8.326418930080057e-05, "loss": 0.1812, "step": 40970 }, { "epoch": 0.5760067467847354, "grad_norm": 0.42613980174064636, "learning_rate": 8.321836448437232e-05, "loss": 0.179, "step": 40980 }, { "epoch": 0.5761473048000563, "grad_norm": 0.5401775240898132, "learning_rate": 8.317254329377608e-05, "loss": 0.1773, "step": 40990 }, { "epoch": 0.576287862815377, "grad_norm": 0.3493726849555969, "learning_rate": 8.312672573891202e-05, "loss": 0.1827, "step": 41000 }, { "epoch": 0.576287862815377, "eval_chrf": 70.05708677995977, "eval_loss": 0.37228840589523315, "eval_runtime": 326.7373, "eval_samples_per_second": 0.306, "eval_steps_per_second": 0.012, "step": 41000 }, { "epoch": 0.5764284208306979, "grad_norm": 0.3397007882595062, "learning_rate": 8.30809118296794e-05, "loss": 0.1789, "step": 41010 }, { "epoch": 0.5765689788460187, "grad_norm": 0.3283292055130005, "learning_rate": 8.303510157597678e-05, "loss": 0.1847, "step": 41020 }, { "epoch": 0.5767095368613395, "grad_norm": 0.3661724925041199, "learning_rate": 8.29892949877019e-05, "loss": 0.203, "step": 41030 }, { "epoch": 0.5768500948766604, "grad_norm": 0.39149966835975647, "learning_rate": 8.29434920747517e-05, "loss": 0.1894, "step": 41040 }, { "epoch": 0.5769906528919811, "grad_norm": 0.41224539279937744, "learning_rate": 8.289769284702234e-05, "loss": 0.1874, "step": 41050 }, { "epoch": 0.577131210907302, "grad_norm": 0.33227643370628357, "learning_rate": 8.285189731440919e-05, "loss": 0.1756, "step": 41060 }, { "epoch": 0.5772717689226228, "grad_norm": 0.40387648344039917, "learning_rate": 8.280610548680678e-05, "loss": 0.1678, "step": 41070 }, { "epoch": 0.5774123269379436, "grad_norm": 0.3667382001876831, "learning_rate": 8.27603173741089e-05, "loss": 0.1744, "step": 41080 }, { "epoch": 0.5775528849532645, "grad_norm": 0.2799052894115448, "learning_rate": 8.271453298620851e-05, "loss": 0.1808, "step": 41090 }, { "epoch": 0.5776934429685853, "grad_norm": 0.4208914041519165, "learning_rate": 8.266875233299773e-05, "loss": 0.1794, "step": 41100 }, { "epoch": 0.5778340009839061, "grad_norm": 0.31498298048973083, "learning_rate": 8.262297542436793e-05, "loss": 0.1706, "step": 41110 }, { "epoch": 0.5779745589992269, "grad_norm": 0.4450007379055023, "learning_rate": 8.257720227020965e-05, "loss": 0.1936, "step": 41120 }, { "epoch": 0.5781151170145478, "grad_norm": 0.3582266867160797, "learning_rate": 8.253143288041261e-05, "loss": 0.1942, "step": 41130 }, { "epoch": 0.5782556750298686, "grad_norm": 0.4047289788722992, "learning_rate": 8.248566726486571e-05, "loss": 0.1622, "step": 41140 }, { "epoch": 0.5783962330451894, "grad_norm": 0.3665313422679901, "learning_rate": 8.243990543345707e-05, "loss": 0.1777, "step": 41150 }, { "epoch": 0.5785367910605103, "grad_norm": 0.3635713458061218, "learning_rate": 8.239414739607394e-05, "loss": 0.1694, "step": 41160 }, { "epoch": 0.578677349075831, "grad_norm": 0.3851402997970581, "learning_rate": 8.23483931626028e-05, "loss": 0.1754, "step": 41170 }, { "epoch": 0.5788179070911519, "grad_norm": 0.36724674701690674, "learning_rate": 8.230264274292929e-05, "loss": 0.1605, "step": 41180 }, { "epoch": 0.5789584651064726, "grad_norm": 0.39243537187576294, "learning_rate": 8.225689614693821e-05, "loss": 0.169, "step": 41190 }, { "epoch": 0.5790990231217935, "grad_norm": 0.4184189736843109, "learning_rate": 8.221115338451355e-05, "loss": 0.1934, "step": 41200 }, { "epoch": 0.5792395811371144, "grad_norm": 0.37976041436195374, "learning_rate": 8.216541446553848e-05, "loss": 0.1907, "step": 41210 }, { "epoch": 0.5793801391524351, "grad_norm": 0.4090244472026825, "learning_rate": 8.211967939989531e-05, "loss": 0.1817, "step": 41220 }, { "epoch": 0.579520697167756, "grad_norm": 0.4144601821899414, "learning_rate": 8.207394819746558e-05, "loss": 0.1642, "step": 41230 }, { "epoch": 0.5796612551830768, "grad_norm": 0.3041282594203949, "learning_rate": 8.202822086812989e-05, "loss": 0.1777, "step": 41240 }, { "epoch": 0.5798018131983976, "grad_norm": 0.34671542048454285, "learning_rate": 8.19824974217681e-05, "loss": 0.1616, "step": 41250 }, { "epoch": 0.5799423712137185, "grad_norm": 0.39715781807899475, "learning_rate": 8.19367778682592e-05, "loss": 0.1927, "step": 41260 }, { "epoch": 0.5800829292290393, "grad_norm": 0.43676459789276123, "learning_rate": 8.189106221748131e-05, "loss": 0.1739, "step": 41270 }, { "epoch": 0.5802234872443601, "grad_norm": 0.4754163324832916, "learning_rate": 8.184535047931175e-05, "loss": 0.1809, "step": 41280 }, { "epoch": 0.5803640452596809, "grad_norm": 0.3058910667896271, "learning_rate": 8.179964266362697e-05, "loss": 0.1597, "step": 41290 }, { "epoch": 0.5805046032750018, "grad_norm": 0.30070939660072327, "learning_rate": 8.175393878030257e-05, "loss": 0.1652, "step": 41300 }, { "epoch": 0.5806451612903226, "grad_norm": 0.4284345507621765, "learning_rate": 8.170823883921335e-05, "loss": 0.1975, "step": 41310 }, { "epoch": 0.5807857193056434, "grad_norm": 0.4294290542602539, "learning_rate": 8.166254285023315e-05, "loss": 0.1803, "step": 41320 }, { "epoch": 0.5809262773209642, "grad_norm": 0.4316389560699463, "learning_rate": 8.161685082323508e-05, "loss": 0.1586, "step": 41330 }, { "epoch": 0.581066835336285, "grad_norm": 0.35033249855041504, "learning_rate": 8.157116276809132e-05, "loss": 0.1688, "step": 41340 }, { "epoch": 0.5812073933516059, "grad_norm": 0.2849494218826294, "learning_rate": 8.152547869467319e-05, "loss": 0.1864, "step": 41350 }, { "epoch": 0.5813479513669267, "grad_norm": 0.4948350489139557, "learning_rate": 8.14797986128512e-05, "loss": 0.171, "step": 41360 }, { "epoch": 0.5814885093822475, "grad_norm": 0.3462991714477539, "learning_rate": 8.143412253249494e-05, "loss": 0.1831, "step": 41370 }, { "epoch": 0.5816290673975684, "grad_norm": 0.539348840713501, "learning_rate": 8.138845046347316e-05, "loss": 0.1776, "step": 41380 }, { "epoch": 0.5817696254128891, "grad_norm": 0.32116562128067017, "learning_rate": 8.134278241565383e-05, "loss": 0.1731, "step": 41390 }, { "epoch": 0.58191018342821, "grad_norm": 0.4358501732349396, "learning_rate": 8.129711839890383e-05, "loss": 0.1815, "step": 41400 }, { "epoch": 0.5820507414435309, "grad_norm": 0.3794027864933014, "learning_rate": 8.125145842308938e-05, "loss": 0.1849, "step": 41410 }, { "epoch": 0.5821912994588516, "grad_norm": 0.4954479932785034, "learning_rate": 8.120580249807573e-05, "loss": 0.1944, "step": 41420 }, { "epoch": 0.5823318574741725, "grad_norm": 0.40640848875045776, "learning_rate": 8.11601506337273e-05, "loss": 0.1725, "step": 41430 }, { "epoch": 0.5824724154894932, "grad_norm": 0.4593553841114044, "learning_rate": 8.11145028399076e-05, "loss": 0.1695, "step": 41440 }, { "epoch": 0.5826129735048141, "grad_norm": 0.47762438654899597, "learning_rate": 8.106885912647926e-05, "loss": 0.1697, "step": 41450 }, { "epoch": 0.582753531520135, "grad_norm": 0.3857489228248596, "learning_rate": 8.102321950330404e-05, "loss": 0.1781, "step": 41460 }, { "epoch": 0.5828940895354557, "grad_norm": 0.2961033582687378, "learning_rate": 8.097758398024283e-05, "loss": 0.147, "step": 41470 }, { "epoch": 0.5830346475507766, "grad_norm": 0.42926403880119324, "learning_rate": 8.093195256715561e-05, "loss": 0.1745, "step": 41480 }, { "epoch": 0.5831752055660974, "grad_norm": 0.345276415348053, "learning_rate": 8.088632527390148e-05, "loss": 0.1595, "step": 41490 }, { "epoch": 0.5833157635814182, "grad_norm": 0.33365002274513245, "learning_rate": 8.084070211033866e-05, "loss": 0.1625, "step": 41500 }, { "epoch": 0.5834563215967391, "grad_norm": 0.393058717250824, "learning_rate": 8.079508308632447e-05, "loss": 0.1745, "step": 41510 }, { "epoch": 0.5835968796120599, "grad_norm": 0.299457311630249, "learning_rate": 8.07494682117153e-05, "loss": 0.1485, "step": 41520 }, { "epoch": 0.5837374376273807, "grad_norm": 0.4067396819591522, "learning_rate": 8.070385749636672e-05, "loss": 0.1664, "step": 41530 }, { "epoch": 0.5838779956427015, "grad_norm": 0.4003329873085022, "learning_rate": 8.065825095013333e-05, "loss": 0.1511, "step": 41540 }, { "epoch": 0.5840185536580224, "grad_norm": 0.4504907727241516, "learning_rate": 8.061264858286888e-05, "loss": 0.174, "step": 41550 }, { "epoch": 0.5841591116733432, "grad_norm": 0.3335106074810028, "learning_rate": 8.056705040442619e-05, "loss": 0.1903, "step": 41560 }, { "epoch": 0.584299669688664, "grad_norm": 0.40294119715690613, "learning_rate": 8.052145642465717e-05, "loss": 0.1739, "step": 41570 }, { "epoch": 0.5844402277039848, "grad_norm": 0.31343063712120056, "learning_rate": 8.047586665341284e-05, "loss": 0.1546, "step": 41580 }, { "epoch": 0.5845807857193056, "grad_norm": 0.3426019847393036, "learning_rate": 8.043028110054327e-05, "loss": 0.1707, "step": 41590 }, { "epoch": 0.5847213437346265, "grad_norm": 0.44034767150878906, "learning_rate": 8.038469977589773e-05, "loss": 0.1704, "step": 41600 }, { "epoch": 0.5848619017499473, "grad_norm": 0.4241983890533447, "learning_rate": 8.033912268932446e-05, "loss": 0.1471, "step": 41610 }, { "epoch": 0.5850024597652681, "grad_norm": 0.5022419691085815, "learning_rate": 8.029354985067078e-05, "loss": 0.1903, "step": 41620 }, { "epoch": 0.585143017780589, "grad_norm": 0.4251757860183716, "learning_rate": 8.024798126978321e-05, "loss": 0.193, "step": 41630 }, { "epoch": 0.5852835757959097, "grad_norm": 0.37948426604270935, "learning_rate": 8.020241695650723e-05, "loss": 0.1887, "step": 41640 }, { "epoch": 0.5854241338112306, "grad_norm": 0.3809700608253479, "learning_rate": 8.015685692068744e-05, "loss": 0.1804, "step": 41650 }, { "epoch": 0.5855646918265515, "grad_norm": 0.43913763761520386, "learning_rate": 8.011130117216755e-05, "loss": 0.1549, "step": 41660 }, { "epoch": 0.5857052498418722, "grad_norm": 0.25458937883377075, "learning_rate": 8.006574972079028e-05, "loss": 0.1705, "step": 41670 }, { "epoch": 0.5858458078571931, "grad_norm": 0.29581257700920105, "learning_rate": 8.002020257639748e-05, "loss": 0.1491, "step": 41680 }, { "epoch": 0.5859863658725138, "grad_norm": 0.43577784299850464, "learning_rate": 7.997465974883003e-05, "loss": 0.1953, "step": 41690 }, { "epoch": 0.5861269238878347, "grad_norm": 0.23517319560050964, "learning_rate": 7.992912124792789e-05, "loss": 0.1517, "step": 41700 }, { "epoch": 0.5862674819031555, "grad_norm": 0.43467965722084045, "learning_rate": 7.988358708353009e-05, "loss": 0.1815, "step": 41710 }, { "epoch": 0.5864080399184763, "grad_norm": 0.5543309450149536, "learning_rate": 7.983805726547471e-05, "loss": 0.1531, "step": 41720 }, { "epoch": 0.5865485979337972, "grad_norm": 0.27722403407096863, "learning_rate": 7.979253180359893e-05, "loss": 0.1457, "step": 41730 }, { "epoch": 0.586689155949118, "grad_norm": 0.41455915570259094, "learning_rate": 7.974701070773893e-05, "loss": 0.174, "step": 41740 }, { "epoch": 0.5868297139644388, "grad_norm": 0.30396610498428345, "learning_rate": 7.970149398773001e-05, "loss": 0.1733, "step": 41750 }, { "epoch": 0.5869702719797596, "grad_norm": 0.41698095202445984, "learning_rate": 7.965598165340644e-05, "loss": 0.1778, "step": 41760 }, { "epoch": 0.5871108299950805, "grad_norm": 0.504375696182251, "learning_rate": 7.961047371460162e-05, "loss": 0.1687, "step": 41770 }, { "epoch": 0.5872513880104013, "grad_norm": 0.43992507457733154, "learning_rate": 7.956497018114798e-05, "loss": 0.1843, "step": 41780 }, { "epoch": 0.5873919460257221, "grad_norm": 0.35274389386177063, "learning_rate": 7.951947106287699e-05, "loss": 0.1744, "step": 41790 }, { "epoch": 0.587532504041043, "grad_norm": 0.38785210251808167, "learning_rate": 7.947397636961915e-05, "loss": 0.1676, "step": 41800 }, { "epoch": 0.5876730620563637, "grad_norm": 0.44539543986320496, "learning_rate": 7.942848611120404e-05, "loss": 0.1705, "step": 41810 }, { "epoch": 0.5878136200716846, "grad_norm": 0.39283066987991333, "learning_rate": 7.938300029746025e-05, "loss": 0.1576, "step": 41820 }, { "epoch": 0.5879541780870055, "grad_norm": 0.39538851380348206, "learning_rate": 7.933751893821544e-05, "loss": 0.1693, "step": 41830 }, { "epoch": 0.5880947361023262, "grad_norm": 0.3279722034931183, "learning_rate": 7.929204204329627e-05, "loss": 0.1867, "step": 41840 }, { "epoch": 0.5882352941176471, "grad_norm": 0.3832647502422333, "learning_rate": 7.924656962252846e-05, "loss": 0.1659, "step": 41850 }, { "epoch": 0.5883758521329678, "grad_norm": 0.41260239481925964, "learning_rate": 7.920110168573678e-05, "loss": 0.1773, "step": 41860 }, { "epoch": 0.5885164101482887, "grad_norm": 0.32211098074913025, "learning_rate": 7.915563824274497e-05, "loss": 0.1869, "step": 41870 }, { "epoch": 0.5886569681636096, "grad_norm": 0.33297044038772583, "learning_rate": 7.911017930337589e-05, "loss": 0.1659, "step": 41880 }, { "epoch": 0.5887975261789303, "grad_norm": 0.372994989156723, "learning_rate": 7.906472487745133e-05, "loss": 0.1707, "step": 41890 }, { "epoch": 0.5889380841942512, "grad_norm": 0.5005825757980347, "learning_rate": 7.901927497479219e-05, "loss": 0.1782, "step": 41900 }, { "epoch": 0.589078642209572, "grad_norm": 0.5057122707366943, "learning_rate": 7.897382960521833e-05, "loss": 0.178, "step": 41910 }, { "epoch": 0.5892192002248928, "grad_norm": 0.466781884431839, "learning_rate": 7.892838877854866e-05, "loss": 0.1734, "step": 41920 }, { "epoch": 0.5893597582402137, "grad_norm": 0.45633605122566223, "learning_rate": 7.888295250460111e-05, "loss": 0.1942, "step": 41930 }, { "epoch": 0.5895003162555345, "grad_norm": 0.5042839050292969, "learning_rate": 7.883752079319259e-05, "loss": 0.1671, "step": 41940 }, { "epoch": 0.5896408742708553, "grad_norm": 0.4372533857822418, "learning_rate": 7.87920936541391e-05, "loss": 0.1834, "step": 41950 }, { "epoch": 0.5897814322861761, "grad_norm": 0.4715641438961029, "learning_rate": 7.874667109725557e-05, "loss": 0.161, "step": 41960 }, { "epoch": 0.589921990301497, "grad_norm": 0.49999579787254333, "learning_rate": 7.8701253132356e-05, "loss": 0.1812, "step": 41970 }, { "epoch": 0.5900625483168178, "grad_norm": 0.38548150658607483, "learning_rate": 7.865583976925336e-05, "loss": 0.2009, "step": 41980 }, { "epoch": 0.5902031063321386, "grad_norm": 0.5082098245620728, "learning_rate": 7.861043101775963e-05, "loss": 0.1661, "step": 41990 }, { "epoch": 0.5903436643474594, "grad_norm": 0.5039867162704468, "learning_rate": 7.856502688768582e-05, "loss": 0.2032, "step": 42000 }, { "epoch": 0.5903436643474594, "eval_chrf": 69.27115948936047, "eval_loss": 0.363955020904541, "eval_runtime": 324.8691, "eval_samples_per_second": 0.308, "eval_steps_per_second": 0.012, "step": 42000 }, { "epoch": 0.5904842223627802, "grad_norm": 0.45851778984069824, "learning_rate": 7.851962738884191e-05, "loss": 0.1649, "step": 42010 }, { "epoch": 0.5906247803781011, "grad_norm": 0.422050803899765, "learning_rate": 7.847423253103691e-05, "loss": 0.1759, "step": 42020 }, { "epoch": 0.5907653383934219, "grad_norm": 0.4894850254058838, "learning_rate": 7.84288423240788e-05, "loss": 0.1693, "step": 42030 }, { "epoch": 0.5909058964087427, "grad_norm": 0.33519265055656433, "learning_rate": 7.838345677777457e-05, "loss": 0.1721, "step": 42040 }, { "epoch": 0.5910464544240636, "grad_norm": 0.45269614458084106, "learning_rate": 7.83380759019302e-05, "loss": 0.1921, "step": 42050 }, { "epoch": 0.5911870124393843, "grad_norm": 0.4886782765388489, "learning_rate": 7.82926997063507e-05, "loss": 0.1732, "step": 42060 }, { "epoch": 0.5913275704547052, "grad_norm": 0.39376261830329895, "learning_rate": 7.824732820083993e-05, "loss": 0.1578, "step": 42070 }, { "epoch": 0.591468128470026, "grad_norm": 0.39648663997650146, "learning_rate": 7.82019613952009e-05, "loss": 0.1759, "step": 42080 }, { "epoch": 0.5916086864853468, "grad_norm": 0.4441344141960144, "learning_rate": 7.815659929923554e-05, "loss": 0.1838, "step": 42090 }, { "epoch": 0.5917492445006677, "grad_norm": 0.3926589787006378, "learning_rate": 7.811124192274473e-05, "loss": 0.1902, "step": 42100 }, { "epoch": 0.5918898025159884, "grad_norm": 0.43851348757743835, "learning_rate": 7.806588927552841e-05, "loss": 0.1802, "step": 42110 }, { "epoch": 0.5920303605313093, "grad_norm": 0.39181485772132874, "learning_rate": 7.802054136738544e-05, "loss": 0.1659, "step": 42120 }, { "epoch": 0.5921709185466302, "grad_norm": 0.5086449384689331, "learning_rate": 7.797519820811363e-05, "loss": 0.1996, "step": 42130 }, { "epoch": 0.5923114765619509, "grad_norm": 0.26584893465042114, "learning_rate": 7.792985980750984e-05, "loss": 0.1756, "step": 42140 }, { "epoch": 0.5924520345772718, "grad_norm": 0.3105525076389313, "learning_rate": 7.788452617536984e-05, "loss": 0.1816, "step": 42150 }, { "epoch": 0.5925925925925926, "grad_norm": 0.37008169293403625, "learning_rate": 7.783919732148841e-05, "loss": 0.161, "step": 42160 }, { "epoch": 0.5927331506079134, "grad_norm": 0.36043819785118103, "learning_rate": 7.779387325565926e-05, "loss": 0.191, "step": 42170 }, { "epoch": 0.5928737086232342, "grad_norm": 0.39846110343933105, "learning_rate": 7.774855398767511e-05, "loss": 0.1795, "step": 42180 }, { "epoch": 0.593014266638555, "grad_norm": 0.3892119526863098, "learning_rate": 7.77032395273276e-05, "loss": 0.1741, "step": 42190 }, { "epoch": 0.5931548246538759, "grad_norm": 0.3905502259731293, "learning_rate": 7.765792988440737e-05, "loss": 0.1506, "step": 42200 }, { "epoch": 0.5932953826691967, "grad_norm": 0.5239992141723633, "learning_rate": 7.761262506870397e-05, "loss": 0.201, "step": 42210 }, { "epoch": 0.5934359406845175, "grad_norm": 0.34727463126182556, "learning_rate": 7.756732509000597e-05, "loss": 0.1549, "step": 42220 }, { "epoch": 0.5935764986998383, "grad_norm": 0.3456290662288666, "learning_rate": 7.752202995810083e-05, "loss": 0.1571, "step": 42230 }, { "epoch": 0.5937170567151592, "grad_norm": 0.4618767499923706, "learning_rate": 7.7476739682775e-05, "loss": 0.1798, "step": 42240 }, { "epoch": 0.59385761473048, "grad_norm": 0.3260291814804077, "learning_rate": 7.74314542738139e-05, "loss": 0.1549, "step": 42250 }, { "epoch": 0.5939981727458008, "grad_norm": 0.28016695380210876, "learning_rate": 7.738617374100187e-05, "loss": 0.1613, "step": 42260 }, { "epoch": 0.5941387307611217, "grad_norm": 0.4371799826622009, "learning_rate": 7.734089809412217e-05, "loss": 0.1774, "step": 42270 }, { "epoch": 0.5942792887764424, "grad_norm": 0.4408225417137146, "learning_rate": 7.729562734295705e-05, "loss": 0.1873, "step": 42280 }, { "epoch": 0.5944198467917633, "grad_norm": 0.33245933055877686, "learning_rate": 7.72503614972877e-05, "loss": 0.1552, "step": 42290 }, { "epoch": 0.5945604048070842, "grad_norm": 0.38343554735183716, "learning_rate": 7.720510056689423e-05, "loss": 0.1553, "step": 42300 }, { "epoch": 0.5947009628224049, "grad_norm": 0.43015557527542114, "learning_rate": 7.715984456155567e-05, "loss": 0.1812, "step": 42310 }, { "epoch": 0.5948415208377258, "grad_norm": 0.2760184407234192, "learning_rate": 7.711459349105004e-05, "loss": 0.1672, "step": 42320 }, { "epoch": 0.5949820788530465, "grad_norm": 0.48811301589012146, "learning_rate": 7.706934736515425e-05, "loss": 0.166, "step": 42330 }, { "epoch": 0.5951226368683674, "grad_norm": 0.4176845848560333, "learning_rate": 7.702410619364417e-05, "loss": 0.1685, "step": 42340 }, { "epoch": 0.5952631948836883, "grad_norm": 0.33408379554748535, "learning_rate": 7.697886998629457e-05, "loss": 0.1521, "step": 42350 }, { "epoch": 0.595403752899009, "grad_norm": 0.4298063814640045, "learning_rate": 7.693363875287918e-05, "loss": 0.1593, "step": 42360 }, { "epoch": 0.5955443109143299, "grad_norm": 0.4347199499607086, "learning_rate": 7.68884125031706e-05, "loss": 0.1955, "step": 42370 }, { "epoch": 0.5956848689296507, "grad_norm": 0.39815792441368103, "learning_rate": 7.684319124694045e-05, "loss": 0.1786, "step": 42380 }, { "epoch": 0.5958254269449715, "grad_norm": 0.274598628282547, "learning_rate": 7.679797499395916e-05, "loss": 0.1785, "step": 42390 }, { "epoch": 0.5959659849602924, "grad_norm": 0.3358634114265442, "learning_rate": 7.675276375399618e-05, "loss": 0.1747, "step": 42400 }, { "epoch": 0.5961065429756132, "grad_norm": 0.3847486674785614, "learning_rate": 7.670755753681977e-05, "loss": 0.1581, "step": 42410 }, { "epoch": 0.596247100990934, "grad_norm": 0.4903138279914856, "learning_rate": 7.666235635219721e-05, "loss": 0.1654, "step": 42420 }, { "epoch": 0.5963876590062548, "grad_norm": 0.4884909391403198, "learning_rate": 7.661716020989464e-05, "loss": 0.1845, "step": 42430 }, { "epoch": 0.5965282170215757, "grad_norm": 0.4085238575935364, "learning_rate": 7.65719691196771e-05, "loss": 0.1645, "step": 42440 }, { "epoch": 0.5966687750368965, "grad_norm": 0.3783973157405853, "learning_rate": 7.652678309130857e-05, "loss": 0.1779, "step": 42450 }, { "epoch": 0.5968093330522173, "grad_norm": 0.3126957416534424, "learning_rate": 7.648160213455191e-05, "loss": 0.1647, "step": 42460 }, { "epoch": 0.5969498910675382, "grad_norm": 0.3112151324748993, "learning_rate": 7.64364262591689e-05, "loss": 0.1688, "step": 42470 }, { "epoch": 0.5970904490828589, "grad_norm": 0.27285337448120117, "learning_rate": 7.639125547492023e-05, "loss": 0.1805, "step": 42480 }, { "epoch": 0.5972310070981798, "grad_norm": 0.44554203748703003, "learning_rate": 7.634608979156545e-05, "loss": 0.1633, "step": 42490 }, { "epoch": 0.5973715651135006, "grad_norm": 0.33010488748550415, "learning_rate": 7.630092921886305e-05, "loss": 0.1624, "step": 42500 }, { "epoch": 0.5975121231288214, "grad_norm": 0.29994094371795654, "learning_rate": 7.625577376657042e-05, "loss": 0.1679, "step": 42510 }, { "epoch": 0.5976526811441423, "grad_norm": 0.4038458466529846, "learning_rate": 7.62106234444438e-05, "loss": 0.1776, "step": 42520 }, { "epoch": 0.597793239159463, "grad_norm": 0.3720175623893738, "learning_rate": 7.616547826223835e-05, "loss": 0.1886, "step": 42530 }, { "epoch": 0.5979337971747839, "grad_norm": 0.39369431138038635, "learning_rate": 7.612033822970813e-05, "loss": 0.1736, "step": 42540 }, { "epoch": 0.5980743551901048, "grad_norm": 0.28870657086372375, "learning_rate": 7.607520335660606e-05, "loss": 0.1575, "step": 42550 }, { "epoch": 0.5982149132054255, "grad_norm": 0.5239557027816772, "learning_rate": 7.603007365268397e-05, "loss": 0.1843, "step": 42560 }, { "epoch": 0.5983554712207464, "grad_norm": 0.40606215596199036, "learning_rate": 7.598494912769255e-05, "loss": 0.1622, "step": 42570 }, { "epoch": 0.5984960292360672, "grad_norm": 0.33013060688972473, "learning_rate": 7.593982979138139e-05, "loss": 0.1497, "step": 42580 }, { "epoch": 0.598636587251388, "grad_norm": 0.4070757031440735, "learning_rate": 7.589471565349895e-05, "loss": 0.1736, "step": 42590 }, { "epoch": 0.5987771452667089, "grad_norm": 0.4686710834503174, "learning_rate": 7.584960672379257e-05, "loss": 0.1712, "step": 42600 }, { "epoch": 0.5989177032820296, "grad_norm": 0.34591057896614075, "learning_rate": 7.580450301200845e-05, "loss": 0.1806, "step": 42610 }, { "epoch": 0.5990582612973505, "grad_norm": 0.46998921036720276, "learning_rate": 7.575940452789171e-05, "loss": 0.1556, "step": 42620 }, { "epoch": 0.5991988193126713, "grad_norm": 0.37321600317955017, "learning_rate": 7.571431128118627e-05, "loss": 0.1842, "step": 42630 }, { "epoch": 0.5993393773279921, "grad_norm": 0.34320640563964844, "learning_rate": 7.566922328163499e-05, "loss": 0.1618, "step": 42640 }, { "epoch": 0.5994799353433129, "grad_norm": 0.5494560599327087, "learning_rate": 7.562414053897952e-05, "loss": 0.2126, "step": 42650 }, { "epoch": 0.5996204933586338, "grad_norm": 0.4479246139526367, "learning_rate": 7.557906306296046e-05, "loss": 0.1835, "step": 42660 }, { "epoch": 0.5997610513739546, "grad_norm": 0.451515793800354, "learning_rate": 7.553399086331719e-05, "loss": 0.2045, "step": 42670 }, { "epoch": 0.5999016093892754, "grad_norm": 0.6131281852722168, "learning_rate": 7.5488923949788e-05, "loss": 0.1606, "step": 42680 }, { "epoch": 0.6000421674045963, "grad_norm": 0.36279651522636414, "learning_rate": 7.544386233211003e-05, "loss": 0.1586, "step": 42690 }, { "epoch": 0.600182725419917, "grad_norm": 0.3585605025291443, "learning_rate": 7.539880602001927e-05, "loss": 0.165, "step": 42700 }, { "epoch": 0.6003232834352379, "grad_norm": 0.3063223361968994, "learning_rate": 7.535375502325057e-05, "loss": 0.1623, "step": 42710 }, { "epoch": 0.6004638414505588, "grad_norm": 0.3708120882511139, "learning_rate": 7.530870935153762e-05, "loss": 0.1526, "step": 42720 }, { "epoch": 0.6006043994658795, "grad_norm": 0.34393540024757385, "learning_rate": 7.5263669014613e-05, "loss": 0.1814, "step": 42730 }, { "epoch": 0.6007449574812004, "grad_norm": 0.5064597129821777, "learning_rate": 7.521863402220802e-05, "loss": 0.1827, "step": 42740 }, { "epoch": 0.6008855154965211, "grad_norm": 0.41531017422676086, "learning_rate": 7.517360438405298e-05, "loss": 0.1799, "step": 42750 }, { "epoch": 0.601026073511842, "grad_norm": 0.35172322392463684, "learning_rate": 7.512858010987697e-05, "loss": 0.1679, "step": 42760 }, { "epoch": 0.6011666315271629, "grad_norm": 0.27190056443214417, "learning_rate": 7.508356120940787e-05, "loss": 0.1644, "step": 42770 }, { "epoch": 0.6013071895424836, "grad_norm": 0.2985529601573944, "learning_rate": 7.503854769237249e-05, "loss": 0.1759, "step": 42780 }, { "epoch": 0.6014477475578045, "grad_norm": 0.3518924415111542, "learning_rate": 7.499353956849639e-05, "loss": 0.19, "step": 42790 }, { "epoch": 0.6015883055731253, "grad_norm": 0.4358084797859192, "learning_rate": 7.494853684750402e-05, "loss": 0.1802, "step": 42800 }, { "epoch": 0.6017288635884461, "grad_norm": 0.4318670630455017, "learning_rate": 7.490353953911863e-05, "loss": 0.1691, "step": 42810 }, { "epoch": 0.601869421603767, "grad_norm": 0.33650708198547363, "learning_rate": 7.485854765306235e-05, "loss": 0.184, "step": 42820 }, { "epoch": 0.6020099796190878, "grad_norm": 0.3853902518749237, "learning_rate": 7.481356119905607e-05, "loss": 0.1895, "step": 42830 }, { "epoch": 0.6021505376344086, "grad_norm": 0.32648152112960815, "learning_rate": 7.476858018681957e-05, "loss": 0.1454, "step": 42840 }, { "epoch": 0.6022910956497294, "grad_norm": 0.3527396023273468, "learning_rate": 7.472360462607141e-05, "loss": 0.1506, "step": 42850 }, { "epoch": 0.6024316536650502, "grad_norm": 0.38165199756622314, "learning_rate": 7.4678634526529e-05, "loss": 0.1669, "step": 42860 }, { "epoch": 0.6025722116803711, "grad_norm": 0.3815620243549347, "learning_rate": 7.463366989790852e-05, "loss": 0.1751, "step": 42870 }, { "epoch": 0.6027127696956919, "grad_norm": 0.4380703270435333, "learning_rate": 7.458871074992506e-05, "loss": 0.1818, "step": 42880 }, { "epoch": 0.6028533277110127, "grad_norm": 0.33199450373649597, "learning_rate": 7.454375709229244e-05, "loss": 0.1681, "step": 42890 }, { "epoch": 0.6029938857263335, "grad_norm": 0.24013389647006989, "learning_rate": 7.449880893472333e-05, "loss": 0.1786, "step": 42900 }, { "epoch": 0.6031344437416544, "grad_norm": 0.3959920406341553, "learning_rate": 7.445386628692921e-05, "loss": 0.1861, "step": 42910 }, { "epoch": 0.6032750017569752, "grad_norm": 0.41576969623565674, "learning_rate": 7.440892915862036e-05, "loss": 0.1805, "step": 42920 }, { "epoch": 0.603415559772296, "grad_norm": 0.47591328620910645, "learning_rate": 7.43639975595059e-05, "loss": 0.1849, "step": 42930 }, { "epoch": 0.6035561177876169, "grad_norm": 0.43059951066970825, "learning_rate": 7.431907149929369e-05, "loss": 0.1777, "step": 42940 }, { "epoch": 0.6036966758029376, "grad_norm": 0.3965088725090027, "learning_rate": 7.427415098769045e-05, "loss": 0.1807, "step": 42950 }, { "epoch": 0.6038372338182585, "grad_norm": 0.34018439054489136, "learning_rate": 7.422923603440168e-05, "loss": 0.1672, "step": 42960 }, { "epoch": 0.6039777918335794, "grad_norm": 0.2599017024040222, "learning_rate": 7.418432664913171e-05, "loss": 0.171, "step": 42970 }, { "epoch": 0.6041183498489001, "grad_norm": 0.43904367089271545, "learning_rate": 7.413942284158362e-05, "loss": 0.1837, "step": 42980 }, { "epoch": 0.604258907864221, "grad_norm": 0.4200136363506317, "learning_rate": 7.40945246214593e-05, "loss": 0.1575, "step": 42990 }, { "epoch": 0.6043994658795417, "grad_norm": 0.4445408582687378, "learning_rate": 7.404963199845942e-05, "loss": 0.1594, "step": 43000 }, { "epoch": 0.6043994658795417, "eval_chrf": 83.57044035800844, "eval_loss": 0.36899685859680176, "eval_runtime": 164.275, "eval_samples_per_second": 0.609, "eval_steps_per_second": 0.024, "step": 43000 }, { "epoch": 0.6045400238948626, "grad_norm": 0.3644404113292694, "learning_rate": 7.400474498228351e-05, "loss": 0.1529, "step": 43010 }, { "epoch": 0.6046805819101835, "grad_norm": 0.3766258955001831, "learning_rate": 7.395986358262978e-05, "loss": 0.1922, "step": 43020 }, { "epoch": 0.6048211399255042, "grad_norm": 0.36604171991348267, "learning_rate": 7.391498780919532e-05, "loss": 0.1608, "step": 43030 }, { "epoch": 0.6049616979408251, "grad_norm": 0.38928326964378357, "learning_rate": 7.387011767167597e-05, "loss": 0.1642, "step": 43040 }, { "epoch": 0.6051022559561459, "grad_norm": 0.24062176048755646, "learning_rate": 7.382525317976634e-05, "loss": 0.161, "step": 43050 }, { "epoch": 0.6052428139714667, "grad_norm": 0.33095383644104004, "learning_rate": 7.378039434315983e-05, "loss": 0.1712, "step": 43060 }, { "epoch": 0.6053833719867876, "grad_norm": 0.3314269781112671, "learning_rate": 7.37355411715486e-05, "loss": 0.1719, "step": 43070 }, { "epoch": 0.6055239300021084, "grad_norm": 0.4529399573802948, "learning_rate": 7.369069367462362e-05, "loss": 0.1719, "step": 43080 }, { "epoch": 0.6056644880174292, "grad_norm": 0.25623270869255066, "learning_rate": 7.364585186207464e-05, "loss": 0.1728, "step": 43090 }, { "epoch": 0.60580504603275, "grad_norm": 0.4020390808582306, "learning_rate": 7.360101574359014e-05, "loss": 0.1715, "step": 43100 }, { "epoch": 0.6059456040480709, "grad_norm": 0.3411688506603241, "learning_rate": 7.355618532885738e-05, "loss": 0.1634, "step": 43110 }, { "epoch": 0.6060861620633917, "grad_norm": 0.34853067994117737, "learning_rate": 7.351136062756241e-05, "loss": 0.1778, "step": 43120 }, { "epoch": 0.6062267200787125, "grad_norm": 0.4465995132923126, "learning_rate": 7.346654164939002e-05, "loss": 0.1649, "step": 43130 }, { "epoch": 0.6063672780940333, "grad_norm": 0.37591084837913513, "learning_rate": 7.342172840402381e-05, "loss": 0.164, "step": 43140 }, { "epoch": 0.6065078361093541, "grad_norm": 0.4197112023830414, "learning_rate": 7.337692090114607e-05, "loss": 0.2018, "step": 43150 }, { "epoch": 0.606648394124675, "grad_norm": 0.27792155742645264, "learning_rate": 7.333211915043791e-05, "loss": 0.1351, "step": 43160 }, { "epoch": 0.6067889521399957, "grad_norm": 0.2809070646762848, "learning_rate": 7.328732316157918e-05, "loss": 0.1657, "step": 43170 }, { "epoch": 0.6069295101553166, "grad_norm": 0.3960578739643097, "learning_rate": 7.324253294424845e-05, "loss": 0.1856, "step": 43180 }, { "epoch": 0.6070700681706375, "grad_norm": 0.39426571130752563, "learning_rate": 7.319774850812312e-05, "loss": 0.1759, "step": 43190 }, { "epoch": 0.6072106261859582, "grad_norm": 0.39615365862846375, "learning_rate": 7.315296986287922e-05, "loss": 0.1794, "step": 43200 }, { "epoch": 0.6073511842012791, "grad_norm": 0.42621809244155884, "learning_rate": 7.310819701819168e-05, "loss": 0.1744, "step": 43210 }, { "epoch": 0.6074917422165999, "grad_norm": 0.5943285822868347, "learning_rate": 7.306342998373406e-05, "loss": 0.178, "step": 43220 }, { "epoch": 0.6076323002319207, "grad_norm": 0.4223845601081848, "learning_rate": 7.30186687691787e-05, "loss": 0.1743, "step": 43230 }, { "epoch": 0.6077728582472416, "grad_norm": 0.3097108006477356, "learning_rate": 7.297391338419671e-05, "loss": 0.1815, "step": 43240 }, { "epoch": 0.6079134162625623, "grad_norm": 0.3980135917663574, "learning_rate": 7.292916383845789e-05, "loss": 0.1806, "step": 43250 }, { "epoch": 0.6080539742778832, "grad_norm": 0.44541212916374207, "learning_rate": 7.288442014163084e-05, "loss": 0.1992, "step": 43260 }, { "epoch": 0.608194532293204, "grad_norm": 0.3464401066303253, "learning_rate": 7.283968230338282e-05, "loss": 0.1724, "step": 43270 }, { "epoch": 0.6083350903085248, "grad_norm": 0.44827163219451904, "learning_rate": 7.279495033337988e-05, "loss": 0.1543, "step": 43280 }, { "epoch": 0.6084756483238457, "grad_norm": 0.4815289378166199, "learning_rate": 7.275022424128681e-05, "loss": 0.1797, "step": 43290 }, { "epoch": 0.6086162063391665, "grad_norm": 0.35351064801216125, "learning_rate": 7.270550403676708e-05, "loss": 0.1675, "step": 43300 }, { "epoch": 0.6087567643544873, "grad_norm": 0.46254482865333557, "learning_rate": 7.266078972948292e-05, "loss": 0.1866, "step": 43310 }, { "epoch": 0.6088973223698081, "grad_norm": 0.5490948557853699, "learning_rate": 7.26160813290953e-05, "loss": 0.1682, "step": 43320 }, { "epoch": 0.609037880385129, "grad_norm": 0.31148210167884827, "learning_rate": 7.257137884526388e-05, "loss": 0.1824, "step": 43330 }, { "epoch": 0.6091784384004498, "grad_norm": 0.47011902928352356, "learning_rate": 7.252668228764708e-05, "loss": 0.179, "step": 43340 }, { "epoch": 0.6093189964157706, "grad_norm": 0.3612685799598694, "learning_rate": 7.248199166590197e-05, "loss": 0.1729, "step": 43350 }, { "epoch": 0.6094595544310915, "grad_norm": 0.36253541707992554, "learning_rate": 7.24373069896844e-05, "loss": 0.1797, "step": 43360 }, { "epoch": 0.6096001124464122, "grad_norm": 0.41139015555381775, "learning_rate": 7.239262826864896e-05, "loss": 0.171, "step": 43370 }, { "epoch": 0.6097406704617331, "grad_norm": 0.39285191893577576, "learning_rate": 7.234795551244888e-05, "loss": 0.1687, "step": 43380 }, { "epoch": 0.609881228477054, "grad_norm": 0.3595815598964691, "learning_rate": 7.230328873073613e-05, "loss": 0.1706, "step": 43390 }, { "epoch": 0.6100217864923747, "grad_norm": 0.3913259506225586, "learning_rate": 7.225862793316144e-05, "loss": 0.1642, "step": 43400 }, { "epoch": 0.6101623445076956, "grad_norm": 0.39592108130455017, "learning_rate": 7.221397312937413e-05, "loss": 0.2039, "step": 43410 }, { "epoch": 0.6103029025230163, "grad_norm": 0.4641571640968323, "learning_rate": 7.21693243290223e-05, "loss": 0.1731, "step": 43420 }, { "epoch": 0.6104434605383372, "grad_norm": 0.29540741443634033, "learning_rate": 7.21246815417528e-05, "loss": 0.1505, "step": 43430 }, { "epoch": 0.6105840185536581, "grad_norm": 0.4817897379398346, "learning_rate": 7.20800447772111e-05, "loss": 0.1769, "step": 43440 }, { "epoch": 0.6107245765689788, "grad_norm": 0.4189295768737793, "learning_rate": 7.20354140450414e-05, "loss": 0.1536, "step": 43450 }, { "epoch": 0.6108651345842997, "grad_norm": 0.49806585907936096, "learning_rate": 7.199078935488658e-05, "loss": 0.1635, "step": 43460 }, { "epoch": 0.6110056925996205, "grad_norm": 0.3175036609172821, "learning_rate": 7.194617071638827e-05, "loss": 0.1414, "step": 43470 }, { "epoch": 0.6111462506149413, "grad_norm": 0.3762895464897156, "learning_rate": 7.190155813918671e-05, "loss": 0.1642, "step": 43480 }, { "epoch": 0.6112868086302622, "grad_norm": 0.410321444272995, "learning_rate": 7.185695163292089e-05, "loss": 0.161, "step": 43490 }, { "epoch": 0.611427366645583, "grad_norm": 0.4362190067768097, "learning_rate": 7.181235120722847e-05, "loss": 0.1781, "step": 43500 }, { "epoch": 0.6115679246609038, "grad_norm": 0.4020962417125702, "learning_rate": 7.17677568717458e-05, "loss": 0.1691, "step": 43510 }, { "epoch": 0.6117084826762246, "grad_norm": 0.4284510314464569, "learning_rate": 7.172316863610789e-05, "loss": 0.1645, "step": 43520 }, { "epoch": 0.6118490406915454, "grad_norm": 0.346312940120697, "learning_rate": 7.167858650994848e-05, "loss": 0.1721, "step": 43530 }, { "epoch": 0.6119895987068663, "grad_norm": 0.4400968551635742, "learning_rate": 7.163401050289994e-05, "loss": 0.1648, "step": 43540 }, { "epoch": 0.6121301567221871, "grad_norm": 0.34509405493736267, "learning_rate": 7.158944062459335e-05, "loss": 0.1639, "step": 43550 }, { "epoch": 0.6122707147375079, "grad_norm": 0.4468303322792053, "learning_rate": 7.154487688465848e-05, "loss": 0.1668, "step": 43560 }, { "epoch": 0.6124112727528287, "grad_norm": 0.36385270953178406, "learning_rate": 7.150031929272369e-05, "loss": 0.1767, "step": 43570 }, { "epoch": 0.6125518307681496, "grad_norm": 0.48590368032455444, "learning_rate": 7.145576785841613e-05, "loss": 0.1792, "step": 43580 }, { "epoch": 0.6126923887834704, "grad_norm": 0.3635001480579376, "learning_rate": 7.141122259136153e-05, "loss": 0.1671, "step": 43590 }, { "epoch": 0.6128329467987912, "grad_norm": 0.3440009653568268, "learning_rate": 7.136668350118434e-05, "loss": 0.1436, "step": 43600 }, { "epoch": 0.6129735048141121, "grad_norm": 0.48527681827545166, "learning_rate": 7.132215059750764e-05, "loss": 0.1812, "step": 43610 }, { "epoch": 0.6131140628294328, "grad_norm": 0.33638450503349304, "learning_rate": 7.12776238899532e-05, "loss": 0.182, "step": 43620 }, { "epoch": 0.6132546208447537, "grad_norm": 0.3818179965019226, "learning_rate": 7.12331033881414e-05, "loss": 0.1757, "step": 43630 }, { "epoch": 0.6133951788600744, "grad_norm": 0.4085322916507721, "learning_rate": 7.118858910169138e-05, "loss": 0.1645, "step": 43640 }, { "epoch": 0.6135357368753953, "grad_norm": 0.3118310272693634, "learning_rate": 7.114408104022082e-05, "loss": 0.1631, "step": 43650 }, { "epoch": 0.6136762948907162, "grad_norm": 0.3122931718826294, "learning_rate": 7.109957921334614e-05, "loss": 0.1626, "step": 43660 }, { "epoch": 0.6138168529060369, "grad_norm": 0.34794265031814575, "learning_rate": 7.105508363068238e-05, "loss": 0.1772, "step": 43670 }, { "epoch": 0.6139574109213578, "grad_norm": 0.3584219515323639, "learning_rate": 7.101059430184322e-05, "loss": 0.1818, "step": 43680 }, { "epoch": 0.6140979689366786, "grad_norm": 0.3252063989639282, "learning_rate": 7.096611123644104e-05, "loss": 0.1755, "step": 43690 }, { "epoch": 0.6142385269519994, "grad_norm": 0.4239749610424042, "learning_rate": 7.092163444408677e-05, "loss": 0.1489, "step": 43700 }, { "epoch": 0.6143790849673203, "grad_norm": 0.3646966218948364, "learning_rate": 7.087716393439009e-05, "loss": 0.1814, "step": 43710 }, { "epoch": 0.6145196429826411, "grad_norm": 0.3831283748149872, "learning_rate": 7.083269971695926e-05, "loss": 0.1858, "step": 43720 }, { "epoch": 0.6146602009979619, "grad_norm": 0.342099130153656, "learning_rate": 7.07882418014012e-05, "loss": 0.1801, "step": 43730 }, { "epoch": 0.6148007590132827, "grad_norm": 0.4391648471355438, "learning_rate": 7.074379019732147e-05, "loss": 0.165, "step": 43740 }, { "epoch": 0.6149413170286036, "grad_norm": 0.3105049133300781, "learning_rate": 7.069934491432426e-05, "loss": 0.1685, "step": 43750 }, { "epoch": 0.6150818750439244, "grad_norm": 0.3759618103504181, "learning_rate": 7.065490596201239e-05, "loss": 0.1519, "step": 43760 }, { "epoch": 0.6152224330592452, "grad_norm": 0.4365192651748657, "learning_rate": 7.061047334998733e-05, "loss": 0.1562, "step": 43770 }, { "epoch": 0.615362991074566, "grad_norm": 0.4824438691139221, "learning_rate": 7.056604708784915e-05, "loss": 0.1674, "step": 43780 }, { "epoch": 0.6155035490898868, "grad_norm": 0.40726444125175476, "learning_rate": 7.05216271851966e-05, "loss": 0.1879, "step": 43790 }, { "epoch": 0.6156441071052077, "grad_norm": 0.42368054389953613, "learning_rate": 7.047721365162699e-05, "loss": 0.1888, "step": 43800 }, { "epoch": 0.6157846651205285, "grad_norm": 0.4291306734085083, "learning_rate": 7.043280649673628e-05, "loss": 0.1765, "step": 43810 }, { "epoch": 0.6159252231358493, "grad_norm": 0.2989252805709839, "learning_rate": 7.038840573011909e-05, "loss": 0.1806, "step": 43820 }, { "epoch": 0.6160657811511702, "grad_norm": 0.43362295627593994, "learning_rate": 7.034401136136861e-05, "loss": 0.1802, "step": 43830 }, { "epoch": 0.6162063391664909, "grad_norm": 0.33333566784858704, "learning_rate": 7.029962340007669e-05, "loss": 0.1729, "step": 43840 }, { "epoch": 0.6163468971818118, "grad_norm": 0.4206397235393524, "learning_rate": 7.025524185583372e-05, "loss": 0.164, "step": 43850 }, { "epoch": 0.6164874551971327, "grad_norm": 0.28481581807136536, "learning_rate": 7.02108667382288e-05, "loss": 0.1713, "step": 43860 }, { "epoch": 0.6166280132124534, "grad_norm": 0.43364596366882324, "learning_rate": 7.01664980568496e-05, "loss": 0.1958, "step": 43870 }, { "epoch": 0.6167685712277743, "grad_norm": 0.4091656804084778, "learning_rate": 7.012213582128237e-05, "loss": 0.1409, "step": 43880 }, { "epoch": 0.616909129243095, "grad_norm": 0.4062401354312897, "learning_rate": 7.007778004111198e-05, "loss": 0.1775, "step": 43890 }, { "epoch": 0.6170496872584159, "grad_norm": 0.39458316564559937, "learning_rate": 7.003343072592197e-05, "loss": 0.1877, "step": 43900 }, { "epoch": 0.6171902452737368, "grad_norm": 0.5522444248199463, "learning_rate": 6.998908788529439e-05, "loss": 0.1857, "step": 43910 }, { "epoch": 0.6173308032890575, "grad_norm": 0.4108028709888458, "learning_rate": 6.994475152880993e-05, "loss": 0.1999, "step": 43920 }, { "epoch": 0.6174713613043784, "grad_norm": 0.3741578459739685, "learning_rate": 6.990042166604791e-05, "loss": 0.1678, "step": 43930 }, { "epoch": 0.6176119193196992, "grad_norm": 0.2947121262550354, "learning_rate": 6.985609830658621e-05, "loss": 0.1519, "step": 43940 }, { "epoch": 0.61775247733502, "grad_norm": 0.4122995138168335, "learning_rate": 6.98117814600013e-05, "loss": 0.1519, "step": 43950 }, { "epoch": 0.6178930353503409, "grad_norm": 0.3719595670700073, "learning_rate": 6.976747113586827e-05, "loss": 0.1843, "step": 43960 }, { "epoch": 0.6180335933656617, "grad_norm": 0.2966946065425873, "learning_rate": 6.972316734376078e-05, "loss": 0.1717, "step": 43970 }, { "epoch": 0.6181741513809825, "grad_norm": 0.3694175183773041, "learning_rate": 6.967887009325108e-05, "loss": 0.1816, "step": 43980 }, { "epoch": 0.6183147093963033, "grad_norm": 0.4243808388710022, "learning_rate": 6.963457939391002e-05, "loss": 0.1524, "step": 43990 }, { "epoch": 0.6184552674116242, "grad_norm": 0.3591419756412506, "learning_rate": 6.959029525530704e-05, "loss": 0.1861, "step": 44000 }, { "epoch": 0.6184552674116242, "eval_chrf": 77.63238475835136, "eval_loss": 0.3709178864955902, "eval_runtime": 327.2596, "eval_samples_per_second": 0.306, "eval_steps_per_second": 0.012, "step": 44000 }, { "epoch": 0.618595825426945, "grad_norm": 0.3396461009979248, "learning_rate": 6.954601768701013e-05, "loss": 0.2045, "step": 44010 }, { "epoch": 0.6187363834422658, "grad_norm": 0.430901437997818, "learning_rate": 6.95017466985859e-05, "loss": 0.1812, "step": 44020 }, { "epoch": 0.6188769414575866, "grad_norm": 0.56734699010849, "learning_rate": 6.945748229959948e-05, "loss": 0.1798, "step": 44030 }, { "epoch": 0.6190174994729074, "grad_norm": 0.426201730966568, "learning_rate": 6.941322449961465e-05, "loss": 0.1879, "step": 44040 }, { "epoch": 0.6191580574882283, "grad_norm": 0.27214428782463074, "learning_rate": 6.936897330819372e-05, "loss": 0.1441, "step": 44050 }, { "epoch": 0.6192986155035491, "grad_norm": 0.550829291343689, "learning_rate": 6.932472873489758e-05, "loss": 0.1682, "step": 44060 }, { "epoch": 0.6194391735188699, "grad_norm": 0.3202691376209259, "learning_rate": 6.928049078928569e-05, "loss": 0.1624, "step": 44070 }, { "epoch": 0.6195797315341908, "grad_norm": 0.30396291613578796, "learning_rate": 6.92362594809161e-05, "loss": 0.1768, "step": 44080 }, { "epoch": 0.6197202895495115, "grad_norm": 0.2941432297229767, "learning_rate": 6.919203481934536e-05, "loss": 0.1744, "step": 44090 }, { "epoch": 0.6198608475648324, "grad_norm": 0.49695345759391785, "learning_rate": 6.914781681412866e-05, "loss": 0.1689, "step": 44100 }, { "epoch": 0.6200014055801532, "grad_norm": 0.35379379987716675, "learning_rate": 6.910360547481971e-05, "loss": 0.1465, "step": 44110 }, { "epoch": 0.620141963595474, "grad_norm": 0.43449699878692627, "learning_rate": 6.90594008109708e-05, "loss": 0.1933, "step": 44120 }, { "epoch": 0.6202825216107949, "grad_norm": 0.3357935845851898, "learning_rate": 6.901520283213277e-05, "loss": 0.1513, "step": 44130 }, { "epoch": 0.6204230796261156, "grad_norm": 0.3763403296470642, "learning_rate": 6.897101154785499e-05, "loss": 0.1661, "step": 44140 }, { "epoch": 0.6205636376414365, "grad_norm": 0.40166908502578735, "learning_rate": 6.892682696768545e-05, "loss": 0.175, "step": 44150 }, { "epoch": 0.6207041956567573, "grad_norm": 0.42627114057540894, "learning_rate": 6.88826491011706e-05, "loss": 0.2007, "step": 44160 }, { "epoch": 0.6208447536720781, "grad_norm": 0.3611266016960144, "learning_rate": 6.883847795785552e-05, "loss": 0.1616, "step": 44170 }, { "epoch": 0.620985311687399, "grad_norm": 0.5528517365455627, "learning_rate": 6.879431354728378e-05, "loss": 0.1873, "step": 44180 }, { "epoch": 0.6211258697027198, "grad_norm": 0.37437355518341064, "learning_rate": 6.875015587899755e-05, "loss": 0.1667, "step": 44190 }, { "epoch": 0.6212664277180406, "grad_norm": 0.27261611819267273, "learning_rate": 6.870600496253752e-05, "loss": 0.1907, "step": 44200 }, { "epoch": 0.6214069857333614, "grad_norm": 0.30701377987861633, "learning_rate": 6.86618608074429e-05, "loss": 0.1683, "step": 44210 }, { "epoch": 0.6215475437486823, "grad_norm": 0.4434259235858917, "learning_rate": 6.861772342325144e-05, "loss": 0.1506, "step": 44220 }, { "epoch": 0.6216881017640031, "grad_norm": 0.48307085037231445, "learning_rate": 6.857359281949946e-05, "loss": 0.1743, "step": 44230 }, { "epoch": 0.6218286597793239, "grad_norm": 0.43294084072113037, "learning_rate": 6.852946900572179e-05, "loss": 0.1671, "step": 44240 }, { "epoch": 0.6219692177946448, "grad_norm": 0.49979066848754883, "learning_rate": 6.848535199145179e-05, "loss": 0.21, "step": 44250 }, { "epoch": 0.6221097758099655, "grad_norm": 0.4027746319770813, "learning_rate": 6.84412417862214e-05, "loss": 0.1741, "step": 44260 }, { "epoch": 0.6222503338252864, "grad_norm": 0.5052631497383118, "learning_rate": 6.8397138399561e-05, "loss": 0.1953, "step": 44270 }, { "epoch": 0.6223908918406073, "grad_norm": 0.34749501943588257, "learning_rate": 6.835304184099958e-05, "loss": 0.1759, "step": 44280 }, { "epoch": 0.622531449855928, "grad_norm": 0.5017284750938416, "learning_rate": 6.83089521200646e-05, "loss": 0.1775, "step": 44290 }, { "epoch": 0.6226720078712489, "grad_norm": 0.40785008668899536, "learning_rate": 6.826486924628209e-05, "loss": 0.1786, "step": 44300 }, { "epoch": 0.6228125658865696, "grad_norm": 0.46408775448799133, "learning_rate": 6.822079322917656e-05, "loss": 0.1796, "step": 44310 }, { "epoch": 0.6229531239018905, "grad_norm": 0.3966285288333893, "learning_rate": 6.817672407827103e-05, "loss": 0.1831, "step": 44320 }, { "epoch": 0.6230936819172114, "grad_norm": 0.45978012681007385, "learning_rate": 6.813266180308711e-05, "loss": 0.1756, "step": 44330 }, { "epoch": 0.6232342399325321, "grad_norm": 0.4069959223270416, "learning_rate": 6.808860641314481e-05, "loss": 0.163, "step": 44340 }, { "epoch": 0.623374797947853, "grad_norm": 0.4341197609901428, "learning_rate": 6.804455791796277e-05, "loss": 0.1828, "step": 44350 }, { "epoch": 0.6235153559631738, "grad_norm": 0.406414657831192, "learning_rate": 6.800051632705807e-05, "loss": 0.1813, "step": 44360 }, { "epoch": 0.6236559139784946, "grad_norm": 0.3163243532180786, "learning_rate": 6.79564816499463e-05, "loss": 0.1713, "step": 44370 }, { "epoch": 0.6237964719938155, "grad_norm": 0.3498212397098541, "learning_rate": 6.791245389614159e-05, "loss": 0.158, "step": 44380 }, { "epoch": 0.6239370300091363, "grad_norm": 0.48263484239578247, "learning_rate": 6.786843307515655e-05, "loss": 0.1524, "step": 44390 }, { "epoch": 0.6240775880244571, "grad_norm": 0.4155692756175995, "learning_rate": 6.782441919650229e-05, "loss": 0.1712, "step": 44400 }, { "epoch": 0.6242181460397779, "grad_norm": 0.35613593459129333, "learning_rate": 6.778041226968843e-05, "loss": 0.1769, "step": 44410 }, { "epoch": 0.6243587040550987, "grad_norm": 0.434958815574646, "learning_rate": 6.77364123042231e-05, "loss": 0.177, "step": 44420 }, { "epoch": 0.6244992620704196, "grad_norm": 0.44713306427001953, "learning_rate": 6.76924193096129e-05, "loss": 0.1778, "step": 44430 }, { "epoch": 0.6246398200857404, "grad_norm": 0.3982578217983246, "learning_rate": 6.764843329536293e-05, "loss": 0.1426, "step": 44440 }, { "epoch": 0.6247803781010612, "grad_norm": 0.3782222867012024, "learning_rate": 6.760445427097678e-05, "loss": 0.1883, "step": 44450 }, { "epoch": 0.624920936116382, "grad_norm": 0.3814140856266022, "learning_rate": 6.756048224595656e-05, "loss": 0.1851, "step": 44460 }, { "epoch": 0.6250614941317029, "grad_norm": 0.3756048381328583, "learning_rate": 6.751651722980283e-05, "loss": 0.1625, "step": 44470 }, { "epoch": 0.6252020521470237, "grad_norm": 0.3470192551612854, "learning_rate": 6.747255923201465e-05, "loss": 0.1604, "step": 44480 }, { "epoch": 0.6253426101623445, "grad_norm": 0.44632065296173096, "learning_rate": 6.742860826208956e-05, "loss": 0.202, "step": 44490 }, { "epoch": 0.6254831681776654, "grad_norm": 0.24534133076667786, "learning_rate": 6.73846643295236e-05, "loss": 0.1708, "step": 44500 }, { "epoch": 0.6256237261929861, "grad_norm": 0.40814921259880066, "learning_rate": 6.734072744381129e-05, "loss": 0.1739, "step": 44510 }, { "epoch": 0.625764284208307, "grad_norm": 0.4645785987377167, "learning_rate": 6.729679761444553e-05, "loss": 0.1732, "step": 44520 }, { "epoch": 0.6259048422236279, "grad_norm": 0.4125882685184479, "learning_rate": 6.725287485091789e-05, "loss": 0.1726, "step": 44530 }, { "epoch": 0.6260454002389486, "grad_norm": 0.3475102186203003, "learning_rate": 6.720895916271821e-05, "loss": 0.1671, "step": 44540 }, { "epoch": 0.6261859582542695, "grad_norm": 0.4807623028755188, "learning_rate": 6.716505055933496e-05, "loss": 0.1769, "step": 44550 }, { "epoch": 0.6263265162695902, "grad_norm": 0.46677112579345703, "learning_rate": 6.712114905025498e-05, "loss": 0.1957, "step": 44560 }, { "epoch": 0.6264670742849111, "grad_norm": 0.33543965220451355, "learning_rate": 6.707725464496358e-05, "loss": 0.1889, "step": 44570 }, { "epoch": 0.626607632300232, "grad_norm": 0.30811792612075806, "learning_rate": 6.70333673529446e-05, "loss": 0.1725, "step": 44580 }, { "epoch": 0.6267481903155527, "grad_norm": 0.43859946727752686, "learning_rate": 6.698948718368033e-05, "loss": 0.1575, "step": 44590 }, { "epoch": 0.6268887483308736, "grad_norm": 0.3518104553222656, "learning_rate": 6.694561414665145e-05, "loss": 0.1745, "step": 44600 }, { "epoch": 0.6270293063461944, "grad_norm": 0.30487966537475586, "learning_rate": 6.690174825133717e-05, "loss": 0.1695, "step": 44610 }, { "epoch": 0.6271698643615152, "grad_norm": 0.49385231733322144, "learning_rate": 6.685788950721512e-05, "loss": 0.1716, "step": 44620 }, { "epoch": 0.627310422376836, "grad_norm": 0.4758278727531433, "learning_rate": 6.681403792376142e-05, "loss": 0.1658, "step": 44630 }, { "epoch": 0.6274509803921569, "grad_norm": 0.4662957489490509, "learning_rate": 6.677019351045061e-05, "loss": 0.1829, "step": 44640 }, { "epoch": 0.6275915384074777, "grad_norm": 0.41147729754447937, "learning_rate": 6.67263562767557e-05, "loss": 0.1669, "step": 44650 }, { "epoch": 0.6277320964227985, "grad_norm": 0.36372533440589905, "learning_rate": 6.668252623214812e-05, "loss": 0.1452, "step": 44660 }, { "epoch": 0.6278726544381193, "grad_norm": 0.41888707876205444, "learning_rate": 6.663870338609779e-05, "loss": 0.1693, "step": 44670 }, { "epoch": 0.6280132124534401, "grad_norm": 0.35983574390411377, "learning_rate": 6.659488774807304e-05, "loss": 0.1639, "step": 44680 }, { "epoch": 0.628153770468761, "grad_norm": 0.3690883219242096, "learning_rate": 6.655107932754068e-05, "loss": 0.1577, "step": 44690 }, { "epoch": 0.6282943284840818, "grad_norm": 0.4388582408428192, "learning_rate": 6.65072781339659e-05, "loss": 0.1529, "step": 44700 }, { "epoch": 0.6284348864994026, "grad_norm": 0.42833447456359863, "learning_rate": 6.646348417681239e-05, "loss": 0.1642, "step": 44710 }, { "epoch": 0.6285754445147235, "grad_norm": 0.2447792887687683, "learning_rate": 6.641969746554225e-05, "loss": 0.1692, "step": 44720 }, { "epoch": 0.6287160025300442, "grad_norm": 0.33198803663253784, "learning_rate": 6.637591800961598e-05, "loss": 0.1718, "step": 44730 }, { "epoch": 0.6288565605453651, "grad_norm": 0.2678699195384979, "learning_rate": 6.63321458184926e-05, "loss": 0.1855, "step": 44740 }, { "epoch": 0.628997118560686, "grad_norm": 0.32302653789520264, "learning_rate": 6.628838090162955e-05, "loss": 0.1678, "step": 44750 }, { "epoch": 0.6291376765760067, "grad_norm": 0.35951146483421326, "learning_rate": 6.624462326848252e-05, "loss": 0.1565, "step": 44760 }, { "epoch": 0.6292782345913276, "grad_norm": 0.3611389100551605, "learning_rate": 6.620087292850584e-05, "loss": 0.1743, "step": 44770 }, { "epoch": 0.6294187926066483, "grad_norm": 0.41236069798469543, "learning_rate": 6.615712989115219e-05, "loss": 0.1638, "step": 44780 }, { "epoch": 0.6295593506219692, "grad_norm": 0.30151498317718506, "learning_rate": 6.611339416587265e-05, "loss": 0.1568, "step": 44790 }, { "epoch": 0.6296999086372901, "grad_norm": 0.35278046131134033, "learning_rate": 6.606966576211678e-05, "loss": 0.1721, "step": 44800 }, { "epoch": 0.6298404666526108, "grad_norm": 0.5340271592140198, "learning_rate": 6.602594468933245e-05, "loss": 0.1837, "step": 44810 }, { "epoch": 0.6299810246679317, "grad_norm": 0.24998606741428375, "learning_rate": 6.598223095696608e-05, "loss": 0.1609, "step": 44820 }, { "epoch": 0.6301215826832525, "grad_norm": 0.3351782262325287, "learning_rate": 6.593852457446242e-05, "loss": 0.1543, "step": 44830 }, { "epoch": 0.6302621406985733, "grad_norm": 0.3094533681869507, "learning_rate": 6.589482555126461e-05, "loss": 0.1765, "step": 44840 }, { "epoch": 0.6304026987138942, "grad_norm": 0.4538188576698303, "learning_rate": 6.58511338968143e-05, "loss": 0.1815, "step": 44850 }, { "epoch": 0.630543256729215, "grad_norm": 0.47430428862571716, "learning_rate": 6.580744962055145e-05, "loss": 0.1754, "step": 44860 }, { "epoch": 0.6306838147445358, "grad_norm": 0.3575490117073059, "learning_rate": 6.576377273191447e-05, "loss": 0.1666, "step": 44870 }, { "epoch": 0.6308243727598566, "grad_norm": 0.4652651250362396, "learning_rate": 6.572010324034016e-05, "loss": 0.18, "step": 44880 }, { "epoch": 0.6309649307751775, "grad_norm": 0.25143569707870483, "learning_rate": 6.567644115526374e-05, "loss": 0.1638, "step": 44890 }, { "epoch": 0.6311054887904983, "grad_norm": 0.40408599376678467, "learning_rate": 6.563278648611884e-05, "loss": 0.1695, "step": 44900 }, { "epoch": 0.6312460468058191, "grad_norm": 0.5499407052993774, "learning_rate": 6.558913924233743e-05, "loss": 0.1978, "step": 44910 }, { "epoch": 0.63138660482114, "grad_norm": 0.38111111521720886, "learning_rate": 6.554549943334992e-05, "loss": 0.1811, "step": 44920 }, { "epoch": 0.6315271628364607, "grad_norm": 0.3579551875591278, "learning_rate": 6.550186706858513e-05, "loss": 0.172, "step": 44930 }, { "epoch": 0.6316677208517816, "grad_norm": 0.34631818532943726, "learning_rate": 6.54582421574702e-05, "loss": 0.1627, "step": 44940 }, { "epoch": 0.6318082788671024, "grad_norm": 0.4256238043308258, "learning_rate": 6.541462470943076e-05, "loss": 0.1887, "step": 44950 }, { "epoch": 0.6319488368824232, "grad_norm": 0.4322136640548706, "learning_rate": 6.537101473389075e-05, "loss": 0.1734, "step": 44960 }, { "epoch": 0.6320893948977441, "grad_norm": 0.3775302469730377, "learning_rate": 6.532741224027252e-05, "loss": 0.1997, "step": 44970 }, { "epoch": 0.6322299529130648, "grad_norm": 0.34045374393463135, "learning_rate": 6.528381723799682e-05, "loss": 0.1798, "step": 44980 }, { "epoch": 0.6323705109283857, "grad_norm": 0.30288296937942505, "learning_rate": 6.524022973648273e-05, "loss": 0.1743, "step": 44990 }, { "epoch": 0.6325110689437066, "grad_norm": 0.35040807723999023, "learning_rate": 6.519664974514778e-05, "loss": 0.1677, "step": 45000 }, { "epoch": 0.6325110689437066, "eval_chrf": 80.21349446857397, "eval_loss": 0.36095374822616577, "eval_runtime": 256.1043, "eval_samples_per_second": 0.39, "eval_steps_per_second": 0.016, "step": 45000 }, { "epoch": 0.6326516269590273, "grad_norm": 0.31671202182769775, "learning_rate": 6.515307727340782e-05, "loss": 0.1726, "step": 45010 }, { "epoch": 0.6327921849743482, "grad_norm": 0.2751798629760742, "learning_rate": 6.510951233067711e-05, "loss": 0.1577, "step": 45020 }, { "epoch": 0.632932742989669, "grad_norm": 0.34134364128112793, "learning_rate": 6.506595492636827e-05, "loss": 0.1356, "step": 45030 }, { "epoch": 0.6330733010049898, "grad_norm": 0.37104201316833496, "learning_rate": 6.502240506989228e-05, "loss": 0.1797, "step": 45040 }, { "epoch": 0.6332138590203107, "grad_norm": 0.3534786105155945, "learning_rate": 6.49788627706585e-05, "loss": 0.1464, "step": 45050 }, { "epoch": 0.6333544170356314, "grad_norm": 0.3754238784313202, "learning_rate": 6.493532803807468e-05, "loss": 0.1658, "step": 45060 }, { "epoch": 0.6334949750509523, "grad_norm": 0.3704669177532196, "learning_rate": 6.48918008815469e-05, "loss": 0.18, "step": 45070 }, { "epoch": 0.6336355330662731, "grad_norm": 0.5426111817359924, "learning_rate": 6.48482813104796e-05, "loss": 0.1762, "step": 45080 }, { "epoch": 0.6337760910815939, "grad_norm": 0.39849957823753357, "learning_rate": 6.480476933427562e-05, "loss": 0.166, "step": 45090 }, { "epoch": 0.6339166490969147, "grad_norm": 0.4700013995170593, "learning_rate": 6.476126496233611e-05, "loss": 0.1613, "step": 45100 }, { "epoch": 0.6340572071122356, "grad_norm": 0.30370020866394043, "learning_rate": 6.471776820406066e-05, "loss": 0.1578, "step": 45110 }, { "epoch": 0.6341977651275564, "grad_norm": 0.38362917304039, "learning_rate": 6.467427906884709e-05, "loss": 0.1761, "step": 45120 }, { "epoch": 0.6343383231428772, "grad_norm": 0.3319981098175049, "learning_rate": 6.46307975660917e-05, "loss": 0.1836, "step": 45130 }, { "epoch": 0.6344788811581981, "grad_norm": 0.3726891875267029, "learning_rate": 6.458732370518903e-05, "loss": 0.1803, "step": 45140 }, { "epoch": 0.6346194391735188, "grad_norm": 0.49176207184791565, "learning_rate": 6.454385749553205e-05, "loss": 0.1562, "step": 45150 }, { "epoch": 0.6347599971888397, "grad_norm": 0.4453577697277069, "learning_rate": 6.450039894651205e-05, "loss": 0.1788, "step": 45160 }, { "epoch": 0.6349005552041606, "grad_norm": 0.3720984160900116, "learning_rate": 6.445694806751868e-05, "loss": 0.1886, "step": 45170 }, { "epoch": 0.6350411132194813, "grad_norm": 0.37454113364219666, "learning_rate": 6.441350486793986e-05, "loss": 0.1628, "step": 45180 }, { "epoch": 0.6351816712348022, "grad_norm": 0.3713572025299072, "learning_rate": 6.437006935716195e-05, "loss": 0.1683, "step": 45190 }, { "epoch": 0.6353222292501229, "grad_norm": 0.3631324768066406, "learning_rate": 6.43266415445696e-05, "loss": 0.1829, "step": 45200 }, { "epoch": 0.6354627872654438, "grad_norm": 0.39969420433044434, "learning_rate": 6.42832214395458e-05, "loss": 0.1791, "step": 45210 }, { "epoch": 0.6356033452807647, "grad_norm": 0.35384640097618103, "learning_rate": 6.423980905147187e-05, "loss": 0.1793, "step": 45220 }, { "epoch": 0.6357439032960854, "grad_norm": 0.3424532115459442, "learning_rate": 6.419640438972748e-05, "loss": 0.1981, "step": 45230 }, { "epoch": 0.6358844613114063, "grad_norm": 0.3682054579257965, "learning_rate": 6.415300746369062e-05, "loss": 0.1606, "step": 45240 }, { "epoch": 0.6360250193267271, "grad_norm": 0.44392141699790955, "learning_rate": 6.410961828273758e-05, "loss": 0.1649, "step": 45250 }, { "epoch": 0.6361655773420479, "grad_norm": 0.43493548035621643, "learning_rate": 6.406623685624304e-05, "loss": 0.1514, "step": 45260 }, { "epoch": 0.6363061353573688, "grad_norm": 0.37985971570014954, "learning_rate": 6.402286319357997e-05, "loss": 0.1608, "step": 45270 }, { "epoch": 0.6364466933726896, "grad_norm": 0.3957103490829468, "learning_rate": 6.397949730411966e-05, "loss": 0.1817, "step": 45280 }, { "epoch": 0.6365872513880104, "grad_norm": 0.38979092240333557, "learning_rate": 6.393613919723168e-05, "loss": 0.188, "step": 45290 }, { "epoch": 0.6367278094033312, "grad_norm": 0.41796258091926575, "learning_rate": 6.389278888228402e-05, "loss": 0.1813, "step": 45300 }, { "epoch": 0.636868367418652, "grad_norm": 0.48477429151535034, "learning_rate": 6.38494463686429e-05, "loss": 0.1702, "step": 45310 }, { "epoch": 0.6370089254339729, "grad_norm": 0.45257049798965454, "learning_rate": 6.380611166567288e-05, "loss": 0.1675, "step": 45320 }, { "epoch": 0.6371494834492937, "grad_norm": 0.41892340779304504, "learning_rate": 6.376278478273685e-05, "loss": 0.1669, "step": 45330 }, { "epoch": 0.6372900414646145, "grad_norm": 0.36784976720809937, "learning_rate": 6.371946572919598e-05, "loss": 0.1787, "step": 45340 }, { "epoch": 0.6374305994799353, "grad_norm": 0.4263574779033661, "learning_rate": 6.367615451440976e-05, "loss": 0.1565, "step": 45350 }, { "epoch": 0.6375711574952562, "grad_norm": 0.6635227203369141, "learning_rate": 6.3632851147736e-05, "loss": 0.1909, "step": 45360 }, { "epoch": 0.637711715510577, "grad_norm": 0.5651838183403015, "learning_rate": 6.358955563853082e-05, "loss": 0.1652, "step": 45370 }, { "epoch": 0.6378522735258978, "grad_norm": 0.3701779544353485, "learning_rate": 6.35462679961486e-05, "loss": 0.1761, "step": 45380 }, { "epoch": 0.6379928315412187, "grad_norm": 0.30128878355026245, "learning_rate": 6.350298822994207e-05, "loss": 0.1413, "step": 45390 }, { "epoch": 0.6381333895565394, "grad_norm": 0.44980573654174805, "learning_rate": 6.345971634926222e-05, "loss": 0.1779, "step": 45400 }, { "epoch": 0.6382739475718603, "grad_norm": 0.3776741325855255, "learning_rate": 6.341645236345836e-05, "loss": 0.1827, "step": 45410 }, { "epoch": 0.6384145055871812, "grad_norm": 0.4075419306755066, "learning_rate": 6.337319628187813e-05, "loss": 0.1579, "step": 45420 }, { "epoch": 0.6385550636025019, "grad_norm": 0.45170167088508606, "learning_rate": 6.33299481138673e-05, "loss": 0.1862, "step": 45430 }, { "epoch": 0.6386956216178228, "grad_norm": 0.42756417393684387, "learning_rate": 6.328670786877013e-05, "loss": 0.1575, "step": 45440 }, { "epoch": 0.6388361796331435, "grad_norm": 0.4477543830871582, "learning_rate": 6.324347555592909e-05, "loss": 0.1777, "step": 45450 }, { "epoch": 0.6389767376484644, "grad_norm": 0.34204620122909546, "learning_rate": 6.320025118468491e-05, "loss": 0.166, "step": 45460 }, { "epoch": 0.6391172956637853, "grad_norm": 0.4834466874599457, "learning_rate": 6.315703476437662e-05, "loss": 0.1582, "step": 45470 }, { "epoch": 0.639257853679106, "grad_norm": 0.399301141500473, "learning_rate": 6.311382630434158e-05, "loss": 0.157, "step": 45480 }, { "epoch": 0.6393984116944269, "grad_norm": 0.43653443455696106, "learning_rate": 6.307062581391533e-05, "loss": 0.1949, "step": 45490 }, { "epoch": 0.6395389697097477, "grad_norm": 0.3309311866760254, "learning_rate": 6.30274333024318e-05, "loss": 0.1794, "step": 45500 }, { "epoch": 0.6396795277250685, "grad_norm": 0.3255656659603119, "learning_rate": 6.298424877922311e-05, "loss": 0.1794, "step": 45510 }, { "epoch": 0.6398200857403894, "grad_norm": 0.3199480473995209, "learning_rate": 6.294107225361969e-05, "loss": 0.1477, "step": 45520 }, { "epoch": 0.6399606437557102, "grad_norm": 0.33728545904159546, "learning_rate": 6.289790373495027e-05, "loss": 0.1704, "step": 45530 }, { "epoch": 0.640101201771031, "grad_norm": 0.41263148188591003, "learning_rate": 6.285474323254179e-05, "loss": 0.1498, "step": 45540 }, { "epoch": 0.6402417597863518, "grad_norm": 0.45552393794059753, "learning_rate": 6.281159075571948e-05, "loss": 0.1771, "step": 45550 }, { "epoch": 0.6403823178016727, "grad_norm": 0.3347470462322235, "learning_rate": 6.276844631380687e-05, "loss": 0.1627, "step": 45560 }, { "epoch": 0.6405228758169934, "grad_norm": 0.4019238352775574, "learning_rate": 6.27253099161257e-05, "loss": 0.1787, "step": 45570 }, { "epoch": 0.6406634338323143, "grad_norm": 0.40482115745544434, "learning_rate": 6.268218157199604e-05, "loss": 0.1616, "step": 45580 }, { "epoch": 0.6408039918476351, "grad_norm": 0.4995248019695282, "learning_rate": 6.263906129073614e-05, "loss": 0.1627, "step": 45590 }, { "epoch": 0.6409445498629559, "grad_norm": 0.471030592918396, "learning_rate": 6.259594908166255e-05, "loss": 0.1831, "step": 45600 }, { "epoch": 0.6410851078782768, "grad_norm": 0.32379043102264404, "learning_rate": 6.25528449540901e-05, "loss": 0.1687, "step": 45610 }, { "epoch": 0.6412256658935975, "grad_norm": 0.36522582173347473, "learning_rate": 6.250974891733182e-05, "loss": 0.1673, "step": 45620 }, { "epoch": 0.6413662239089184, "grad_norm": 0.2776927649974823, "learning_rate": 6.246666098069904e-05, "loss": 0.1723, "step": 45630 }, { "epoch": 0.6415067819242393, "grad_norm": 0.43431001901626587, "learning_rate": 6.242358115350128e-05, "loss": 0.1905, "step": 45640 }, { "epoch": 0.64164733993956, "grad_norm": 0.38113686442375183, "learning_rate": 6.23805094450464e-05, "loss": 0.1844, "step": 45650 }, { "epoch": 0.6417878979548809, "grad_norm": 0.369980126619339, "learning_rate": 6.233744586464042e-05, "loss": 0.1753, "step": 45660 }, { "epoch": 0.6419284559702017, "grad_norm": 0.4596186578273773, "learning_rate": 6.229439042158765e-05, "loss": 0.1886, "step": 45670 }, { "epoch": 0.6420690139855225, "grad_norm": 0.40157437324523926, "learning_rate": 6.225134312519061e-05, "loss": 0.1618, "step": 45680 }, { "epoch": 0.6422095720008434, "grad_norm": 0.4568968713283539, "learning_rate": 6.220830398475009e-05, "loss": 0.1746, "step": 45690 }, { "epoch": 0.6423501300161641, "grad_norm": 0.4889320731163025, "learning_rate": 6.216527300956512e-05, "loss": 0.1655, "step": 45700 }, { "epoch": 0.642490688031485, "grad_norm": 0.34276077151298523, "learning_rate": 6.212225020893292e-05, "loss": 0.1794, "step": 45710 }, { "epoch": 0.6426312460468058, "grad_norm": 0.35472655296325684, "learning_rate": 6.207923559214899e-05, "loss": 0.146, "step": 45720 }, { "epoch": 0.6427718040621266, "grad_norm": 0.36485761404037476, "learning_rate": 6.203622916850706e-05, "loss": 0.1783, "step": 45730 }, { "epoch": 0.6429123620774475, "grad_norm": 0.46038782596588135, "learning_rate": 6.199323094729905e-05, "loss": 0.1774, "step": 45740 }, { "epoch": 0.6430529200927683, "grad_norm": 0.3016781508922577, "learning_rate": 6.195024093781518e-05, "loss": 0.1374, "step": 45750 }, { "epoch": 0.6431934781080891, "grad_norm": 0.4437497556209564, "learning_rate": 6.19072591493438e-05, "loss": 0.165, "step": 45760 }, { "epoch": 0.6433340361234099, "grad_norm": 0.5395594239234924, "learning_rate": 6.186428559117156e-05, "loss": 0.1723, "step": 45770 }, { "epoch": 0.6434745941387308, "grad_norm": 0.3817726969718933, "learning_rate": 6.18213202725833e-05, "loss": 0.1703, "step": 45780 }, { "epoch": 0.6436151521540516, "grad_norm": 0.36339154839515686, "learning_rate": 6.17783632028621e-05, "loss": 0.1683, "step": 45790 }, { "epoch": 0.6437557101693724, "grad_norm": 0.4736917018890381, "learning_rate": 6.173541439128923e-05, "loss": 0.1922, "step": 45800 }, { "epoch": 0.6438962681846933, "grad_norm": 0.4476756453514099, "learning_rate": 6.169247384714417e-05, "loss": 0.1716, "step": 45810 }, { "epoch": 0.644036826200014, "grad_norm": 0.4236280918121338, "learning_rate": 6.164954157970467e-05, "loss": 0.1825, "step": 45820 }, { "epoch": 0.6441773842153349, "grad_norm": 0.4264964163303375, "learning_rate": 6.160661759824664e-05, "loss": 0.1683, "step": 45830 }, { "epoch": 0.6443179422306557, "grad_norm": 0.33900779485702515, "learning_rate": 6.156370191204421e-05, "loss": 0.1681, "step": 45840 }, { "epoch": 0.6444585002459765, "grad_norm": 0.3525594472885132, "learning_rate": 6.152079453036974e-05, "loss": 0.1855, "step": 45850 }, { "epoch": 0.6445990582612974, "grad_norm": 0.4360409379005432, "learning_rate": 6.147789546249375e-05, "loss": 0.1769, "step": 45860 }, { "epoch": 0.6447396162766181, "grad_norm": 0.45855650305747986, "learning_rate": 6.143500471768502e-05, "loss": 0.1655, "step": 45870 }, { "epoch": 0.644880174291939, "grad_norm": 0.4100463390350342, "learning_rate": 6.139212230521047e-05, "loss": 0.1807, "step": 45880 }, { "epoch": 0.6450207323072599, "grad_norm": 0.4619031548500061, "learning_rate": 6.13492482343353e-05, "loss": 0.1843, "step": 45890 }, { "epoch": 0.6451612903225806, "grad_norm": 0.3914530873298645, "learning_rate": 6.130638251432281e-05, "loss": 0.1784, "step": 45900 }, { "epoch": 0.6453018483379015, "grad_norm": 0.3291589021682739, "learning_rate": 6.126352515443457e-05, "loss": 0.171, "step": 45910 }, { "epoch": 0.6454424063532223, "grad_norm": 0.3202427327632904, "learning_rate": 6.122067616393035e-05, "loss": 0.1779, "step": 45920 }, { "epoch": 0.6455829643685431, "grad_norm": 0.37621304392814636, "learning_rate": 6.117783555206802e-05, "loss": 0.1576, "step": 45930 }, { "epoch": 0.645723522383864, "grad_norm": 0.3314257860183716, "learning_rate": 6.113500332810376e-05, "loss": 0.2038, "step": 45940 }, { "epoch": 0.6458640803991847, "grad_norm": 0.3285437822341919, "learning_rate": 6.109217950129183e-05, "loss": 0.1762, "step": 45950 }, { "epoch": 0.6460046384145056, "grad_norm": 0.3959253430366516, "learning_rate": 6.104936408088478e-05, "loss": 0.1664, "step": 45960 }, { "epoch": 0.6461451964298264, "grad_norm": 0.46875542402267456, "learning_rate": 6.100655707613323e-05, "loss": 0.1462, "step": 45970 }, { "epoch": 0.6462857544451472, "grad_norm": 0.3809106647968292, "learning_rate": 6.096375849628608e-05, "loss": 0.1649, "step": 45980 }, { "epoch": 0.6464263124604681, "grad_norm": 0.3985312581062317, "learning_rate": 6.092096835059038e-05, "loss": 0.1776, "step": 45990 }, { "epoch": 0.6465668704757889, "grad_norm": 0.32413896918296814, "learning_rate": 6.08781866482913e-05, "loss": 0.1569, "step": 46000 }, { "epoch": 0.6465668704757889, "eval_chrf": 82.38197137761941, "eval_loss": 0.3631060719490051, "eval_runtime": 276.4599, "eval_samples_per_second": 0.362, "eval_steps_per_second": 0.014, "step": 46000 }, { "epoch": 0.6467074284911097, "grad_norm": 0.4690287113189697, "learning_rate": 6.083541339863228e-05, "loss": 0.1541, "step": 46010 }, { "epoch": 0.6468479865064305, "grad_norm": 0.40396568179130554, "learning_rate": 6.0792648610854875e-05, "loss": 0.1696, "step": 46020 }, { "epoch": 0.6469885445217514, "grad_norm": 0.43039193749427795, "learning_rate": 6.0749892294198806e-05, "loss": 0.1696, "step": 46030 }, { "epoch": 0.6471291025370722, "grad_norm": 0.33346179127693176, "learning_rate": 6.0707144457902e-05, "loss": 0.1659, "step": 46040 }, { "epoch": 0.647269660552393, "grad_norm": 0.2777289152145386, "learning_rate": 6.066440511120053e-05, "loss": 0.1637, "step": 46050 }, { "epoch": 0.6474102185677139, "grad_norm": 0.35530346632003784, "learning_rate": 6.062167426332863e-05, "loss": 0.1926, "step": 46060 }, { "epoch": 0.6475507765830346, "grad_norm": 0.33783388137817383, "learning_rate": 6.0578951923518725e-05, "loss": 0.1676, "step": 46070 }, { "epoch": 0.6476913345983555, "grad_norm": 0.2598033845424652, "learning_rate": 6.053623810100135e-05, "loss": 0.186, "step": 46080 }, { "epoch": 0.6478318926136762, "grad_norm": 0.3518681228160858, "learning_rate": 6.0493532805005294e-05, "loss": 0.1792, "step": 46090 }, { "epoch": 0.6479724506289971, "grad_norm": 0.4977501630783081, "learning_rate": 6.045083604475739e-05, "loss": 0.1836, "step": 46100 }, { "epoch": 0.648113008644318, "grad_norm": 0.5222976803779602, "learning_rate": 6.040814782948267e-05, "loss": 0.1719, "step": 46110 }, { "epoch": 0.6482535666596387, "grad_norm": 0.5117230415344238, "learning_rate": 6.036546816840438e-05, "loss": 0.1886, "step": 46120 }, { "epoch": 0.6483941246749596, "grad_norm": 0.41326743364334106, "learning_rate": 6.032279707074382e-05, "loss": 0.182, "step": 46130 }, { "epoch": 0.6485346826902804, "grad_norm": 0.3465782701969147, "learning_rate": 6.028013454572051e-05, "loss": 0.1791, "step": 46140 }, { "epoch": 0.6486752407056012, "grad_norm": 0.44033685326576233, "learning_rate": 6.0237480602552085e-05, "loss": 0.214, "step": 46150 }, { "epoch": 0.6488157987209221, "grad_norm": 0.3798646926879883, "learning_rate": 6.019483525045435e-05, "loss": 0.1723, "step": 46160 }, { "epoch": 0.6489563567362429, "grad_norm": 0.49568066000938416, "learning_rate": 6.015219849864122e-05, "loss": 0.1687, "step": 46170 }, { "epoch": 0.6490969147515637, "grad_norm": 0.28596997261047363, "learning_rate": 6.010957035632479e-05, "loss": 0.174, "step": 46180 }, { "epoch": 0.6492374727668845, "grad_norm": 0.3705233037471771, "learning_rate": 6.0066950832715254e-05, "loss": 0.1724, "step": 46190 }, { "epoch": 0.6493780307822054, "grad_norm": 0.392262727022171, "learning_rate": 6.002433993702099e-05, "loss": 0.1595, "step": 46200 }, { "epoch": 0.6495185887975262, "grad_norm": 0.3774138391017914, "learning_rate": 5.998173767844847e-05, "loss": 0.1484, "step": 46210 }, { "epoch": 0.649659146812847, "grad_norm": 0.3614377975463867, "learning_rate": 5.993914406620231e-05, "loss": 0.1693, "step": 46220 }, { "epoch": 0.6497997048281678, "grad_norm": 0.37364089488983154, "learning_rate": 5.98965591094853e-05, "loss": 0.1599, "step": 46230 }, { "epoch": 0.6499402628434886, "grad_norm": 0.4179651439189911, "learning_rate": 5.98539828174983e-05, "loss": 0.1796, "step": 46240 }, { "epoch": 0.6500808208588095, "grad_norm": 0.38695430755615234, "learning_rate": 5.981141519944031e-05, "loss": 0.1643, "step": 46250 }, { "epoch": 0.6502213788741303, "grad_norm": 0.5926967263221741, "learning_rate": 5.97688562645085e-05, "loss": 0.1841, "step": 46260 }, { "epoch": 0.6503619368894511, "grad_norm": 0.3806653320789337, "learning_rate": 5.9726306021898106e-05, "loss": 0.1817, "step": 46270 }, { "epoch": 0.650502494904772, "grad_norm": 0.3245481550693512, "learning_rate": 5.9683764480802526e-05, "loss": 0.1733, "step": 46280 }, { "epoch": 0.6506430529200927, "grad_norm": 0.33144858479499817, "learning_rate": 5.964123165041327e-05, "loss": 0.1667, "step": 46290 }, { "epoch": 0.6507836109354136, "grad_norm": 0.41733354330062866, "learning_rate": 5.959870753991995e-05, "loss": 0.1931, "step": 46300 }, { "epoch": 0.6509241689507345, "grad_norm": 0.45859894156455994, "learning_rate": 5.955619215851031e-05, "loss": 0.166, "step": 46310 }, { "epoch": 0.6510647269660552, "grad_norm": 0.39740046858787537, "learning_rate": 5.95136855153702e-05, "loss": 0.1539, "step": 46320 }, { "epoch": 0.6512052849813761, "grad_norm": 0.4183885455131531, "learning_rate": 5.947118761968359e-05, "loss": 0.1568, "step": 46330 }, { "epoch": 0.6513458429966968, "grad_norm": 0.369146466255188, "learning_rate": 5.942869848063255e-05, "loss": 0.1804, "step": 46340 }, { "epoch": 0.6514864010120177, "grad_norm": 0.3931564390659332, "learning_rate": 5.9386218107397266e-05, "loss": 0.1845, "step": 46350 }, { "epoch": 0.6516269590273386, "grad_norm": 0.43380603194236755, "learning_rate": 5.9343746509156016e-05, "loss": 0.1913, "step": 46360 }, { "epoch": 0.6517675170426593, "grad_norm": 0.3476782441139221, "learning_rate": 5.9301283695085205e-05, "loss": 0.169, "step": 46370 }, { "epoch": 0.6519080750579802, "grad_norm": 0.40904736518859863, "learning_rate": 5.925882967435932e-05, "loss": 0.1661, "step": 46380 }, { "epoch": 0.652048633073301, "grad_norm": 0.3820328712463379, "learning_rate": 5.9216384456150966e-05, "loss": 0.1722, "step": 46390 }, { "epoch": 0.6521891910886218, "grad_norm": 0.427290141582489, "learning_rate": 5.9173948049630836e-05, "loss": 0.1763, "step": 46400 }, { "epoch": 0.6523297491039427, "grad_norm": 0.3932560682296753, "learning_rate": 5.913152046396771e-05, "loss": 0.1875, "step": 46410 }, { "epoch": 0.6524703071192635, "grad_norm": 0.37783360481262207, "learning_rate": 5.908910170832846e-05, "loss": 0.1606, "step": 46420 }, { "epoch": 0.6526108651345843, "grad_norm": 0.35456210374832153, "learning_rate": 5.9046691791878106e-05, "loss": 0.1847, "step": 46430 }, { "epoch": 0.6527514231499051, "grad_norm": 0.4274060130119324, "learning_rate": 5.9004290723779665e-05, "loss": 0.1705, "step": 46440 }, { "epoch": 0.652891981165226, "grad_norm": 0.41579386591911316, "learning_rate": 5.8961898513194335e-05, "loss": 0.1713, "step": 46450 }, { "epoch": 0.6530325391805468, "grad_norm": 0.41511207818984985, "learning_rate": 5.8919515169281315e-05, "loss": 0.1773, "step": 46460 }, { "epoch": 0.6531730971958676, "grad_norm": 0.3858306109905243, "learning_rate": 5.887714070119794e-05, "loss": 0.1742, "step": 46470 }, { "epoch": 0.6533136552111884, "grad_norm": 0.3599134087562561, "learning_rate": 5.883477511809965e-05, "loss": 0.1605, "step": 46480 }, { "epoch": 0.6534542132265092, "grad_norm": 0.4521912932395935, "learning_rate": 5.87924184291399e-05, "loss": 0.16, "step": 46490 }, { "epoch": 0.6535947712418301, "grad_norm": 0.4453141689300537, "learning_rate": 5.8750070643470245e-05, "loss": 0.1583, "step": 46500 }, { "epoch": 0.6537353292571509, "grad_norm": 0.34592360258102417, "learning_rate": 5.870773177024036e-05, "loss": 0.1864, "step": 46510 }, { "epoch": 0.6538758872724717, "grad_norm": 0.32892781496047974, "learning_rate": 5.8665401818597955e-05, "loss": 0.1605, "step": 46520 }, { "epoch": 0.6540164452877926, "grad_norm": 0.3231342136859894, "learning_rate": 5.86230807976888e-05, "loss": 0.1699, "step": 46530 }, { "epoch": 0.6541570033031133, "grad_norm": 0.45281505584716797, "learning_rate": 5.858076871665676e-05, "loss": 0.1741, "step": 46540 }, { "epoch": 0.6542975613184342, "grad_norm": 0.47716307640075684, "learning_rate": 5.8538465584643776e-05, "loss": 0.1722, "step": 46550 }, { "epoch": 0.654438119333755, "grad_norm": 0.38653871417045593, "learning_rate": 5.849617141078985e-05, "loss": 0.1508, "step": 46560 }, { "epoch": 0.6545786773490758, "grad_norm": 0.3510001003742218, "learning_rate": 5.8453886204233e-05, "loss": 0.1739, "step": 46570 }, { "epoch": 0.6547192353643967, "grad_norm": 0.39627891778945923, "learning_rate": 5.841160997410937e-05, "loss": 0.1895, "step": 46580 }, { "epoch": 0.6548597933797174, "grad_norm": 0.5046212077140808, "learning_rate": 5.8369342729553176e-05, "loss": 0.149, "step": 46590 }, { "epoch": 0.6550003513950383, "grad_norm": 0.3888484537601471, "learning_rate": 5.8327084479696593e-05, "loss": 0.1654, "step": 46600 }, { "epoch": 0.6551409094103591, "grad_norm": 0.2727382779121399, "learning_rate": 5.828483523366997e-05, "loss": 0.1564, "step": 46610 }, { "epoch": 0.6552814674256799, "grad_norm": 0.3435800075531006, "learning_rate": 5.824259500060162e-05, "loss": 0.17, "step": 46620 }, { "epoch": 0.6554220254410008, "grad_norm": 0.33127716183662415, "learning_rate": 5.8200363789617996e-05, "loss": 0.1951, "step": 46630 }, { "epoch": 0.6555625834563216, "grad_norm": 0.4050454795360565, "learning_rate": 5.815814160984349e-05, "loss": 0.153, "step": 46640 }, { "epoch": 0.6557031414716424, "grad_norm": 0.3367047607898712, "learning_rate": 5.811592847040067e-05, "loss": 0.1765, "step": 46650 }, { "epoch": 0.6558436994869632, "grad_norm": 0.4014679193496704, "learning_rate": 5.807372438041003e-05, "loss": 0.157, "step": 46660 }, { "epoch": 0.6559842575022841, "grad_norm": 0.32407405972480774, "learning_rate": 5.803152934899022e-05, "loss": 0.1822, "step": 46670 }, { "epoch": 0.6561248155176049, "grad_norm": 0.5178097486495972, "learning_rate": 5.798934338525781e-05, "loss": 0.1719, "step": 46680 }, { "epoch": 0.6562653735329257, "grad_norm": 0.36699798703193665, "learning_rate": 5.794716649832756e-05, "loss": 0.1617, "step": 46690 }, { "epoch": 0.6564059315482466, "grad_norm": 0.3077496588230133, "learning_rate": 5.790499869731212e-05, "loss": 0.1518, "step": 46700 }, { "epoch": 0.6565464895635673, "grad_norm": 0.393049418926239, "learning_rate": 5.786283999132229e-05, "loss": 0.1715, "step": 46710 }, { "epoch": 0.6566870475788882, "grad_norm": 0.3637532889842987, "learning_rate": 5.782069038946681e-05, "loss": 0.1926, "step": 46720 }, { "epoch": 0.656827605594209, "grad_norm": 0.3915221393108368, "learning_rate": 5.777854990085258e-05, "loss": 0.1512, "step": 46730 }, { "epoch": 0.6569681636095298, "grad_norm": 0.27357760071754456, "learning_rate": 5.773641853458436e-05, "loss": 0.1722, "step": 46740 }, { "epoch": 0.6571087216248507, "grad_norm": 0.49531319737434387, "learning_rate": 5.7694296299765106e-05, "loss": 0.1715, "step": 46750 }, { "epoch": 0.6572492796401714, "grad_norm": 0.3616010248661041, "learning_rate": 5.7652183205495726e-05, "loss": 0.153, "step": 46760 }, { "epoch": 0.6573898376554923, "grad_norm": 0.3260374963283539, "learning_rate": 5.761007926087507e-05, "loss": 0.1732, "step": 46770 }, { "epoch": 0.6575303956708132, "grad_norm": 0.3030858039855957, "learning_rate": 5.7567984475000205e-05, "loss": 0.1587, "step": 46780 }, { "epoch": 0.6576709536861339, "grad_norm": 0.29215386509895325, "learning_rate": 5.752589885696602e-05, "loss": 0.1574, "step": 46790 }, { "epoch": 0.6578115117014548, "grad_norm": 0.4393480122089386, "learning_rate": 5.7483822415865584e-05, "loss": 0.1534, "step": 46800 }, { "epoch": 0.6579520697167756, "grad_norm": 0.3719373345375061, "learning_rate": 5.7441755160789845e-05, "loss": 0.1771, "step": 46810 }, { "epoch": 0.6580926277320964, "grad_norm": 0.3512132465839386, "learning_rate": 5.739969710082792e-05, "loss": 0.1745, "step": 46820 }, { "epoch": 0.6582331857474173, "grad_norm": 0.42287617921829224, "learning_rate": 5.7357648245066756e-05, "loss": 0.1796, "step": 46830 }, { "epoch": 0.658373743762738, "grad_norm": 0.39335954189300537, "learning_rate": 5.731560860259151e-05, "loss": 0.1852, "step": 46840 }, { "epoch": 0.6585143017780589, "grad_norm": 0.29379117488861084, "learning_rate": 5.727357818248513e-05, "loss": 0.1512, "step": 46850 }, { "epoch": 0.6586548597933797, "grad_norm": 0.34768614172935486, "learning_rate": 5.723155699382881e-05, "loss": 0.1621, "step": 46860 }, { "epoch": 0.6587954178087005, "grad_norm": 0.4134244918823242, "learning_rate": 5.718954504570152e-05, "loss": 0.1711, "step": 46870 }, { "epoch": 0.6589359758240214, "grad_norm": 0.4068963825702667, "learning_rate": 5.714754234718044e-05, "loss": 0.2003, "step": 46880 }, { "epoch": 0.6590765338393422, "grad_norm": 0.3430961072444916, "learning_rate": 5.710554890734054e-05, "loss": 0.1627, "step": 46890 }, { "epoch": 0.659217091854663, "grad_norm": 0.4104665517807007, "learning_rate": 5.7063564735255025e-05, "loss": 0.155, "step": 46900 }, { "epoch": 0.6593576498699838, "grad_norm": 0.464220255613327, "learning_rate": 5.702158983999488e-05, "loss": 0.1849, "step": 46910 }, { "epoch": 0.6594982078853047, "grad_norm": 0.37425777316093445, "learning_rate": 5.697962423062925e-05, "loss": 0.1815, "step": 46920 }, { "epoch": 0.6596387659006255, "grad_norm": 0.3134264051914215, "learning_rate": 5.693766791622512e-05, "loss": 0.1735, "step": 46930 }, { "epoch": 0.6597793239159463, "grad_norm": 0.3093084990978241, "learning_rate": 5.689572090584765e-05, "loss": 0.1631, "step": 46940 }, { "epoch": 0.6599198819312672, "grad_norm": 0.4233850836753845, "learning_rate": 5.685378320855981e-05, "loss": 0.1952, "step": 46950 }, { "epoch": 0.6600604399465879, "grad_norm": 0.3595212697982788, "learning_rate": 5.68118548334227e-05, "loss": 0.179, "step": 46960 }, { "epoch": 0.6602009979619088, "grad_norm": 0.3647650480270386, "learning_rate": 5.676993578949527e-05, "loss": 0.1656, "step": 46970 }, { "epoch": 0.6603415559772297, "grad_norm": 0.4111272394657135, "learning_rate": 5.6728026085834606e-05, "loss": 0.2024, "step": 46980 }, { "epoch": 0.6604821139925504, "grad_norm": 0.48057103157043457, "learning_rate": 5.668612573149564e-05, "loss": 0.1677, "step": 46990 }, { "epoch": 0.6606226720078713, "grad_norm": 0.37313178181648254, "learning_rate": 5.664423473553141e-05, "loss": 0.1945, "step": 47000 }, { "epoch": 0.6606226720078713, "eval_chrf": 78.54251639053005, "eval_loss": 0.37721508741378784, "eval_runtime": 258.2804, "eval_samples_per_second": 0.387, "eval_steps_per_second": 0.015, "step": 47000 }, { "epoch": 0.660763230023192, "grad_norm": 0.39650222659111023, "learning_rate": 5.660235310699278e-05, "loss": 0.1679, "step": 47010 }, { "epoch": 0.6609037880385129, "grad_norm": 0.3072584867477417, "learning_rate": 5.6560480854928745e-05, "loss": 0.1588, "step": 47020 }, { "epoch": 0.6610443460538337, "grad_norm": 0.40000805258750916, "learning_rate": 5.6518617988386135e-05, "loss": 0.1671, "step": 47030 }, { "epoch": 0.6611849040691545, "grad_norm": 0.3673750162124634, "learning_rate": 5.647676451640991e-05, "loss": 0.1754, "step": 47040 }, { "epoch": 0.6613254620844754, "grad_norm": 0.2628951370716095, "learning_rate": 5.643492044804282e-05, "loss": 0.1567, "step": 47050 }, { "epoch": 0.6614660200997962, "grad_norm": 0.4595640301704407, "learning_rate": 5.639308579232576e-05, "loss": 0.1686, "step": 47060 }, { "epoch": 0.661606578115117, "grad_norm": 0.5201691389083862, "learning_rate": 5.63512605582974e-05, "loss": 0.1877, "step": 47070 }, { "epoch": 0.6617471361304378, "grad_norm": 0.39629697799682617, "learning_rate": 5.63094447549946e-05, "loss": 0.1544, "step": 47080 }, { "epoch": 0.6618876941457587, "grad_norm": 0.44209417700767517, "learning_rate": 5.626763839145196e-05, "loss": 0.1511, "step": 47090 }, { "epoch": 0.6620282521610795, "grad_norm": 0.4663390815258026, "learning_rate": 5.6225841476702226e-05, "loss": 0.1534, "step": 47100 }, { "epoch": 0.6621688101764003, "grad_norm": 0.38166019320487976, "learning_rate": 5.618405401977593e-05, "loss": 0.175, "step": 47110 }, { "epoch": 0.6623093681917211, "grad_norm": 0.2989104986190796, "learning_rate": 5.6142276029701746e-05, "loss": 0.1669, "step": 47120 }, { "epoch": 0.6624499262070419, "grad_norm": 0.28844520449638367, "learning_rate": 5.6100507515506103e-05, "loss": 0.1688, "step": 47130 }, { "epoch": 0.6625904842223628, "grad_norm": 0.3244578242301941, "learning_rate": 5.60587484862136e-05, "loss": 0.1666, "step": 47140 }, { "epoch": 0.6627310422376836, "grad_norm": 0.27320772409439087, "learning_rate": 5.6016998950846576e-05, "loss": 0.1865, "step": 47150 }, { "epoch": 0.6628716002530044, "grad_norm": 0.3454894423484802, "learning_rate": 5.5975258918425476e-05, "loss": 0.1937, "step": 47160 }, { "epoch": 0.6630121582683253, "grad_norm": 0.31500983238220215, "learning_rate": 5.593352839796858e-05, "loss": 0.1714, "step": 47170 }, { "epoch": 0.663152716283646, "grad_norm": 0.43122321367263794, "learning_rate": 5.589180739849223e-05, "loss": 0.1715, "step": 47180 }, { "epoch": 0.6632932742989669, "grad_norm": 0.2885819375514984, "learning_rate": 5.5850095929010573e-05, "loss": 0.1891, "step": 47190 }, { "epoch": 0.6634338323142878, "grad_norm": 0.2869061529636383, "learning_rate": 5.5808393998535835e-05, "loss": 0.1577, "step": 47200 }, { "epoch": 0.6635743903296085, "grad_norm": 0.3633897602558136, "learning_rate": 5.576670161607804e-05, "loss": 0.1766, "step": 47210 }, { "epoch": 0.6637149483449294, "grad_norm": 0.40465348958969116, "learning_rate": 5.5725018790645336e-05, "loss": 0.1677, "step": 47220 }, { "epoch": 0.6638555063602501, "grad_norm": 0.3205110728740692, "learning_rate": 5.5683345531243546e-05, "loss": 0.1826, "step": 47230 }, { "epoch": 0.663996064375571, "grad_norm": 0.46258682012557983, "learning_rate": 5.564168184687673e-05, "loss": 0.1679, "step": 47240 }, { "epoch": 0.6641366223908919, "grad_norm": 0.40163615345954895, "learning_rate": 5.560002774654658e-05, "loss": 0.1627, "step": 47250 }, { "epoch": 0.6642771804062126, "grad_norm": 0.37548112869262695, "learning_rate": 5.5558383239252974e-05, "loss": 0.1596, "step": 47260 }, { "epoch": 0.6644177384215335, "grad_norm": 0.5590593218803406, "learning_rate": 5.551674833399353e-05, "loss": 0.1625, "step": 47270 }, { "epoch": 0.6645582964368543, "grad_norm": 0.4699946939945221, "learning_rate": 5.5475123039763924e-05, "loss": 0.2086, "step": 47280 }, { "epoch": 0.6646988544521751, "grad_norm": 0.32331383228302, "learning_rate": 5.5433507365557624e-05, "loss": 0.1633, "step": 47290 }, { "epoch": 0.664839412467496, "grad_norm": 0.3101024925708771, "learning_rate": 5.539190132036618e-05, "loss": 0.1909, "step": 47300 }, { "epoch": 0.6649799704828168, "grad_norm": 0.40582162141799927, "learning_rate": 5.5350304913178884e-05, "loss": 0.1667, "step": 47310 }, { "epoch": 0.6651205284981376, "grad_norm": 0.41470903158187866, "learning_rate": 5.530871815298313e-05, "loss": 0.1964, "step": 47320 }, { "epoch": 0.6652610865134584, "grad_norm": 0.36652326583862305, "learning_rate": 5.526714104876403e-05, "loss": 0.1833, "step": 47330 }, { "epoch": 0.6654016445287793, "grad_norm": 0.4002378284931183, "learning_rate": 5.522557360950482e-05, "loss": 0.215, "step": 47340 }, { "epoch": 0.6655422025441001, "grad_norm": 0.38589349389076233, "learning_rate": 5.518401584418644e-05, "loss": 0.1753, "step": 47350 }, { "epoch": 0.6656827605594209, "grad_norm": 0.395865797996521, "learning_rate": 5.514246776178793e-05, "loss": 0.1807, "step": 47360 }, { "epoch": 0.6658233185747418, "grad_norm": 0.3177359104156494, "learning_rate": 5.5100929371286056e-05, "loss": 0.1795, "step": 47370 }, { "epoch": 0.6659638765900625, "grad_norm": 0.3520699143409729, "learning_rate": 5.505940068165569e-05, "loss": 0.1686, "step": 47380 }, { "epoch": 0.6661044346053834, "grad_norm": 0.331185519695282, "learning_rate": 5.501788170186939e-05, "loss": 0.1641, "step": 47390 }, { "epoch": 0.6662449926207042, "grad_norm": 0.36551252007484436, "learning_rate": 5.497637244089784e-05, "loss": 0.1515, "step": 47400 }, { "epoch": 0.666385550636025, "grad_norm": 0.4092820882797241, "learning_rate": 5.493487290770939e-05, "loss": 0.1857, "step": 47410 }, { "epoch": 0.6665261086513459, "grad_norm": 0.34370318055152893, "learning_rate": 5.489338311127052e-05, "loss": 0.1575, "step": 47420 }, { "epoch": 0.6666666666666666, "grad_norm": 0.3167739808559418, "learning_rate": 5.48519030605454e-05, "loss": 0.184, "step": 47430 }, { "epoch": 0.6668072246819875, "grad_norm": 0.40686434507369995, "learning_rate": 5.481043276449632e-05, "loss": 0.1823, "step": 47440 }, { "epoch": 0.6669477826973084, "grad_norm": 0.3953606188297272, "learning_rate": 5.476897223208317e-05, "loss": 0.1836, "step": 47450 }, { "epoch": 0.6670883407126291, "grad_norm": 0.4484312832355499, "learning_rate": 5.472752147226401e-05, "loss": 0.1475, "step": 47460 }, { "epoch": 0.66722889872795, "grad_norm": 0.41706743836402893, "learning_rate": 5.468608049399457e-05, "loss": 0.1721, "step": 47470 }, { "epoch": 0.6673694567432708, "grad_norm": 0.31754666566848755, "learning_rate": 5.46446493062287e-05, "loss": 0.15, "step": 47480 }, { "epoch": 0.6675100147585916, "grad_norm": 0.4365607500076294, "learning_rate": 5.4603227917917875e-05, "loss": 0.1654, "step": 47490 }, { "epoch": 0.6676505727739125, "grad_norm": 0.3824831545352936, "learning_rate": 5.4561816338011676e-05, "loss": 0.1589, "step": 47500 }, { "epoch": 0.6677911307892332, "grad_norm": 0.3625919818878174, "learning_rate": 5.452041457545739e-05, "loss": 0.1783, "step": 47510 }, { "epoch": 0.6679316888045541, "grad_norm": 0.5666170716285706, "learning_rate": 5.447902263920034e-05, "loss": 0.1971, "step": 47520 }, { "epoch": 0.6680722468198749, "grad_norm": 0.37668928503990173, "learning_rate": 5.4437640538183564e-05, "loss": 0.188, "step": 47530 }, { "epoch": 0.6682128048351957, "grad_norm": 0.41840583086013794, "learning_rate": 5.439626828134816e-05, "loss": 0.1758, "step": 47540 }, { "epoch": 0.6683533628505165, "grad_norm": 0.41168487071990967, "learning_rate": 5.435490587763288e-05, "loss": 0.1548, "step": 47550 }, { "epoch": 0.6684939208658374, "grad_norm": 0.49045082926750183, "learning_rate": 5.431355333597459e-05, "loss": 0.173, "step": 47560 }, { "epoch": 0.6686344788811582, "grad_norm": 0.3818732798099518, "learning_rate": 5.427221066530777e-05, "loss": 0.1629, "step": 47570 }, { "epoch": 0.668775036896479, "grad_norm": 0.39010149240493774, "learning_rate": 5.423087787456502e-05, "loss": 0.1632, "step": 47580 }, { "epoch": 0.6689155949117999, "grad_norm": 0.3519304692745209, "learning_rate": 5.418955497267657e-05, "loss": 0.1769, "step": 47590 }, { "epoch": 0.6690561529271206, "grad_norm": 0.49452972412109375, "learning_rate": 5.414824196857073e-05, "loss": 0.1943, "step": 47600 }, { "epoch": 0.6691967109424415, "grad_norm": 0.34571000933647156, "learning_rate": 5.4106938871173494e-05, "loss": 0.1466, "step": 47610 }, { "epoch": 0.6693372689577624, "grad_norm": 0.41626954078674316, "learning_rate": 5.406564568940884e-05, "loss": 0.1544, "step": 47620 }, { "epoch": 0.6694778269730831, "grad_norm": 0.47072553634643555, "learning_rate": 5.402436243219851e-05, "loss": 0.1589, "step": 47630 }, { "epoch": 0.669618384988404, "grad_norm": 0.414405882358551, "learning_rate": 5.398308910846217e-05, "loss": 0.1486, "step": 47640 }, { "epoch": 0.6697589430037247, "grad_norm": 0.3934914171695709, "learning_rate": 5.394182572711728e-05, "loss": 0.1696, "step": 47650 }, { "epoch": 0.6698995010190456, "grad_norm": 0.4239861071109772, "learning_rate": 5.3900572297079256e-05, "loss": 0.1819, "step": 47660 }, { "epoch": 0.6700400590343665, "grad_norm": 0.41308853030204773, "learning_rate": 5.385932882726121e-05, "loss": 0.1623, "step": 47670 }, { "epoch": 0.6701806170496872, "grad_norm": 0.3246447741985321, "learning_rate": 5.381809532657427e-05, "loss": 0.1765, "step": 47680 }, { "epoch": 0.6703211750650081, "grad_norm": 0.3457357883453369, "learning_rate": 5.377687180392723e-05, "loss": 0.1595, "step": 47690 }, { "epoch": 0.6704617330803289, "grad_norm": 0.34478646516799927, "learning_rate": 5.373565826822693e-05, "loss": 0.1643, "step": 47700 }, { "epoch": 0.6706022910956497, "grad_norm": 0.3674975633621216, "learning_rate": 5.369445472837786e-05, "loss": 0.1793, "step": 47710 }, { "epoch": 0.6707428491109706, "grad_norm": 0.3989918828010559, "learning_rate": 5.36532611932825e-05, "loss": 0.1775, "step": 47720 }, { "epoch": 0.6708834071262914, "grad_norm": 0.3287481367588043, "learning_rate": 5.3612077671841047e-05, "loss": 0.1665, "step": 47730 }, { "epoch": 0.6710239651416122, "grad_norm": 0.27225741744041443, "learning_rate": 5.357090417295165e-05, "loss": 0.1644, "step": 47740 }, { "epoch": 0.671164523156933, "grad_norm": 0.353710412979126, "learning_rate": 5.352974070551018e-05, "loss": 0.1814, "step": 47750 }, { "epoch": 0.6713050811722538, "grad_norm": 0.44788897037506104, "learning_rate": 5.348858727841045e-05, "loss": 0.1553, "step": 47760 }, { "epoch": 0.6714456391875747, "grad_norm": 0.3965194523334503, "learning_rate": 5.344744390054399e-05, "loss": 0.1557, "step": 47770 }, { "epoch": 0.6715861972028955, "grad_norm": 0.28312981128692627, "learning_rate": 5.340631058080029e-05, "loss": 0.1725, "step": 47780 }, { "epoch": 0.6717267552182163, "grad_norm": 0.4658781588077545, "learning_rate": 5.336518732806651e-05, "loss": 0.1649, "step": 47790 }, { "epoch": 0.6718673132335371, "grad_norm": 0.3657032251358032, "learning_rate": 5.332407415122782e-05, "loss": 0.1773, "step": 47800 }, { "epoch": 0.672007871248858, "grad_norm": 0.3435717225074768, "learning_rate": 5.3282971059167005e-05, "loss": 0.1594, "step": 47810 }, { "epoch": 0.6721484292641788, "grad_norm": 0.3758085072040558, "learning_rate": 5.3241878060764886e-05, "loss": 0.1731, "step": 47820 }, { "epoch": 0.6722889872794996, "grad_norm": 0.3348824977874756, "learning_rate": 5.3200795164899887e-05, "loss": 0.1649, "step": 47830 }, { "epoch": 0.6724295452948205, "grad_norm": 0.2984980642795563, "learning_rate": 5.315972238044847e-05, "loss": 0.1716, "step": 47840 }, { "epoch": 0.6725701033101412, "grad_norm": 0.46859562397003174, "learning_rate": 5.31186597162847e-05, "loss": 0.1545, "step": 47850 }, { "epoch": 0.6727106613254621, "grad_norm": 0.35370686650276184, "learning_rate": 5.307760718128065e-05, "loss": 0.1857, "step": 47860 }, { "epoch": 0.672851219340783, "grad_norm": 0.4333016574382782, "learning_rate": 5.303656478430601e-05, "loss": 0.1738, "step": 47870 }, { "epoch": 0.6729917773561037, "grad_norm": 0.39497295022010803, "learning_rate": 5.299553253422849e-05, "loss": 0.1655, "step": 47880 }, { "epoch": 0.6731323353714246, "grad_norm": 0.2927321791648865, "learning_rate": 5.2954510439913395e-05, "loss": 0.1507, "step": 47890 }, { "epoch": 0.6732728933867453, "grad_norm": 0.4232181906700134, "learning_rate": 5.2913498510224027e-05, "loss": 0.168, "step": 47900 }, { "epoch": 0.6734134514020662, "grad_norm": 0.3505028784275055, "learning_rate": 5.2872496754021304e-05, "loss": 0.1633, "step": 47910 }, { "epoch": 0.6735540094173871, "grad_norm": 0.30001300573349, "learning_rate": 5.2831505180164177e-05, "loss": 0.1564, "step": 47920 }, { "epoch": 0.6736945674327078, "grad_norm": 0.45110681653022766, "learning_rate": 5.279052379750913e-05, "loss": 0.1703, "step": 47930 }, { "epoch": 0.6738351254480287, "grad_norm": 0.3818683922290802, "learning_rate": 5.274955261491068e-05, "loss": 0.1792, "step": 47940 }, { "epoch": 0.6739756834633495, "grad_norm": 0.5145854353904724, "learning_rate": 5.270859164122097e-05, "loss": 0.1965, "step": 47950 }, { "epoch": 0.6741162414786703, "grad_norm": 0.4271232783794403, "learning_rate": 5.266764088529009e-05, "loss": 0.1831, "step": 47960 }, { "epoch": 0.6742567994939912, "grad_norm": 0.40935248136520386, "learning_rate": 5.2626700355965754e-05, "loss": 0.1641, "step": 47970 }, { "epoch": 0.674397357509312, "grad_norm": 0.31136754155158997, "learning_rate": 5.258577006209362e-05, "loss": 0.1696, "step": 47980 }, { "epoch": 0.6745379155246328, "grad_norm": 0.4646071195602417, "learning_rate": 5.254485001251701e-05, "loss": 0.1812, "step": 47990 }, { "epoch": 0.6746784735399536, "grad_norm": 0.34271240234375, "learning_rate": 5.250394021607715e-05, "loss": 0.1756, "step": 48000 }, { "epoch": 0.6746784735399536, "eval_chrf": 73.59441090909657, "eval_loss": 0.3677200675010681, "eval_runtime": 324.8817, "eval_samples_per_second": 0.308, "eval_steps_per_second": 0.012, "step": 48000 }, { "epoch": 0.6748190315552745, "grad_norm": 0.3200497329235077, "learning_rate": 5.246304068161292e-05, "loss": 0.1691, "step": 48010 }, { "epoch": 0.6749595895705952, "grad_norm": 0.45317354798316956, "learning_rate": 5.2422151417961144e-05, "loss": 0.1373, "step": 48020 }, { "epoch": 0.6751001475859161, "grad_norm": 0.4153939187526703, "learning_rate": 5.238127243395624e-05, "loss": 0.2004, "step": 48030 }, { "epoch": 0.675240705601237, "grad_norm": 0.42276909947395325, "learning_rate": 5.234040373843059e-05, "loss": 0.163, "step": 48040 }, { "epoch": 0.6753812636165577, "grad_norm": 0.4025135040283203, "learning_rate": 5.229954534021417e-05, "loss": 0.1785, "step": 48050 }, { "epoch": 0.6755218216318786, "grad_norm": 0.2253536731004715, "learning_rate": 5.225869724813493e-05, "loss": 0.1644, "step": 48060 }, { "epoch": 0.6756623796471993, "grad_norm": 0.3587108254432678, "learning_rate": 5.22178594710184e-05, "loss": 0.1794, "step": 48070 }, { "epoch": 0.6758029376625202, "grad_norm": 0.413657546043396, "learning_rate": 5.2177032017688046e-05, "loss": 0.1876, "step": 48080 }, { "epoch": 0.6759434956778411, "grad_norm": 0.39810359477996826, "learning_rate": 5.213621489696494e-05, "loss": 0.1671, "step": 48090 }, { "epoch": 0.6760840536931618, "grad_norm": 0.3903466463088989, "learning_rate": 5.20954081176681e-05, "loss": 0.1659, "step": 48100 }, { "epoch": 0.6762246117084827, "grad_norm": 0.43574848771095276, "learning_rate": 5.205461168861418e-05, "loss": 0.1618, "step": 48110 }, { "epoch": 0.6763651697238034, "grad_norm": 0.2700211703777313, "learning_rate": 5.201382561861759e-05, "loss": 0.1716, "step": 48120 }, { "epoch": 0.6765057277391243, "grad_norm": 0.3783707320690155, "learning_rate": 5.197304991649064e-05, "loss": 0.1645, "step": 48130 }, { "epoch": 0.6766462857544452, "grad_norm": 0.3352111577987671, "learning_rate": 5.1932284591043226e-05, "loss": 0.1508, "step": 48140 }, { "epoch": 0.676786843769766, "grad_norm": 0.42214563488960266, "learning_rate": 5.1891529651083146e-05, "loss": 0.2041, "step": 48150 }, { "epoch": 0.6769274017850868, "grad_norm": 0.385493665933609, "learning_rate": 5.185078510541583e-05, "loss": 0.196, "step": 48160 }, { "epoch": 0.6770679598004076, "grad_norm": 0.5575045347213745, "learning_rate": 5.18100509628446e-05, "loss": 0.1541, "step": 48170 }, { "epoch": 0.6772085178157284, "grad_norm": 0.3971259593963623, "learning_rate": 5.176932723217038e-05, "loss": 0.1716, "step": 48180 }, { "epoch": 0.6773490758310493, "grad_norm": 0.5190660953521729, "learning_rate": 5.172861392219199e-05, "loss": 0.1586, "step": 48190 }, { "epoch": 0.6774896338463701, "grad_norm": 0.3985922932624817, "learning_rate": 5.168791104170586e-05, "loss": 0.1964, "step": 48200 }, { "epoch": 0.6776301918616909, "grad_norm": 0.42941612005233765, "learning_rate": 5.164721859950631e-05, "loss": 0.1662, "step": 48210 }, { "epoch": 0.6777707498770117, "grad_norm": 0.4269723892211914, "learning_rate": 5.1606536604385235e-05, "loss": 0.169, "step": 48220 }, { "epoch": 0.6779113078923326, "grad_norm": 0.373107373714447, "learning_rate": 5.156586506513248e-05, "loss": 0.1754, "step": 48230 }, { "epoch": 0.6780518659076534, "grad_norm": 0.37568169832229614, "learning_rate": 5.15252039905354e-05, "loss": 0.1838, "step": 48240 }, { "epoch": 0.6781924239229742, "grad_norm": 0.5764591097831726, "learning_rate": 5.148455338937931e-05, "loss": 0.1815, "step": 48250 }, { "epoch": 0.678332981938295, "grad_norm": 0.4133480191230774, "learning_rate": 5.1443913270447076e-05, "loss": 0.169, "step": 48260 }, { "epoch": 0.6784735399536158, "grad_norm": 0.5136486291885376, "learning_rate": 5.1403283642519454e-05, "loss": 0.169, "step": 48270 }, { "epoch": 0.6786140979689367, "grad_norm": 0.3314149081707001, "learning_rate": 5.1362664514374794e-05, "loss": 0.1474, "step": 48280 }, { "epoch": 0.6787546559842575, "grad_norm": 0.3599088191986084, "learning_rate": 5.132205589478931e-05, "loss": 0.159, "step": 48290 }, { "epoch": 0.6788952139995783, "grad_norm": 0.34585297107696533, "learning_rate": 5.1281457792536835e-05, "loss": 0.1665, "step": 48300 }, { "epoch": 0.6790357720148992, "grad_norm": 0.3417344093322754, "learning_rate": 5.1240870216389004e-05, "loss": 0.1624, "step": 48310 }, { "epoch": 0.6791763300302199, "grad_norm": 0.3887704312801361, "learning_rate": 5.1200293175115096e-05, "loss": 0.19, "step": 48320 }, { "epoch": 0.6793168880455408, "grad_norm": 0.3785271942615509, "learning_rate": 5.115972667748227e-05, "loss": 0.1849, "step": 48330 }, { "epoch": 0.6794574460608617, "grad_norm": 0.37237548828125, "learning_rate": 5.111917073225519e-05, "loss": 0.1611, "step": 48340 }, { "epoch": 0.6795980040761824, "grad_norm": 0.39942654967308044, "learning_rate": 5.107862534819643e-05, "loss": 0.2033, "step": 48350 }, { "epoch": 0.6797385620915033, "grad_norm": 0.34830865263938904, "learning_rate": 5.103809053406615e-05, "loss": 0.1488, "step": 48360 }, { "epoch": 0.679879120106824, "grad_norm": 0.2858423888683319, "learning_rate": 5.0997566298622355e-05, "loss": 0.1555, "step": 48370 }, { "epoch": 0.6800196781221449, "grad_norm": 0.3308117687702179, "learning_rate": 5.0957052650620605e-05, "loss": 0.1709, "step": 48380 }, { "epoch": 0.6801602361374658, "grad_norm": 0.33425357937812805, "learning_rate": 5.091654959881433e-05, "loss": 0.1811, "step": 48390 }, { "epoch": 0.6803007941527865, "grad_norm": 0.2872956097126007, "learning_rate": 5.087605715195456e-05, "loss": 0.1591, "step": 48400 }, { "epoch": 0.6804413521681074, "grad_norm": 0.35558682680130005, "learning_rate": 5.083557531879011e-05, "loss": 0.1506, "step": 48410 }, { "epoch": 0.6805819101834282, "grad_norm": 0.3197231590747833, "learning_rate": 5.0795104108067406e-05, "loss": 0.1683, "step": 48420 }, { "epoch": 0.680722468198749, "grad_norm": 0.2837541699409485, "learning_rate": 5.075464352853071e-05, "loss": 0.1529, "step": 48430 }, { "epoch": 0.6808630262140699, "grad_norm": 0.5361260175704956, "learning_rate": 5.0714193588921846e-05, "loss": 0.1495, "step": 48440 }, { "epoch": 0.6810035842293907, "grad_norm": 0.39931023120880127, "learning_rate": 5.067375429798047e-05, "loss": 0.1781, "step": 48450 }, { "epoch": 0.6811441422447115, "grad_norm": 0.4552576541900635, "learning_rate": 5.063332566444381e-05, "loss": 0.1786, "step": 48460 }, { "epoch": 0.6812847002600323, "grad_norm": 0.39349377155303955, "learning_rate": 5.059290769704694e-05, "loss": 0.1717, "step": 48470 }, { "epoch": 0.6814252582753532, "grad_norm": 0.4419980049133301, "learning_rate": 5.0552500404522465e-05, "loss": 0.1721, "step": 48480 }, { "epoch": 0.6815658162906739, "grad_norm": 0.3095633089542389, "learning_rate": 5.051210379560083e-05, "loss": 0.1397, "step": 48490 }, { "epoch": 0.6817063743059948, "grad_norm": 0.4042552709579468, "learning_rate": 5.047171787901004e-05, "loss": 0.1854, "step": 48500 }, { "epoch": 0.6818469323213157, "grad_norm": 0.3907172381877899, "learning_rate": 5.0431342663475935e-05, "loss": 0.2011, "step": 48510 }, { "epoch": 0.6819874903366364, "grad_norm": 0.3393981158733368, "learning_rate": 5.039097815772188e-05, "loss": 0.1706, "step": 48520 }, { "epoch": 0.6821280483519573, "grad_norm": 0.4830770194530487, "learning_rate": 5.035062437046909e-05, "loss": 0.1828, "step": 48530 }, { "epoch": 0.682268606367278, "grad_norm": 0.45952385663986206, "learning_rate": 5.031028131043632e-05, "loss": 0.1731, "step": 48540 }, { "epoch": 0.6824091643825989, "grad_norm": 0.511646032333374, "learning_rate": 5.026994898634014e-05, "loss": 0.1683, "step": 48550 }, { "epoch": 0.6825497223979198, "grad_norm": 0.2801165282726288, "learning_rate": 5.022962740689465e-05, "loss": 0.1728, "step": 48560 }, { "epoch": 0.6826902804132405, "grad_norm": 0.33137014508247375, "learning_rate": 5.018931658081179e-05, "loss": 0.1684, "step": 48570 }, { "epoch": 0.6828308384285614, "grad_norm": 0.30291131138801575, "learning_rate": 5.014901651680102e-05, "loss": 0.156, "step": 48580 }, { "epoch": 0.6829713964438822, "grad_norm": 0.4711996614933014, "learning_rate": 5.010872722356964e-05, "loss": 0.1823, "step": 48590 }, { "epoch": 0.683111954459203, "grad_norm": 0.29929518699645996, "learning_rate": 5.006844870982245e-05, "loss": 0.1776, "step": 48600 }, { "epoch": 0.6832525124745239, "grad_norm": 0.4039543569087982, "learning_rate": 5.002818098426207e-05, "loss": 0.1739, "step": 48610 }, { "epoch": 0.6833930704898447, "grad_norm": 0.3180190622806549, "learning_rate": 4.998792405558869e-05, "loss": 0.1673, "step": 48620 }, { "epoch": 0.6835336285051655, "grad_norm": 0.4465638995170593, "learning_rate": 4.9947677932500225e-05, "loss": 0.1812, "step": 48630 }, { "epoch": 0.6836741865204863, "grad_norm": 0.4132784605026245, "learning_rate": 4.990744262369219e-05, "loss": 0.1746, "step": 48640 }, { "epoch": 0.6838147445358072, "grad_norm": 0.4020783305168152, "learning_rate": 4.9867218137857884e-05, "loss": 0.1977, "step": 48650 }, { "epoch": 0.683955302551128, "grad_norm": 0.44012129306793213, "learning_rate": 4.98270044836881e-05, "loss": 0.1664, "step": 48660 }, { "epoch": 0.6840958605664488, "grad_norm": 0.3492875099182129, "learning_rate": 4.978680166987145e-05, "loss": 0.1712, "step": 48670 }, { "epoch": 0.6842364185817696, "grad_norm": 0.41620054841041565, "learning_rate": 4.9746609705094085e-05, "loss": 0.1763, "step": 48680 }, { "epoch": 0.6843769765970904, "grad_norm": 0.3826924264431, "learning_rate": 4.970642859803992e-05, "loss": 0.1799, "step": 48690 }, { "epoch": 0.6845175346124113, "grad_norm": 0.4146118760108948, "learning_rate": 4.966625835739038e-05, "loss": 0.1771, "step": 48700 }, { "epoch": 0.6846580926277321, "grad_norm": 0.44485095143318176, "learning_rate": 4.962609899182473e-05, "loss": 0.1632, "step": 48710 }, { "epoch": 0.6847986506430529, "grad_norm": 0.3078020513057709, "learning_rate": 4.95859505100197e-05, "loss": 0.1611, "step": 48720 }, { "epoch": 0.6849392086583738, "grad_norm": 0.3447209596633911, "learning_rate": 4.95458129206498e-05, "loss": 0.1548, "step": 48730 }, { "epoch": 0.6850797666736945, "grad_norm": 0.38146939873695374, "learning_rate": 4.950568623238709e-05, "loss": 0.1693, "step": 48740 }, { "epoch": 0.6852203246890154, "grad_norm": 0.3679945468902588, "learning_rate": 4.94655704539014e-05, "loss": 0.1837, "step": 48750 }, { "epoch": 0.6853608827043363, "grad_norm": 0.32762065529823303, "learning_rate": 4.942546559386003e-05, "loss": 0.1629, "step": 48760 }, { "epoch": 0.685501440719657, "grad_norm": 0.35465124249458313, "learning_rate": 4.93853716609281e-05, "loss": 0.1759, "step": 48770 }, { "epoch": 0.6856419987349779, "grad_norm": 0.41711315512657166, "learning_rate": 4.9345288663768266e-05, "loss": 0.161, "step": 48780 }, { "epoch": 0.6857825567502986, "grad_norm": 0.36827749013900757, "learning_rate": 4.930521661104083e-05, "loss": 0.1477, "step": 48790 }, { "epoch": 0.6859231147656195, "grad_norm": 0.39657124876976013, "learning_rate": 4.9265155511403685e-05, "loss": 0.1675, "step": 48800 }, { "epoch": 0.6860636727809404, "grad_norm": 0.3166278004646301, "learning_rate": 4.9225105373512505e-05, "loss": 0.1622, "step": 48810 }, { "epoch": 0.6862042307962611, "grad_norm": 0.28927573561668396, "learning_rate": 4.918506620602043e-05, "loss": 0.161, "step": 48820 }, { "epoch": 0.686344788811582, "grad_norm": 0.3811091482639313, "learning_rate": 4.914503801757836e-05, "loss": 0.1602, "step": 48830 }, { "epoch": 0.6864853468269028, "grad_norm": 0.364082008600235, "learning_rate": 4.910502081683471e-05, "loss": 0.185, "step": 48840 }, { "epoch": 0.6866259048422236, "grad_norm": 0.38235294818878174, "learning_rate": 4.9065014612435657e-05, "loss": 0.183, "step": 48850 }, { "epoch": 0.6867664628575445, "grad_norm": 0.40850192308425903, "learning_rate": 4.902501941302483e-05, "loss": 0.1853, "step": 48860 }, { "epoch": 0.6869070208728653, "grad_norm": 0.4095821976661682, "learning_rate": 4.898503522724366e-05, "loss": 0.1591, "step": 48870 }, { "epoch": 0.6870475788881861, "grad_norm": 0.45164576172828674, "learning_rate": 4.894506206373102e-05, "loss": 0.1602, "step": 48880 }, { "epoch": 0.6871881369035069, "grad_norm": 0.36443573236465454, "learning_rate": 4.890509993112358e-05, "loss": 0.1652, "step": 48890 }, { "epoch": 0.6873286949188278, "grad_norm": 0.3586047887802124, "learning_rate": 4.886514883805546e-05, "loss": 0.1623, "step": 48900 }, { "epoch": 0.6874692529341486, "grad_norm": 0.37332266569137573, "learning_rate": 4.882520879315856e-05, "loss": 0.1845, "step": 48910 }, { "epoch": 0.6876098109494694, "grad_norm": 0.3771437406539917, "learning_rate": 4.878527980506221e-05, "loss": 0.1746, "step": 48920 }, { "epoch": 0.6877503689647902, "grad_norm": 0.37698593735694885, "learning_rate": 4.874536188239354e-05, "loss": 0.1782, "step": 48930 }, { "epoch": 0.687890926980111, "grad_norm": 0.33789968490600586, "learning_rate": 4.870545503377711e-05, "loss": 0.1572, "step": 48940 }, { "epoch": 0.6880314849954319, "grad_norm": 0.3339237570762634, "learning_rate": 4.866555926783527e-05, "loss": 0.1485, "step": 48950 }, { "epoch": 0.6881720430107527, "grad_norm": 0.5001163482666016, "learning_rate": 4.862567459318778e-05, "loss": 0.157, "step": 48960 }, { "epoch": 0.6883126010260735, "grad_norm": 0.3400193750858307, "learning_rate": 4.8585801018452194e-05, "loss": 0.1636, "step": 48970 }, { "epoch": 0.6884531590413944, "grad_norm": 0.4310280382633209, "learning_rate": 4.8545938552243495e-05, "loss": 0.193, "step": 48980 }, { "epoch": 0.6885937170567151, "grad_norm": 0.36668428778648376, "learning_rate": 4.850608720317442e-05, "loss": 0.1676, "step": 48990 }, { "epoch": 0.688734275072036, "grad_norm": 0.44923144578933716, "learning_rate": 4.846624697985518e-05, "loss": 0.1674, "step": 49000 }, { "epoch": 0.688734275072036, "eval_chrf": 79.95379135798156, "eval_loss": 0.3630891442298889, "eval_runtime": 295.3513, "eval_samples_per_second": 0.339, "eval_steps_per_second": 0.014, "step": 49000 }, { "epoch": 0.6888748330873568, "grad_norm": 0.33405956625938416, "learning_rate": 4.842641789089368e-05, "loss": 0.1702, "step": 49010 }, { "epoch": 0.6890153911026776, "grad_norm": 0.33272942900657654, "learning_rate": 4.838659994489532e-05, "loss": 0.1571, "step": 49020 }, { "epoch": 0.6891559491179985, "grad_norm": 0.4708268642425537, "learning_rate": 4.8346793150463196e-05, "loss": 0.1676, "step": 49030 }, { "epoch": 0.6892965071333192, "grad_norm": 0.3452412486076355, "learning_rate": 4.830699751619789e-05, "loss": 0.1723, "step": 49040 }, { "epoch": 0.6894370651486401, "grad_norm": 0.30366259813308716, "learning_rate": 4.8267213050697714e-05, "loss": 0.1711, "step": 49050 }, { "epoch": 0.6895776231639609, "grad_norm": 0.42754900455474854, "learning_rate": 4.822743976255838e-05, "loss": 0.1847, "step": 49060 }, { "epoch": 0.6897181811792817, "grad_norm": 0.4515030086040497, "learning_rate": 4.8187677660373364e-05, "loss": 0.1846, "step": 49070 }, { "epoch": 0.6898587391946026, "grad_norm": 0.30266475677490234, "learning_rate": 4.814792675273359e-05, "loss": 0.1607, "step": 49080 }, { "epoch": 0.6899992972099234, "grad_norm": 0.340821772813797, "learning_rate": 4.810818704822767e-05, "loss": 0.1925, "step": 49090 }, { "epoch": 0.6901398552252442, "grad_norm": 0.28537577390670776, "learning_rate": 4.8068458555441687e-05, "loss": 0.1834, "step": 49100 }, { "epoch": 0.690280413240565, "grad_norm": 0.44687119126319885, "learning_rate": 4.8028741282959435e-05, "loss": 0.1698, "step": 49110 }, { "epoch": 0.6904209712558859, "grad_norm": 0.40084707736968994, "learning_rate": 4.7989035239362126e-05, "loss": 0.1839, "step": 49120 }, { "epoch": 0.6905615292712067, "grad_norm": 0.47335416078567505, "learning_rate": 4.79493404332287e-05, "loss": 0.1514, "step": 49130 }, { "epoch": 0.6907020872865275, "grad_norm": 0.4480488896369934, "learning_rate": 4.7909656873135534e-05, "loss": 0.195, "step": 49140 }, { "epoch": 0.6908426453018484, "grad_norm": 0.4829513430595398, "learning_rate": 4.786998456765671e-05, "loss": 0.155, "step": 49150 }, { "epoch": 0.6909832033171691, "grad_norm": 0.3073470890522003, "learning_rate": 4.783032352536373e-05, "loss": 0.1567, "step": 49160 }, { "epoch": 0.69112376133249, "grad_norm": 0.4633159041404724, "learning_rate": 4.7790673754825824e-05, "loss": 0.1781, "step": 49170 }, { "epoch": 0.6912643193478109, "grad_norm": 0.3136443495750427, "learning_rate": 4.775103526460961e-05, "loss": 0.1568, "step": 49180 }, { "epoch": 0.6914048773631316, "grad_norm": 0.370098352432251, "learning_rate": 4.7711408063279454e-05, "loss": 0.142, "step": 49190 }, { "epoch": 0.6915454353784525, "grad_norm": 0.3789938688278198, "learning_rate": 4.76717921593971e-05, "loss": 0.1763, "step": 49200 }, { "epoch": 0.6916859933937732, "grad_norm": 0.4118127226829529, "learning_rate": 4.763218756152201e-05, "loss": 0.1727, "step": 49210 }, { "epoch": 0.6918265514090941, "grad_norm": 0.4234118163585663, "learning_rate": 4.759259427821109e-05, "loss": 0.1654, "step": 49220 }, { "epoch": 0.691967109424415, "grad_norm": 0.7092957496643066, "learning_rate": 4.7553012318018896e-05, "loss": 0.1703, "step": 49230 }, { "epoch": 0.6921076674397357, "grad_norm": 0.49885180592536926, "learning_rate": 4.751344168949742e-05, "loss": 0.1686, "step": 49240 }, { "epoch": 0.6922482254550566, "grad_norm": 0.45480483770370483, "learning_rate": 4.7473882401196346e-05, "loss": 0.1517, "step": 49250 }, { "epoch": 0.6923887834703774, "grad_norm": 0.27201780676841736, "learning_rate": 4.743433446166275e-05, "loss": 0.146, "step": 49260 }, { "epoch": 0.6925293414856982, "grad_norm": 0.36466437578201294, "learning_rate": 4.739479787944143e-05, "loss": 0.1629, "step": 49270 }, { "epoch": 0.6926698995010191, "grad_norm": 0.2820543348789215, "learning_rate": 4.735527266307457e-05, "loss": 0.169, "step": 49280 }, { "epoch": 0.6928104575163399, "grad_norm": 0.3912178874015808, "learning_rate": 4.731575882110202e-05, "loss": 0.1655, "step": 49290 }, { "epoch": 0.6929510155316607, "grad_norm": 0.37822580337524414, "learning_rate": 4.7276256362061075e-05, "loss": 0.1568, "step": 49300 }, { "epoch": 0.6930915735469815, "grad_norm": 0.32967841625213623, "learning_rate": 4.723676529448668e-05, "loss": 0.1857, "step": 49310 }, { "epoch": 0.6932321315623023, "grad_norm": 0.816712498664856, "learning_rate": 4.7197285626911156e-05, "loss": 0.1935, "step": 49320 }, { "epoch": 0.6933726895776232, "grad_norm": 0.4365677833557129, "learning_rate": 4.715781736786459e-05, "loss": 0.1751, "step": 49330 }, { "epoch": 0.693513247592944, "grad_norm": 0.37101316452026367, "learning_rate": 4.711836052587435e-05, "loss": 0.1572, "step": 49340 }, { "epoch": 0.6936538056082648, "grad_norm": 0.41701507568359375, "learning_rate": 4.707891510946556e-05, "loss": 0.1634, "step": 49350 }, { "epoch": 0.6937943636235856, "grad_norm": 0.32426103949546814, "learning_rate": 4.70394811271607e-05, "loss": 0.1639, "step": 49360 }, { "epoch": 0.6939349216389065, "grad_norm": 0.3698156774044037, "learning_rate": 4.700005858747993e-05, "loss": 0.1431, "step": 49370 }, { "epoch": 0.6940754796542273, "grad_norm": 0.3618178367614746, "learning_rate": 4.69606474989408e-05, "loss": 0.1568, "step": 49380 }, { "epoch": 0.6942160376695481, "grad_norm": 0.29728826880455017, "learning_rate": 4.692124787005852e-05, "loss": 0.1765, "step": 49390 }, { "epoch": 0.694356595684869, "grad_norm": 0.3886052072048187, "learning_rate": 4.688185970934565e-05, "loss": 0.1636, "step": 49400 }, { "epoch": 0.6944971537001897, "grad_norm": 0.43441951274871826, "learning_rate": 4.6842483025312513e-05, "loss": 0.1608, "step": 49410 }, { "epoch": 0.6946377117155106, "grad_norm": 0.47952476143836975, "learning_rate": 4.680311782646668e-05, "loss": 0.1713, "step": 49420 }, { "epoch": 0.6947782697308315, "grad_norm": 0.33909785747528076, "learning_rate": 4.676376412131349e-05, "loss": 0.1552, "step": 49430 }, { "epoch": 0.6949188277461522, "grad_norm": 0.3531533181667328, "learning_rate": 4.67244219183556e-05, "loss": 0.1422, "step": 49440 }, { "epoch": 0.6950593857614731, "grad_norm": 0.39314037561416626, "learning_rate": 4.6685091226093334e-05, "loss": 0.187, "step": 49450 }, { "epoch": 0.6951999437767938, "grad_norm": 0.46536579728126526, "learning_rate": 4.664577205302444e-05, "loss": 0.1507, "step": 49460 }, { "epoch": 0.6953405017921147, "grad_norm": 0.27574050426483154, "learning_rate": 4.660646440764416e-05, "loss": 0.1515, "step": 49470 }, { "epoch": 0.6954810598074355, "grad_norm": 0.32661160826683044, "learning_rate": 4.656716829844534e-05, "loss": 0.1662, "step": 49480 }, { "epoch": 0.6956216178227563, "grad_norm": 0.356479674577713, "learning_rate": 4.652788373391822e-05, "loss": 0.1804, "step": 49490 }, { "epoch": 0.6957621758380772, "grad_norm": 0.349311888217926, "learning_rate": 4.648861072255068e-05, "loss": 0.1711, "step": 49500 }, { "epoch": 0.695902733853398, "grad_norm": 0.3784264028072357, "learning_rate": 4.644934927282795e-05, "loss": 0.1633, "step": 49510 }, { "epoch": 0.6960432918687188, "grad_norm": 0.463031142950058, "learning_rate": 4.6410099393232906e-05, "loss": 0.1661, "step": 49520 }, { "epoch": 0.6961838498840396, "grad_norm": 0.4308297634124756, "learning_rate": 4.63708610922458e-05, "loss": 0.169, "step": 49530 }, { "epoch": 0.6963244078993605, "grad_norm": 0.4579606056213379, "learning_rate": 4.63316343783445e-05, "loss": 0.1648, "step": 49540 }, { "epoch": 0.6964649659146813, "grad_norm": 0.3730986714363098, "learning_rate": 4.629241926000425e-05, "loss": 0.1761, "step": 49550 }, { "epoch": 0.6966055239300021, "grad_norm": 0.41617172956466675, "learning_rate": 4.625321574569791e-05, "loss": 0.1989, "step": 49560 }, { "epoch": 0.696746081945323, "grad_norm": 0.42932072281837463, "learning_rate": 4.6214023843895705e-05, "loss": 0.1723, "step": 49570 }, { "epoch": 0.6968866399606437, "grad_norm": 0.29537931084632874, "learning_rate": 4.617484356306549e-05, "loss": 0.1579, "step": 49580 }, { "epoch": 0.6970271979759646, "grad_norm": 0.40688666701316833, "learning_rate": 4.6135674911672466e-05, "loss": 0.1583, "step": 49590 }, { "epoch": 0.6971677559912854, "grad_norm": 0.33323273062705994, "learning_rate": 4.609651789817947e-05, "loss": 0.1639, "step": 49600 }, { "epoch": 0.6973083140066062, "grad_norm": 0.39615142345428467, "learning_rate": 4.605737253104667e-05, "loss": 0.1729, "step": 49610 }, { "epoch": 0.6974488720219271, "grad_norm": 0.5205367207527161, "learning_rate": 4.6018238818731865e-05, "loss": 0.1976, "step": 49620 }, { "epoch": 0.6975894300372478, "grad_norm": 0.5548632144927979, "learning_rate": 4.597911676969019e-05, "loss": 0.1675, "step": 49630 }, { "epoch": 0.6977299880525687, "grad_norm": 0.3631393313407898, "learning_rate": 4.594000639237443e-05, "loss": 0.1538, "step": 49640 }, { "epoch": 0.6978705460678896, "grad_norm": 0.3613353371620178, "learning_rate": 4.5900907695234654e-05, "loss": 0.1595, "step": 49650 }, { "epoch": 0.6980111040832103, "grad_norm": 0.4969308376312256, "learning_rate": 4.5861820686718596e-05, "loss": 0.1557, "step": 49660 }, { "epoch": 0.6981516620985312, "grad_norm": 0.2675575613975525, "learning_rate": 4.5822745375271294e-05, "loss": 0.1673, "step": 49670 }, { "epoch": 0.698292220113852, "grad_norm": 0.4694734811782837, "learning_rate": 4.578368176933543e-05, "loss": 0.1487, "step": 49680 }, { "epoch": 0.6984327781291728, "grad_norm": 0.32156407833099365, "learning_rate": 4.5744629877350975e-05, "loss": 0.1498, "step": 49690 }, { "epoch": 0.6985733361444937, "grad_norm": 0.3179536759853363, "learning_rate": 4.5705589707755545e-05, "loss": 0.1644, "step": 49700 }, { "epoch": 0.6987138941598144, "grad_norm": 0.46308329701423645, "learning_rate": 4.566656126898407e-05, "loss": 0.1727, "step": 49710 }, { "epoch": 0.6988544521751353, "grad_norm": 0.45487168431282043, "learning_rate": 4.5627544569469083e-05, "loss": 0.1752, "step": 49720 }, { "epoch": 0.6989950101904561, "grad_norm": 0.4241594076156616, "learning_rate": 4.5588539617640446e-05, "loss": 0.1655, "step": 49730 }, { "epoch": 0.6991355682057769, "grad_norm": 0.36288541555404663, "learning_rate": 4.554954642192562e-05, "loss": 0.1685, "step": 49740 }, { "epoch": 0.6992761262210978, "grad_norm": 0.324632465839386, "learning_rate": 4.551056499074937e-05, "loss": 0.1819, "step": 49750 }, { "epoch": 0.6994166842364186, "grad_norm": 0.2858855724334717, "learning_rate": 4.54715953325341e-05, "loss": 0.1636, "step": 49760 }, { "epoch": 0.6995572422517394, "grad_norm": 0.47081872820854187, "learning_rate": 4.543263745569949e-05, "loss": 0.1658, "step": 49770 }, { "epoch": 0.6996978002670602, "grad_norm": 0.34186944365501404, "learning_rate": 4.539369136866284e-05, "loss": 0.171, "step": 49780 }, { "epoch": 0.6998383582823811, "grad_norm": 0.40782099962234497, "learning_rate": 4.535475707983874e-05, "loss": 0.1792, "step": 49790 }, { "epoch": 0.6999789162977019, "grad_norm": 0.407845139503479, "learning_rate": 4.531583459763939e-05, "loss": 0.1389, "step": 49800 }, { "epoch": 0.7001194743130227, "grad_norm": 0.4455857276916504, "learning_rate": 4.527692393047429e-05, "loss": 0.1733, "step": 49810 }, { "epoch": 0.7002600323283436, "grad_norm": 0.31916528940200806, "learning_rate": 4.5238025086750544e-05, "loss": 0.1562, "step": 49820 }, { "epoch": 0.7004005903436643, "grad_norm": 0.34428226947784424, "learning_rate": 4.5199138074872525e-05, "loss": 0.1694, "step": 49830 }, { "epoch": 0.7005411483589852, "grad_norm": 0.384682834148407, "learning_rate": 4.5160262903242235e-05, "loss": 0.1586, "step": 49840 }, { "epoch": 0.700681706374306, "grad_norm": 0.4845108091831207, "learning_rate": 4.512139958025894e-05, "loss": 0.1692, "step": 49850 }, { "epoch": 0.7008222643896268, "grad_norm": 0.40447092056274414, "learning_rate": 4.50825481143195e-05, "loss": 0.1537, "step": 49860 }, { "epoch": 0.7009628224049477, "grad_norm": 0.42161500453948975, "learning_rate": 4.504370851381807e-05, "loss": 0.1729, "step": 49870 }, { "epoch": 0.7011033804202684, "grad_norm": 0.38627347350120544, "learning_rate": 4.500488078714641e-05, "loss": 0.1649, "step": 49880 }, { "epoch": 0.7012439384355893, "grad_norm": 0.5103524923324585, "learning_rate": 4.496606494269352e-05, "loss": 0.1773, "step": 49890 }, { "epoch": 0.7013844964509102, "grad_norm": 0.337223619222641, "learning_rate": 4.492726098884602e-05, "loss": 0.1598, "step": 49900 }, { "epoch": 0.7015250544662309, "grad_norm": 0.4297640025615692, "learning_rate": 4.4888468933987806e-05, "loss": 0.1828, "step": 49910 }, { "epoch": 0.7016656124815518, "grad_norm": 0.31979233026504517, "learning_rate": 4.484968878650032e-05, "loss": 0.1836, "step": 49920 }, { "epoch": 0.7018061704968726, "grad_norm": 0.28272655606269836, "learning_rate": 4.481092055476233e-05, "loss": 0.1734, "step": 49930 }, { "epoch": 0.7019467285121934, "grad_norm": 0.41349393129348755, "learning_rate": 4.477216424715015e-05, "loss": 0.166, "step": 49940 }, { "epoch": 0.7020872865275142, "grad_norm": 0.34932541847229004, "learning_rate": 4.473341987203737e-05, "loss": 0.1565, "step": 49950 }, { "epoch": 0.702227844542835, "grad_norm": 0.369492769241333, "learning_rate": 4.469468743779516e-05, "loss": 0.1618, "step": 49960 }, { "epoch": 0.7023684025581559, "grad_norm": 0.38204425573349, "learning_rate": 4.4655966952791964e-05, "loss": 0.1648, "step": 49970 }, { "epoch": 0.7025089605734767, "grad_norm": 0.40265777707099915, "learning_rate": 4.461725842539376e-05, "loss": 0.187, "step": 49980 }, { "epoch": 0.7026495185887975, "grad_norm": 0.38271403312683105, "learning_rate": 4.4578561863963856e-05, "loss": 0.1768, "step": 49990 }, { "epoch": 0.7027900766041183, "grad_norm": 0.39733636379241943, "learning_rate": 4.453987727686305e-05, "loss": 0.1746, "step": 50000 }, { "epoch": 0.7027900766041183, "eval_chrf": 76.08611862854853, "eval_loss": 0.3601585030555725, "eval_runtime": 325.1544, "eval_samples_per_second": 0.308, "eval_steps_per_second": 0.012, "step": 50000 } ], "logging_steps": 10, "max_steps": 71145, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.1762575999305253e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }