| { |
| "best_global_step": 18000, |
| "best_metric": 87.79281231152616, |
| "best_model_checkpoint": "checkpoints_7B_lora_translated/ru-kz-final/checkpoint-18000", |
| "epoch": 1.041572321347704, |
| "eval_steps": 1000, |
| "global_step": 23000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.00045285753102074087, |
| "grad_norm": 3.4883720874786377, |
| "learning_rate": 8.148483476686284e-07, |
| "loss": 5.0788, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.0009057150620414817, |
| "grad_norm": 4.04372501373291, |
| "learning_rate": 1.720235400633771e-06, |
| "loss": 5.133, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0013585725930622225, |
| "grad_norm": 4.182616233825684, |
| "learning_rate": 2.6256224535989136e-06, |
| "loss": 5.1652, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0018114301240829635, |
| "grad_norm": 4.259949684143066, |
| "learning_rate": 3.5310095065640563e-06, |
| "loss": 4.9479, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.002264287655103704, |
| "grad_norm": 4.507060527801514, |
| "learning_rate": 4.4363965595291986e-06, |
| "loss": 4.9178, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.002717145186124445, |
| "grad_norm": 7.0745086669921875, |
| "learning_rate": 5.341783612494342e-06, |
| "loss": 4.558, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.003170002717145186, |
| "grad_norm": 9.023025512695312, |
| "learning_rate": 6.247170665459484e-06, |
| "loss": 3.7948, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.003622860248165927, |
| "grad_norm": 7.761899948120117, |
| "learning_rate": 7.152557718424627e-06, |
| "loss": 3.2346, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.004075717779186668, |
| "grad_norm": 4.877920150756836, |
| "learning_rate": 8.05794477138977e-06, |
| "loss": 2.0696, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.004528575310207408, |
| "grad_norm": 1.9245119094848633, |
| "learning_rate": 8.963331824354912e-06, |
| "loss": 1.4604, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.00498143284122815, |
| "grad_norm": 2.389509439468384, |
| "learning_rate": 9.868718877320054e-06, |
| "loss": 1.2735, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.00543429037224889, |
| "grad_norm": 1.2380106449127197, |
| "learning_rate": 1.0774105930285198e-05, |
| "loss": 1.1165, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.0058871479032696315, |
| "grad_norm": 0.7591245770454407, |
| "learning_rate": 1.167949298325034e-05, |
| "loss": 0.9562, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.006340005434290372, |
| "grad_norm": 0.6966114044189453, |
| "learning_rate": 1.2584880036215482e-05, |
| "loss": 0.8385, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.006792862965311113, |
| "grad_norm": 0.6013082265853882, |
| "learning_rate": 1.3490267089180624e-05, |
| "loss": 0.7487, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.007245720496331854, |
| "grad_norm": 0.5370333790779114, |
| "learning_rate": 1.4395654142145767e-05, |
| "loss": 0.7016, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.007698578027352595, |
| "grad_norm": 0.4565121531486511, |
| "learning_rate": 1.530104119511091e-05, |
| "loss": 0.6324, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.008151435558373336, |
| "grad_norm": 0.4546894431114197, |
| "learning_rate": 1.6206428248076053e-05, |
| "loss": 0.6874, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.008604293089394076, |
| "grad_norm": 0.38576531410217285, |
| "learning_rate": 1.7111815301041197e-05, |
| "loss": 0.6267, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.009057150620414817, |
| "grad_norm": 0.4952828884124756, |
| "learning_rate": 1.8017202354006337e-05, |
| "loss": 0.5919, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.009510008151435559, |
| "grad_norm": 0.3792784512042999, |
| "learning_rate": 1.892258940697148e-05, |
| "loss": 0.6007, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.0099628656824563, |
| "grad_norm": 0.46156740188598633, |
| "learning_rate": 1.9827976459936622e-05, |
| "loss": 0.5762, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.01041572321347704, |
| "grad_norm": 0.4529961347579956, |
| "learning_rate": 2.0733363512901766e-05, |
| "loss": 0.55, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.01086858074449778, |
| "grad_norm": 0.47309136390686035, |
| "learning_rate": 2.163875056586691e-05, |
| "loss": 0.5293, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.011321438275518523, |
| "grad_norm": 0.4536254405975342, |
| "learning_rate": 2.254413761883205e-05, |
| "loss": 0.4997, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.011774295806539263, |
| "grad_norm": 0.6276798248291016, |
| "learning_rate": 2.3449524671797194e-05, |
| "loss": 0.541, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.012227153337560004, |
| "grad_norm": 0.5525098443031311, |
| "learning_rate": 2.4354911724762335e-05, |
| "loss": 0.538, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.012680010868580744, |
| "grad_norm": 0.40901291370391846, |
| "learning_rate": 2.5260298777727482e-05, |
| "loss": 0.5161, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.013132868399601485, |
| "grad_norm": 0.46389591693878174, |
| "learning_rate": 2.6165685830692623e-05, |
| "loss": 0.4911, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.013585725930622227, |
| "grad_norm": 0.5157365798950195, |
| "learning_rate": 2.7071072883657767e-05, |
| "loss": 0.5064, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.014038583461642967, |
| "grad_norm": 0.5174415707588196, |
| "learning_rate": 2.7976459936622907e-05, |
| "loss": 0.4649, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.014491440992663708, |
| "grad_norm": 0.5348315834999084, |
| "learning_rate": 2.888184698958805e-05, |
| "loss": 0.4376, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.014944298523684448, |
| "grad_norm": 0.592741072177887, |
| "learning_rate": 2.9787234042553192e-05, |
| "loss": 0.4873, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.01539715605470519, |
| "grad_norm": 0.6220499277114868, |
| "learning_rate": 3.0692621095518336e-05, |
| "loss": 0.4712, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.01585001358572593, |
| "grad_norm": 0.6278738975524902, |
| "learning_rate": 3.159800814848348e-05, |
| "loss": 0.4564, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.01630287111674667, |
| "grad_norm": 0.7244141697883606, |
| "learning_rate": 3.2503395201448624e-05, |
| "loss": 0.4701, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.016755728647767414, |
| "grad_norm": 0.6748137474060059, |
| "learning_rate": 3.340878225441376e-05, |
| "loss": 0.4641, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.017208586178788152, |
| "grad_norm": 0.5243514180183411, |
| "learning_rate": 3.4314169307378905e-05, |
| "loss": 0.445, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.017661443709808895, |
| "grad_norm": 0.7680507302284241, |
| "learning_rate": 3.521955636034405e-05, |
| "loss": 0.4426, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.018114301240829633, |
| "grad_norm": 0.5714876651763916, |
| "learning_rate": 3.612494341330919e-05, |
| "loss": 0.4399, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.018567158771850376, |
| "grad_norm": 0.8051562905311584, |
| "learning_rate": 3.703033046627433e-05, |
| "loss": 0.4358, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.019020016302871118, |
| "grad_norm": 0.6920434236526489, |
| "learning_rate": 3.793571751923948e-05, |
| "loss": 0.4504, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.019472873833891857, |
| "grad_norm": 0.548530101776123, |
| "learning_rate": 3.884110457220462e-05, |
| "loss": 0.4391, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.0199257313649126, |
| "grad_norm": 0.8991818428039551, |
| "learning_rate": 3.974649162516976e-05, |
| "loss": 0.4269, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.02037858889593334, |
| "grad_norm": 0.7665138244628906, |
| "learning_rate": 4.0651878678134906e-05, |
| "loss": 0.4311, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.02083144642695408, |
| "grad_norm": 0.7382726669311523, |
| "learning_rate": 4.155726573110005e-05, |
| "loss": 0.3808, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.021284303957974822, |
| "grad_norm": 0.7119817733764648, |
| "learning_rate": 4.246265278406519e-05, |
| "loss": 0.4154, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.02173716148899556, |
| "grad_norm": 0.6397897005081177, |
| "learning_rate": 4.336803983703033e-05, |
| "loss": 0.4125, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.022190019020016303, |
| "grad_norm": 0.6061927080154419, |
| "learning_rate": 4.4273426889995475e-05, |
| "loss": 0.4004, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.022642876551037045, |
| "grad_norm": 0.6459689140319824, |
| "learning_rate": 4.517881394296062e-05, |
| "loss": 0.3976, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.023095734082057784, |
| "grad_norm": 0.8386490345001221, |
| "learning_rate": 4.6084200995925756e-05, |
| "loss": 0.4249, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.023548591613078526, |
| "grad_norm": 0.6931592226028442, |
| "learning_rate": 4.6989588048890906e-05, |
| "loss": 0.4099, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.024001449144099265, |
| "grad_norm": 0.6584775447845459, |
| "learning_rate": 4.7894975101856044e-05, |
| "loss": 0.405, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.024454306675120007, |
| "grad_norm": 0.728175938129425, |
| "learning_rate": 4.880036215482119e-05, |
| "loss": 0.4054, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.02490716420614075, |
| "grad_norm": 0.8367078304290771, |
| "learning_rate": 4.970574920778633e-05, |
| "loss": 0.3941, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.025360021737161488, |
| "grad_norm": 0.8447272777557373, |
| "learning_rate": 5.061113626075147e-05, |
| "loss": 0.3893, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.02581287926818223, |
| "grad_norm": 0.8425686359405518, |
| "learning_rate": 5.151652331371661e-05, |
| "loss": 0.4156, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.02626573679920297, |
| "grad_norm": 0.8669265508651733, |
| "learning_rate": 5.2421910366681757e-05, |
| "loss": 0.4148, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.02671859433022371, |
| "grad_norm": 0.7871622443199158, |
| "learning_rate": 5.332729741964691e-05, |
| "loss": 0.3944, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.027171451861244453, |
| "grad_norm": 0.7147888541221619, |
| "learning_rate": 5.423268447261205e-05, |
| "loss": 0.4041, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.027624309392265192, |
| "grad_norm": 0.8785873651504517, |
| "learning_rate": 5.513807152557718e-05, |
| "loss": 0.3851, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.028077166923285934, |
| "grad_norm": 0.7281315326690674, |
| "learning_rate": 5.604345857854233e-05, |
| "loss": 0.4027, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.028530024454306677, |
| "grad_norm": 0.9609182476997375, |
| "learning_rate": 5.6948845631507476e-05, |
| "loss": 0.3872, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.028982881985327415, |
| "grad_norm": 0.8392449021339417, |
| "learning_rate": 5.785423268447262e-05, |
| "loss": 0.4003, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.029435739516348158, |
| "grad_norm": 0.7625656723976135, |
| "learning_rate": 5.875961973743776e-05, |
| "loss": 0.3884, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.029888597047368896, |
| "grad_norm": 0.8860791921615601, |
| "learning_rate": 5.96650067904029e-05, |
| "loss": 0.4055, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.03034145457838964, |
| "grad_norm": 0.7659401893615723, |
| "learning_rate": 6.0570393843368045e-05, |
| "loss": 0.3866, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.03079431210941038, |
| "grad_norm": 0.8208166360855103, |
| "learning_rate": 6.14757808963332e-05, |
| "loss": 0.3746, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.03124716964043112, |
| "grad_norm": 0.9270297288894653, |
| "learning_rate": 6.238116794929832e-05, |
| "loss": 0.3617, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.03170002717145186, |
| "grad_norm": 0.8403503894805908, |
| "learning_rate": 6.328655500226347e-05, |
| "loss": 0.4067, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.0321528847024726, |
| "grad_norm": 0.8086050152778625, |
| "learning_rate": 6.419194205522862e-05, |
| "loss": 0.3664, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.03260574223349334, |
| "grad_norm": 0.9025173187255859, |
| "learning_rate": 6.509732910819376e-05, |
| "loss": 0.3812, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.033058599764514085, |
| "grad_norm": 0.8973671197891235, |
| "learning_rate": 6.60027161611589e-05, |
| "loss": 0.3865, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.03351145729553483, |
| "grad_norm": 0.7736189365386963, |
| "learning_rate": 6.690810321412405e-05, |
| "loss": 0.3643, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.03396431482655556, |
| "grad_norm": 0.7647067308425903, |
| "learning_rate": 6.781349026708918e-05, |
| "loss": 0.394, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.034417172357576305, |
| "grad_norm": 0.8811724185943604, |
| "learning_rate": 6.871887732005433e-05, |
| "loss": 0.3877, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.03487002988859705, |
| "grad_norm": 0.7932597398757935, |
| "learning_rate": 6.962426437301947e-05, |
| "loss": 0.3703, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.03532288741961779, |
| "grad_norm": 0.7502638697624207, |
| "learning_rate": 7.052965142598461e-05, |
| "loss": 0.3616, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.03577574495063853, |
| "grad_norm": 0.9235597252845764, |
| "learning_rate": 7.143503847894976e-05, |
| "loss": 0.3856, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.03622860248165927, |
| "grad_norm": 0.839526355266571, |
| "learning_rate": 7.23404255319149e-05, |
| "loss": 0.366, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.03668146001268001, |
| "grad_norm": 0.9115842580795288, |
| "learning_rate": 7.324581258488003e-05, |
| "loss": 0.3608, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.03713431754370075, |
| "grad_norm": 0.8510302901268005, |
| "learning_rate": 7.415119963784518e-05, |
| "loss": 0.3471, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.03758717507472149, |
| "grad_norm": 0.8537122011184692, |
| "learning_rate": 7.505658669081032e-05, |
| "loss": 0.3643, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.038040032605742236, |
| "grad_norm": 0.9007611870765686, |
| "learning_rate": 7.596197374377547e-05, |
| "loss": 0.3695, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.03849289013676298, |
| "grad_norm": 0.8921451568603516, |
| "learning_rate": 7.686736079674061e-05, |
| "loss": 0.3751, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.03894574766778371, |
| "grad_norm": 0.7536324858665466, |
| "learning_rate": 7.777274784970575e-05, |
| "loss": 0.36, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.039398605198804455, |
| "grad_norm": 0.6539735198020935, |
| "learning_rate": 7.86781349026709e-05, |
| "loss": 0.3678, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.0398514627298252, |
| "grad_norm": 0.7920020222663879, |
| "learning_rate": 7.958352195563605e-05, |
| "loss": 0.3668, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.04030432026084594, |
| "grad_norm": 1.0227323770523071, |
| "learning_rate": 8.048890900860117e-05, |
| "loss": 0.36, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.04075717779186668, |
| "grad_norm": 0.7640708684921265, |
| "learning_rate": 8.139429606156632e-05, |
| "loss": 0.3253, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.04121003532288742, |
| "grad_norm": 0.7461748123168945, |
| "learning_rate": 8.229968311453147e-05, |
| "loss": 0.3713, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.04166289285390816, |
| "grad_norm": 0.7150042653083801, |
| "learning_rate": 8.320507016749661e-05, |
| "loss": 0.349, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.0421157503849289, |
| "grad_norm": 0.7429611682891846, |
| "learning_rate": 8.411045722046175e-05, |
| "loss": 0.3226, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.042568607915949644, |
| "grad_norm": 0.7522969841957092, |
| "learning_rate": 8.50158442734269e-05, |
| "loss": 0.3427, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.043021465446970386, |
| "grad_norm": 0.8997257947921753, |
| "learning_rate": 8.592123132639204e-05, |
| "loss": 0.3471, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.04347432297799112, |
| "grad_norm": 0.8524265885353088, |
| "learning_rate": 8.682661837935719e-05, |
| "loss": 0.3817, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.043927180509011864, |
| "grad_norm": 0.6731559634208679, |
| "learning_rate": 8.773200543232232e-05, |
| "loss": 0.3432, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.044380038040032606, |
| "grad_norm": 0.8779836893081665, |
| "learning_rate": 8.863739248528746e-05, |
| "loss": 0.3283, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.04483289557105335, |
| "grad_norm": 0.6812531352043152, |
| "learning_rate": 8.954277953825261e-05, |
| "loss": 0.3326, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.04528575310207409, |
| "grad_norm": 0.7251217365264893, |
| "learning_rate": 9.044816659121775e-05, |
| "loss": 0.3444, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.04528575310207409, |
| "eval_chrf": 66.89902036813824, |
| "eval_loss": 0.4501848816871643, |
| "eval_runtime": 26.7238, |
| "eval_samples_per_second": 0.374, |
| "eval_steps_per_second": 0.037, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.045738610633094826, |
| "grad_norm": 0.9436652660369873, |
| "learning_rate": 9.135355364418289e-05, |
| "loss": 0.3641, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.04619146816411557, |
| "grad_norm": 0.711843729019165, |
| "learning_rate": 9.225894069714804e-05, |
| "loss": 0.3503, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.04664432569513631, |
| "grad_norm": 0.7915553450584412, |
| "learning_rate": 9.316432775011317e-05, |
| "loss": 0.3693, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.04709718322615705, |
| "grad_norm": 0.6526038646697998, |
| "learning_rate": 9.406971480307832e-05, |
| "loss": 0.3255, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.047550040757177794, |
| "grad_norm": 0.6790297031402588, |
| "learning_rate": 9.497510185604346e-05, |
| "loss": 0.3277, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.04800289828819853, |
| "grad_norm": 0.8189061284065247, |
| "learning_rate": 9.58804889090086e-05, |
| "loss": 0.3457, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.04845575581921927, |
| "grad_norm": 0.7368476986885071, |
| "learning_rate": 9.678587596197375e-05, |
| "loss": 0.3014, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.048908613350240014, |
| "grad_norm": 0.998906135559082, |
| "learning_rate": 9.76912630149389e-05, |
| "loss": 0.3247, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.049361470881260756, |
| "grad_norm": 0.7826228737831116, |
| "learning_rate": 9.859665006790402e-05, |
| "loss": 0.3101, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.0498143284122815, |
| "grad_norm": 0.7113918662071228, |
| "learning_rate": 9.950203712086917e-05, |
| "loss": 0.3164, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.050267185943302234, |
| "grad_norm": 0.8249057531356812, |
| "learning_rate": 0.00010040742417383431, |
| "loss": 0.3567, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.050720043474322976, |
| "grad_norm": 0.636049211025238, |
| "learning_rate": 0.00010131281122679945, |
| "loss": 0.3037, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.05117290100534372, |
| "grad_norm": 0.7211443185806274, |
| "learning_rate": 0.0001022181982797646, |
| "loss": 0.3284, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.05162575853636446, |
| "grad_norm": 0.7342743277549744, |
| "learning_rate": 0.00010312358533272975, |
| "loss": 0.3229, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.0520786160673852, |
| "grad_norm": 0.7419087886810303, |
| "learning_rate": 0.00010402897238569489, |
| "loss": 0.3253, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.05253147359840594, |
| "grad_norm": 0.8502224087715149, |
| "learning_rate": 0.00010493435943866004, |
| "loss": 0.3233, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.05298433112942668, |
| "grad_norm": 0.5899739861488342, |
| "learning_rate": 0.00010583974649162517, |
| "loss": 0.3287, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.05343718866044742, |
| "grad_norm": 0.7512941360473633, |
| "learning_rate": 0.00010674513354459033, |
| "loss": 0.3311, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.053890046191468165, |
| "grad_norm": 1.5729906558990479, |
| "learning_rate": 0.00010765052059755548, |
| "loss": 0.3218, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.05434290372248891, |
| "grad_norm": 0.8467279076576233, |
| "learning_rate": 0.0001085559076505206, |
| "loss": 0.3308, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.05479576125350965, |
| "grad_norm": 0.7849757671356201, |
| "learning_rate": 0.00010946129470348574, |
| "loss": 0.3203, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.055248618784530384, |
| "grad_norm": 0.8003832697868347, |
| "learning_rate": 0.00011036668175645089, |
| "loss": 0.3489, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.05570147631555113, |
| "grad_norm": 0.8363836407661438, |
| "learning_rate": 0.00011127206880941602, |
| "loss": 0.3116, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.05615433384657187, |
| "grad_norm": 0.6694333553314209, |
| "learning_rate": 0.00011217745586238118, |
| "loss": 0.3133, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.05660719137759261, |
| "grad_norm": 0.6896982789039612, |
| "learning_rate": 0.00011308284291534633, |
| "loss": 0.3219, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.05706004890861335, |
| "grad_norm": 0.631035566329956, |
| "learning_rate": 0.00011398822996831146, |
| "loss": 0.3516, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.05751290643963409, |
| "grad_norm": 0.7124539017677307, |
| "learning_rate": 0.00011489361702127661, |
| "loss": 0.3234, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.05796576397065483, |
| "grad_norm": 0.6392337083816528, |
| "learning_rate": 0.00011579900407424174, |
| "loss": 0.3552, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.05841862150167557, |
| "grad_norm": 0.5689034461975098, |
| "learning_rate": 0.00011670439112720688, |
| "loss": 0.3237, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.058871479032696315, |
| "grad_norm": 0.7601428031921387, |
| "learning_rate": 0.00011760977818017203, |
| "loss": 0.3261, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.05932433656371706, |
| "grad_norm": 0.8485261797904968, |
| "learning_rate": 0.00011851516523313718, |
| "loss": 0.3211, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.05977719409473779, |
| "grad_norm": 0.616400957107544, |
| "learning_rate": 0.00011942055228610231, |
| "loss": 0.3439, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.060230051625758535, |
| "grad_norm": 0.8092880845069885, |
| "learning_rate": 0.00012032593933906746, |
| "loss": 0.3118, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.06068290915677928, |
| "grad_norm": 0.7503390908241272, |
| "learning_rate": 0.0001212313263920326, |
| "loss": 0.3077, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.06113576668780002, |
| "grad_norm": 0.7002999186515808, |
| "learning_rate": 0.00012213671344499775, |
| "loss": 0.2747, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.06158862421882076, |
| "grad_norm": 0.7599196434020996, |
| "learning_rate": 0.00012304210049796286, |
| "loss": 0.3072, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.0620414817498415, |
| "grad_norm": 0.7310253381729126, |
| "learning_rate": 0.00012394748755092803, |
| "loss": 0.3286, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.06249433928086224, |
| "grad_norm": 0.8364066481590271, |
| "learning_rate": 0.00012485287460389316, |
| "loss": 0.3164, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.06294719681188299, |
| "grad_norm": 0.6382613182067871, |
| "learning_rate": 0.0001257582616568583, |
| "loss": 0.2996, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.06340005434290372, |
| "grad_norm": 0.8997701406478882, |
| "learning_rate": 0.00012666364870982347, |
| "loss": 0.3191, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.06385291187392446, |
| "grad_norm": 0.8146569728851318, |
| "learning_rate": 0.0001275690357627886, |
| "loss": 0.3146, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.0643057694049452, |
| "grad_norm": 0.6676255464553833, |
| "learning_rate": 0.00012847442281575374, |
| "loss": 0.3181, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.06475862693596594, |
| "grad_norm": 0.5385376811027527, |
| "learning_rate": 0.0001293798098687189, |
| "loss": 0.2957, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.06521148446698669, |
| "grad_norm": 0.8492608070373535, |
| "learning_rate": 0.00013028519692168401, |
| "loss": 0.3162, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.06566434199800743, |
| "grad_norm": 0.9273693561553955, |
| "learning_rate": 0.00013119058397464915, |
| "loss": 0.3402, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.06611719952902817, |
| "grad_norm": 0.5114811658859253, |
| "learning_rate": 0.00013209597102761432, |
| "loss": 0.3088, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.06657005706004891, |
| "grad_norm": 0.6595302224159241, |
| "learning_rate": 0.00013300135808057945, |
| "loss": 0.3436, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.06702291459106965, |
| "grad_norm": 0.7126308679580688, |
| "learning_rate": 0.0001339067451335446, |
| "loss": 0.3141, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.0674757721220904, |
| "grad_norm": 0.6146643161773682, |
| "learning_rate": 0.00013481213218650975, |
| "loss": 0.2798, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.06792862965311112, |
| "grad_norm": 0.8765645623207092, |
| "learning_rate": 0.0001357175192394749, |
| "loss": 0.3122, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.06838148718413187, |
| "grad_norm": 0.6217523217201233, |
| "learning_rate": 0.00013662290629244003, |
| "loss": 0.3218, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.06883434471515261, |
| "grad_norm": 0.5067332983016968, |
| "learning_rate": 0.00013752829334540517, |
| "loss": 0.3046, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.06928720224617335, |
| "grad_norm": 0.6011443734169006, |
| "learning_rate": 0.0001384336803983703, |
| "loss": 0.3341, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.0697400597771941, |
| "grad_norm": 0.5284327268600464, |
| "learning_rate": 0.00013933906745133544, |
| "loss": 0.3042, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.07019291730821484, |
| "grad_norm": 0.5794896483421326, |
| "learning_rate": 0.0001402444545043006, |
| "loss": 0.3021, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.07064577483923558, |
| "grad_norm": 0.5913949012756348, |
| "learning_rate": 0.00014114984155726574, |
| "loss": 0.2878, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.07109863237025632, |
| "grad_norm": 0.8013747334480286, |
| "learning_rate": 0.00014205522861023088, |
| "loss": 0.2926, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.07155148990127706, |
| "grad_norm": 0.4837814271450043, |
| "learning_rate": 0.00014296061566319604, |
| "loss": 0.2954, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.0720043474322978, |
| "grad_norm": 0.6365007162094116, |
| "learning_rate": 0.00014386600271616118, |
| "loss": 0.3182, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.07245720496331853, |
| "grad_norm": 0.6202572584152222, |
| "learning_rate": 0.0001447713897691263, |
| "loss": 0.3072, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.07291006249433928, |
| "grad_norm": 0.7837294936180115, |
| "learning_rate": 0.00014567677682209145, |
| "loss": 0.3225, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.07336292002536002, |
| "grad_norm": 0.5507187843322754, |
| "learning_rate": 0.0001465821638750566, |
| "loss": 0.2954, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.07381577755638076, |
| "grad_norm": 0.5234072804450989, |
| "learning_rate": 0.00014748755092802173, |
| "loss": 0.3037, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.0742686350874015, |
| "grad_norm": 0.6388247013092041, |
| "learning_rate": 0.00014839293798098687, |
| "loss": 0.3214, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.07472149261842224, |
| "grad_norm": 0.5577211380004883, |
| "learning_rate": 0.00014929832503395203, |
| "loss": 0.296, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.07517435014944299, |
| "grad_norm": 0.5393535494804382, |
| "learning_rate": 0.00015020371208691717, |
| "loss": 0.2965, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.07562720768046373, |
| "grad_norm": 0.6293423771858215, |
| "learning_rate": 0.0001511090991398823, |
| "loss": 0.2917, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.07608006521148447, |
| "grad_norm": 0.49211928248405457, |
| "learning_rate": 0.00015201448619284747, |
| "loss": 0.284, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.07653292274250521, |
| "grad_norm": 0.5974680185317993, |
| "learning_rate": 0.00015291987324581258, |
| "loss": 0.2852, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.07698578027352596, |
| "grad_norm": 0.6545078754425049, |
| "learning_rate": 0.00015382526029877772, |
| "loss": 0.2782, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.07743863780454668, |
| "grad_norm": 0.5832185745239258, |
| "learning_rate": 0.00015473064735174288, |
| "loss": 0.3127, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.07789149533556743, |
| "grad_norm": 0.5393053293228149, |
| "learning_rate": 0.00015563603440470802, |
| "loss": 0.303, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.07834435286658817, |
| "grad_norm": 0.6128617525100708, |
| "learning_rate": 0.00015654142145767315, |
| "loss": 0.3028, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.07879721039760891, |
| "grad_norm": 0.6199638843536377, |
| "learning_rate": 0.00015744680851063832, |
| "loss": 0.2893, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.07925006792862965, |
| "grad_norm": 0.5525530576705933, |
| "learning_rate": 0.00015835219556360346, |
| "loss": 0.2994, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.0797029254596504, |
| "grad_norm": 0.5762396454811096, |
| "learning_rate": 0.0001592575826165686, |
| "loss": 0.3125, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.08015578299067114, |
| "grad_norm": 0.5460029244422913, |
| "learning_rate": 0.00016016296966953373, |
| "loss": 0.2752, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.08060864052169188, |
| "grad_norm": 0.5735179781913757, |
| "learning_rate": 0.00016106835672249887, |
| "loss": 0.2883, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.08106149805271262, |
| "grad_norm": 0.4762778878211975, |
| "learning_rate": 0.000161973743775464, |
| "loss": 0.2747, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.08151435558373336, |
| "grad_norm": 0.7053053975105286, |
| "learning_rate": 0.00016287913082842917, |
| "loss": 0.2996, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.08196721311475409, |
| "grad_norm": 0.5661677718162537, |
| "learning_rate": 0.0001637845178813943, |
| "loss": 0.2828, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.08242007064577483, |
| "grad_norm": 0.7437049150466919, |
| "learning_rate": 0.00016468990493435944, |
| "loss": 0.2991, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.08287292817679558, |
| "grad_norm": 0.6861435770988464, |
| "learning_rate": 0.0001655952919873246, |
| "loss": 0.2893, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.08332578570781632, |
| "grad_norm": 0.49107825756073, |
| "learning_rate": 0.00016650067904028974, |
| "loss": 0.2813, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.08377864323883706, |
| "grad_norm": 0.6133277416229248, |
| "learning_rate": 0.00016740606609325485, |
| "loss": 0.2687, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.0842315007698578, |
| "grad_norm": 0.5676704049110413, |
| "learning_rate": 0.00016831145314622002, |
| "loss": 0.2734, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.08468435830087855, |
| "grad_norm": 0.6072496175765991, |
| "learning_rate": 0.00016921684019918516, |
| "loss": 0.2867, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.08513721583189929, |
| "grad_norm": 0.5643963813781738, |
| "learning_rate": 0.0001701222272521503, |
| "loss": 0.2812, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.08559007336292003, |
| "grad_norm": 0.5513191223144531, |
| "learning_rate": 0.00017102761430511546, |
| "loss": 0.2674, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.08604293089394077, |
| "grad_norm": 0.6839463114738464, |
| "learning_rate": 0.0001719330013580806, |
| "loss": 0.2905, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.0864957884249615, |
| "grad_norm": 0.6099688410758972, |
| "learning_rate": 0.00017283838841104573, |
| "loss": 0.2887, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.08694864595598224, |
| "grad_norm": 0.611600399017334, |
| "learning_rate": 0.0001737437754640109, |
| "loss": 0.2977, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.08740150348700298, |
| "grad_norm": 0.5446760654449463, |
| "learning_rate": 0.000174649162516976, |
| "loss": 0.3095, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.08785436101802373, |
| "grad_norm": 0.5957568883895874, |
| "learning_rate": 0.00017555454956994114, |
| "loss": 0.275, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.08830721854904447, |
| "grad_norm": 0.4382849633693695, |
| "learning_rate": 0.0001764599366229063, |
| "loss": 0.2902, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.08876007608006521, |
| "grad_norm": 0.6389471292495728, |
| "learning_rate": 0.00017736532367587144, |
| "loss": 0.3022, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.08921293361108595, |
| "grad_norm": 0.5382727980613708, |
| "learning_rate": 0.00017827071072883658, |
| "loss": 0.2796, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.0896657911421067, |
| "grad_norm": 0.5546545386314392, |
| "learning_rate": 0.00017917609778180175, |
| "loss": 0.3119, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.09011864867312744, |
| "grad_norm": 0.5750550627708435, |
| "learning_rate": 0.00018008148483476688, |
| "loss": 0.2573, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.09057150620414818, |
| "grad_norm": 0.5644318461418152, |
| "learning_rate": 0.00018098687188773202, |
| "loss": 0.2822, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.09057150620414818, |
| "eval_chrf": 51.4571346316286, |
| "eval_loss": 0.29403626918792725, |
| "eval_runtime": 26.2584, |
| "eval_samples_per_second": 0.381, |
| "eval_steps_per_second": 0.038, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.09102436373516892, |
| "grad_norm": 0.5000986456871033, |
| "learning_rate": 0.00018189225894069716, |
| "loss": 0.2905, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.09147722126618965, |
| "grad_norm": 0.5213611125946045, |
| "learning_rate": 0.0001827976459936623, |
| "loss": 0.2849, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.0919300787972104, |
| "grad_norm": 0.6767968535423279, |
| "learning_rate": 0.00018370303304662743, |
| "loss": 0.2831, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.09238293632823114, |
| "grad_norm": 0.4952644109725952, |
| "learning_rate": 0.00018460842009959257, |
| "loss": 0.2778, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.09283579385925188, |
| "grad_norm": 0.6272353529930115, |
| "learning_rate": 0.00018551380715255773, |
| "loss": 0.3121, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.09328865139027262, |
| "grad_norm": 0.5801631808280945, |
| "learning_rate": 0.00018641919420552287, |
| "loss": 0.2918, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.09374150892129336, |
| "grad_norm": 0.47861814498901367, |
| "learning_rate": 0.000187324581258488, |
| "loss": 0.2693, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.0941943664523141, |
| "grad_norm": 0.5323045253753662, |
| "learning_rate": 0.00018822996831145317, |
| "loss": 0.2807, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.09464722398333485, |
| "grad_norm": 0.7477088570594788, |
| "learning_rate": 0.00018913535536441828, |
| "loss": 0.2658, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.09510008151435559, |
| "grad_norm": 0.5277721881866455, |
| "learning_rate": 0.00019004074241738342, |
| "loss": 0.2895, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.09555293904537633, |
| "grad_norm": 0.781351625919342, |
| "learning_rate": 0.00019094612947034858, |
| "loss": 0.2667, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.09600579657639706, |
| "grad_norm": 0.4823305010795593, |
| "learning_rate": 0.00019185151652331372, |
| "loss": 0.2772, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.0964586541074178, |
| "grad_norm": 0.5646785497665405, |
| "learning_rate": 0.00019275690357627886, |
| "loss": 0.2634, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.09691151163843854, |
| "grad_norm": 0.5562229156494141, |
| "learning_rate": 0.00019366229062924402, |
| "loss": 0.2482, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.09736436916945929, |
| "grad_norm": 0.3854808509349823, |
| "learning_rate": 0.00019456767768220916, |
| "loss": 0.2549, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.09781722670048003, |
| "grad_norm": 0.4851052165031433, |
| "learning_rate": 0.0001954730647351743, |
| "loss": 0.3053, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.09827008423150077, |
| "grad_norm": 0.6279167532920837, |
| "learning_rate": 0.00019637845178813943, |
| "loss": 0.2529, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.09872294176252151, |
| "grad_norm": 0.5062315464019775, |
| "learning_rate": 0.00019728383884110457, |
| "loss": 0.2813, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.09917579929354225, |
| "grad_norm": 0.7650270462036133, |
| "learning_rate": 0.0001981892258940697, |
| "loss": 0.2782, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.099628656824563, |
| "grad_norm": 0.684183657169342, |
| "learning_rate": 0.00019909461294703487, |
| "loss": 0.2933, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.10008151435558374, |
| "grad_norm": 0.4574642479419708, |
| "learning_rate": 0.0002, |
| "loss": 0.2812, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.10053437188660447, |
| "grad_norm": 0.45532846450805664, |
| "learning_rate": 0.0001999999719648891, |
| "loss": 0.277, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.10098722941762521, |
| "grad_norm": 0.5651437640190125, |
| "learning_rate": 0.0001999998878595721, |
| "loss": 0.2575, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.10144008694864595, |
| "grad_norm": 0.5053766369819641, |
| "learning_rate": 0.00019999974768409614, |
| "loss": 0.3048, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.1018929444796667, |
| "grad_norm": 0.6162516474723816, |
| "learning_rate": 0.00019999955143853982, |
| "loss": 0.2515, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.10234580201068744, |
| "grad_norm": 0.5970934629440308, |
| "learning_rate": 0.00019999929912301322, |
| "loss": 0.3024, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.10279865954170818, |
| "grad_norm": 0.6110081672668457, |
| "learning_rate": 0.00019999899073765774, |
| "loss": 0.2766, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.10325151707272892, |
| "grad_norm": 0.5295965075492859, |
| "learning_rate": 0.00019999862628264633, |
| "loss": 0.228, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.10370437460374966, |
| "grad_norm": 0.41794171929359436, |
| "learning_rate": 0.00019999820575818336, |
| "loss": 0.2703, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.1041572321347704, |
| "grad_norm": 0.5175167918205261, |
| "learning_rate": 0.00019999772916450464, |
| "loss": 0.2896, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.10461008966579115, |
| "grad_norm": 0.6496297121047974, |
| "learning_rate": 0.00019999719650187729, |
| "loss": 0.2708, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.10506294719681188, |
| "grad_norm": 0.5578742027282715, |
| "learning_rate": 0.0001999966077706001, |
| "loss": 0.2749, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.10551580472783262, |
| "grad_norm": 0.5760526657104492, |
| "learning_rate": 0.00019999596297100308, |
| "loss": 0.2867, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.10596866225885336, |
| "grad_norm": 0.5972622632980347, |
| "learning_rate": 0.00019999526210344784, |
| "loss": 0.2513, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.1064215197898741, |
| "grad_norm": 0.6074303388595581, |
| "learning_rate": 0.0001999945051683273, |
| "loss": 0.2624, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.10687437732089484, |
| "grad_norm": 0.460184782743454, |
| "learning_rate": 0.00019999369216606593, |
| "loss": 0.2671, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.10732723485191559, |
| "grad_norm": 0.5181593298912048, |
| "learning_rate": 0.00019999282309711953, |
| "loss": 0.2701, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.10778009238293633, |
| "grad_norm": 0.600666880607605, |
| "learning_rate": 0.00019999189796197545, |
| "loss": 0.2891, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.10823294991395707, |
| "grad_norm": 0.49728748202323914, |
| "learning_rate": 0.0001999909167611523, |
| "loss": 0.2754, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.10868580744497781, |
| "grad_norm": 0.5172849297523499, |
| "learning_rate": 0.00019998987949520038, |
| "loss": 0.2507, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.10913866497599856, |
| "grad_norm": 0.43115153908729553, |
| "learning_rate": 0.00019998878616470122, |
| "loss": 0.2827, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.1095915225070193, |
| "grad_norm": 0.49532899260520935, |
| "learning_rate": 0.00019998763677026786, |
| "loss": 0.2568, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.11004438003804003, |
| "grad_norm": 0.9006966352462769, |
| "learning_rate": 0.00019998643131254474, |
| "loss": 0.2636, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.11049723756906077, |
| "grad_norm": 0.40261346101760864, |
| "learning_rate": 0.00019998516979220782, |
| "loss": 0.238, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.11095009510008151, |
| "grad_norm": 0.5829101204872131, |
| "learning_rate": 0.0001999838522099644, |
| "loss": 0.2649, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.11140295263110225, |
| "grad_norm": 0.5074893236160278, |
| "learning_rate": 0.00019998247856655325, |
| "loss": 0.2833, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.111855810162123, |
| "grad_norm": 0.544731616973877, |
| "learning_rate": 0.00019998104886274462, |
| "loss": 0.2562, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.11230866769314374, |
| "grad_norm": 0.5525861978530884, |
| "learning_rate": 0.00019997956309934006, |
| "loss": 0.2751, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.11276152522416448, |
| "grad_norm": 0.5456953644752502, |
| "learning_rate": 0.00019997802127717272, |
| "loss": 0.2757, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.11321438275518522, |
| "grad_norm": 0.4306289553642273, |
| "learning_rate": 0.00019997642339710707, |
| "loss": 0.2539, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.11366724028620596, |
| "grad_norm": 0.4044501781463623, |
| "learning_rate": 0.00019997476946003907, |
| "loss": 0.2585, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.1141200978172267, |
| "grad_norm": 0.4749915599822998, |
| "learning_rate": 0.00019997305946689605, |
| "loss": 0.2666, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.11457295534824743, |
| "grad_norm": 0.4891221523284912, |
| "learning_rate": 0.0001999712934186368, |
| "loss": 0.2489, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.11502581287926818, |
| "grad_norm": 0.6200125813484192, |
| "learning_rate": 0.0001999694713162516, |
| "loss": 0.2787, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.11547867041028892, |
| "grad_norm": 0.49395912885665894, |
| "learning_rate": 0.00019996759316076208, |
| "loss": 0.2397, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.11593152794130966, |
| "grad_norm": 0.5160639882087708, |
| "learning_rate": 0.0001999656589532213, |
| "loss": 0.2616, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.1163843854723304, |
| "grad_norm": 0.473092257976532, |
| "learning_rate": 0.0001999636686947138, |
| "loss": 0.2634, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.11683724300335115, |
| "grad_norm": 0.6124957799911499, |
| "learning_rate": 0.00019996162238635555, |
| "loss": 0.2649, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.11729010053437189, |
| "grad_norm": 0.36893054842948914, |
| "learning_rate": 0.00019995952002929387, |
| "loss": 0.2634, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.11774295806539263, |
| "grad_norm": 0.5636609196662903, |
| "learning_rate": 0.0001999573616247076, |
| "loss": 0.2693, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.11819581559641337, |
| "grad_norm": 0.539794921875, |
| "learning_rate": 0.0001999551471738069, |
| "loss": 0.2305, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.11864867312743411, |
| "grad_norm": 0.3865772485733032, |
| "learning_rate": 0.00019995287667783348, |
| "loss": 0.2461, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.11910153065845484, |
| "grad_norm": 0.4933566451072693, |
| "learning_rate": 0.00019995055013806039, |
| "loss": 0.2805, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.11955438818947559, |
| "grad_norm": 0.492123544216156, |
| "learning_rate": 0.0001999481675557921, |
| "loss": 0.256, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.12000724572049633, |
| "grad_norm": 0.5840746760368347, |
| "learning_rate": 0.0001999457289323646, |
| "loss": 0.2478, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.12046010325151707, |
| "grad_norm": 0.47510668635368347, |
| "learning_rate": 0.00019994323426914513, |
| "loss": 0.2809, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.12091296078253781, |
| "grad_norm": 0.45924919843673706, |
| "learning_rate": 0.00019994068356753256, |
| "loss": 0.2281, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.12136581831355855, |
| "grad_norm": 0.5342031121253967, |
| "learning_rate": 0.00019993807682895697, |
| "loss": 0.2509, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.1218186758445793, |
| "grad_norm": 0.6540825963020325, |
| "learning_rate": 0.00019993541405488005, |
| "loss": 0.2748, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.12227153337560004, |
| "grad_norm": 0.7478682398796082, |
| "learning_rate": 0.00019993269524679478, |
| "loss": 0.258, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.12272439090662078, |
| "grad_norm": 0.477021723985672, |
| "learning_rate": 0.00019992992040622563, |
| "loss": 0.2692, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.12317724843764152, |
| "grad_norm": 0.39322835206985474, |
| "learning_rate": 0.0001999270895347284, |
| "loss": 0.2497, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.12363010596866227, |
| "grad_norm": 0.5442454218864441, |
| "learning_rate": 0.00019992420263389047, |
| "loss": 0.2625, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.124082963499683, |
| "grad_norm": 0.6335331797599792, |
| "learning_rate": 0.00019992125970533041, |
| "loss": 0.2844, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.12453582103070374, |
| "grad_norm": 0.5638910531997681, |
| "learning_rate": 0.00019991826075069842, |
| "loss": 0.2569, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.12498867856172448, |
| "grad_norm": 0.4456355571746826, |
| "learning_rate": 0.00019991520577167596, |
| "loss": 0.2745, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.12544153609274522, |
| "grad_norm": 0.40395674109458923, |
| "learning_rate": 0.000199912094769976, |
| "loss": 0.2483, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.12589439362376598, |
| "grad_norm": 0.48974549770355225, |
| "learning_rate": 0.0001999089277473429, |
| "loss": 0.2388, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.1263472511547867, |
| "grad_norm": 0.5266743302345276, |
| "learning_rate": 0.00019990570470555235, |
| "loss": 0.2382, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.12680010868580743, |
| "grad_norm": 0.5559818744659424, |
| "learning_rate": 0.00019990242564641157, |
| "loss": 0.2503, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.1272529662168282, |
| "grad_norm": 0.47886645793914795, |
| "learning_rate": 0.00019989909057175912, |
| "loss": 0.2432, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.12770582374784892, |
| "grad_norm": 0.5434776544570923, |
| "learning_rate": 0.000199895699483465, |
| "loss": 0.2484, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.12815868127886967, |
| "grad_norm": 0.5014550089836121, |
| "learning_rate": 0.00019989225238343058, |
| "loss": 0.2514, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.1286115388098904, |
| "grad_norm": 0.5822556018829346, |
| "learning_rate": 0.00019988874927358868, |
| "loss": 0.265, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.12906439634091116, |
| "grad_norm": 0.4825868606567383, |
| "learning_rate": 0.00019988519015590346, |
| "loss": 0.2876, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.1295172538719319, |
| "grad_norm": 0.4180533289909363, |
| "learning_rate": 0.00019988157503237058, |
| "loss": 0.2546, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.12997011140295264, |
| "grad_norm": 0.3579380512237549, |
| "learning_rate": 0.000199877903905017, |
| "loss": 0.2413, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.13042296893397337, |
| "grad_norm": 0.4857385456562042, |
| "learning_rate": 0.00019987417677590113, |
| "loss": 0.2704, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.1308758264649941, |
| "grad_norm": 0.5797598958015442, |
| "learning_rate": 0.0001998703936471128, |
| "loss": 0.2314, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.13132868399601486, |
| "grad_norm": 0.6053803563117981, |
| "learning_rate": 0.00019986655452077328, |
| "loss": 0.2407, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.13178154152703558, |
| "grad_norm": 0.5609238743782043, |
| "learning_rate": 0.00019986265939903505, |
| "loss": 0.2219, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.13223439905805634, |
| "grad_norm": 0.6816825270652771, |
| "learning_rate": 0.0001998587082840822, |
| "loss": 0.2516, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.13268725658907707, |
| "grad_norm": 0.6507164239883423, |
| "learning_rate": 0.0001998547011781301, |
| "loss": 0.2366, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.13314011412009782, |
| "grad_norm": 0.4358994662761688, |
| "learning_rate": 0.00019985063808342557, |
| "loss": 0.2484, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.13359297165111855, |
| "grad_norm": 0.5335454940795898, |
| "learning_rate": 0.00019984651900224675, |
| "loss": 0.2438, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.1340458291821393, |
| "grad_norm": 0.45652270317077637, |
| "learning_rate": 0.00019984234393690325, |
| "loss": 0.2508, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.13449868671316004, |
| "grad_norm": 0.4294167160987854, |
| "learning_rate": 0.00019983811288973603, |
| "loss": 0.2393, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.1349515442441808, |
| "grad_norm": 0.548907995223999, |
| "learning_rate": 0.00019983382586311746, |
| "loss": 0.237, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.13540440177520152, |
| "grad_norm": 0.48326581716537476, |
| "learning_rate": 0.00019982948285945126, |
| "loss": 0.2604, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.13585725930622225, |
| "grad_norm": 0.5535202622413635, |
| "learning_rate": 0.0001998250838811726, |
| "loss": 0.2646, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.13585725930622225, |
| "eval_chrf": 56.01199333702116, |
| "eval_loss": 0.23407021164894104, |
| "eval_runtime": 26.3619, |
| "eval_samples_per_second": 0.379, |
| "eval_steps_per_second": 0.038, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.136310116837243, |
| "grad_norm": 0.46452951431274414, |
| "learning_rate": 0.00019982062893074794, |
| "loss": 0.2292, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.13676297436826373, |
| "grad_norm": 0.6923738121986389, |
| "learning_rate": 0.00019981611801067525, |
| "loss": 0.2519, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.1372158318992845, |
| "grad_norm": 0.4062545895576477, |
| "learning_rate": 0.0001998115511234837, |
| "loss": 0.2558, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.13766868943030522, |
| "grad_norm": 0.5292578339576721, |
| "learning_rate": 0.0001998069282717341, |
| "loss": 0.2708, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.13812154696132597, |
| "grad_norm": 0.5739614963531494, |
| "learning_rate": 0.00019980224945801843, |
| "loss": 0.2355, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.1385744044923467, |
| "grad_norm": 0.6753970384597778, |
| "learning_rate": 0.00019979751468496005, |
| "loss": 0.2308, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.13902726202336746, |
| "grad_norm": 0.5382646322250366, |
| "learning_rate": 0.00019979272395521388, |
| "loss": 0.2623, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.1394801195543882, |
| "grad_norm": 0.5424644351005554, |
| "learning_rate": 0.00019978787727146596, |
| "loss": 0.2346, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.13993297708540894, |
| "grad_norm": 0.4321116507053375, |
| "learning_rate": 0.00019978297463643394, |
| "loss": 0.2457, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.14038583461642967, |
| "grad_norm": 0.4374447762966156, |
| "learning_rate": 0.0001997780160528667, |
| "loss": 0.2509, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.1408386921474504, |
| "grad_norm": 0.46546927094459534, |
| "learning_rate": 0.00019977300152354451, |
| "loss": 0.258, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.14129154967847116, |
| "grad_norm": 0.5650977492332458, |
| "learning_rate": 0.00019976793105127904, |
| "loss": 0.2502, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.14174440720949188, |
| "grad_norm": 0.39362677931785583, |
| "learning_rate": 0.00019976280463891336, |
| "loss": 0.2315, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.14219726474051264, |
| "grad_norm": 0.4962209463119507, |
| "learning_rate": 0.0001997576222893218, |
| "loss": 0.2504, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.14265012227153337, |
| "grad_norm": 0.44957464933395386, |
| "learning_rate": 0.00019975238400541013, |
| "loss": 0.2581, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.14310297980255413, |
| "grad_norm": 0.50096595287323, |
| "learning_rate": 0.00019974708979011548, |
| "loss": 0.2487, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.14355583733357485, |
| "grad_norm": 0.47329550981521606, |
| "learning_rate": 0.00019974173964640632, |
| "loss": 0.2276, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.1440086948645956, |
| "grad_norm": 0.36653953790664673, |
| "learning_rate": 0.0001997363335772825, |
| "loss": 0.2566, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.14446155239561634, |
| "grad_norm": 0.5317615866661072, |
| "learning_rate": 0.00019973087158577522, |
| "loss": 0.2684, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.14491440992663707, |
| "grad_norm": 0.5648652911186218, |
| "learning_rate": 0.00019972535367494698, |
| "loss": 0.226, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.14536726745765782, |
| "grad_norm": 0.4121849834918976, |
| "learning_rate": 0.00019971977984789172, |
| "loss": 0.257, |
| "step": 3210 |
| }, |
| { |
| "epoch": 0.14582012498867855, |
| "grad_norm": 0.7073838114738464, |
| "learning_rate": 0.00019971415010773473, |
| "loss": 0.2362, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.1462729825196993, |
| "grad_norm": 0.5668673515319824, |
| "learning_rate": 0.00019970846445763258, |
| "loss": 0.2478, |
| "step": 3230 |
| }, |
| { |
| "epoch": 0.14672584005072004, |
| "grad_norm": 0.5943505764007568, |
| "learning_rate": 0.0001997027229007732, |
| "loss": 0.2309, |
| "step": 3240 |
| }, |
| { |
| "epoch": 0.1471786975817408, |
| "grad_norm": 0.6767163872718811, |
| "learning_rate": 0.00019969692544037594, |
| "loss": 0.2482, |
| "step": 3250 |
| }, |
| { |
| "epoch": 0.14763155511276152, |
| "grad_norm": 0.5584532022476196, |
| "learning_rate": 0.00019969107207969144, |
| "loss": 0.2383, |
| "step": 3260 |
| }, |
| { |
| "epoch": 0.14808441264378228, |
| "grad_norm": 0.5313519239425659, |
| "learning_rate": 0.00019968516282200171, |
| "loss": 0.2211, |
| "step": 3270 |
| }, |
| { |
| "epoch": 0.148537270174803, |
| "grad_norm": 0.5229864120483398, |
| "learning_rate": 0.00019967919767062005, |
| "loss": 0.2616, |
| "step": 3280 |
| }, |
| { |
| "epoch": 0.14899012770582376, |
| "grad_norm": 0.4079675078392029, |
| "learning_rate": 0.00019967317662889113, |
| "loss": 0.2674, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.1494429852368445, |
| "grad_norm": 0.44019877910614014, |
| "learning_rate": 0.000199667099700191, |
| "loss": 0.241, |
| "step": 3300 |
| }, |
| { |
| "epoch": 0.14989584276786522, |
| "grad_norm": 0.6310319304466248, |
| "learning_rate": 0.00019966096688792695, |
| "loss": 0.245, |
| "step": 3310 |
| }, |
| { |
| "epoch": 0.15034870029888597, |
| "grad_norm": 0.5826947093009949, |
| "learning_rate": 0.0001996547781955377, |
| "loss": 0.2588, |
| "step": 3320 |
| }, |
| { |
| "epoch": 0.1508015578299067, |
| "grad_norm": 0.5099973678588867, |
| "learning_rate": 0.00019964853362649328, |
| "loss": 0.2528, |
| "step": 3330 |
| }, |
| { |
| "epoch": 0.15125441536092746, |
| "grad_norm": 0.5420626401901245, |
| "learning_rate": 0.000199642233184295, |
| "loss": 0.2732, |
| "step": 3340 |
| }, |
| { |
| "epoch": 0.15170727289194819, |
| "grad_norm": 0.5267677903175354, |
| "learning_rate": 0.00019963587687247555, |
| "loss": 0.2206, |
| "step": 3350 |
| }, |
| { |
| "epoch": 0.15216013042296894, |
| "grad_norm": 0.46923214197158813, |
| "learning_rate": 0.0001996294646945989, |
| "loss": 0.2431, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.15261298795398967, |
| "grad_norm": 0.5497370362281799, |
| "learning_rate": 0.00019962299665426042, |
| "loss": 0.2626, |
| "step": 3370 |
| }, |
| { |
| "epoch": 0.15306584548501043, |
| "grad_norm": 0.4924209713935852, |
| "learning_rate": 0.00019961647275508674, |
| "loss": 0.2491, |
| "step": 3380 |
| }, |
| { |
| "epoch": 0.15351870301603115, |
| "grad_norm": 0.4276602268218994, |
| "learning_rate": 0.00019960989300073578, |
| "loss": 0.261, |
| "step": 3390 |
| }, |
| { |
| "epoch": 0.1539715605470519, |
| "grad_norm": 0.45627403259277344, |
| "learning_rate": 0.0001996032573948969, |
| "loss": 0.2398, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.15442441807807264, |
| "grad_norm": 0.5323420166969299, |
| "learning_rate": 0.00019959656594129062, |
| "loss": 0.232, |
| "step": 3410 |
| }, |
| { |
| "epoch": 0.15487727560909337, |
| "grad_norm": 0.4109100103378296, |
| "learning_rate": 0.0001995898186436689, |
| "loss": 0.2539, |
| "step": 3420 |
| }, |
| { |
| "epoch": 0.15533013314011412, |
| "grad_norm": 0.7459518313407898, |
| "learning_rate": 0.00019958301550581495, |
| "loss": 0.2421, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.15578299067113485, |
| "grad_norm": 0.3366219103336334, |
| "learning_rate": 0.00019957615653154332, |
| "loss": 0.2295, |
| "step": 3440 |
| }, |
| { |
| "epoch": 0.1562358482021556, |
| "grad_norm": 0.5722688436508179, |
| "learning_rate": 0.00019956924172469983, |
| "loss": 0.2135, |
| "step": 3450 |
| }, |
| { |
| "epoch": 0.15668870573317634, |
| "grad_norm": 0.47314661741256714, |
| "learning_rate": 0.00019956227108916164, |
| "loss": 0.2399, |
| "step": 3460 |
| }, |
| { |
| "epoch": 0.1571415632641971, |
| "grad_norm": 0.40694594383239746, |
| "learning_rate": 0.0001995552446288372, |
| "loss": 0.2349, |
| "step": 3470 |
| }, |
| { |
| "epoch": 0.15759442079521782, |
| "grad_norm": 0.44671499729156494, |
| "learning_rate": 0.00019954816234766626, |
| "loss": 0.2225, |
| "step": 3480 |
| }, |
| { |
| "epoch": 0.15804727832623858, |
| "grad_norm": 0.4138961434364319, |
| "learning_rate": 0.00019954102424961986, |
| "loss": 0.241, |
| "step": 3490 |
| }, |
| { |
| "epoch": 0.1585001358572593, |
| "grad_norm": 0.40024974942207336, |
| "learning_rate": 0.00019953383033870033, |
| "loss": 0.2592, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.15895299338828003, |
| "grad_norm": 0.43061670660972595, |
| "learning_rate": 0.0001995265806189414, |
| "loss": 0.2329, |
| "step": 3510 |
| }, |
| { |
| "epoch": 0.1594058509193008, |
| "grad_norm": 0.47086796164512634, |
| "learning_rate": 0.00019951927509440792, |
| "loss": 0.2208, |
| "step": 3520 |
| }, |
| { |
| "epoch": 0.15985870845032152, |
| "grad_norm": 0.4552053213119507, |
| "learning_rate": 0.00019951191376919614, |
| "loss": 0.2148, |
| "step": 3530 |
| }, |
| { |
| "epoch": 0.16031156598134227, |
| "grad_norm": 0.5751559138298035, |
| "learning_rate": 0.00019950449664743358, |
| "loss": 0.2623, |
| "step": 3540 |
| }, |
| { |
| "epoch": 0.160764423512363, |
| "grad_norm": 0.4023667573928833, |
| "learning_rate": 0.000199497023733279, |
| "loss": 0.2547, |
| "step": 3550 |
| }, |
| { |
| "epoch": 0.16121728104338376, |
| "grad_norm": 0.37084415555000305, |
| "learning_rate": 0.00019948949503092252, |
| "loss": 0.2386, |
| "step": 3560 |
| }, |
| { |
| "epoch": 0.1616701385744045, |
| "grad_norm": 0.46690812706947327, |
| "learning_rate": 0.0001994819105445855, |
| "loss": 0.2482, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.16212299610542524, |
| "grad_norm": 0.3902026414871216, |
| "learning_rate": 0.00019947427027852053, |
| "loss": 0.2395, |
| "step": 3580 |
| }, |
| { |
| "epoch": 0.16257585363644597, |
| "grad_norm": 0.4588131010532379, |
| "learning_rate": 0.00019946657423701157, |
| "loss": 0.2341, |
| "step": 3590 |
| }, |
| { |
| "epoch": 0.16302871116746673, |
| "grad_norm": 0.6853991150856018, |
| "learning_rate": 0.00019945882242437382, |
| "loss": 0.237, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.16348156869848746, |
| "grad_norm": 0.493629515171051, |
| "learning_rate": 0.0001994510148449537, |
| "loss": 0.2584, |
| "step": 3610 |
| }, |
| { |
| "epoch": 0.16393442622950818, |
| "grad_norm": 0.33976632356643677, |
| "learning_rate": 0.00019944315150312894, |
| "loss": 0.2475, |
| "step": 3620 |
| }, |
| { |
| "epoch": 0.16438728376052894, |
| "grad_norm": 0.4430384635925293, |
| "learning_rate": 0.00019943523240330854, |
| "loss": 0.2257, |
| "step": 3630 |
| }, |
| { |
| "epoch": 0.16484014129154967, |
| "grad_norm": 0.5202488303184509, |
| "learning_rate": 0.00019942725754993278, |
| "loss": 0.2005, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.16529299882257042, |
| "grad_norm": 0.4460958242416382, |
| "learning_rate": 0.00019941922694747318, |
| "loss": 0.231, |
| "step": 3650 |
| }, |
| { |
| "epoch": 0.16574585635359115, |
| "grad_norm": 0.46459195017814636, |
| "learning_rate": 0.00019941114060043248, |
| "loss": 0.1931, |
| "step": 3660 |
| }, |
| { |
| "epoch": 0.1661987138846119, |
| "grad_norm": 0.4212459325790405, |
| "learning_rate": 0.0001994029985133447, |
| "loss": 0.2344, |
| "step": 3670 |
| }, |
| { |
| "epoch": 0.16665157141563264, |
| "grad_norm": 0.4669646620750427, |
| "learning_rate": 0.00019939480069077517, |
| "loss": 0.1844, |
| "step": 3680 |
| }, |
| { |
| "epoch": 0.1671044289466534, |
| "grad_norm": 0.40480050444602966, |
| "learning_rate": 0.0001993865471373204, |
| "loss": 0.234, |
| "step": 3690 |
| }, |
| { |
| "epoch": 0.16755728647767412, |
| "grad_norm": 0.433941513299942, |
| "learning_rate": 0.0001993782378576082, |
| "loss": 0.2179, |
| "step": 3700 |
| }, |
| { |
| "epoch": 0.16801014400869488, |
| "grad_norm": 0.39012041687965393, |
| "learning_rate": 0.0001993698728562976, |
| "loss": 0.227, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.1684630015397156, |
| "grad_norm": 0.5125910639762878, |
| "learning_rate": 0.00019936145213807885, |
| "loss": 0.2222, |
| "step": 3720 |
| }, |
| { |
| "epoch": 0.16891585907073633, |
| "grad_norm": 0.3955945074558258, |
| "learning_rate": 0.0001993529757076735, |
| "loss": 0.2464, |
| "step": 3730 |
| }, |
| { |
| "epoch": 0.1693687166017571, |
| "grad_norm": 0.5229743123054504, |
| "learning_rate": 0.00019934444356983427, |
| "loss": 0.2408, |
| "step": 3740 |
| }, |
| { |
| "epoch": 0.16982157413277782, |
| "grad_norm": 0.35481345653533936, |
| "learning_rate": 0.00019933585572934516, |
| "loss": 0.2252, |
| "step": 3750 |
| }, |
| { |
| "epoch": 0.17027443166379858, |
| "grad_norm": 0.3383258581161499, |
| "learning_rate": 0.0001993272121910214, |
| "loss": 0.2363, |
| "step": 3760 |
| }, |
| { |
| "epoch": 0.1707272891948193, |
| "grad_norm": 0.5481933951377869, |
| "learning_rate": 0.00019931851295970944, |
| "loss": 0.2396, |
| "step": 3770 |
| }, |
| { |
| "epoch": 0.17118014672584006, |
| "grad_norm": 0.603874921798706, |
| "learning_rate": 0.00019930975804028693, |
| "loss": 0.2247, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.1716330042568608, |
| "grad_norm": 0.44633063673973083, |
| "learning_rate": 0.00019930094743766282, |
| "loss": 0.2293, |
| "step": 3790 |
| }, |
| { |
| "epoch": 0.17208586178788154, |
| "grad_norm": 0.4310886561870575, |
| "learning_rate": 0.0001992920811567772, |
| "loss": 0.2277, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.17253871931890227, |
| "grad_norm": 0.4108830988407135, |
| "learning_rate": 0.0001992831592026014, |
| "loss": 0.2671, |
| "step": 3810 |
| }, |
| { |
| "epoch": 0.172991576849923, |
| "grad_norm": 0.4433537423610687, |
| "learning_rate": 0.00019927418158013801, |
| "loss": 0.2579, |
| "step": 3820 |
| }, |
| { |
| "epoch": 0.17344443438094376, |
| "grad_norm": 0.49078381061553955, |
| "learning_rate": 0.00019926514829442083, |
| "loss": 0.2291, |
| "step": 3830 |
| }, |
| { |
| "epoch": 0.17389729191196449, |
| "grad_norm": 0.32983776926994324, |
| "learning_rate": 0.0001992560593505148, |
| "loss": 0.2263, |
| "step": 3840 |
| }, |
| { |
| "epoch": 0.17435014944298524, |
| "grad_norm": 0.5339885354042053, |
| "learning_rate": 0.00019924691475351608, |
| "loss": 0.271, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.17480300697400597, |
| "grad_norm": 0.5582554936408997, |
| "learning_rate": 0.00019923771450855213, |
| "loss": 0.2433, |
| "step": 3860 |
| }, |
| { |
| "epoch": 0.17525586450502673, |
| "grad_norm": 0.5500646233558655, |
| "learning_rate": 0.0001992284586207815, |
| "loss": 0.2539, |
| "step": 3870 |
| }, |
| { |
| "epoch": 0.17570872203604745, |
| "grad_norm": 0.40870654582977295, |
| "learning_rate": 0.00019921914709539404, |
| "loss": 0.2474, |
| "step": 3880 |
| }, |
| { |
| "epoch": 0.1761615795670682, |
| "grad_norm": 0.5105723142623901, |
| "learning_rate": 0.00019920977993761072, |
| "loss": 0.2718, |
| "step": 3890 |
| }, |
| { |
| "epoch": 0.17661443709808894, |
| "grad_norm": 0.5596238970756531, |
| "learning_rate": 0.0001992003571526837, |
| "loss": 0.2524, |
| "step": 3900 |
| }, |
| { |
| "epoch": 0.1770672946291097, |
| "grad_norm": 0.4464496374130249, |
| "learning_rate": 0.00019919087874589638, |
| "loss": 0.2168, |
| "step": 3910 |
| }, |
| { |
| "epoch": 0.17752015216013042, |
| "grad_norm": 0.5252407789230347, |
| "learning_rate": 0.0001991813447225633, |
| "loss": 0.2328, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.17797300969115115, |
| "grad_norm": 0.45272043347358704, |
| "learning_rate": 0.00019917175508803026, |
| "loss": 0.2425, |
| "step": 3930 |
| }, |
| { |
| "epoch": 0.1784258672221719, |
| "grad_norm": 0.4741479158401489, |
| "learning_rate": 0.00019916210984767413, |
| "loss": 0.2529, |
| "step": 3940 |
| }, |
| { |
| "epoch": 0.17887872475319264, |
| "grad_norm": 0.5444139838218689, |
| "learning_rate": 0.00019915240900690307, |
| "loss": 0.2566, |
| "step": 3950 |
| }, |
| { |
| "epoch": 0.1793315822842134, |
| "grad_norm": 0.5768086314201355, |
| "learning_rate": 0.00019914265257115634, |
| "loss": 0.251, |
| "step": 3960 |
| }, |
| { |
| "epoch": 0.17978443981523412, |
| "grad_norm": 0.5692720413208008, |
| "learning_rate": 0.00019913284054590434, |
| "loss": 0.271, |
| "step": 3970 |
| }, |
| { |
| "epoch": 0.18023729734625488, |
| "grad_norm": 0.5857642292976379, |
| "learning_rate": 0.0001991229729366488, |
| "loss": 0.2225, |
| "step": 3980 |
| }, |
| { |
| "epoch": 0.1806901548772756, |
| "grad_norm": 0.3476020395755768, |
| "learning_rate": 0.00019911304974892243, |
| "loss": 0.2377, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.18114301240829636, |
| "grad_norm": 0.4105261564254761, |
| "learning_rate": 0.00019910307098828923, |
| "loss": 0.2168, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.18114301240829636, |
| "eval_chrf": 76.70440143862535, |
| "eval_loss": 0.1951996386051178, |
| "eval_runtime": 13.6389, |
| "eval_samples_per_second": 0.733, |
| "eval_steps_per_second": 0.073, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.1815958699393171, |
| "grad_norm": 0.44274958968162537, |
| "learning_rate": 0.0001990930366603443, |
| "loss": 0.2442, |
| "step": 4010 |
| }, |
| { |
| "epoch": 0.18204872747033785, |
| "grad_norm": 0.4935576617717743, |
| "learning_rate": 0.00019908294677071386, |
| "loss": 0.2184, |
| "step": 4020 |
| }, |
| { |
| "epoch": 0.18250158500135857, |
| "grad_norm": 0.4292125403881073, |
| "learning_rate": 0.00019907280132505542, |
| "loss": 0.2155, |
| "step": 4030 |
| }, |
| { |
| "epoch": 0.1829544425323793, |
| "grad_norm": 0.49460700154304504, |
| "learning_rate": 0.00019906260032905747, |
| "loss": 0.2028, |
| "step": 4040 |
| }, |
| { |
| "epoch": 0.18340730006340006, |
| "grad_norm": 0.4970226585865021, |
| "learning_rate": 0.00019905234378843982, |
| "loss": 0.2134, |
| "step": 4050 |
| }, |
| { |
| "epoch": 0.1838601575944208, |
| "grad_norm": 0.4835842251777649, |
| "learning_rate": 0.00019904203170895325, |
| "loss": 0.2337, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.18431301512544154, |
| "grad_norm": 0.47777554392814636, |
| "learning_rate": 0.00019903166409637982, |
| "loss": 0.2006, |
| "step": 4070 |
| }, |
| { |
| "epoch": 0.18476587265646227, |
| "grad_norm": 0.39269423484802246, |
| "learning_rate": 0.00019902124095653268, |
| "loss": 0.2084, |
| "step": 4080 |
| }, |
| { |
| "epoch": 0.18521873018748303, |
| "grad_norm": 0.4256626069545746, |
| "learning_rate": 0.00019901076229525603, |
| "loss": 0.227, |
| "step": 4090 |
| }, |
| { |
| "epoch": 0.18567158771850376, |
| "grad_norm": 0.4103119373321533, |
| "learning_rate": 0.00019900022811842537, |
| "loss": 0.2563, |
| "step": 4100 |
| }, |
| { |
| "epoch": 0.1861244452495245, |
| "grad_norm": 0.4316677749156952, |
| "learning_rate": 0.00019898963843194718, |
| "loss": 0.2257, |
| "step": 4110 |
| }, |
| { |
| "epoch": 0.18657730278054524, |
| "grad_norm": 0.41959917545318604, |
| "learning_rate": 0.00019897899324175915, |
| "loss": 0.2369, |
| "step": 4120 |
| }, |
| { |
| "epoch": 0.18703016031156597, |
| "grad_norm": 0.5151909589767456, |
| "learning_rate": 0.00019896829255383006, |
| "loss": 0.2132, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.18748301784258672, |
| "grad_norm": 0.5552547574043274, |
| "learning_rate": 0.00019895753637415978, |
| "loss": 0.2321, |
| "step": 4140 |
| }, |
| { |
| "epoch": 0.18793587537360745, |
| "grad_norm": 0.6295343637466431, |
| "learning_rate": 0.00019894672470877934, |
| "loss": 0.2465, |
| "step": 4150 |
| }, |
| { |
| "epoch": 0.1883887329046282, |
| "grad_norm": 0.4910910427570343, |
| "learning_rate": 0.0001989358575637509, |
| "loss": 0.2113, |
| "step": 4160 |
| }, |
| { |
| "epoch": 0.18884159043564894, |
| "grad_norm": 0.6068474054336548, |
| "learning_rate": 0.00019892493494516765, |
| "loss": 0.2362, |
| "step": 4170 |
| }, |
| { |
| "epoch": 0.1892944479666697, |
| "grad_norm": 0.49513566493988037, |
| "learning_rate": 0.0001989139568591539, |
| "loss": 0.2156, |
| "step": 4180 |
| }, |
| { |
| "epoch": 0.18974730549769042, |
| "grad_norm": 0.5288516879081726, |
| "learning_rate": 0.00019890292331186518, |
| "loss": 0.2071, |
| "step": 4190 |
| }, |
| { |
| "epoch": 0.19020016302871118, |
| "grad_norm": 0.4332873225212097, |
| "learning_rate": 0.00019889183430948795, |
| "loss": 0.24, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.1906530205597319, |
| "grad_norm": 0.4727018177509308, |
| "learning_rate": 0.00019888068985823986, |
| "loss": 0.2216, |
| "step": 4210 |
| }, |
| { |
| "epoch": 0.19110587809075266, |
| "grad_norm": 0.4223858118057251, |
| "learning_rate": 0.00019886948996436962, |
| "loss": 0.2167, |
| "step": 4220 |
| }, |
| { |
| "epoch": 0.1915587356217734, |
| "grad_norm": 0.5059617161750793, |
| "learning_rate": 0.00019885823463415702, |
| "loss": 0.218, |
| "step": 4230 |
| }, |
| { |
| "epoch": 0.19201159315279412, |
| "grad_norm": 0.38767218589782715, |
| "learning_rate": 0.00019884692387391303, |
| "loss": 0.2405, |
| "step": 4240 |
| }, |
| { |
| "epoch": 0.19246445068381487, |
| "grad_norm": 0.4692607522010803, |
| "learning_rate": 0.0001988355576899795, |
| "loss": 0.214, |
| "step": 4250 |
| }, |
| { |
| "epoch": 0.1929173082148356, |
| "grad_norm": 0.5011858344078064, |
| "learning_rate": 0.00019882413608872957, |
| "loss": 0.2359, |
| "step": 4260 |
| }, |
| { |
| "epoch": 0.19337016574585636, |
| "grad_norm": 0.3452933132648468, |
| "learning_rate": 0.0001988126590765673, |
| "loss": 0.2068, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.1938230232768771, |
| "grad_norm": 0.5500070452690125, |
| "learning_rate": 0.0001988011266599279, |
| "loss": 0.2388, |
| "step": 4280 |
| }, |
| { |
| "epoch": 0.19427588080789784, |
| "grad_norm": 0.4085341989994049, |
| "learning_rate": 0.0001987895388452776, |
| "loss": 0.2222, |
| "step": 4290 |
| }, |
| { |
| "epoch": 0.19472873833891857, |
| "grad_norm": 0.41355833411216736, |
| "learning_rate": 0.0001987778956391138, |
| "loss": 0.2117, |
| "step": 4300 |
| }, |
| { |
| "epoch": 0.19518159586993933, |
| "grad_norm": 0.4159574806690216, |
| "learning_rate": 0.00019876619704796474, |
| "loss": 0.2261, |
| "step": 4310 |
| }, |
| { |
| "epoch": 0.19563445340096006, |
| "grad_norm": 0.5774794220924377, |
| "learning_rate": 0.0001987544430783899, |
| "loss": 0.2134, |
| "step": 4320 |
| }, |
| { |
| "epoch": 0.19608731093198078, |
| "grad_norm": 0.5641900300979614, |
| "learning_rate": 0.00019874263373697982, |
| "loss": 0.2382, |
| "step": 4330 |
| }, |
| { |
| "epoch": 0.19654016846300154, |
| "grad_norm": 0.4016419053077698, |
| "learning_rate": 0.00019873076903035593, |
| "loss": 0.2298, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.19699302599402227, |
| "grad_norm": 0.3750913739204407, |
| "learning_rate": 0.00019871884896517086, |
| "loss": 0.2013, |
| "step": 4350 |
| }, |
| { |
| "epoch": 0.19744588352504303, |
| "grad_norm": 0.5473160743713379, |
| "learning_rate": 0.00019870687354810816, |
| "loss": 0.2275, |
| "step": 4360 |
| }, |
| { |
| "epoch": 0.19789874105606375, |
| "grad_norm": 0.4866637885570526, |
| "learning_rate": 0.00019869484278588255, |
| "loss": 0.2438, |
| "step": 4370 |
| }, |
| { |
| "epoch": 0.1983515985870845, |
| "grad_norm": 0.43724608421325684, |
| "learning_rate": 0.00019868275668523963, |
| "loss": 0.2374, |
| "step": 4380 |
| }, |
| { |
| "epoch": 0.19880445611810524, |
| "grad_norm": 0.4235040843486786, |
| "learning_rate": 0.00019867061525295616, |
| "loss": 0.2229, |
| "step": 4390 |
| }, |
| { |
| "epoch": 0.199257313649126, |
| "grad_norm": 0.5863602757453918, |
| "learning_rate": 0.0001986584184958398, |
| "loss": 0.2257, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.19971017118014672, |
| "grad_norm": 0.4537600576877594, |
| "learning_rate": 0.0001986461664207294, |
| "loss": 0.2389, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.20016302871116748, |
| "grad_norm": 0.4225648045539856, |
| "learning_rate": 0.00019863385903449464, |
| "loss": 0.2107, |
| "step": 4420 |
| }, |
| { |
| "epoch": 0.2006158862421882, |
| "grad_norm": 0.49402421712875366, |
| "learning_rate": 0.00019862149634403635, |
| "loss": 0.1887, |
| "step": 4430 |
| }, |
| { |
| "epoch": 0.20106874377320894, |
| "grad_norm": 0.5091655850410461, |
| "learning_rate": 0.00019860907835628626, |
| "loss": 0.2358, |
| "step": 4440 |
| }, |
| { |
| "epoch": 0.2015216013042297, |
| "grad_norm": 0.5644922852516174, |
| "learning_rate": 0.0001985966050782072, |
| "loss": 0.2566, |
| "step": 4450 |
| }, |
| { |
| "epoch": 0.20197445883525042, |
| "grad_norm": 0.4026089608669281, |
| "learning_rate": 0.00019858407651679298, |
| "loss": 0.239, |
| "step": 4460 |
| }, |
| { |
| "epoch": 0.20242731636627118, |
| "grad_norm": 0.46105530858039856, |
| "learning_rate": 0.00019857149267906837, |
| "loss": 0.2266, |
| "step": 4470 |
| }, |
| { |
| "epoch": 0.2028801738972919, |
| "grad_norm": 0.3332107663154602, |
| "learning_rate": 0.00019855885357208917, |
| "loss": 0.2049, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.20333303142831266, |
| "grad_norm": 0.491138219833374, |
| "learning_rate": 0.00019854615920294214, |
| "loss": 0.2296, |
| "step": 4490 |
| }, |
| { |
| "epoch": 0.2037858889593334, |
| "grad_norm": 0.4568822979927063, |
| "learning_rate": 0.00019853340957874506, |
| "loss": 0.225, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.20423874649035414, |
| "grad_norm": 0.5473847985267639, |
| "learning_rate": 0.00019852060470664662, |
| "loss": 0.2509, |
| "step": 4510 |
| }, |
| { |
| "epoch": 0.20469160402137487, |
| "grad_norm": 0.40623512864112854, |
| "learning_rate": 0.0001985077445938266, |
| "loss": 0.2033, |
| "step": 4520 |
| }, |
| { |
| "epoch": 0.20514446155239563, |
| "grad_norm": 0.44612765312194824, |
| "learning_rate": 0.0001984948292474957, |
| "loss": 0.2458, |
| "step": 4530 |
| }, |
| { |
| "epoch": 0.20559731908341636, |
| "grad_norm": 0.4710107147693634, |
| "learning_rate": 0.0001984818586748955, |
| "loss": 0.2106, |
| "step": 4540 |
| }, |
| { |
| "epoch": 0.20605017661443709, |
| "grad_norm": 0.4278642237186432, |
| "learning_rate": 0.00019846883288329874, |
| "loss": 0.2121, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.20650303414545784, |
| "grad_norm": 0.48352622985839844, |
| "learning_rate": 0.00019845575188000893, |
| "loss": 0.2213, |
| "step": 4560 |
| }, |
| { |
| "epoch": 0.20695589167647857, |
| "grad_norm": 0.5382823944091797, |
| "learning_rate": 0.00019844261567236065, |
| "loss": 0.233, |
| "step": 4570 |
| }, |
| { |
| "epoch": 0.20740874920749933, |
| "grad_norm": 0.38672780990600586, |
| "learning_rate": 0.00019842942426771937, |
| "loss": 0.2258, |
| "step": 4580 |
| }, |
| { |
| "epoch": 0.20786160673852005, |
| "grad_norm": 0.45074501633644104, |
| "learning_rate": 0.0001984161776734816, |
| "loss": 0.2112, |
| "step": 4590 |
| }, |
| { |
| "epoch": 0.2083144642695408, |
| "grad_norm": 0.37882986664772034, |
| "learning_rate": 0.00019840287589707468, |
| "loss": 0.219, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.20876732180056154, |
| "grad_norm": 0.4186610281467438, |
| "learning_rate": 0.000198389518945957, |
| "loss": 0.2079, |
| "step": 4610 |
| }, |
| { |
| "epoch": 0.2092201793315823, |
| "grad_norm": 0.35181114077568054, |
| "learning_rate": 0.00019837610682761777, |
| "loss": 0.2062, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.20967303686260302, |
| "grad_norm": 0.38587722182273865, |
| "learning_rate": 0.0001983626395495772, |
| "loss": 0.2467, |
| "step": 4630 |
| }, |
| { |
| "epoch": 0.21012589439362375, |
| "grad_norm": 0.7229722738265991, |
| "learning_rate": 0.00019834911711938644, |
| "loss": 0.2219, |
| "step": 4640 |
| }, |
| { |
| "epoch": 0.2105787519246445, |
| "grad_norm": 0.5380842685699463, |
| "learning_rate": 0.00019833553954462757, |
| "loss": 0.2421, |
| "step": 4650 |
| }, |
| { |
| "epoch": 0.21103160945566524, |
| "grad_norm": 0.4756489396095276, |
| "learning_rate": 0.00019832190683291356, |
| "loss": 0.2093, |
| "step": 4660 |
| }, |
| { |
| "epoch": 0.211484466986686, |
| "grad_norm": 0.5300980806350708, |
| "learning_rate": 0.0001983082189918883, |
| "loss": 0.2367, |
| "step": 4670 |
| }, |
| { |
| "epoch": 0.21193732451770672, |
| "grad_norm": 0.49144670367240906, |
| "learning_rate": 0.00019829447602922654, |
| "loss": 0.2001, |
| "step": 4680 |
| }, |
| { |
| "epoch": 0.21239018204872748, |
| "grad_norm": 0.45265018939971924, |
| "learning_rate": 0.00019828067795263406, |
| "loss": 0.23, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.2128430395797482, |
| "grad_norm": 0.4063917398452759, |
| "learning_rate": 0.00019826682476984742, |
| "loss": 0.21, |
| "step": 4700 |
| }, |
| { |
| "epoch": 0.21329589711076896, |
| "grad_norm": 0.36459800601005554, |
| "learning_rate": 0.00019825291648863414, |
| "loss": 0.2116, |
| "step": 4710 |
| }, |
| { |
| "epoch": 0.2137487546417897, |
| "grad_norm": 0.4962182641029358, |
| "learning_rate": 0.00019823895311679268, |
| "loss": 0.2137, |
| "step": 4720 |
| }, |
| { |
| "epoch": 0.21420161217281045, |
| "grad_norm": 0.5544801354408264, |
| "learning_rate": 0.00019822493466215227, |
| "loss": 0.2377, |
| "step": 4730 |
| }, |
| { |
| "epoch": 0.21465446970383117, |
| "grad_norm": 0.4859352111816406, |
| "learning_rate": 0.00019821086113257311, |
| "loss": 0.2308, |
| "step": 4740 |
| }, |
| { |
| "epoch": 0.2151073272348519, |
| "grad_norm": 0.8023104667663574, |
| "learning_rate": 0.00019819673253594627, |
| "loss": 0.2164, |
| "step": 4750 |
| }, |
| { |
| "epoch": 0.21556018476587266, |
| "grad_norm": 0.37945619225502014, |
| "learning_rate": 0.00019818254888019366, |
| "loss": 0.2158, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.2160130422968934, |
| "grad_norm": 0.375491738319397, |
| "learning_rate": 0.00019816831017326812, |
| "loss": 0.2258, |
| "step": 4770 |
| }, |
| { |
| "epoch": 0.21646589982791414, |
| "grad_norm": 0.66087806224823, |
| "learning_rate": 0.00019815401642315328, |
| "loss": 0.2385, |
| "step": 4780 |
| }, |
| { |
| "epoch": 0.21691875735893487, |
| "grad_norm": 0.5332721471786499, |
| "learning_rate": 0.0001981396676378637, |
| "loss": 0.1996, |
| "step": 4790 |
| }, |
| { |
| "epoch": 0.21737161488995563, |
| "grad_norm": 0.37998682260513306, |
| "learning_rate": 0.0001981252638254448, |
| "loss": 0.1871, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.21782447242097636, |
| "grad_norm": 0.3764188587665558, |
| "learning_rate": 0.00019811080499397283, |
| "loss": 0.2142, |
| "step": 4810 |
| }, |
| { |
| "epoch": 0.2182773299519971, |
| "grad_norm": 0.48812979459762573, |
| "learning_rate": 0.00019809629115155483, |
| "loss": 0.2152, |
| "step": 4820 |
| }, |
| { |
| "epoch": 0.21873018748301784, |
| "grad_norm": 0.4503139853477478, |
| "learning_rate": 0.0001980817223063288, |
| "loss": 0.241, |
| "step": 4830 |
| }, |
| { |
| "epoch": 0.2191830450140386, |
| "grad_norm": 0.4879043400287628, |
| "learning_rate": 0.00019806709846646348, |
| "loss": 0.2272, |
| "step": 4840 |
| }, |
| { |
| "epoch": 0.21963590254505932, |
| "grad_norm": 0.4850088655948639, |
| "learning_rate": 0.00019805241964015853, |
| "loss": 0.2142, |
| "step": 4850 |
| }, |
| { |
| "epoch": 0.22008876007608005, |
| "grad_norm": 0.44978779554367065, |
| "learning_rate": 0.00019803768583564438, |
| "loss": 0.2161, |
| "step": 4860 |
| }, |
| { |
| "epoch": 0.2205416176071008, |
| "grad_norm": 0.4910193383693695, |
| "learning_rate": 0.00019802289706118234, |
| "loss": 0.2432, |
| "step": 4870 |
| }, |
| { |
| "epoch": 0.22099447513812154, |
| "grad_norm": 0.40853068232536316, |
| "learning_rate": 0.00019800805332506448, |
| "loss": 0.2263, |
| "step": 4880 |
| }, |
| { |
| "epoch": 0.2214473326691423, |
| "grad_norm": 0.4698120951652527, |
| "learning_rate": 0.00019799315463561368, |
| "loss": 0.2118, |
| "step": 4890 |
| }, |
| { |
| "epoch": 0.22190019020016302, |
| "grad_norm": 0.5427727103233337, |
| "learning_rate": 0.0001979782010011837, |
| "loss": 0.1948, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.22235304773118378, |
| "grad_norm": 0.3942580223083496, |
| "learning_rate": 0.00019796319243015913, |
| "loss": 0.203, |
| "step": 4910 |
| }, |
| { |
| "epoch": 0.2228059052622045, |
| "grad_norm": 0.4781436622142792, |
| "learning_rate": 0.00019794812893095522, |
| "loss": 0.2003, |
| "step": 4920 |
| }, |
| { |
| "epoch": 0.22325876279322526, |
| "grad_norm": 0.4551750719547272, |
| "learning_rate": 0.00019793301051201817, |
| "loss": 0.2366, |
| "step": 4930 |
| }, |
| { |
| "epoch": 0.223711620324246, |
| "grad_norm": 0.3415755033493042, |
| "learning_rate": 0.00019791783718182485, |
| "loss": 0.2003, |
| "step": 4940 |
| }, |
| { |
| "epoch": 0.22416447785526672, |
| "grad_norm": 0.47352227568626404, |
| "learning_rate": 0.00019790260894888306, |
| "loss": 0.217, |
| "step": 4950 |
| }, |
| { |
| "epoch": 0.22461733538628748, |
| "grad_norm": 0.6360595226287842, |
| "learning_rate": 0.00019788732582173127, |
| "loss": 0.2358, |
| "step": 4960 |
| }, |
| { |
| "epoch": 0.2250701929173082, |
| "grad_norm": 0.48768046498298645, |
| "learning_rate": 0.0001978719878089387, |
| "loss": 0.2213, |
| "step": 4970 |
| }, |
| { |
| "epoch": 0.22552305044832896, |
| "grad_norm": 0.4766073226928711, |
| "learning_rate": 0.00019785659491910549, |
| "loss": 0.208, |
| "step": 4980 |
| }, |
| { |
| "epoch": 0.2259759079793497, |
| "grad_norm": 0.44524306058883667, |
| "learning_rate": 0.00019784114716086247, |
| "loss": 0.2219, |
| "step": 4990 |
| }, |
| { |
| "epoch": 0.22642876551037044, |
| "grad_norm": 0.44435834884643555, |
| "learning_rate": 0.00019782564454287117, |
| "loss": 0.2297, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.22642876551037044, |
| "eval_chrf": 72.84821817627648, |
| "eval_loss": 0.17898276448249817, |
| "eval_runtime": 26.7337, |
| "eval_samples_per_second": 0.374, |
| "eval_steps_per_second": 0.037, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.22688162304139117, |
| "grad_norm": 0.3903729319572449, |
| "learning_rate": 0.00019781008707382399, |
| "loss": 0.2442, |
| "step": 5010 |
| }, |
| { |
| "epoch": 0.22733448057241193, |
| "grad_norm": 0.3812226355075836, |
| "learning_rate": 0.000197794474762444, |
| "loss": 0.2457, |
| "step": 5020 |
| }, |
| { |
| "epoch": 0.22778733810343266, |
| "grad_norm": 0.47868451476097107, |
| "learning_rate": 0.00019777880761748508, |
| "loss": 0.235, |
| "step": 5030 |
| }, |
| { |
| "epoch": 0.2282401956344534, |
| "grad_norm": 0.49971717596054077, |
| "learning_rate": 0.00019776308564773188, |
| "loss": 0.2117, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.22869305316547414, |
| "grad_norm": 0.30920127034187317, |
| "learning_rate": 0.00019774730886199966, |
| "loss": 0.2015, |
| "step": 5050 |
| }, |
| { |
| "epoch": 0.22914591069649487, |
| "grad_norm": 0.40997618436813354, |
| "learning_rate": 0.00019773147726913454, |
| "loss": 0.2125, |
| "step": 5060 |
| }, |
| { |
| "epoch": 0.22959876822751563, |
| "grad_norm": 0.5081835985183716, |
| "learning_rate": 0.00019771559087801332, |
| "loss": 0.2154, |
| "step": 5070 |
| }, |
| { |
| "epoch": 0.23005162575853635, |
| "grad_norm": 0.5434297919273376, |
| "learning_rate": 0.00019769964969754353, |
| "loss": 0.2285, |
| "step": 5080 |
| }, |
| { |
| "epoch": 0.2305044832895571, |
| "grad_norm": 0.5638498663902283, |
| "learning_rate": 0.00019768365373666347, |
| "loss": 0.2377, |
| "step": 5090 |
| }, |
| { |
| "epoch": 0.23095734082057784, |
| "grad_norm": 0.4750683307647705, |
| "learning_rate": 0.00019766760300434207, |
| "loss": 0.2125, |
| "step": 5100 |
| }, |
| { |
| "epoch": 0.2314101983515986, |
| "grad_norm": 0.6645839810371399, |
| "learning_rate": 0.000197651497509579, |
| "loss": 0.2027, |
| "step": 5110 |
| }, |
| { |
| "epoch": 0.23186305588261932, |
| "grad_norm": 0.6112692952156067, |
| "learning_rate": 0.00019763533726140464, |
| "loss": 0.2238, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.23231591341364008, |
| "grad_norm": 0.8351989984512329, |
| "learning_rate": 0.00019761912226888014, |
| "loss": 0.2138, |
| "step": 5130 |
| }, |
| { |
| "epoch": 0.2327687709446608, |
| "grad_norm": 0.5615606904029846, |
| "learning_rate": 0.00019760285254109722, |
| "loss": 0.2152, |
| "step": 5140 |
| }, |
| { |
| "epoch": 0.23322162847568156, |
| "grad_norm": 0.5717998743057251, |
| "learning_rate": 0.00019758652808717837, |
| "loss": 0.2377, |
| "step": 5150 |
| }, |
| { |
| "epoch": 0.2336744860067023, |
| "grad_norm": 0.7914870381355286, |
| "learning_rate": 0.00019757014891627676, |
| "loss": 0.2319, |
| "step": 5160 |
| }, |
| { |
| "epoch": 0.23412734353772302, |
| "grad_norm": 0.43396610021591187, |
| "learning_rate": 0.00019755371503757624, |
| "loss": 0.2221, |
| "step": 5170 |
| }, |
| { |
| "epoch": 0.23458020106874378, |
| "grad_norm": 0.42183613777160645, |
| "learning_rate": 0.00019753722646029126, |
| "loss": 0.19, |
| "step": 5180 |
| }, |
| { |
| "epoch": 0.2350330585997645, |
| "grad_norm": 0.373030424118042, |
| "learning_rate": 0.00019752068319366708, |
| "loss": 0.2196, |
| "step": 5190 |
| }, |
| { |
| "epoch": 0.23548591613078526, |
| "grad_norm": 0.4341379702091217, |
| "learning_rate": 0.00019750408524697946, |
| "loss": 0.2207, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.235938773661806, |
| "grad_norm": 0.3972753882408142, |
| "learning_rate": 0.00019748743262953502, |
| "loss": 0.2316, |
| "step": 5210 |
| }, |
| { |
| "epoch": 0.23639163119282675, |
| "grad_norm": 0.3169364631175995, |
| "learning_rate": 0.00019747072535067082, |
| "loss": 0.2222, |
| "step": 5220 |
| }, |
| { |
| "epoch": 0.23684448872384747, |
| "grad_norm": 0.4205416440963745, |
| "learning_rate": 0.00019745396341975472, |
| "loss": 0.2303, |
| "step": 5230 |
| }, |
| { |
| "epoch": 0.23729734625486823, |
| "grad_norm": 0.4518096148967743, |
| "learning_rate": 0.00019743714684618516, |
| "loss": 0.257, |
| "step": 5240 |
| }, |
| { |
| "epoch": 0.23775020378588896, |
| "grad_norm": 0.5455726385116577, |
| "learning_rate": 0.0001974202756393912, |
| "loss": 0.2335, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.2382030613169097, |
| "grad_norm": 0.5465617179870605, |
| "learning_rate": 0.0001974033498088326, |
| "loss": 0.2136, |
| "step": 5260 |
| }, |
| { |
| "epoch": 0.23865591884793044, |
| "grad_norm": 0.4863675832748413, |
| "learning_rate": 0.0001973863693639997, |
| "loss": 0.2272, |
| "step": 5270 |
| }, |
| { |
| "epoch": 0.23910877637895117, |
| "grad_norm": 0.6372243165969849, |
| "learning_rate": 0.0001973693343144135, |
| "loss": 0.221, |
| "step": 5280 |
| }, |
| { |
| "epoch": 0.23956163390997193, |
| "grad_norm": 0.4402875304222107, |
| "learning_rate": 0.00019735224466962552, |
| "loss": 0.2056, |
| "step": 5290 |
| }, |
| { |
| "epoch": 0.24001449144099266, |
| "grad_norm": 0.4618695080280304, |
| "learning_rate": 0.000197335100439218, |
| "loss": 0.2339, |
| "step": 5300 |
| }, |
| { |
| "epoch": 0.2404673489720134, |
| "grad_norm": 0.5031725764274597, |
| "learning_rate": 0.00019731790163280376, |
| "loss": 0.2137, |
| "step": 5310 |
| }, |
| { |
| "epoch": 0.24092020650303414, |
| "grad_norm": 0.34283286333084106, |
| "learning_rate": 0.00019730064826002622, |
| "loss": 0.2295, |
| "step": 5320 |
| }, |
| { |
| "epoch": 0.2413730640340549, |
| "grad_norm": 0.5465356707572937, |
| "learning_rate": 0.00019728334033055936, |
| "loss": 0.2232, |
| "step": 5330 |
| }, |
| { |
| "epoch": 0.24182592156507562, |
| "grad_norm": 0.34146648645401, |
| "learning_rate": 0.00019726597785410782, |
| "loss": 0.2163, |
| "step": 5340 |
| }, |
| { |
| "epoch": 0.24227877909609638, |
| "grad_norm": 0.41163069009780884, |
| "learning_rate": 0.00019724856084040666, |
| "loss": 0.2093, |
| "step": 5350 |
| }, |
| { |
| "epoch": 0.2427316366271171, |
| "grad_norm": 0.30183130502700806, |
| "learning_rate": 0.00019723108929922177, |
| "loss": 0.2015, |
| "step": 5360 |
| }, |
| { |
| "epoch": 0.24318449415813784, |
| "grad_norm": 0.5386610627174377, |
| "learning_rate": 0.00019721356324034942, |
| "loss": 0.208, |
| "step": 5370 |
| }, |
| { |
| "epoch": 0.2436373516891586, |
| "grad_norm": 0.4824809730052948, |
| "learning_rate": 0.0001971959826736165, |
| "loss": 0.1962, |
| "step": 5380 |
| }, |
| { |
| "epoch": 0.24409020922017932, |
| "grad_norm": 0.40670323371887207, |
| "learning_rate": 0.0001971783476088805, |
| "loss": 0.2423, |
| "step": 5390 |
| }, |
| { |
| "epoch": 0.24454306675120008, |
| "grad_norm": 0.8714826703071594, |
| "learning_rate": 0.00019716065805602947, |
| "loss": 0.2203, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.2449959242822208, |
| "grad_norm": 0.43796902894973755, |
| "learning_rate": 0.00019714291402498187, |
| "loss": 0.2068, |
| "step": 5410 |
| }, |
| { |
| "epoch": 0.24544878181324156, |
| "grad_norm": 0.4488067924976349, |
| "learning_rate": 0.00019712511552568692, |
| "loss": 0.1948, |
| "step": 5420 |
| }, |
| { |
| "epoch": 0.2459016393442623, |
| "grad_norm": 0.3524841070175171, |
| "learning_rate": 0.00019710726256812427, |
| "loss": 0.1947, |
| "step": 5430 |
| }, |
| { |
| "epoch": 0.24635449687528305, |
| "grad_norm": 0.4532184898853302, |
| "learning_rate": 0.0001970893551623041, |
| "loss": 0.2044, |
| "step": 5440 |
| }, |
| { |
| "epoch": 0.24680735440630377, |
| "grad_norm": 0.49360191822052, |
| "learning_rate": 0.0001970713933182671, |
| "loss": 0.2134, |
| "step": 5450 |
| }, |
| { |
| "epoch": 0.24726021193732453, |
| "grad_norm": 0.3581199645996094, |
| "learning_rate": 0.00019705337704608455, |
| "loss": 0.2654, |
| "step": 5460 |
| }, |
| { |
| "epoch": 0.24771306946834526, |
| "grad_norm": 0.47226762771606445, |
| "learning_rate": 0.0001970353063558582, |
| "loss": 0.2431, |
| "step": 5470 |
| }, |
| { |
| "epoch": 0.248165926999366, |
| "grad_norm": 0.5614981055259705, |
| "learning_rate": 0.00019701718125772033, |
| "loss": 0.2333, |
| "step": 5480 |
| }, |
| { |
| "epoch": 0.24861878453038674, |
| "grad_norm": 0.3719375729560852, |
| "learning_rate": 0.00019699900176183374, |
| "loss": 0.213, |
| "step": 5490 |
| }, |
| { |
| "epoch": 0.24907164206140747, |
| "grad_norm": 0.4844624698162079, |
| "learning_rate": 0.00019698076787839165, |
| "loss": 0.2224, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.24952449959242823, |
| "grad_norm": 0.44172951579093933, |
| "learning_rate": 0.0001969624796176179, |
| "loss": 0.2274, |
| "step": 5510 |
| }, |
| { |
| "epoch": 0.24997735712344896, |
| "grad_norm": 0.5217725038528442, |
| "learning_rate": 0.00019694413698976678, |
| "loss": 0.2247, |
| "step": 5520 |
| }, |
| { |
| "epoch": 0.2504302146544697, |
| "grad_norm": 0.47444963455200195, |
| "learning_rate": 0.000196925740005123, |
| "loss": 0.2103, |
| "step": 5530 |
| }, |
| { |
| "epoch": 0.25088307218549044, |
| "grad_norm": 0.5168370008468628, |
| "learning_rate": 0.00019690728867400177, |
| "loss": 0.1976, |
| "step": 5540 |
| }, |
| { |
| "epoch": 0.2513359297165112, |
| "grad_norm": 0.4875032603740692, |
| "learning_rate": 0.0001968887830067488, |
| "loss": 0.2089, |
| "step": 5550 |
| }, |
| { |
| "epoch": 0.25178878724753195, |
| "grad_norm": 0.486665278673172, |
| "learning_rate": 0.0001968702230137403, |
| "loss": 0.2067, |
| "step": 5560 |
| }, |
| { |
| "epoch": 0.25224164477855265, |
| "grad_norm": 0.5231538414955139, |
| "learning_rate": 0.0001968516087053829, |
| "loss": 0.2264, |
| "step": 5570 |
| }, |
| { |
| "epoch": 0.2526945023095734, |
| "grad_norm": 0.5574133992195129, |
| "learning_rate": 0.0001968329400921136, |
| "loss": 0.2344, |
| "step": 5580 |
| }, |
| { |
| "epoch": 0.25314735984059417, |
| "grad_norm": 0.322848379611969, |
| "learning_rate": 0.00019681421718440004, |
| "loss": 0.2059, |
| "step": 5590 |
| }, |
| { |
| "epoch": 0.25360021737161487, |
| "grad_norm": 0.31106603145599365, |
| "learning_rate": 0.00019679543999274015, |
| "loss": 0.2006, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.2540530749026356, |
| "grad_norm": 0.453117311000824, |
| "learning_rate": 0.00019677660852766233, |
| "loss": 0.2292, |
| "step": 5610 |
| }, |
| { |
| "epoch": 0.2545059324336564, |
| "grad_norm": 0.4585261940956116, |
| "learning_rate": 0.00019675772279972544, |
| "loss": 0.2288, |
| "step": 5620 |
| }, |
| { |
| "epoch": 0.25495878996467713, |
| "grad_norm": 0.4391018748283386, |
| "learning_rate": 0.00019673878281951875, |
| "loss": 0.2196, |
| "step": 5630 |
| }, |
| { |
| "epoch": 0.25541164749569784, |
| "grad_norm": 0.5377073884010315, |
| "learning_rate": 0.00019671978859766193, |
| "loss": 0.2516, |
| "step": 5640 |
| }, |
| { |
| "epoch": 0.2558645050267186, |
| "grad_norm": 0.4374406635761261, |
| "learning_rate": 0.0001967007401448051, |
| "loss": 0.2289, |
| "step": 5650 |
| }, |
| { |
| "epoch": 0.25631736255773935, |
| "grad_norm": 0.46719449758529663, |
| "learning_rate": 0.00019668163747162874, |
| "loss": 0.2231, |
| "step": 5660 |
| }, |
| { |
| "epoch": 0.25677022008876005, |
| "grad_norm": 0.38420817255973816, |
| "learning_rate": 0.00019666248058884385, |
| "loss": 0.2164, |
| "step": 5670 |
| }, |
| { |
| "epoch": 0.2572230776197808, |
| "grad_norm": 0.4629984498023987, |
| "learning_rate": 0.00019664326950719164, |
| "loss": 0.214, |
| "step": 5680 |
| }, |
| { |
| "epoch": 0.25767593515080156, |
| "grad_norm": 0.4738280773162842, |
| "learning_rate": 0.00019662400423744382, |
| "loss": 0.1882, |
| "step": 5690 |
| }, |
| { |
| "epoch": 0.2581287926818223, |
| "grad_norm": 0.5236591696739197, |
| "learning_rate": 0.00019660468479040251, |
| "loss": 0.2007, |
| "step": 5700 |
| }, |
| { |
| "epoch": 0.258581650212843, |
| "grad_norm": 0.449022114276886, |
| "learning_rate": 0.00019658531117690018, |
| "loss": 0.2009, |
| "step": 5710 |
| }, |
| { |
| "epoch": 0.2590345077438638, |
| "grad_norm": 0.4621582329273224, |
| "learning_rate": 0.00019656588340779958, |
| "loss": 0.191, |
| "step": 5720 |
| }, |
| { |
| "epoch": 0.25948736527488453, |
| "grad_norm": 0.3730362057685852, |
| "learning_rate": 0.00019654640149399397, |
| "loss": 0.2061, |
| "step": 5730 |
| }, |
| { |
| "epoch": 0.2599402228059053, |
| "grad_norm": 0.4392858147621155, |
| "learning_rate": 0.00019652686544640685, |
| "loss": 0.2019, |
| "step": 5740 |
| }, |
| { |
| "epoch": 0.260393080336926, |
| "grad_norm": 0.41935303807258606, |
| "learning_rate": 0.0001965072752759922, |
| "loss": 0.225, |
| "step": 5750 |
| }, |
| { |
| "epoch": 0.26084593786794674, |
| "grad_norm": 0.39728057384490967, |
| "learning_rate": 0.0001964876309937342, |
| "loss": 0.2143, |
| "step": 5760 |
| }, |
| { |
| "epoch": 0.2612987953989675, |
| "grad_norm": 0.38051551580429077, |
| "learning_rate": 0.00019646793261064746, |
| "loss": 0.2104, |
| "step": 5770 |
| }, |
| { |
| "epoch": 0.2617516529299882, |
| "grad_norm": 0.45690861344337463, |
| "learning_rate": 0.00019644818013777693, |
| "loss": 0.2265, |
| "step": 5780 |
| }, |
| { |
| "epoch": 0.26220451046100895, |
| "grad_norm": 2.40419864654541, |
| "learning_rate": 0.00019642837358619785, |
| "loss": 0.191, |
| "step": 5790 |
| }, |
| { |
| "epoch": 0.2626573679920297, |
| "grad_norm": 0.42658358812332153, |
| "learning_rate": 0.0001964085129670158, |
| "loss": 0.2098, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.26311022552305047, |
| "grad_norm": 0.3833671808242798, |
| "learning_rate": 0.00019638859829136668, |
| "loss": 0.2034, |
| "step": 5810 |
| }, |
| { |
| "epoch": 0.26356308305407117, |
| "grad_norm": 0.4116675853729248, |
| "learning_rate": 0.0001963686295704167, |
| "loss": 0.2212, |
| "step": 5820 |
| }, |
| { |
| "epoch": 0.2640159405850919, |
| "grad_norm": 0.39625871181488037, |
| "learning_rate": 0.00019634860681536233, |
| "loss": 0.2281, |
| "step": 5830 |
| }, |
| { |
| "epoch": 0.2644687981161127, |
| "grad_norm": 0.4207947552204132, |
| "learning_rate": 0.0001963285300374304, |
| "loss": 0.2353, |
| "step": 5840 |
| }, |
| { |
| "epoch": 0.26492165564713344, |
| "grad_norm": 0.3432312607765198, |
| "learning_rate": 0.000196308399247878, |
| "loss": 0.2049, |
| "step": 5850 |
| }, |
| { |
| "epoch": 0.26537451317815414, |
| "grad_norm": 0.43867117166519165, |
| "learning_rate": 0.0001962882144579925, |
| "loss": 0.2138, |
| "step": 5860 |
| }, |
| { |
| "epoch": 0.2658273707091749, |
| "grad_norm": 0.39972978830337524, |
| "learning_rate": 0.00019626797567909158, |
| "loss": 0.2073, |
| "step": 5870 |
| }, |
| { |
| "epoch": 0.26628022824019565, |
| "grad_norm": 0.46662867069244385, |
| "learning_rate": 0.00019624768292252314, |
| "loss": 0.1944, |
| "step": 5880 |
| }, |
| { |
| "epoch": 0.26673308577121635, |
| "grad_norm": 0.47923582792282104, |
| "learning_rate": 0.0001962273361996654, |
| "loss": 0.2099, |
| "step": 5890 |
| }, |
| { |
| "epoch": 0.2671859433022371, |
| "grad_norm": 0.45202603936195374, |
| "learning_rate": 0.00019620693552192678, |
| "loss": 0.2042, |
| "step": 5900 |
| }, |
| { |
| "epoch": 0.26763880083325786, |
| "grad_norm": 0.36669155955314636, |
| "learning_rate": 0.00019618648090074603, |
| "loss": 0.1883, |
| "step": 5910 |
| }, |
| { |
| "epoch": 0.2680916583642786, |
| "grad_norm": 0.3664119243621826, |
| "learning_rate": 0.00019616597234759205, |
| "loss": 0.2061, |
| "step": 5920 |
| }, |
| { |
| "epoch": 0.2685445158952993, |
| "grad_norm": 0.2816719114780426, |
| "learning_rate": 0.0001961454098739641, |
| "loss": 0.2334, |
| "step": 5930 |
| }, |
| { |
| "epoch": 0.2689973734263201, |
| "grad_norm": 0.38767072558403015, |
| "learning_rate": 0.0001961247934913915, |
| "loss": 0.1829, |
| "step": 5940 |
| }, |
| { |
| "epoch": 0.26945023095734083, |
| "grad_norm": 0.45452436804771423, |
| "learning_rate": 0.00019610412321143398, |
| "loss": 0.256, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.2699030884883616, |
| "grad_norm": 0.448044091463089, |
| "learning_rate": 0.0001960833990456814, |
| "loss": 0.2153, |
| "step": 5960 |
| }, |
| { |
| "epoch": 0.2703559460193823, |
| "grad_norm": 0.4571818709373474, |
| "learning_rate": 0.00019606262100575387, |
| "loss": 0.2267, |
| "step": 5970 |
| }, |
| { |
| "epoch": 0.27080880355040304, |
| "grad_norm": 0.5397394895553589, |
| "learning_rate": 0.0001960417891033016, |
| "loss": 0.2056, |
| "step": 5980 |
| }, |
| { |
| "epoch": 0.2712616610814238, |
| "grad_norm": 1.0641297101974487, |
| "learning_rate": 0.00019602090335000516, |
| "loss": 0.2006, |
| "step": 5990 |
| }, |
| { |
| "epoch": 0.2717145186124445, |
| "grad_norm": 0.3743475079536438, |
| "learning_rate": 0.00019599996375757522, |
| "loss": 0.2114, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.2717145186124445, |
| "eval_chrf": 85.75058405488659, |
| "eval_loss": 0.19780972599983215, |
| "eval_runtime": 8.1051, |
| "eval_samples_per_second": 1.234, |
| "eval_steps_per_second": 0.123, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.27216737614346526, |
| "grad_norm": 0.40974077582359314, |
| "learning_rate": 0.00019597897033775267, |
| "loss": 0.2187, |
| "step": 6010 |
| }, |
| { |
| "epoch": 0.272620233674486, |
| "grad_norm": 0.4024125933647156, |
| "learning_rate": 0.0001959579231023085, |
| "loss": 0.2057, |
| "step": 6020 |
| }, |
| { |
| "epoch": 0.27307309120550677, |
| "grad_norm": 0.3904462456703186, |
| "learning_rate": 0.00019593682206304404, |
| "loss": 0.2154, |
| "step": 6030 |
| }, |
| { |
| "epoch": 0.27352594873652747, |
| "grad_norm": 0.4456275403499603, |
| "learning_rate": 0.0001959156672317906, |
| "loss": 0.1945, |
| "step": 6040 |
| }, |
| { |
| "epoch": 0.2739788062675482, |
| "grad_norm": 0.47315770387649536, |
| "learning_rate": 0.0001958944586204098, |
| "loss": 0.2118, |
| "step": 6050 |
| }, |
| { |
| "epoch": 0.274431663798569, |
| "grad_norm": 0.3774961829185486, |
| "learning_rate": 0.00019587319624079334, |
| "loss": 0.1844, |
| "step": 6060 |
| }, |
| { |
| "epoch": 0.27488452132958974, |
| "grad_norm": 0.38651296496391296, |
| "learning_rate": 0.00019585188010486307, |
| "loss": 0.1957, |
| "step": 6070 |
| }, |
| { |
| "epoch": 0.27533737886061044, |
| "grad_norm": 0.41263914108276367, |
| "learning_rate": 0.000195830510224571, |
| "loss": 0.1949, |
| "step": 6080 |
| }, |
| { |
| "epoch": 0.2757902363916312, |
| "grad_norm": 0.3857577443122864, |
| "learning_rate": 0.0001958090866118993, |
| "loss": 0.2078, |
| "step": 6090 |
| }, |
| { |
| "epoch": 0.27624309392265195, |
| "grad_norm": 0.6576055288314819, |
| "learning_rate": 0.00019578760927886017, |
| "loss": 0.2255, |
| "step": 6100 |
| }, |
| { |
| "epoch": 0.27669595145367265, |
| "grad_norm": 0.472508043050766, |
| "learning_rate": 0.00019576607823749607, |
| "loss": 0.2043, |
| "step": 6110 |
| }, |
| { |
| "epoch": 0.2771488089846934, |
| "grad_norm": 0.39050430059432983, |
| "learning_rate": 0.00019574449349987947, |
| "loss": 0.2083, |
| "step": 6120 |
| }, |
| { |
| "epoch": 0.27760166651571416, |
| "grad_norm": 0.4212459921836853, |
| "learning_rate": 0.00019572285507811295, |
| "loss": 0.2012, |
| "step": 6130 |
| }, |
| { |
| "epoch": 0.2780545240467349, |
| "grad_norm": 0.44424203038215637, |
| "learning_rate": 0.00019570116298432927, |
| "loss": 0.1881, |
| "step": 6140 |
| }, |
| { |
| "epoch": 0.2785073815777556, |
| "grad_norm": 0.43899261951446533, |
| "learning_rate": 0.00019567941723069122, |
| "loss": 0.1974, |
| "step": 6150 |
| }, |
| { |
| "epoch": 0.2789602391087764, |
| "grad_norm": 0.40080156922340393, |
| "learning_rate": 0.0001956576178293917, |
| "loss": 0.1923, |
| "step": 6160 |
| }, |
| { |
| "epoch": 0.27941309663979713, |
| "grad_norm": 0.49086445569992065, |
| "learning_rate": 0.00019563576479265365, |
| "loss": 0.2112, |
| "step": 6170 |
| }, |
| { |
| "epoch": 0.2798659541708179, |
| "grad_norm": 0.4632461667060852, |
| "learning_rate": 0.00019561385813273016, |
| "loss": 0.2087, |
| "step": 6180 |
| }, |
| { |
| "epoch": 0.2803188117018386, |
| "grad_norm": 0.3927869200706482, |
| "learning_rate": 0.00019559189786190432, |
| "loss": 0.1968, |
| "step": 6190 |
| }, |
| { |
| "epoch": 0.28077166923285934, |
| "grad_norm": 0.513033926486969, |
| "learning_rate": 0.00019556988399248927, |
| "loss": 0.2206, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.2812245267638801, |
| "grad_norm": 0.4554494321346283, |
| "learning_rate": 0.00019554781653682832, |
| "loss": 0.2253, |
| "step": 6210 |
| }, |
| { |
| "epoch": 0.2816773842949008, |
| "grad_norm": 0.3585559129714966, |
| "learning_rate": 0.00019552569550729468, |
| "loss": 0.1924, |
| "step": 6220 |
| }, |
| { |
| "epoch": 0.28213024182592156, |
| "grad_norm": 0.48740342259407043, |
| "learning_rate": 0.00019550352091629166, |
| "loss": 0.1948, |
| "step": 6230 |
| }, |
| { |
| "epoch": 0.2825830993569423, |
| "grad_norm": 0.4652673006057739, |
| "learning_rate": 0.0001954812927762526, |
| "loss": 0.2103, |
| "step": 6240 |
| }, |
| { |
| "epoch": 0.28303595688796307, |
| "grad_norm": 0.4237286150455475, |
| "learning_rate": 0.0001954590110996409, |
| "loss": 0.2297, |
| "step": 6250 |
| }, |
| { |
| "epoch": 0.28348881441898377, |
| "grad_norm": 0.6610928177833557, |
| "learning_rate": 0.0001954366758989499, |
| "loss": 0.2187, |
| "step": 6260 |
| }, |
| { |
| "epoch": 0.2839416719500045, |
| "grad_norm": 0.4797596037387848, |
| "learning_rate": 0.00019541428718670306, |
| "loss": 0.2045, |
| "step": 6270 |
| }, |
| { |
| "epoch": 0.2843945294810253, |
| "grad_norm": 0.3981485962867737, |
| "learning_rate": 0.0001953918449754537, |
| "loss": 0.1904, |
| "step": 6280 |
| }, |
| { |
| "epoch": 0.284847387012046, |
| "grad_norm": 0.7103779315948486, |
| "learning_rate": 0.00019536934927778533, |
| "loss": 0.1914, |
| "step": 6290 |
| }, |
| { |
| "epoch": 0.28530024454306674, |
| "grad_norm": 0.3921561539173126, |
| "learning_rate": 0.0001953468001063112, |
| "loss": 0.2028, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.2857531020740875, |
| "grad_norm": 0.539473295211792, |
| "learning_rate": 0.00019532419747367478, |
| "loss": 0.2178, |
| "step": 6310 |
| }, |
| { |
| "epoch": 0.28620595960510825, |
| "grad_norm": 0.5404443144798279, |
| "learning_rate": 0.00019530154139254938, |
| "loss": 0.223, |
| "step": 6320 |
| }, |
| { |
| "epoch": 0.28665881713612895, |
| "grad_norm": 0.4464082717895508, |
| "learning_rate": 0.00019527883187563832, |
| "loss": 0.213, |
| "step": 6330 |
| }, |
| { |
| "epoch": 0.2871116746671497, |
| "grad_norm": 0.4774690270423889, |
| "learning_rate": 0.00019525606893567487, |
| "loss": 0.2224, |
| "step": 6340 |
| }, |
| { |
| "epoch": 0.28756453219817046, |
| "grad_norm": 0.5096843838691711, |
| "learning_rate": 0.00019523325258542228, |
| "loss": 0.2159, |
| "step": 6350 |
| }, |
| { |
| "epoch": 0.2880173897291912, |
| "grad_norm": 0.48254311084747314, |
| "learning_rate": 0.00019521038283767372, |
| "loss": 0.1867, |
| "step": 6360 |
| }, |
| { |
| "epoch": 0.2884702472602119, |
| "grad_norm": 0.5749514698982239, |
| "learning_rate": 0.0001951874597052523, |
| "loss": 0.2121, |
| "step": 6370 |
| }, |
| { |
| "epoch": 0.2889231047912327, |
| "grad_norm": 0.5011195540428162, |
| "learning_rate": 0.00019516448320101105, |
| "loss": 0.2435, |
| "step": 6380 |
| }, |
| { |
| "epoch": 0.28937596232225343, |
| "grad_norm": 0.5004845857620239, |
| "learning_rate": 0.00019514145333783303, |
| "loss": 0.2068, |
| "step": 6390 |
| }, |
| { |
| "epoch": 0.28982881985327413, |
| "grad_norm": 0.29733824729919434, |
| "learning_rate": 0.00019511837012863104, |
| "loss": 0.1963, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.2902816773842949, |
| "grad_norm": 0.5419613122940063, |
| "learning_rate": 0.00019509523358634794, |
| "loss": 0.217, |
| "step": 6410 |
| }, |
| { |
| "epoch": 0.29073453491531565, |
| "grad_norm": 0.36856991052627563, |
| "learning_rate": 0.00019507204372395639, |
| "loss": 0.2054, |
| "step": 6420 |
| }, |
| { |
| "epoch": 0.2911873924463364, |
| "grad_norm": 0.5289191007614136, |
| "learning_rate": 0.00019504880055445906, |
| "loss": 0.2295, |
| "step": 6430 |
| }, |
| { |
| "epoch": 0.2916402499773571, |
| "grad_norm": 0.3745858669281006, |
| "learning_rate": 0.00019502550409088843, |
| "loss": 0.1861, |
| "step": 6440 |
| }, |
| { |
| "epoch": 0.29209310750837786, |
| "grad_norm": 0.6232544183731079, |
| "learning_rate": 0.00019500215434630684, |
| "loss": 0.203, |
| "step": 6450 |
| }, |
| { |
| "epoch": 0.2925459650393986, |
| "grad_norm": 0.45420029759407043, |
| "learning_rate": 0.0001949787513338066, |
| "loss": 0.1683, |
| "step": 6460 |
| }, |
| { |
| "epoch": 0.29299882257041937, |
| "grad_norm": 0.5476380586624146, |
| "learning_rate": 0.0001949552950665098, |
| "loss": 0.2168, |
| "step": 6470 |
| }, |
| { |
| "epoch": 0.29345168010144007, |
| "grad_norm": 0.4741978347301483, |
| "learning_rate": 0.0001949317855575684, |
| "loss": 0.2132, |
| "step": 6480 |
| }, |
| { |
| "epoch": 0.2939045376324608, |
| "grad_norm": 0.5809348821640015, |
| "learning_rate": 0.00019490822282016427, |
| "loss": 0.2454, |
| "step": 6490 |
| }, |
| { |
| "epoch": 0.2943573951634816, |
| "grad_norm": 0.5414433479309082, |
| "learning_rate": 0.00019488460686750906, |
| "loss": 0.1964, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.2948102526945023, |
| "grad_norm": 0.36005979776382446, |
| "learning_rate": 0.00019486093771284432, |
| "loss": 0.2165, |
| "step": 6510 |
| }, |
| { |
| "epoch": 0.29526311022552304, |
| "grad_norm": 0.385918527841568, |
| "learning_rate": 0.00019483721536944136, |
| "loss": 0.2145, |
| "step": 6520 |
| }, |
| { |
| "epoch": 0.2957159677565438, |
| "grad_norm": 0.49405747652053833, |
| "learning_rate": 0.0001948134398506014, |
| "loss": 0.2389, |
| "step": 6530 |
| }, |
| { |
| "epoch": 0.29616882528756455, |
| "grad_norm": 0.4467755854129791, |
| "learning_rate": 0.0001947896111696554, |
| "loss": 0.2311, |
| "step": 6540 |
| }, |
| { |
| "epoch": 0.29662168281858525, |
| "grad_norm": 0.6420004367828369, |
| "learning_rate": 0.00019476572933996416, |
| "loss": 0.2129, |
| "step": 6550 |
| }, |
| { |
| "epoch": 0.297074540349606, |
| "grad_norm": 0.3505796492099762, |
| "learning_rate": 0.0001947417943749182, |
| "loss": 0.2156, |
| "step": 6560 |
| }, |
| { |
| "epoch": 0.29752739788062676, |
| "grad_norm": 0.3485499620437622, |
| "learning_rate": 0.00019471780628793807, |
| "loss": 0.2009, |
| "step": 6570 |
| }, |
| { |
| "epoch": 0.2979802554116475, |
| "grad_norm": 0.5055803060531616, |
| "learning_rate": 0.0001946937650924738, |
| "loss": 0.2169, |
| "step": 6580 |
| }, |
| { |
| "epoch": 0.2984331129426682, |
| "grad_norm": 0.5099272727966309, |
| "learning_rate": 0.00019466967080200538, |
| "loss": 0.2276, |
| "step": 6590 |
| }, |
| { |
| "epoch": 0.298885970473689, |
| "grad_norm": 0.38270193338394165, |
| "learning_rate": 0.00019464552343004257, |
| "loss": 0.2193, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.29933882800470973, |
| "grad_norm": 0.4830116927623749, |
| "learning_rate": 0.00019462132299012482, |
| "loss": 0.1965, |
| "step": 6610 |
| }, |
| { |
| "epoch": 0.29979168553573043, |
| "grad_norm": 0.4647078812122345, |
| "learning_rate": 0.00019459706949582134, |
| "loss": 0.1906, |
| "step": 6620 |
| }, |
| { |
| "epoch": 0.3002445430667512, |
| "grad_norm": 0.6252137422561646, |
| "learning_rate": 0.0001945727629607312, |
| "loss": 0.209, |
| "step": 6630 |
| }, |
| { |
| "epoch": 0.30069740059777195, |
| "grad_norm": 0.4478525221347809, |
| "learning_rate": 0.00019454840339848306, |
| "loss": 0.225, |
| "step": 6640 |
| }, |
| { |
| "epoch": 0.3011502581287927, |
| "grad_norm": 0.4418219029903412, |
| "learning_rate": 0.00019452399082273543, |
| "loss": 0.2185, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.3016031156598134, |
| "grad_norm": 0.4830974042415619, |
| "learning_rate": 0.00019449952524717645, |
| "loss": 0.2212, |
| "step": 6660 |
| }, |
| { |
| "epoch": 0.30205597319083416, |
| "grad_norm": 0.4741996228694916, |
| "learning_rate": 0.00019447500668552404, |
| "loss": 0.196, |
| "step": 6670 |
| }, |
| { |
| "epoch": 0.3025088307218549, |
| "grad_norm": 0.36549901962280273, |
| "learning_rate": 0.0001944504351515258, |
| "loss": 0.2089, |
| "step": 6680 |
| }, |
| { |
| "epoch": 0.30296168825287567, |
| "grad_norm": 0.37369242310523987, |
| "learning_rate": 0.0001944258106589591, |
| "loss": 0.225, |
| "step": 6690 |
| }, |
| { |
| "epoch": 0.30341454578389637, |
| "grad_norm": 0.32829204201698303, |
| "learning_rate": 0.00019440113322163088, |
| "loss": 0.191, |
| "step": 6700 |
| }, |
| { |
| "epoch": 0.30386740331491713, |
| "grad_norm": 0.359739750623703, |
| "learning_rate": 0.00019437640285337786, |
| "loss": 0.175, |
| "step": 6710 |
| }, |
| { |
| "epoch": 0.3043202608459379, |
| "grad_norm": 0.48949897289276123, |
| "learning_rate": 0.0001943516195680664, |
| "loss": 0.2158, |
| "step": 6720 |
| }, |
| { |
| "epoch": 0.3047731183769586, |
| "grad_norm": 0.32513946294784546, |
| "learning_rate": 0.00019432678337959257, |
| "loss": 0.1945, |
| "step": 6730 |
| }, |
| { |
| "epoch": 0.30522597590797934, |
| "grad_norm": 0.5650992393493652, |
| "learning_rate": 0.00019430189430188208, |
| "loss": 0.2059, |
| "step": 6740 |
| }, |
| { |
| "epoch": 0.3056788334390001, |
| "grad_norm": 0.487543523311615, |
| "learning_rate": 0.00019427695234889024, |
| "loss": 0.1989, |
| "step": 6750 |
| }, |
| { |
| "epoch": 0.30613169097002085, |
| "grad_norm": 0.521586000919342, |
| "learning_rate": 0.0001942519575346021, |
| "loss": 0.2127, |
| "step": 6760 |
| }, |
| { |
| "epoch": 0.30658454850104155, |
| "grad_norm": 0.5865873694419861, |
| "learning_rate": 0.0001942269098730323, |
| "loss": 0.2336, |
| "step": 6770 |
| }, |
| { |
| "epoch": 0.3070374060320623, |
| "grad_norm": 0.49344274401664734, |
| "learning_rate": 0.0001942018093782251, |
| "loss": 0.2272, |
| "step": 6780 |
| }, |
| { |
| "epoch": 0.30749026356308307, |
| "grad_norm": 0.5003451704978943, |
| "learning_rate": 0.00019417665606425447, |
| "loss": 0.2008, |
| "step": 6790 |
| }, |
| { |
| "epoch": 0.3079431210941038, |
| "grad_norm": 0.4872739613056183, |
| "learning_rate": 0.00019415144994522384, |
| "loss": 0.1807, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.3083959786251245, |
| "grad_norm": 0.40256235003471375, |
| "learning_rate": 0.00019412619103526636, |
| "loss": 0.2155, |
| "step": 6810 |
| }, |
| { |
| "epoch": 0.3088488361561453, |
| "grad_norm": 0.4837823212146759, |
| "learning_rate": 0.00019410087934854483, |
| "loss": 0.1899, |
| "step": 6820 |
| }, |
| { |
| "epoch": 0.30930169368716603, |
| "grad_norm": 0.3010696470737457, |
| "learning_rate": 0.00019407551489925145, |
| "loss": 0.2095, |
| "step": 6830 |
| }, |
| { |
| "epoch": 0.30975455121818674, |
| "grad_norm": 0.4583839476108551, |
| "learning_rate": 0.0001940500977016082, |
| "loss": 0.1954, |
| "step": 6840 |
| }, |
| { |
| "epoch": 0.3102074087492075, |
| "grad_norm": 0.3842408061027527, |
| "learning_rate": 0.00019402462776986655, |
| "loss": 0.1968, |
| "step": 6850 |
| }, |
| { |
| "epoch": 0.31066026628022825, |
| "grad_norm": 0.4319639503955841, |
| "learning_rate": 0.00019399910511830757, |
| "loss": 0.2249, |
| "step": 6860 |
| }, |
| { |
| "epoch": 0.311113123811249, |
| "grad_norm": 0.4340662956237793, |
| "learning_rate": 0.0001939735297612418, |
| "loss": 0.1955, |
| "step": 6870 |
| }, |
| { |
| "epoch": 0.3115659813422697, |
| "grad_norm": 0.561085045337677, |
| "learning_rate": 0.00019394790171300947, |
| "loss": 0.1808, |
| "step": 6880 |
| }, |
| { |
| "epoch": 0.31201883887329046, |
| "grad_norm": 0.5028398633003235, |
| "learning_rate": 0.00019392222098798024, |
| "loss": 0.2016, |
| "step": 6890 |
| }, |
| { |
| "epoch": 0.3124716964043112, |
| "grad_norm": 0.37517672777175903, |
| "learning_rate": 0.00019389648760055335, |
| "loss": 0.2457, |
| "step": 6900 |
| }, |
| { |
| "epoch": 0.3129245539353319, |
| "grad_norm": 0.5868512988090515, |
| "learning_rate": 0.0001938707015651576, |
| "loss": 0.2184, |
| "step": 6910 |
| }, |
| { |
| "epoch": 0.3133774114663527, |
| "grad_norm": 0.5316442847251892, |
| "learning_rate": 0.00019384486289625123, |
| "loss": 0.1874, |
| "step": 6920 |
| }, |
| { |
| "epoch": 0.31383026899737343, |
| "grad_norm": 0.33138179779052734, |
| "learning_rate": 0.00019381897160832207, |
| "loss": 0.208, |
| "step": 6930 |
| }, |
| { |
| "epoch": 0.3142831265283942, |
| "grad_norm": 0.49291670322418213, |
| "learning_rate": 0.00019379302771588744, |
| "loss": 0.2225, |
| "step": 6940 |
| }, |
| { |
| "epoch": 0.3147359840594149, |
| "grad_norm": 0.41960468888282776, |
| "learning_rate": 0.00019376703123349408, |
| "loss": 0.2071, |
| "step": 6950 |
| }, |
| { |
| "epoch": 0.31518884159043564, |
| "grad_norm": 0.4372851252555847, |
| "learning_rate": 0.00019374098217571833, |
| "loss": 0.197, |
| "step": 6960 |
| }, |
| { |
| "epoch": 0.3156416991214564, |
| "grad_norm": 0.479920357465744, |
| "learning_rate": 0.00019371488055716594, |
| "loss": 0.2175, |
| "step": 6970 |
| }, |
| { |
| "epoch": 0.31609455665247715, |
| "grad_norm": 0.585425615310669, |
| "learning_rate": 0.00019368872639247213, |
| "loss": 0.2128, |
| "step": 6980 |
| }, |
| { |
| "epoch": 0.31654741418349785, |
| "grad_norm": 0.5114132165908813, |
| "learning_rate": 0.00019366251969630164, |
| "loss": 0.2282, |
| "step": 6990 |
| }, |
| { |
| "epoch": 0.3170002717145186, |
| "grad_norm": 0.3805500864982605, |
| "learning_rate": 0.00019363626048334856, |
| "loss": 0.2257, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.3170002717145186, |
| "eval_chrf": 82.99971755536923, |
| "eval_loss": 0.1683083176612854, |
| "eval_runtime": 8.3569, |
| "eval_samples_per_second": 1.197, |
| "eval_steps_per_second": 0.12, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.31745312924553937, |
| "grad_norm": 0.40227165818214417, |
| "learning_rate": 0.00019360994876833651, |
| "loss": 0.2096, |
| "step": 7010 |
| }, |
| { |
| "epoch": 0.31790598677656007, |
| "grad_norm": 0.4601973295211792, |
| "learning_rate": 0.00019358358456601855, |
| "loss": 0.1969, |
| "step": 7020 |
| }, |
| { |
| "epoch": 0.3183588443075808, |
| "grad_norm": 0.42821553349494934, |
| "learning_rate": 0.00019355716789117716, |
| "loss": 0.1847, |
| "step": 7030 |
| }, |
| { |
| "epoch": 0.3188117018386016, |
| "grad_norm": 0.5266753435134888, |
| "learning_rate": 0.00019353069875862415, |
| "loss": 0.1914, |
| "step": 7040 |
| }, |
| { |
| "epoch": 0.31926455936962234, |
| "grad_norm": 0.3582618236541748, |
| "learning_rate": 0.00019350417718320091, |
| "loss": 0.2137, |
| "step": 7050 |
| }, |
| { |
| "epoch": 0.31971741690064304, |
| "grad_norm": 0.38131266832351685, |
| "learning_rate": 0.00019347760317977813, |
| "loss": 0.2159, |
| "step": 7060 |
| }, |
| { |
| "epoch": 0.3201702744316638, |
| "grad_norm": 0.4729970395565033, |
| "learning_rate": 0.00019345097676325582, |
| "loss": 0.2093, |
| "step": 7070 |
| }, |
| { |
| "epoch": 0.32062313196268455, |
| "grad_norm": 0.5034728050231934, |
| "learning_rate": 0.0001934242979485636, |
| "loss": 0.2536, |
| "step": 7080 |
| }, |
| { |
| "epoch": 0.3210759894937053, |
| "grad_norm": 0.3000277280807495, |
| "learning_rate": 0.00019339756675066028, |
| "loss": 0.1886, |
| "step": 7090 |
| }, |
| { |
| "epoch": 0.321528847024726, |
| "grad_norm": 0.45927393436431885, |
| "learning_rate": 0.0001933707831845341, |
| "loss": 0.2117, |
| "step": 7100 |
| }, |
| { |
| "epoch": 0.32198170455574676, |
| "grad_norm": 0.4469738304615021, |
| "learning_rate": 0.00019334394726520267, |
| "loss": 0.2165, |
| "step": 7110 |
| }, |
| { |
| "epoch": 0.3224345620867675, |
| "grad_norm": 0.44823434948921204, |
| "learning_rate": 0.00019331705900771295, |
| "loss": 0.2168, |
| "step": 7120 |
| }, |
| { |
| "epoch": 0.3228874196177882, |
| "grad_norm": 0.4014623165130615, |
| "learning_rate": 0.00019329011842714124, |
| "loss": 0.183, |
| "step": 7130 |
| }, |
| { |
| "epoch": 0.323340277148809, |
| "grad_norm": 0.46763408184051514, |
| "learning_rate": 0.0001932631255385932, |
| "loss": 0.1927, |
| "step": 7140 |
| }, |
| { |
| "epoch": 0.32379313467982973, |
| "grad_norm": 0.375232070684433, |
| "learning_rate": 0.00019323608035720378, |
| "loss": 0.2059, |
| "step": 7150 |
| }, |
| { |
| "epoch": 0.3242459922108505, |
| "grad_norm": 0.5370981097221375, |
| "learning_rate": 0.00019320898289813728, |
| "loss": 0.1961, |
| "step": 7160 |
| }, |
| { |
| "epoch": 0.3246988497418712, |
| "grad_norm": 1.2785454988479614, |
| "learning_rate": 0.00019318183317658733, |
| "loss": 0.1853, |
| "step": 7170 |
| }, |
| { |
| "epoch": 0.32515170727289194, |
| "grad_norm": 0.5735570788383484, |
| "learning_rate": 0.00019315463120777682, |
| "loss": 0.2038, |
| "step": 7180 |
| }, |
| { |
| "epoch": 0.3256045648039127, |
| "grad_norm": 0.6372143030166626, |
| "learning_rate": 0.00019312737700695793, |
| "loss": 0.2146, |
| "step": 7190 |
| }, |
| { |
| "epoch": 0.32605742233493346, |
| "grad_norm": 0.5218742489814758, |
| "learning_rate": 0.00019310007058941217, |
| "loss": 0.2102, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.32651027986595416, |
| "grad_norm": 0.40725770592689514, |
| "learning_rate": 0.00019307271197045034, |
| "loss": 0.2103, |
| "step": 7210 |
| }, |
| { |
| "epoch": 0.3269631373969749, |
| "grad_norm": 0.4945192039012909, |
| "learning_rate": 0.00019304530116541244, |
| "loss": 0.2056, |
| "step": 7220 |
| }, |
| { |
| "epoch": 0.32741599492799567, |
| "grad_norm": 0.48213791847229004, |
| "learning_rate": 0.0001930178381896678, |
| "loss": 0.2302, |
| "step": 7230 |
| }, |
| { |
| "epoch": 0.32786885245901637, |
| "grad_norm": 0.3266017436981201, |
| "learning_rate": 0.00019299032305861494, |
| "loss": 0.1773, |
| "step": 7240 |
| }, |
| { |
| "epoch": 0.3283217099900371, |
| "grad_norm": 0.466962069272995, |
| "learning_rate": 0.00019296275578768163, |
| "loss": 0.2195, |
| "step": 7250 |
| }, |
| { |
| "epoch": 0.3287745675210579, |
| "grad_norm": 0.5370128750801086, |
| "learning_rate": 0.000192935136392325, |
| "loss": 0.2086, |
| "step": 7260 |
| }, |
| { |
| "epoch": 0.32922742505207864, |
| "grad_norm": 0.4896990954875946, |
| "learning_rate": 0.00019290746488803118, |
| "loss": 0.228, |
| "step": 7270 |
| }, |
| { |
| "epoch": 0.32968028258309934, |
| "grad_norm": 0.37790390849113464, |
| "learning_rate": 0.00019287974129031575, |
| "loss": 0.1924, |
| "step": 7280 |
| }, |
| { |
| "epoch": 0.3301331401141201, |
| "grad_norm": 0.42240995168685913, |
| "learning_rate": 0.00019285196561472334, |
| "loss": 0.1959, |
| "step": 7290 |
| }, |
| { |
| "epoch": 0.33058599764514085, |
| "grad_norm": 0.48036807775497437, |
| "learning_rate": 0.00019282413787682784, |
| "loss": 0.2039, |
| "step": 7300 |
| }, |
| { |
| "epoch": 0.3310388551761616, |
| "grad_norm": 0.4346061944961548, |
| "learning_rate": 0.0001927962580922323, |
| "loss": 0.2166, |
| "step": 7310 |
| }, |
| { |
| "epoch": 0.3314917127071823, |
| "grad_norm": 0.36262384057044983, |
| "learning_rate": 0.00019276832627656906, |
| "loss": 0.2028, |
| "step": 7320 |
| }, |
| { |
| "epoch": 0.33194457023820306, |
| "grad_norm": 0.36481937766075134, |
| "learning_rate": 0.00019274034244549948, |
| "loss": 0.2198, |
| "step": 7330 |
| }, |
| { |
| "epoch": 0.3323974277692238, |
| "grad_norm": 0.5148301124572754, |
| "learning_rate": 0.00019271230661471416, |
| "loss": 0.1921, |
| "step": 7340 |
| }, |
| { |
| "epoch": 0.3328502853002445, |
| "grad_norm": 0.6357602477073669, |
| "learning_rate": 0.00019268421879993286, |
| "loss": 0.2142, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.3333031428312653, |
| "grad_norm": 0.42315781116485596, |
| "learning_rate": 0.0001926560790169045, |
| "loss": 0.2087, |
| "step": 7360 |
| }, |
| { |
| "epoch": 0.33375600036228603, |
| "grad_norm": 0.36354297399520874, |
| "learning_rate": 0.00019262788728140708, |
| "loss": 0.2045, |
| "step": 7370 |
| }, |
| { |
| "epoch": 0.3342088578933068, |
| "grad_norm": 0.4218559265136719, |
| "learning_rate": 0.00019259964360924777, |
| "loss": 0.2149, |
| "step": 7380 |
| }, |
| { |
| "epoch": 0.3346617154243275, |
| "grad_norm": 0.389312744140625, |
| "learning_rate": 0.00019257134801626294, |
| "loss": 0.1924, |
| "step": 7390 |
| }, |
| { |
| "epoch": 0.33511457295534824, |
| "grad_norm": 0.4661923348903656, |
| "learning_rate": 0.0001925430005183179, |
| "loss": 0.2324, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.335567430486369, |
| "grad_norm": 0.6705445647239685, |
| "learning_rate": 0.00019251460113130721, |
| "loss": 0.2186, |
| "step": 7410 |
| }, |
| { |
| "epoch": 0.33602028801738976, |
| "grad_norm": 0.34467417001724243, |
| "learning_rate": 0.0001924861498711544, |
| "loss": 0.2131, |
| "step": 7420 |
| }, |
| { |
| "epoch": 0.33647314554841046, |
| "grad_norm": 0.5210064053535461, |
| "learning_rate": 0.00019245764675381225, |
| "loss": 0.1881, |
| "step": 7430 |
| }, |
| { |
| "epoch": 0.3369260030794312, |
| "grad_norm": 0.5148203372955322, |
| "learning_rate": 0.00019242909179526248, |
| "loss": 0.2057, |
| "step": 7440 |
| }, |
| { |
| "epoch": 0.33737886061045197, |
| "grad_norm": 0.4705396592617035, |
| "learning_rate": 0.00019240048501151588, |
| "loss": 0.2316, |
| "step": 7450 |
| }, |
| { |
| "epoch": 0.33783171814147267, |
| "grad_norm": 0.4516450762748718, |
| "learning_rate": 0.00019237182641861238, |
| "loss": 0.1875, |
| "step": 7460 |
| }, |
| { |
| "epoch": 0.3382845756724934, |
| "grad_norm": 0.36377689242362976, |
| "learning_rate": 0.00019234311603262086, |
| "loss": 0.2088, |
| "step": 7470 |
| }, |
| { |
| "epoch": 0.3387374332035142, |
| "grad_norm": 0.775227963924408, |
| "learning_rate": 0.00019231435386963942, |
| "loss": 0.1892, |
| "step": 7480 |
| }, |
| { |
| "epoch": 0.33919029073453494, |
| "grad_norm": 0.45915624499320984, |
| "learning_rate": 0.00019228553994579494, |
| "loss": 0.2033, |
| "step": 7490 |
| }, |
| { |
| "epoch": 0.33964314826555564, |
| "grad_norm": 0.5621431469917297, |
| "learning_rate": 0.00019225667427724352, |
| "loss": 0.1782, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.3400960057965764, |
| "grad_norm": 4.9242682456970215, |
| "learning_rate": 0.0001922277568801702, |
| "loss": 0.1944, |
| "step": 7510 |
| }, |
| { |
| "epoch": 0.34054886332759715, |
| "grad_norm": 0.41579318046569824, |
| "learning_rate": 0.00019219878777078896, |
| "loss": 0.196, |
| "step": 7520 |
| }, |
| { |
| "epoch": 0.34100172085861785, |
| "grad_norm": 0.5250774025917053, |
| "learning_rate": 0.00019216976696534297, |
| "loss": 0.2063, |
| "step": 7530 |
| }, |
| { |
| "epoch": 0.3414545783896386, |
| "grad_norm": 0.5255563855171204, |
| "learning_rate": 0.00019214069448010413, |
| "loss": 0.2061, |
| "step": 7540 |
| }, |
| { |
| "epoch": 0.34190743592065936, |
| "grad_norm": 0.41017070412635803, |
| "learning_rate": 0.00019211157033137354, |
| "loss": 0.1757, |
| "step": 7550 |
| }, |
| { |
| "epoch": 0.3423602934516801, |
| "grad_norm": 0.41834238171577454, |
| "learning_rate": 0.00019208239453548113, |
| "loss": 0.2022, |
| "step": 7560 |
| }, |
| { |
| "epoch": 0.3428131509827008, |
| "grad_norm": 0.4785829782485962, |
| "learning_rate": 0.00019205316710878587, |
| "loss": 0.224, |
| "step": 7570 |
| }, |
| { |
| "epoch": 0.3432660085137216, |
| "grad_norm": 0.3674987554550171, |
| "learning_rate": 0.0001920238880676756, |
| "loss": 0.2083, |
| "step": 7580 |
| }, |
| { |
| "epoch": 0.34371886604474233, |
| "grad_norm": 0.421902596950531, |
| "learning_rate": 0.00019199455742856714, |
| "loss": 0.2169, |
| "step": 7590 |
| }, |
| { |
| "epoch": 0.3441717235757631, |
| "grad_norm": 0.7003633379936218, |
| "learning_rate": 0.00019196517520790626, |
| "loss": 0.2069, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.3446245811067838, |
| "grad_norm": 0.40210533142089844, |
| "learning_rate": 0.00019193574142216768, |
| "loss": 0.1985, |
| "step": 7610 |
| }, |
| { |
| "epoch": 0.34507743863780455, |
| "grad_norm": 0.44373050332069397, |
| "learning_rate": 0.00019190625608785493, |
| "loss": 0.1962, |
| "step": 7620 |
| }, |
| { |
| "epoch": 0.3455302961688253, |
| "grad_norm": 0.49582087993621826, |
| "learning_rate": 0.00019187671922150053, |
| "loss": 0.2132, |
| "step": 7630 |
| }, |
| { |
| "epoch": 0.345983153699846, |
| "grad_norm": 0.5912686586380005, |
| "learning_rate": 0.00019184713083966582, |
| "loss": 0.2022, |
| "step": 7640 |
| }, |
| { |
| "epoch": 0.34643601123086676, |
| "grad_norm": 0.4800059199333191, |
| "learning_rate": 0.00019181749095894114, |
| "loss": 0.1992, |
| "step": 7650 |
| }, |
| { |
| "epoch": 0.3468888687618875, |
| "grad_norm": 0.45192936062812805, |
| "learning_rate": 0.00019178779959594562, |
| "loss": 0.2036, |
| "step": 7660 |
| }, |
| { |
| "epoch": 0.34734172629290827, |
| "grad_norm": 0.37126028537750244, |
| "learning_rate": 0.00019175805676732726, |
| "loss": 0.1606, |
| "step": 7670 |
| }, |
| { |
| "epoch": 0.34779458382392897, |
| "grad_norm": 0.4179832637310028, |
| "learning_rate": 0.0001917282624897629, |
| "loss": 0.194, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.3482474413549497, |
| "grad_norm": 0.46051672101020813, |
| "learning_rate": 0.00019169841677995833, |
| "loss": 0.2031, |
| "step": 7690 |
| }, |
| { |
| "epoch": 0.3487002988859705, |
| "grad_norm": 0.5491907596588135, |
| "learning_rate": 0.00019166851965464802, |
| "loss": 0.206, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.34915315641699124, |
| "grad_norm": 0.32158762216567993, |
| "learning_rate": 0.00019163857113059542, |
| "loss": 0.2098, |
| "step": 7710 |
| }, |
| { |
| "epoch": 0.34960601394801194, |
| "grad_norm": 0.4981878101825714, |
| "learning_rate": 0.0001916085712245927, |
| "loss": 0.1966, |
| "step": 7720 |
| }, |
| { |
| "epoch": 0.3500588714790327, |
| "grad_norm": 0.4609077274799347, |
| "learning_rate": 0.0001915785199534609, |
| "loss": 0.1932, |
| "step": 7730 |
| }, |
| { |
| "epoch": 0.35051172901005345, |
| "grad_norm": 0.4244095981121063, |
| "learning_rate": 0.00019154841733404982, |
| "loss": 0.1958, |
| "step": 7740 |
| }, |
| { |
| "epoch": 0.35096458654107415, |
| "grad_norm": 0.40494847297668457, |
| "learning_rate": 0.00019151826338323805, |
| "loss": 0.1977, |
| "step": 7750 |
| }, |
| { |
| "epoch": 0.3514174440720949, |
| "grad_norm": 0.3514610528945923, |
| "learning_rate": 0.000191488058117933, |
| "loss": 0.1984, |
| "step": 7760 |
| }, |
| { |
| "epoch": 0.35187030160311566, |
| "grad_norm": 0.4453369975090027, |
| "learning_rate": 0.00019145780155507085, |
| "loss": 0.2102, |
| "step": 7770 |
| }, |
| { |
| "epoch": 0.3523231591341364, |
| "grad_norm": 0.5334694385528564, |
| "learning_rate": 0.00019142749371161647, |
| "loss": 0.1982, |
| "step": 7780 |
| }, |
| { |
| "epoch": 0.3527760166651571, |
| "grad_norm": 0.39791110157966614, |
| "learning_rate": 0.00019139713460456355, |
| "loss": 0.2014, |
| "step": 7790 |
| }, |
| { |
| "epoch": 0.3532288741961779, |
| "grad_norm": 0.4057285785675049, |
| "learning_rate": 0.00019136672425093453, |
| "loss": 0.213, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.35368173172719863, |
| "grad_norm": 0.45790570974349976, |
| "learning_rate": 0.00019133626266778055, |
| "loss": 0.2204, |
| "step": 7810 |
| }, |
| { |
| "epoch": 0.3541345892582194, |
| "grad_norm": 0.4071456789970398, |
| "learning_rate": 0.00019130574987218148, |
| "loss": 0.2166, |
| "step": 7820 |
| }, |
| { |
| "epoch": 0.3545874467892401, |
| "grad_norm": 0.4177386164665222, |
| "learning_rate": 0.00019127518588124592, |
| "loss": 0.198, |
| "step": 7830 |
| }, |
| { |
| "epoch": 0.35504030432026085, |
| "grad_norm": 0.3865101635456085, |
| "learning_rate": 0.00019124457071211117, |
| "loss": 0.1916, |
| "step": 7840 |
| }, |
| { |
| "epoch": 0.3554931618512816, |
| "grad_norm": 0.41384661197662354, |
| "learning_rate": 0.0001912139043819432, |
| "loss": 0.2182, |
| "step": 7850 |
| }, |
| { |
| "epoch": 0.3559460193823023, |
| "grad_norm": 0.34812772274017334, |
| "learning_rate": 0.00019118318690793676, |
| "loss": 0.2093, |
| "step": 7860 |
| }, |
| { |
| "epoch": 0.35639887691332306, |
| "grad_norm": 0.31512027978897095, |
| "learning_rate": 0.0001911524183073151, |
| "loss": 0.2207, |
| "step": 7870 |
| }, |
| { |
| "epoch": 0.3568517344443438, |
| "grad_norm": 0.3776790201663971, |
| "learning_rate": 0.00019112159859733033, |
| "loss": 0.1826, |
| "step": 7880 |
| }, |
| { |
| "epoch": 0.35730459197536457, |
| "grad_norm": 0.3415994346141815, |
| "learning_rate": 0.0001910907277952631, |
| "loss": 0.1881, |
| "step": 7890 |
| }, |
| { |
| "epoch": 0.35775744950638527, |
| "grad_norm": 0.4983496367931366, |
| "learning_rate": 0.00019105980591842272, |
| "loss": 0.1851, |
| "step": 7900 |
| }, |
| { |
| "epoch": 0.35821030703740603, |
| "grad_norm": 0.48957979679107666, |
| "learning_rate": 0.00019102883298414717, |
| "loss": 0.2, |
| "step": 7910 |
| }, |
| { |
| "epoch": 0.3586631645684268, |
| "grad_norm": 0.3085726499557495, |
| "learning_rate": 0.00019099780900980307, |
| "loss": 0.1909, |
| "step": 7920 |
| }, |
| { |
| "epoch": 0.35911602209944754, |
| "grad_norm": 0.41775602102279663, |
| "learning_rate": 0.00019096673401278557, |
| "loss": 0.1929, |
| "step": 7930 |
| }, |
| { |
| "epoch": 0.35956887963046824, |
| "grad_norm": 0.4147847890853882, |
| "learning_rate": 0.00019093560801051855, |
| "loss": 0.172, |
| "step": 7940 |
| }, |
| { |
| "epoch": 0.360021737161489, |
| "grad_norm": 0.3972959518432617, |
| "learning_rate": 0.00019090443102045437, |
| "loss": 0.2018, |
| "step": 7950 |
| }, |
| { |
| "epoch": 0.36047459469250975, |
| "grad_norm": 0.3908996880054474, |
| "learning_rate": 0.0001908732030600741, |
| "loss": 0.1776, |
| "step": 7960 |
| }, |
| { |
| "epoch": 0.36092745222353045, |
| "grad_norm": 0.48856571316719055, |
| "learning_rate": 0.0001908419241468873, |
| "loss": 0.2169, |
| "step": 7970 |
| }, |
| { |
| "epoch": 0.3613803097545512, |
| "grad_norm": 0.5449142456054688, |
| "learning_rate": 0.00019081059429843208, |
| "loss": 0.2059, |
| "step": 7980 |
| }, |
| { |
| "epoch": 0.36183316728557197, |
| "grad_norm": 0.40314939618110657, |
| "learning_rate": 0.00019077921353227525, |
| "loss": 0.2215, |
| "step": 7990 |
| }, |
| { |
| "epoch": 0.3622860248165927, |
| "grad_norm": 0.4393227994441986, |
| "learning_rate": 0.000190747781866012, |
| "loss": 0.2043, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.3622860248165927, |
| "eval_chrf": 86.15658047675439, |
| "eval_loss": 0.17065152525901794, |
| "eval_runtime": 7.5439, |
| "eval_samples_per_second": 1.326, |
| "eval_steps_per_second": 0.133, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.3627388823476134, |
| "grad_norm": 0.4438968598842621, |
| "learning_rate": 0.00019071629931726616, |
| "loss": 0.201, |
| "step": 8010 |
| }, |
| { |
| "epoch": 0.3631917398786342, |
| "grad_norm": 0.4053437411785126, |
| "learning_rate": 0.00019068476590369007, |
| "loss": 0.1967, |
| "step": 8020 |
| }, |
| { |
| "epoch": 0.36364459740965493, |
| "grad_norm": 0.4435258209705353, |
| "learning_rate": 0.00019065318164296455, |
| "loss": 0.2073, |
| "step": 8030 |
| }, |
| { |
| "epoch": 0.3640974549406757, |
| "grad_norm": 0.44856691360473633, |
| "learning_rate": 0.000190621546552799, |
| "loss": 0.2382, |
| "step": 8040 |
| }, |
| { |
| "epoch": 0.3645503124716964, |
| "grad_norm": 0.42579466104507446, |
| "learning_rate": 0.00019058986065093131, |
| "loss": 0.1648, |
| "step": 8050 |
| }, |
| { |
| "epoch": 0.36500317000271715, |
| "grad_norm": 0.41863974928855896, |
| "learning_rate": 0.00019055812395512778, |
| "loss": 0.2206, |
| "step": 8060 |
| }, |
| { |
| "epoch": 0.3654560275337379, |
| "grad_norm": 0.5849347114562988, |
| "learning_rate": 0.00019052633648318327, |
| "loss": 0.2036, |
| "step": 8070 |
| }, |
| { |
| "epoch": 0.3659088850647586, |
| "grad_norm": 0.4172811806201935, |
| "learning_rate": 0.00019049449825292108, |
| "loss": 0.2059, |
| "step": 8080 |
| }, |
| { |
| "epoch": 0.36636174259577936, |
| "grad_norm": 0.42305058240890503, |
| "learning_rate": 0.00019046260928219302, |
| "loss": 0.2533, |
| "step": 8090 |
| }, |
| { |
| "epoch": 0.3668146001268001, |
| "grad_norm": 0.3714189827442169, |
| "learning_rate": 0.00019043066958887925, |
| "loss": 0.1827, |
| "step": 8100 |
| }, |
| { |
| "epoch": 0.3672674576578209, |
| "grad_norm": 0.40903255343437195, |
| "learning_rate": 0.00019039867919088844, |
| "loss": 0.1811, |
| "step": 8110 |
| }, |
| { |
| "epoch": 0.3677203151888416, |
| "grad_norm": 0.3701893389225006, |
| "learning_rate": 0.00019036663810615768, |
| "loss": 0.1738, |
| "step": 8120 |
| }, |
| { |
| "epoch": 0.36817317271986233, |
| "grad_norm": 0.5263366103172302, |
| "learning_rate": 0.00019033454635265248, |
| "loss": 0.1978, |
| "step": 8130 |
| }, |
| { |
| "epoch": 0.3686260302508831, |
| "grad_norm": 0.42517194151878357, |
| "learning_rate": 0.00019030240394836675, |
| "loss": 0.1896, |
| "step": 8140 |
| }, |
| { |
| "epoch": 0.3690788877819038, |
| "grad_norm": 0.5339916944503784, |
| "learning_rate": 0.0001902702109113228, |
| "loss": 0.223, |
| "step": 8150 |
| }, |
| { |
| "epoch": 0.36953174531292454, |
| "grad_norm": 0.49596965312957764, |
| "learning_rate": 0.0001902379672595714, |
| "loss": 0.2212, |
| "step": 8160 |
| }, |
| { |
| "epoch": 0.3699846028439453, |
| "grad_norm": 0.47744324803352356, |
| "learning_rate": 0.00019020567301119155, |
| "loss": 0.1985, |
| "step": 8170 |
| }, |
| { |
| "epoch": 0.37043746037496605, |
| "grad_norm": 0.543395459651947, |
| "learning_rate": 0.00019017332818429078, |
| "loss": 0.2011, |
| "step": 8180 |
| }, |
| { |
| "epoch": 0.37089031790598675, |
| "grad_norm": 0.48390671610832214, |
| "learning_rate": 0.00019014093279700483, |
| "loss": 0.2018, |
| "step": 8190 |
| }, |
| { |
| "epoch": 0.3713431754370075, |
| "grad_norm": 0.4345707297325134, |
| "learning_rate": 0.00019010848686749793, |
| "loss": 0.2122, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.37179603296802827, |
| "grad_norm": 0.3869522213935852, |
| "learning_rate": 0.00019007599041396257, |
| "loss": 0.1941, |
| "step": 8210 |
| }, |
| { |
| "epoch": 0.372248890499049, |
| "grad_norm": 0.5442917346954346, |
| "learning_rate": 0.00019004344345461958, |
| "loss": 0.2046, |
| "step": 8220 |
| }, |
| { |
| "epoch": 0.3727017480300697, |
| "grad_norm": 0.41071373224258423, |
| "learning_rate": 0.00019001084600771807, |
| "loss": 0.2044, |
| "step": 8230 |
| }, |
| { |
| "epoch": 0.3731546055610905, |
| "grad_norm": 0.4944802522659302, |
| "learning_rate": 0.00018997819809153557, |
| "loss": 0.1961, |
| "step": 8240 |
| }, |
| { |
| "epoch": 0.37360746309211124, |
| "grad_norm": 0.45529964566230774, |
| "learning_rate": 0.0001899454997243778, |
| "loss": 0.2038, |
| "step": 8250 |
| }, |
| { |
| "epoch": 0.37406032062313194, |
| "grad_norm": 0.3635946214199066, |
| "learning_rate": 0.00018991275092457882, |
| "loss": 0.2083, |
| "step": 8260 |
| }, |
| { |
| "epoch": 0.3745131781541527, |
| "grad_norm": 0.3817839026451111, |
| "learning_rate": 0.00018987995171050094, |
| "loss": 0.1883, |
| "step": 8270 |
| }, |
| { |
| "epoch": 0.37496603568517345, |
| "grad_norm": 0.38155823945999146, |
| "learning_rate": 0.00018984710210053477, |
| "loss": 0.1938, |
| "step": 8280 |
| }, |
| { |
| "epoch": 0.3754188932161942, |
| "grad_norm": 0.557369589805603, |
| "learning_rate": 0.00018981420211309916, |
| "loss": 0.217, |
| "step": 8290 |
| }, |
| { |
| "epoch": 0.3758717507472149, |
| "grad_norm": 0.389067143201828, |
| "learning_rate": 0.00018978125176664118, |
| "loss": 0.1791, |
| "step": 8300 |
| }, |
| { |
| "epoch": 0.37632460827823566, |
| "grad_norm": 0.5227335691452026, |
| "learning_rate": 0.0001897482510796362, |
| "loss": 0.1886, |
| "step": 8310 |
| }, |
| { |
| "epoch": 0.3767774658092564, |
| "grad_norm": 0.7860269546508789, |
| "learning_rate": 0.00018971520007058775, |
| "loss": 0.2142, |
| "step": 8320 |
| }, |
| { |
| "epoch": 0.3772303233402772, |
| "grad_norm": 0.6494739055633545, |
| "learning_rate": 0.00018968209875802757, |
| "loss": 0.2166, |
| "step": 8330 |
| }, |
| { |
| "epoch": 0.3776831808712979, |
| "grad_norm": 0.37337711453437805, |
| "learning_rate": 0.0001896489471605157, |
| "loss": 0.1673, |
| "step": 8340 |
| }, |
| { |
| "epoch": 0.37813603840231863, |
| "grad_norm": 0.7072011828422546, |
| "learning_rate": 0.00018961574529664031, |
| "loss": 0.1797, |
| "step": 8350 |
| }, |
| { |
| "epoch": 0.3785888959333394, |
| "grad_norm": 0.3572746813297272, |
| "learning_rate": 0.00018958249318501777, |
| "loss": 0.2033, |
| "step": 8360 |
| }, |
| { |
| "epoch": 0.3790417534643601, |
| "grad_norm": 0.40508168935775757, |
| "learning_rate": 0.00018954919084429256, |
| "loss": 0.2167, |
| "step": 8370 |
| }, |
| { |
| "epoch": 0.37949461099538084, |
| "grad_norm": 0.4418649673461914, |
| "learning_rate": 0.00018951583829313742, |
| "loss": 0.2197, |
| "step": 8380 |
| }, |
| { |
| "epoch": 0.3799474685264016, |
| "grad_norm": 0.40917450189590454, |
| "learning_rate": 0.00018948243555025313, |
| "loss": 0.183, |
| "step": 8390 |
| }, |
| { |
| "epoch": 0.38040032605742236, |
| "grad_norm": 0.5492053031921387, |
| "learning_rate": 0.00018944898263436878, |
| "loss": 0.2283, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.38085318358844306, |
| "grad_norm": 0.34713467955589294, |
| "learning_rate": 0.00018941547956424144, |
| "loss": 0.1689, |
| "step": 8410 |
| }, |
| { |
| "epoch": 0.3813060411194638, |
| "grad_norm": 0.6434493660926819, |
| "learning_rate": 0.00018938192635865634, |
| "loss": 0.2149, |
| "step": 8420 |
| }, |
| { |
| "epoch": 0.38175889865048457, |
| "grad_norm": 0.4369926154613495, |
| "learning_rate": 0.00018934832303642692, |
| "loss": 0.1864, |
| "step": 8430 |
| }, |
| { |
| "epoch": 0.3822117561815053, |
| "grad_norm": 0.436750590801239, |
| "learning_rate": 0.00018931466961639456, |
| "loss": 0.1991, |
| "step": 8440 |
| }, |
| { |
| "epoch": 0.382664613712526, |
| "grad_norm": 0.5193954706192017, |
| "learning_rate": 0.0001892809661174288, |
| "loss": 0.1964, |
| "step": 8450 |
| }, |
| { |
| "epoch": 0.3831174712435468, |
| "grad_norm": 0.4910030961036682, |
| "learning_rate": 0.00018924721255842727, |
| "loss": 0.2151, |
| "step": 8460 |
| }, |
| { |
| "epoch": 0.38357032877456754, |
| "grad_norm": 0.47311288118362427, |
| "learning_rate": 0.00018921340895831573, |
| "loss": 0.2132, |
| "step": 8470 |
| }, |
| { |
| "epoch": 0.38402318630558824, |
| "grad_norm": 0.45840010046958923, |
| "learning_rate": 0.00018917955533604788, |
| "loss": 0.1868, |
| "step": 8480 |
| }, |
| { |
| "epoch": 0.384476043836609, |
| "grad_norm": 0.5156082510948181, |
| "learning_rate": 0.00018914565171060553, |
| "loss": 0.2066, |
| "step": 8490 |
| }, |
| { |
| "epoch": 0.38492890136762975, |
| "grad_norm": 0.3248344659805298, |
| "learning_rate": 0.0001891116981009985, |
| "loss": 0.1923, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.3853817588986505, |
| "grad_norm": 0.3991844058036804, |
| "learning_rate": 0.0001890776945262647, |
| "loss": 0.1988, |
| "step": 8510 |
| }, |
| { |
| "epoch": 0.3858346164296712, |
| "grad_norm": 0.39894551038742065, |
| "learning_rate": 0.00018904364100547, |
| "loss": 0.1933, |
| "step": 8520 |
| }, |
| { |
| "epoch": 0.38628747396069196, |
| "grad_norm": 0.5236116647720337, |
| "learning_rate": 0.00018900953755770825, |
| "loss": 0.2022, |
| "step": 8530 |
| }, |
| { |
| "epoch": 0.3867403314917127, |
| "grad_norm": 0.45928722620010376, |
| "learning_rate": 0.00018897538420210134, |
| "loss": 0.2107, |
| "step": 8540 |
| }, |
| { |
| "epoch": 0.3871931890227335, |
| "grad_norm": 0.5236533284187317, |
| "learning_rate": 0.00018894118095779915, |
| "loss": 0.207, |
| "step": 8550 |
| }, |
| { |
| "epoch": 0.3876460465537542, |
| "grad_norm": 0.48914825916290283, |
| "learning_rate": 0.0001889069278439795, |
| "loss": 0.1956, |
| "step": 8560 |
| }, |
| { |
| "epoch": 0.38809890408477493, |
| "grad_norm": 0.4775371253490448, |
| "learning_rate": 0.0001888726248798482, |
| "loss": 0.2077, |
| "step": 8570 |
| }, |
| { |
| "epoch": 0.3885517616157957, |
| "grad_norm": 0.4409538209438324, |
| "learning_rate": 0.00018883827208463898, |
| "loss": 0.2194, |
| "step": 8580 |
| }, |
| { |
| "epoch": 0.3890046191468164, |
| "grad_norm": 0.35968494415283203, |
| "learning_rate": 0.00018880386947761355, |
| "loss": 0.185, |
| "step": 8590 |
| }, |
| { |
| "epoch": 0.38945747667783714, |
| "grad_norm": 0.4133264422416687, |
| "learning_rate": 0.00018876941707806152, |
| "loss": 0.2145, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.3899103342088579, |
| "grad_norm": 0.3894209563732147, |
| "learning_rate": 0.00018873491490530042, |
| "loss": 0.1673, |
| "step": 8610 |
| }, |
| { |
| "epoch": 0.39036319173987866, |
| "grad_norm": 0.43155336380004883, |
| "learning_rate": 0.00018870036297867566, |
| "loss": 0.223, |
| "step": 8620 |
| }, |
| { |
| "epoch": 0.39081604927089936, |
| "grad_norm": 0.4406642019748688, |
| "learning_rate": 0.0001886657613175607, |
| "loss": 0.1923, |
| "step": 8630 |
| }, |
| { |
| "epoch": 0.3912689068019201, |
| "grad_norm": 0.404340922832489, |
| "learning_rate": 0.0001886311099413566, |
| "loss": 0.21, |
| "step": 8640 |
| }, |
| { |
| "epoch": 0.39172176433294087, |
| "grad_norm": 0.3692395091056824, |
| "learning_rate": 0.00018859640886949262, |
| "loss": 0.2009, |
| "step": 8650 |
| }, |
| { |
| "epoch": 0.39217462186396157, |
| "grad_norm": 0.3608049750328064, |
| "learning_rate": 0.00018856165812142561, |
| "loss": 0.1878, |
| "step": 8660 |
| }, |
| { |
| "epoch": 0.3926274793949823, |
| "grad_norm": 0.3195638656616211, |
| "learning_rate": 0.00018852685771664047, |
| "loss": 0.1795, |
| "step": 8670 |
| }, |
| { |
| "epoch": 0.3930803369260031, |
| "grad_norm": 0.4515351951122284, |
| "learning_rate": 0.00018849200767464983, |
| "loss": 0.1884, |
| "step": 8680 |
| }, |
| { |
| "epoch": 0.39353319445702384, |
| "grad_norm": 0.44284364581108093, |
| "learning_rate": 0.0001884571080149942, |
| "loss": 0.1938, |
| "step": 8690 |
| }, |
| { |
| "epoch": 0.39398605198804454, |
| "grad_norm": 0.49626269936561584, |
| "learning_rate": 0.00018842215875724188, |
| "loss": 0.1971, |
| "step": 8700 |
| }, |
| { |
| "epoch": 0.3944389095190653, |
| "grad_norm": 0.5126382112503052, |
| "learning_rate": 0.00018838715992098898, |
| "loss": 0.1893, |
| "step": 8710 |
| }, |
| { |
| "epoch": 0.39489176705008605, |
| "grad_norm": 0.4352719187736511, |
| "learning_rate": 0.00018835211152585949, |
| "loss": 0.1821, |
| "step": 8720 |
| }, |
| { |
| "epoch": 0.3953446245811068, |
| "grad_norm": 0.4839010238647461, |
| "learning_rate": 0.00018831701359150506, |
| "loss": 0.2254, |
| "step": 8730 |
| }, |
| { |
| "epoch": 0.3957974821121275, |
| "grad_norm": 0.3657974302768707, |
| "learning_rate": 0.0001882818661376052, |
| "loss": 0.1896, |
| "step": 8740 |
| }, |
| { |
| "epoch": 0.39625033964314826, |
| "grad_norm": 0.3730844259262085, |
| "learning_rate": 0.0001882466691838672, |
| "loss": 0.1738, |
| "step": 8750 |
| }, |
| { |
| "epoch": 0.396703197174169, |
| "grad_norm": 0.5131645202636719, |
| "learning_rate": 0.00018821142275002596, |
| "loss": 0.2009, |
| "step": 8760 |
| }, |
| { |
| "epoch": 0.3971560547051897, |
| "grad_norm": 0.5660125017166138, |
| "learning_rate": 0.00018817612685584437, |
| "loss": 0.2141, |
| "step": 8770 |
| }, |
| { |
| "epoch": 0.3976089122362105, |
| "grad_norm": 0.5042424201965332, |
| "learning_rate": 0.00018814078152111288, |
| "loss": 0.2224, |
| "step": 8780 |
| }, |
| { |
| "epoch": 0.39806176976723123, |
| "grad_norm": 0.4598681628704071, |
| "learning_rate": 0.0001881053867656496, |
| "loss": 0.2008, |
| "step": 8790 |
| }, |
| { |
| "epoch": 0.398514627298252, |
| "grad_norm": 0.49776187539100647, |
| "learning_rate": 0.00018806994260930058, |
| "loss": 0.226, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.3989674848292727, |
| "grad_norm": 0.34232836961746216, |
| "learning_rate": 0.00018803444907193937, |
| "loss": 0.1985, |
| "step": 8810 |
| }, |
| { |
| "epoch": 0.39942034236029345, |
| "grad_norm": 0.5097756385803223, |
| "learning_rate": 0.00018799890617346728, |
| "loss": 0.2321, |
| "step": 8820 |
| }, |
| { |
| "epoch": 0.3998731998913142, |
| "grad_norm": 0.3807264566421509, |
| "learning_rate": 0.0001879633139338133, |
| "loss": 0.1831, |
| "step": 8830 |
| }, |
| { |
| "epoch": 0.40032605742233496, |
| "grad_norm": 0.4587901830673218, |
| "learning_rate": 0.00018792767237293408, |
| "loss": 0.1976, |
| "step": 8840 |
| }, |
| { |
| "epoch": 0.40077891495335566, |
| "grad_norm": 0.6649393439292908, |
| "learning_rate": 0.0001878919815108139, |
| "loss": 0.1715, |
| "step": 8850 |
| }, |
| { |
| "epoch": 0.4012317724843764, |
| "grad_norm": 0.426932156085968, |
| "learning_rate": 0.00018785624136746472, |
| "loss": 0.1879, |
| "step": 8860 |
| }, |
| { |
| "epoch": 0.40168463001539717, |
| "grad_norm": 0.38488268852233887, |
| "learning_rate": 0.00018782045196292612, |
| "loss": 0.1992, |
| "step": 8870 |
| }, |
| { |
| "epoch": 0.40213748754641787, |
| "grad_norm": 0.5162119269371033, |
| "learning_rate": 0.00018778461331726533, |
| "loss": 0.2185, |
| "step": 8880 |
| }, |
| { |
| "epoch": 0.4025903450774386, |
| "grad_norm": 0.4168258011341095, |
| "learning_rate": 0.0001877487254505771, |
| "loss": 0.191, |
| "step": 8890 |
| }, |
| { |
| "epoch": 0.4030432026084594, |
| "grad_norm": 0.3788599669933319, |
| "learning_rate": 0.00018771278838298388, |
| "loss": 0.1824, |
| "step": 8900 |
| }, |
| { |
| "epoch": 0.40349606013948014, |
| "grad_norm": 0.38353031873703003, |
| "learning_rate": 0.0001876768021346356, |
| "loss": 0.1922, |
| "step": 8910 |
| }, |
| { |
| "epoch": 0.40394891767050084, |
| "grad_norm": 0.42297571897506714, |
| "learning_rate": 0.00018764076672570993, |
| "loss": 0.1845, |
| "step": 8920 |
| }, |
| { |
| "epoch": 0.4044017752015216, |
| "grad_norm": 0.5288705825805664, |
| "learning_rate": 0.00018760468217641195, |
| "loss": 0.2182, |
| "step": 8930 |
| }, |
| { |
| "epoch": 0.40485463273254235, |
| "grad_norm": 0.40291109681129456, |
| "learning_rate": 0.00018756854850697431, |
| "loss": 0.2134, |
| "step": 8940 |
| }, |
| { |
| "epoch": 0.4053074902635631, |
| "grad_norm": 0.37385135889053345, |
| "learning_rate": 0.0001875323657376573, |
| "loss": 0.1942, |
| "step": 8950 |
| }, |
| { |
| "epoch": 0.4057603477945838, |
| "grad_norm": 0.36549118161201477, |
| "learning_rate": 0.00018749613388874866, |
| "loss": 0.2172, |
| "step": 8960 |
| }, |
| { |
| "epoch": 0.40621320532560456, |
| "grad_norm": 0.4215755760669708, |
| "learning_rate": 0.00018745985298056363, |
| "loss": 0.1837, |
| "step": 8970 |
| }, |
| { |
| "epoch": 0.4066660628566253, |
| "grad_norm": 0.32497653365135193, |
| "learning_rate": 0.00018742352303344504, |
| "loss": 0.1762, |
| "step": 8980 |
| }, |
| { |
| "epoch": 0.407118920387646, |
| "grad_norm": 0.5675212740898132, |
| "learning_rate": 0.00018738714406776316, |
| "loss": 0.1878, |
| "step": 8990 |
| }, |
| { |
| "epoch": 0.4075717779186668, |
| "grad_norm": 0.5199867486953735, |
| "learning_rate": 0.00018735071610391578, |
| "loss": 0.2148, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.4075717779186668, |
| "eval_chrf": 78.88053316914994, |
| "eval_loss": 0.16049259901046753, |
| "eval_runtime": 26.3188, |
| "eval_samples_per_second": 0.38, |
| "eval_steps_per_second": 0.038, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.40802463544968753, |
| "grad_norm": 0.4164709150791168, |
| "learning_rate": 0.0001873142391623281, |
| "loss": 0.16, |
| "step": 9010 |
| }, |
| { |
| "epoch": 0.4084774929807083, |
| "grad_norm": 0.6372416615486145, |
| "learning_rate": 0.00018727771326345282, |
| "loss": 0.1991, |
| "step": 9020 |
| }, |
| { |
| "epoch": 0.408930350511729, |
| "grad_norm": 0.378200888633728, |
| "learning_rate": 0.00018724113842777013, |
| "loss": 0.2072, |
| "step": 9030 |
| }, |
| { |
| "epoch": 0.40938320804274975, |
| "grad_norm": 0.4380868375301361, |
| "learning_rate": 0.00018720451467578762, |
| "loss": 0.2091, |
| "step": 9040 |
| }, |
| { |
| "epoch": 0.4098360655737705, |
| "grad_norm": 0.5316906571388245, |
| "learning_rate": 0.00018716784202804028, |
| "loss": 0.1966, |
| "step": 9050 |
| }, |
| { |
| "epoch": 0.41028892310479126, |
| "grad_norm": 0.4007183611392975, |
| "learning_rate": 0.00018713112050509055, |
| "loss": 0.1774, |
| "step": 9060 |
| }, |
| { |
| "epoch": 0.41074178063581196, |
| "grad_norm": 0.45667392015457153, |
| "learning_rate": 0.00018709435012752827, |
| "loss": 0.2131, |
| "step": 9070 |
| }, |
| { |
| "epoch": 0.4111946381668327, |
| "grad_norm": 0.5240411758422852, |
| "learning_rate": 0.0001870575309159707, |
| "loss": 0.2055, |
| "step": 9080 |
| }, |
| { |
| "epoch": 0.41164749569785347, |
| "grad_norm": 0.3834088444709778, |
| "learning_rate": 0.0001870206628910624, |
| "loss": 0.204, |
| "step": 9090 |
| }, |
| { |
| "epoch": 0.41210035322887417, |
| "grad_norm": 0.428633451461792, |
| "learning_rate": 0.0001869837460734754, |
| "loss": 0.2158, |
| "step": 9100 |
| }, |
| { |
| "epoch": 0.41255321075989493, |
| "grad_norm": 0.5074917078018188, |
| "learning_rate": 0.00018694678048390904, |
| "loss": 0.2004, |
| "step": 9110 |
| }, |
| { |
| "epoch": 0.4130060682909157, |
| "grad_norm": 0.42477914690971375, |
| "learning_rate": 0.00018690976614308996, |
| "loss": 0.2096, |
| "step": 9120 |
| }, |
| { |
| "epoch": 0.41345892582193644, |
| "grad_norm": 0.3913014233112335, |
| "learning_rate": 0.00018687270307177225, |
| "loss": 0.1851, |
| "step": 9130 |
| }, |
| { |
| "epoch": 0.41391178335295714, |
| "grad_norm": 0.517967700958252, |
| "learning_rate": 0.00018683559129073723, |
| "loss": 0.1933, |
| "step": 9140 |
| }, |
| { |
| "epoch": 0.4143646408839779, |
| "grad_norm": 0.5146768093109131, |
| "learning_rate": 0.00018679843082079352, |
| "loss": 0.2077, |
| "step": 9150 |
| }, |
| { |
| "epoch": 0.41481749841499865, |
| "grad_norm": 0.43314865231513977, |
| "learning_rate": 0.0001867612216827771, |
| "loss": 0.1918, |
| "step": 9160 |
| }, |
| { |
| "epoch": 0.4152703559460194, |
| "grad_norm": 0.4207741618156433, |
| "learning_rate": 0.00018672396389755126, |
| "loss": 0.1985, |
| "step": 9170 |
| }, |
| { |
| "epoch": 0.4157232134770401, |
| "grad_norm": 0.3719622790813446, |
| "learning_rate": 0.00018668665748600648, |
| "loss": 0.1795, |
| "step": 9180 |
| }, |
| { |
| "epoch": 0.41617607100806087, |
| "grad_norm": 0.5668209791183472, |
| "learning_rate": 0.0001866493024690606, |
| "loss": 0.2179, |
| "step": 9190 |
| }, |
| { |
| "epoch": 0.4166289285390816, |
| "grad_norm": 0.42645466327667236, |
| "learning_rate": 0.00018661189886765855, |
| "loss": 0.1831, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.4170817860701023, |
| "grad_norm": 0.5149989724159241, |
| "learning_rate": 0.0001865744467027727, |
| "loss": 0.1948, |
| "step": 9210 |
| }, |
| { |
| "epoch": 0.4175346436011231, |
| "grad_norm": 0.4551143944263458, |
| "learning_rate": 0.00018653694599540254, |
| "loss": 0.1969, |
| "step": 9220 |
| }, |
| { |
| "epoch": 0.41798750113214383, |
| "grad_norm": 0.32381895184516907, |
| "learning_rate": 0.00018649939676657483, |
| "loss": 0.2078, |
| "step": 9230 |
| }, |
| { |
| "epoch": 0.4184403586631646, |
| "grad_norm": 0.537377119064331, |
| "learning_rate": 0.00018646179903734344, |
| "loss": 0.1839, |
| "step": 9240 |
| }, |
| { |
| "epoch": 0.4188932161941853, |
| "grad_norm": 0.45596960186958313, |
| "learning_rate": 0.00018642415282878958, |
| "loss": 0.1687, |
| "step": 9250 |
| }, |
| { |
| "epoch": 0.41934607372520605, |
| "grad_norm": 0.3386632204055786, |
| "learning_rate": 0.00018638645816202148, |
| "loss": 0.1995, |
| "step": 9260 |
| }, |
| { |
| "epoch": 0.4197989312562268, |
| "grad_norm": 0.5089104771614075, |
| "learning_rate": 0.00018634871505817467, |
| "loss": 0.1969, |
| "step": 9270 |
| }, |
| { |
| "epoch": 0.4202517887872475, |
| "grad_norm": 0.34959906339645386, |
| "learning_rate": 0.0001863109235384118, |
| "loss": 0.1791, |
| "step": 9280 |
| }, |
| { |
| "epoch": 0.42070464631826826, |
| "grad_norm": 0.42316970229148865, |
| "learning_rate": 0.00018627308362392263, |
| "loss": 0.1821, |
| "step": 9290 |
| }, |
| { |
| "epoch": 0.421157503849289, |
| "grad_norm": 0.5111418962478638, |
| "learning_rate": 0.00018623519533592412, |
| "loss": 0.2211, |
| "step": 9300 |
| }, |
| { |
| "epoch": 0.4216103613803098, |
| "grad_norm": 0.3825642764568329, |
| "learning_rate": 0.0001861972586956603, |
| "loss": 0.1862, |
| "step": 9310 |
| }, |
| { |
| "epoch": 0.4220632189113305, |
| "grad_norm": 0.5130252838134766, |
| "learning_rate": 0.0001861592737244023, |
| "loss": 0.2011, |
| "step": 9320 |
| }, |
| { |
| "epoch": 0.42251607644235123, |
| "grad_norm": 0.435468465089798, |
| "learning_rate": 0.0001861212404434484, |
| "loss": 0.2097, |
| "step": 9330 |
| }, |
| { |
| "epoch": 0.422968933973372, |
| "grad_norm": 0.5458192825317383, |
| "learning_rate": 0.00018608315887412395, |
| "loss": 0.2038, |
| "step": 9340 |
| }, |
| { |
| "epoch": 0.42342179150439274, |
| "grad_norm": 0.3725493550300598, |
| "learning_rate": 0.0001860450290377814, |
| "loss": 0.1823, |
| "step": 9350 |
| }, |
| { |
| "epoch": 0.42387464903541344, |
| "grad_norm": 0.4673443138599396, |
| "learning_rate": 0.00018600685095580016, |
| "loss": 0.1862, |
| "step": 9360 |
| }, |
| { |
| "epoch": 0.4243275065664342, |
| "grad_norm": 0.40920791029930115, |
| "learning_rate": 0.0001859686246495868, |
| "loss": 0.2102, |
| "step": 9370 |
| }, |
| { |
| "epoch": 0.42478036409745495, |
| "grad_norm": 0.5569631457328796, |
| "learning_rate": 0.00018593035014057493, |
| "loss": 0.1948, |
| "step": 9380 |
| }, |
| { |
| "epoch": 0.42523322162847565, |
| "grad_norm": 0.4123372435569763, |
| "learning_rate": 0.00018589202745022512, |
| "loss": 0.1904, |
| "step": 9390 |
| }, |
| { |
| "epoch": 0.4256860791594964, |
| "grad_norm": 0.5805758237838745, |
| "learning_rate": 0.00018585365660002499, |
| "loss": 0.222, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.42613893669051717, |
| "grad_norm": 0.38345110416412354, |
| "learning_rate": 0.00018581523761148917, |
| "loss": 0.1959, |
| "step": 9410 |
| }, |
| { |
| "epoch": 0.4265917942215379, |
| "grad_norm": 0.4751555025577545, |
| "learning_rate": 0.00018577677050615922, |
| "loss": 0.1794, |
| "step": 9420 |
| }, |
| { |
| "epoch": 0.4270446517525586, |
| "grad_norm": 0.32770711183547974, |
| "learning_rate": 0.00018573825530560382, |
| "loss": 0.1994, |
| "step": 9430 |
| }, |
| { |
| "epoch": 0.4274975092835794, |
| "grad_norm": 0.3502885699272156, |
| "learning_rate": 0.00018569969203141847, |
| "loss": 0.2047, |
| "step": 9440 |
| }, |
| { |
| "epoch": 0.42795036681460014, |
| "grad_norm": 0.4478504955768585, |
| "learning_rate": 0.0001856610807052257, |
| "loss": 0.2055, |
| "step": 9450 |
| }, |
| { |
| "epoch": 0.4284032243456209, |
| "grad_norm": 0.31034862995147705, |
| "learning_rate": 0.00018562242134867492, |
| "loss": 0.1664, |
| "step": 9460 |
| }, |
| { |
| "epoch": 0.4288560818766416, |
| "grad_norm": 0.4220707416534424, |
| "learning_rate": 0.00018558371398344264, |
| "loss": 0.1919, |
| "step": 9470 |
| }, |
| { |
| "epoch": 0.42930893940766235, |
| "grad_norm": 0.46567854285240173, |
| "learning_rate": 0.00018554495863123204, |
| "loss": 0.211, |
| "step": 9480 |
| }, |
| { |
| "epoch": 0.4297617969386831, |
| "grad_norm": 0.47741076350212097, |
| "learning_rate": 0.00018550615531377337, |
| "loss": 0.2074, |
| "step": 9490 |
| }, |
| { |
| "epoch": 0.4302146544697038, |
| "grad_norm": 0.5314249396324158, |
| "learning_rate": 0.00018546730405282372, |
| "loss": 0.1915, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.43066751200072456, |
| "grad_norm": 0.40051180124282837, |
| "learning_rate": 0.00018542840487016714, |
| "loss": 0.1915, |
| "step": 9510 |
| }, |
| { |
| "epoch": 0.4311203695317453, |
| "grad_norm": 0.39250391721725464, |
| "learning_rate": 0.00018538945778761448, |
| "loss": 0.2155, |
| "step": 9520 |
| }, |
| { |
| "epoch": 0.4315732270627661, |
| "grad_norm": 0.35448992252349854, |
| "learning_rate": 0.00018535046282700337, |
| "loss": 0.2018, |
| "step": 9530 |
| }, |
| { |
| "epoch": 0.4320260845937868, |
| "grad_norm": 0.7133687734603882, |
| "learning_rate": 0.00018531142001019846, |
| "loss": 0.1711, |
| "step": 9540 |
| }, |
| { |
| "epoch": 0.43247894212480753, |
| "grad_norm": 0.5354053974151611, |
| "learning_rate": 0.00018527232935909108, |
| "loss": 0.1958, |
| "step": 9550 |
| }, |
| { |
| "epoch": 0.4329317996558283, |
| "grad_norm": 0.4301791191101074, |
| "learning_rate": 0.0001852331908955995, |
| "loss": 0.2147, |
| "step": 9560 |
| }, |
| { |
| "epoch": 0.43338465718684904, |
| "grad_norm": 0.2932838499546051, |
| "learning_rate": 0.00018519400464166868, |
| "loss": 0.2106, |
| "step": 9570 |
| }, |
| { |
| "epoch": 0.43383751471786974, |
| "grad_norm": 0.384493887424469, |
| "learning_rate": 0.0001851547706192705, |
| "loss": 0.1827, |
| "step": 9580 |
| }, |
| { |
| "epoch": 0.4342903722488905, |
| "grad_norm": 0.4205734431743622, |
| "learning_rate": 0.00018511548885040356, |
| "loss": 0.1799, |
| "step": 9590 |
| }, |
| { |
| "epoch": 0.43474322977991126, |
| "grad_norm": 0.3769165575504303, |
| "learning_rate": 0.0001850761593570932, |
| "loss": 0.1914, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.43519608731093196, |
| "grad_norm": 0.5843045711517334, |
| "learning_rate": 0.00018503678216139159, |
| "loss": 0.2098, |
| "step": 9610 |
| }, |
| { |
| "epoch": 0.4356489448419527, |
| "grad_norm": 0.591116726398468, |
| "learning_rate": 0.00018499735728537756, |
| "loss": 0.1849, |
| "step": 9620 |
| }, |
| { |
| "epoch": 0.43610180237297347, |
| "grad_norm": 0.39489904046058655, |
| "learning_rate": 0.00018495788475115678, |
| "loss": 0.1955, |
| "step": 9630 |
| }, |
| { |
| "epoch": 0.4365546599039942, |
| "grad_norm": 0.38326331973075867, |
| "learning_rate": 0.00018491836458086155, |
| "loss": 0.194, |
| "step": 9640 |
| }, |
| { |
| "epoch": 0.4370075174350149, |
| "grad_norm": 0.49937698245048523, |
| "learning_rate": 0.00018487879679665093, |
| "loss": 0.1861, |
| "step": 9650 |
| }, |
| { |
| "epoch": 0.4374603749660357, |
| "grad_norm": 0.46309980750083923, |
| "learning_rate": 0.00018483918142071066, |
| "loss": 0.1935, |
| "step": 9660 |
| }, |
| { |
| "epoch": 0.43791323249705644, |
| "grad_norm": 0.4150969386100769, |
| "learning_rate": 0.00018479951847525319, |
| "loss": 0.2049, |
| "step": 9670 |
| }, |
| { |
| "epoch": 0.4383660900280772, |
| "grad_norm": 0.41933974623680115, |
| "learning_rate": 0.0001847598079825176, |
| "loss": 0.1918, |
| "step": 9680 |
| }, |
| { |
| "epoch": 0.4388189475590979, |
| "grad_norm": 0.4882836639881134, |
| "learning_rate": 0.00018472004996476966, |
| "loss": 0.2102, |
| "step": 9690 |
| }, |
| { |
| "epoch": 0.43927180509011865, |
| "grad_norm": 0.5054790377616882, |
| "learning_rate": 0.0001846802444443018, |
| "loss": 0.2131, |
| "step": 9700 |
| }, |
| { |
| "epoch": 0.4397246626211394, |
| "grad_norm": 0.45476034283638, |
| "learning_rate": 0.00018464039144343297, |
| "loss": 0.1929, |
| "step": 9710 |
| }, |
| { |
| "epoch": 0.4401775201521601, |
| "grad_norm": 0.4609181582927704, |
| "learning_rate": 0.00018460049098450898, |
| "loss": 0.2106, |
| "step": 9720 |
| }, |
| { |
| "epoch": 0.44063037768318086, |
| "grad_norm": 0.4659000337123871, |
| "learning_rate": 0.00018456054308990197, |
| "loss": 0.212, |
| "step": 9730 |
| }, |
| { |
| "epoch": 0.4410832352142016, |
| "grad_norm": 0.6165845394134521, |
| "learning_rate": 0.00018452054778201094, |
| "loss": 0.2109, |
| "step": 9740 |
| }, |
| { |
| "epoch": 0.4415360927452224, |
| "grad_norm": 0.4262254536151886, |
| "learning_rate": 0.00018448050508326124, |
| "loss": 0.2149, |
| "step": 9750 |
| }, |
| { |
| "epoch": 0.4419889502762431, |
| "grad_norm": 0.39887556433677673, |
| "learning_rate": 0.00018444041501610494, |
| "loss": 0.2128, |
| "step": 9760 |
| }, |
| { |
| "epoch": 0.44244180780726383, |
| "grad_norm": 0.45499277114868164, |
| "learning_rate": 0.00018440027760302066, |
| "loss": 0.1714, |
| "step": 9770 |
| }, |
| { |
| "epoch": 0.4428946653382846, |
| "grad_norm": 0.3185460865497589, |
| "learning_rate": 0.00018436009286651347, |
| "loss": 0.1933, |
| "step": 9780 |
| }, |
| { |
| "epoch": 0.44334752286930534, |
| "grad_norm": 0.4237852394580841, |
| "learning_rate": 0.0001843198608291151, |
| "loss": 0.2142, |
| "step": 9790 |
| }, |
| { |
| "epoch": 0.44380038040032604, |
| "grad_norm": 0.5233890414237976, |
| "learning_rate": 0.00018427958151338373, |
| "loss": 0.2019, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.4442532379313468, |
| "grad_norm": 0.4304122030735016, |
| "learning_rate": 0.00018423925494190406, |
| "loss": 0.2197, |
| "step": 9810 |
| }, |
| { |
| "epoch": 0.44470609546236756, |
| "grad_norm": 0.33954623341560364, |
| "learning_rate": 0.00018419888113728727, |
| "loss": 0.1886, |
| "step": 9820 |
| }, |
| { |
| "epoch": 0.44515895299338826, |
| "grad_norm": 0.47928377985954285, |
| "learning_rate": 0.00018415846012217104, |
| "loss": 0.1713, |
| "step": 9830 |
| }, |
| { |
| "epoch": 0.445611810524409, |
| "grad_norm": 0.4934302270412445, |
| "learning_rate": 0.00018411799191921956, |
| "loss": 0.2239, |
| "step": 9840 |
| }, |
| { |
| "epoch": 0.44606466805542977, |
| "grad_norm": 0.4325009882450104, |
| "learning_rate": 0.00018407747655112343, |
| "loss": 0.1843, |
| "step": 9850 |
| }, |
| { |
| "epoch": 0.4465175255864505, |
| "grad_norm": 0.5995849967002869, |
| "learning_rate": 0.00018403691404059966, |
| "loss": 0.2361, |
| "step": 9860 |
| }, |
| { |
| "epoch": 0.4469703831174712, |
| "grad_norm": 0.38609248399734497, |
| "learning_rate": 0.0001839963044103918, |
| "loss": 0.1931, |
| "step": 9870 |
| }, |
| { |
| "epoch": 0.447423240648492, |
| "grad_norm": 0.48337194323539734, |
| "learning_rate": 0.00018395564768326972, |
| "loss": 0.19, |
| "step": 9880 |
| }, |
| { |
| "epoch": 0.44787609817951274, |
| "grad_norm": 0.45147621631622314, |
| "learning_rate": 0.00018391494388202975, |
| "loss": 0.2236, |
| "step": 9890 |
| }, |
| { |
| "epoch": 0.44832895571053344, |
| "grad_norm": 0.47226592898368835, |
| "learning_rate": 0.0001838741930294946, |
| "loss": 0.2025, |
| "step": 9900 |
| }, |
| { |
| "epoch": 0.4487818132415542, |
| "grad_norm": 0.47736138105392456, |
| "learning_rate": 0.00018383339514851338, |
| "loss": 0.1986, |
| "step": 9910 |
| }, |
| { |
| "epoch": 0.44923467077257495, |
| "grad_norm": 0.49481838941574097, |
| "learning_rate": 0.00018379255026196152, |
| "loss": 0.2069, |
| "step": 9920 |
| }, |
| { |
| "epoch": 0.4496875283035957, |
| "grad_norm": 0.41190800070762634, |
| "learning_rate": 0.00018375165839274086, |
| "loss": 0.1831, |
| "step": 9930 |
| }, |
| { |
| "epoch": 0.4501403858346164, |
| "grad_norm": 0.433465838432312, |
| "learning_rate": 0.0001837107195637796, |
| "loss": 0.2219, |
| "step": 9940 |
| }, |
| { |
| "epoch": 0.45059324336563716, |
| "grad_norm": 0.44359904527664185, |
| "learning_rate": 0.00018366973379803215, |
| "loss": 0.1933, |
| "step": 9950 |
| }, |
| { |
| "epoch": 0.4510461008966579, |
| "grad_norm": 0.5206522941589355, |
| "learning_rate": 0.00018362870111847935, |
| "loss": 0.1892, |
| "step": 9960 |
| }, |
| { |
| "epoch": 0.4514989584276787, |
| "grad_norm": 0.3605899512767792, |
| "learning_rate": 0.00018358762154812834, |
| "loss": 0.1936, |
| "step": 9970 |
| }, |
| { |
| "epoch": 0.4519518159586994, |
| "grad_norm": 0.34897580742836, |
| "learning_rate": 0.00018354649511001254, |
| "loss": 0.2013, |
| "step": 9980 |
| }, |
| { |
| "epoch": 0.45240467348972013, |
| "grad_norm": 0.36002519726753235, |
| "learning_rate": 0.0001835053218271916, |
| "loss": 0.1795, |
| "step": 9990 |
| }, |
| { |
| "epoch": 0.4528575310207409, |
| "grad_norm": 0.4565688371658325, |
| "learning_rate": 0.00018346410172275144, |
| "loss": 0.1725, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.4528575310207409, |
| "eval_chrf": 76.88655918784474, |
| "eval_loss": 0.15447305142879486, |
| "eval_runtime": 12.1525, |
| "eval_samples_per_second": 0.823, |
| "eval_steps_per_second": 0.082, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.4533103885517616, |
| "grad_norm": 0.4288187325000763, |
| "learning_rate": 0.0001834228348198043, |
| "loss": 0.1634, |
| "step": 10010 |
| }, |
| { |
| "epoch": 0.45376324608278235, |
| "grad_norm": 0.41219136118888855, |
| "learning_rate": 0.00018338152114148864, |
| "loss": 0.1954, |
| "step": 10020 |
| }, |
| { |
| "epoch": 0.4542161036138031, |
| "grad_norm": 0.32993972301483154, |
| "learning_rate": 0.00018334016071096915, |
| "loss": 0.1897, |
| "step": 10030 |
| }, |
| { |
| "epoch": 0.45466896114482386, |
| "grad_norm": 0.4461534917354584, |
| "learning_rate": 0.00018329875355143667, |
| "loss": 0.1948, |
| "step": 10040 |
| }, |
| { |
| "epoch": 0.45512181867584456, |
| "grad_norm": 0.38866010308265686, |
| "learning_rate": 0.00018325729968610824, |
| "loss": 0.1903, |
| "step": 10050 |
| }, |
| { |
| "epoch": 0.4555746762068653, |
| "grad_norm": 0.4280669689178467, |
| "learning_rate": 0.00018321579913822727, |
| "loss": 0.1966, |
| "step": 10060 |
| }, |
| { |
| "epoch": 0.45602753373788607, |
| "grad_norm": 0.5818290710449219, |
| "learning_rate": 0.00018317425193106307, |
| "loss": 0.1909, |
| "step": 10070 |
| }, |
| { |
| "epoch": 0.4564803912689068, |
| "grad_norm": 0.5169594287872314, |
| "learning_rate": 0.00018313265808791136, |
| "loss": 0.2037, |
| "step": 10080 |
| }, |
| { |
| "epoch": 0.4569332487999275, |
| "grad_norm": 0.4597228467464447, |
| "learning_rate": 0.00018309101763209384, |
| "loss": 0.1969, |
| "step": 10090 |
| }, |
| { |
| "epoch": 0.4573861063309483, |
| "grad_norm": 0.4891718327999115, |
| "learning_rate": 0.0001830493305869584, |
| "loss": 0.1946, |
| "step": 10100 |
| }, |
| { |
| "epoch": 0.45783896386196904, |
| "grad_norm": 0.49224698543548584, |
| "learning_rate": 0.0001830075969758791, |
| "loss": 0.1781, |
| "step": 10110 |
| }, |
| { |
| "epoch": 0.45829182139298974, |
| "grad_norm": 0.5037763118743896, |
| "learning_rate": 0.000182965816822256, |
| "loss": 0.1981, |
| "step": 10120 |
| }, |
| { |
| "epoch": 0.4587446789240105, |
| "grad_norm": 0.44621360301971436, |
| "learning_rate": 0.00018292399014951545, |
| "loss": 0.1762, |
| "step": 10130 |
| }, |
| { |
| "epoch": 0.45919753645503125, |
| "grad_norm": 0.38351958990097046, |
| "learning_rate": 0.00018288211698110963, |
| "loss": 0.1818, |
| "step": 10140 |
| }, |
| { |
| "epoch": 0.459650393986052, |
| "grad_norm": 0.4175410270690918, |
| "learning_rate": 0.00018284019734051695, |
| "loss": 0.1524, |
| "step": 10150 |
| }, |
| { |
| "epoch": 0.4601032515170727, |
| "grad_norm": 0.6031002402305603, |
| "learning_rate": 0.00018279823125124192, |
| "loss": 0.1992, |
| "step": 10160 |
| }, |
| { |
| "epoch": 0.46055610904809346, |
| "grad_norm": 0.3932390809059143, |
| "learning_rate": 0.0001827562187368149, |
| "loss": 0.1872, |
| "step": 10170 |
| }, |
| { |
| "epoch": 0.4610089665791142, |
| "grad_norm": 0.3468590974807739, |
| "learning_rate": 0.0001827141598207925, |
| "loss": 0.1883, |
| "step": 10180 |
| }, |
| { |
| "epoch": 0.461461824110135, |
| "grad_norm": 0.5273679494857788, |
| "learning_rate": 0.0001826720545267572, |
| "loss": 0.1855, |
| "step": 10190 |
| }, |
| { |
| "epoch": 0.4619146816411557, |
| "grad_norm": 0.31331586837768555, |
| "learning_rate": 0.0001826299028783175, |
| "loss": 0.1936, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.46236753917217643, |
| "grad_norm": 0.4715229272842407, |
| "learning_rate": 0.00018258770489910803, |
| "loss": 0.205, |
| "step": 10210 |
| }, |
| { |
| "epoch": 0.4628203967031972, |
| "grad_norm": 0.6142929196357727, |
| "learning_rate": 0.0001825454606127892, |
| "loss": 0.2015, |
| "step": 10220 |
| }, |
| { |
| "epoch": 0.4632732542342179, |
| "grad_norm": 0.4637449383735657, |
| "learning_rate": 0.0001825031700430475, |
| "loss": 0.201, |
| "step": 10230 |
| }, |
| { |
| "epoch": 0.46372611176523865, |
| "grad_norm": 0.37245267629623413, |
| "learning_rate": 0.00018246083321359535, |
| "loss": 0.1939, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.4641789692962594, |
| "grad_norm": 0.4161212146282196, |
| "learning_rate": 0.0001824184501481711, |
| "loss": 0.1987, |
| "step": 10250 |
| }, |
| { |
| "epoch": 0.46463182682728016, |
| "grad_norm": 0.5343114137649536, |
| "learning_rate": 0.000182376020870539, |
| "loss": 0.1745, |
| "step": 10260 |
| }, |
| { |
| "epoch": 0.46508468435830086, |
| "grad_norm": 0.39276260137557983, |
| "learning_rate": 0.0001823335454044893, |
| "loss": 0.205, |
| "step": 10270 |
| }, |
| { |
| "epoch": 0.4655375418893216, |
| "grad_norm": 0.36505231261253357, |
| "learning_rate": 0.00018229102377383805, |
| "loss": 0.1628, |
| "step": 10280 |
| }, |
| { |
| "epoch": 0.46599039942034237, |
| "grad_norm": 0.4490553140640259, |
| "learning_rate": 0.0001822484560024272, |
| "loss": 0.1943, |
| "step": 10290 |
| }, |
| { |
| "epoch": 0.4664432569513631, |
| "grad_norm": 0.42624643445014954, |
| "learning_rate": 0.00018220584211412467, |
| "loss": 0.2038, |
| "step": 10300 |
| }, |
| { |
| "epoch": 0.46689611448238383, |
| "grad_norm": 0.4004163146018982, |
| "learning_rate": 0.00018216318213282412, |
| "loss": 0.1981, |
| "step": 10310 |
| }, |
| { |
| "epoch": 0.4673489720134046, |
| "grad_norm": 0.39912936091423035, |
| "learning_rate": 0.00018212047608244506, |
| "loss": 0.1918, |
| "step": 10320 |
| }, |
| { |
| "epoch": 0.46780182954442534, |
| "grad_norm": 0.5702713131904602, |
| "learning_rate": 0.0001820777239869329, |
| "loss": 0.2164, |
| "step": 10330 |
| }, |
| { |
| "epoch": 0.46825468707544604, |
| "grad_norm": 0.3886665403842926, |
| "learning_rate": 0.00018203492587025885, |
| "loss": 0.2134, |
| "step": 10340 |
| }, |
| { |
| "epoch": 0.4687075446064668, |
| "grad_norm": 0.6027638912200928, |
| "learning_rate": 0.00018199208175641987, |
| "loss": 0.196, |
| "step": 10350 |
| }, |
| { |
| "epoch": 0.46916040213748755, |
| "grad_norm": 0.4351930022239685, |
| "learning_rate": 0.00018194919166943877, |
| "loss": 0.1891, |
| "step": 10360 |
| }, |
| { |
| "epoch": 0.4696132596685083, |
| "grad_norm": 0.29797980189323425, |
| "learning_rate": 0.00018190625563336416, |
| "loss": 0.1905, |
| "step": 10370 |
| }, |
| { |
| "epoch": 0.470066117199529, |
| "grad_norm": 0.5475037097930908, |
| "learning_rate": 0.0001818632736722703, |
| "loss": 0.1911, |
| "step": 10380 |
| }, |
| { |
| "epoch": 0.47051897473054977, |
| "grad_norm": 0.4412976801395416, |
| "learning_rate": 0.0001818202458102573, |
| "loss": 0.184, |
| "step": 10390 |
| }, |
| { |
| "epoch": 0.4709718322615705, |
| "grad_norm": 0.4919610619544983, |
| "learning_rate": 0.000181777172071451, |
| "loss": 0.1872, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.4714246897925913, |
| "grad_norm": 0.32885703444480896, |
| "learning_rate": 0.0001817340524800029, |
| "loss": 0.1685, |
| "step": 10410 |
| }, |
| { |
| "epoch": 0.471877547323612, |
| "grad_norm": 0.43138033151626587, |
| "learning_rate": 0.00018169088706009026, |
| "loss": 0.1606, |
| "step": 10420 |
| }, |
| { |
| "epoch": 0.47233040485463273, |
| "grad_norm": 0.36871063709259033, |
| "learning_rate": 0.00018164767583591604, |
| "loss": 0.2123, |
| "step": 10430 |
| }, |
| { |
| "epoch": 0.4727832623856535, |
| "grad_norm": 0.41590359807014465, |
| "learning_rate": 0.00018160441883170888, |
| "loss": 0.1947, |
| "step": 10440 |
| }, |
| { |
| "epoch": 0.4732361199166742, |
| "grad_norm": 0.47074031829833984, |
| "learning_rate": 0.00018156111607172304, |
| "loss": 0.1742, |
| "step": 10450 |
| }, |
| { |
| "epoch": 0.47368897744769495, |
| "grad_norm": 0.386139452457428, |
| "learning_rate": 0.0001815177675802385, |
| "loss": 0.1847, |
| "step": 10460 |
| }, |
| { |
| "epoch": 0.4741418349787157, |
| "grad_norm": 0.5042863488197327, |
| "learning_rate": 0.00018147437338156088, |
| "loss": 0.1685, |
| "step": 10470 |
| }, |
| { |
| "epoch": 0.47459469250973646, |
| "grad_norm": 0.4570608139038086, |
| "learning_rate": 0.00018143093350002137, |
| "loss": 0.187, |
| "step": 10480 |
| }, |
| { |
| "epoch": 0.47504755004075716, |
| "grad_norm": 0.4626786708831787, |
| "learning_rate": 0.00018138744795997681, |
| "loss": 0.2201, |
| "step": 10490 |
| }, |
| { |
| "epoch": 0.4755004075717779, |
| "grad_norm": 0.5135970711708069, |
| "learning_rate": 0.00018134391678580964, |
| "loss": 0.1854, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.4759532651027987, |
| "grad_norm": 0.34417203068733215, |
| "learning_rate": 0.0001813003400019279, |
| "loss": 0.1852, |
| "step": 10510 |
| }, |
| { |
| "epoch": 0.4764061226338194, |
| "grad_norm": 0.3838786780834198, |
| "learning_rate": 0.00018125671763276517, |
| "loss": 0.1947, |
| "step": 10520 |
| }, |
| { |
| "epoch": 0.47685898016484013, |
| "grad_norm": 0.5423650145530701, |
| "learning_rate": 0.0001812130497027806, |
| "loss": 0.194, |
| "step": 10530 |
| }, |
| { |
| "epoch": 0.4773118376958609, |
| "grad_norm": 0.3431115746498108, |
| "learning_rate": 0.0001811693362364589, |
| "loss": 0.2074, |
| "step": 10540 |
| }, |
| { |
| "epoch": 0.47776469522688164, |
| "grad_norm": 0.38044998049736023, |
| "learning_rate": 0.00018112557725831035, |
| "loss": 0.1829, |
| "step": 10550 |
| }, |
| { |
| "epoch": 0.47821755275790234, |
| "grad_norm": 0.4877161383628845, |
| "learning_rate": 0.0001810817727928707, |
| "loss": 0.2014, |
| "step": 10560 |
| }, |
| { |
| "epoch": 0.4786704102889231, |
| "grad_norm": 0.553992748260498, |
| "learning_rate": 0.00018103792286470113, |
| "loss": 0.1937, |
| "step": 10570 |
| }, |
| { |
| "epoch": 0.47912326781994385, |
| "grad_norm": 0.3857057988643646, |
| "learning_rate": 0.0001809940274983885, |
| "loss": 0.1928, |
| "step": 10580 |
| }, |
| { |
| "epoch": 0.4795761253509646, |
| "grad_norm": 0.7242947816848755, |
| "learning_rate": 0.00018095008671854498, |
| "loss": 0.1709, |
| "step": 10590 |
| }, |
| { |
| "epoch": 0.4800289828819853, |
| "grad_norm": 0.47653719782829285, |
| "learning_rate": 0.00018090610054980824, |
| "loss": 0.1756, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.48048184041300607, |
| "grad_norm": 0.38235634565353394, |
| "learning_rate": 0.00018086206901684148, |
| "loss": 0.181, |
| "step": 10610 |
| }, |
| { |
| "epoch": 0.4809346979440268, |
| "grad_norm": 0.5229091048240662, |
| "learning_rate": 0.00018081799214433324, |
| "loss": 0.2015, |
| "step": 10620 |
| }, |
| { |
| "epoch": 0.4813875554750475, |
| "grad_norm": 0.46029898524284363, |
| "learning_rate": 0.0001807738699569975, |
| "loss": 0.1974, |
| "step": 10630 |
| }, |
| { |
| "epoch": 0.4818404130060683, |
| "grad_norm": 0.4247789978981018, |
| "learning_rate": 0.00018072970247957378, |
| "loss": 0.1822, |
| "step": 10640 |
| }, |
| { |
| "epoch": 0.48229327053708904, |
| "grad_norm": 0.32685187458992004, |
| "learning_rate": 0.00018068548973682673, |
| "loss": 0.199, |
| "step": 10650 |
| }, |
| { |
| "epoch": 0.4827461280681098, |
| "grad_norm": 0.5261408686637878, |
| "learning_rate": 0.00018064123175354663, |
| "loss": 0.1965, |
| "step": 10660 |
| }, |
| { |
| "epoch": 0.4831989855991305, |
| "grad_norm": 0.33152681589126587, |
| "learning_rate": 0.00018059692855454902, |
| "loss": 0.1649, |
| "step": 10670 |
| }, |
| { |
| "epoch": 0.48365184313015125, |
| "grad_norm": 0.31257107853889465, |
| "learning_rate": 0.00018055258016467477, |
| "loss": 0.1896, |
| "step": 10680 |
| }, |
| { |
| "epoch": 0.484104700661172, |
| "grad_norm": 0.3735596835613251, |
| "learning_rate": 0.00018050818660879015, |
| "loss": 0.1835, |
| "step": 10690 |
| }, |
| { |
| "epoch": 0.48455755819219276, |
| "grad_norm": 0.45948630571365356, |
| "learning_rate": 0.0001804637479117867, |
| "loss": 0.1785, |
| "step": 10700 |
| }, |
| { |
| "epoch": 0.48501041572321346, |
| "grad_norm": 0.34303784370422363, |
| "learning_rate": 0.00018041926409858132, |
| "loss": 0.1989, |
| "step": 10710 |
| }, |
| { |
| "epoch": 0.4854632732542342, |
| "grad_norm": 0.5830017328262329, |
| "learning_rate": 0.00018037473519411616, |
| "loss": 0.2055, |
| "step": 10720 |
| }, |
| { |
| "epoch": 0.485916130785255, |
| "grad_norm": 0.4883866608142853, |
| "learning_rate": 0.0001803301612233587, |
| "loss": 0.2011, |
| "step": 10730 |
| }, |
| { |
| "epoch": 0.4863689883162757, |
| "grad_norm": 0.5013921856880188, |
| "learning_rate": 0.00018028554221130163, |
| "loss": 0.1803, |
| "step": 10740 |
| }, |
| { |
| "epoch": 0.48682184584729643, |
| "grad_norm": 0.6551631689071655, |
| "learning_rate": 0.00018024087818296298, |
| "loss": 0.2131, |
| "step": 10750 |
| }, |
| { |
| "epoch": 0.4872747033783172, |
| "grad_norm": 0.35842567682266235, |
| "learning_rate": 0.00018019616916338594, |
| "loss": 0.2094, |
| "step": 10760 |
| }, |
| { |
| "epoch": 0.48772756090933794, |
| "grad_norm": 0.4328503906726837, |
| "learning_rate": 0.00018015141517763894, |
| "loss": 0.1568, |
| "step": 10770 |
| }, |
| { |
| "epoch": 0.48818041844035864, |
| "grad_norm": 0.4990096092224121, |
| "learning_rate": 0.00018010661625081565, |
| "loss": 0.1851, |
| "step": 10780 |
| }, |
| { |
| "epoch": 0.4886332759713794, |
| "grad_norm": 0.36216849088668823, |
| "learning_rate": 0.00018006177240803494, |
| "loss": 0.2044, |
| "step": 10790 |
| }, |
| { |
| "epoch": 0.48908613350240016, |
| "grad_norm": 0.45498183369636536, |
| "learning_rate": 0.00018001688367444082, |
| "loss": 0.1827, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.4895389910334209, |
| "grad_norm": 0.3285827040672302, |
| "learning_rate": 0.00017997195007520258, |
| "loss": 0.1884, |
| "step": 10810 |
| }, |
| { |
| "epoch": 0.4899918485644416, |
| "grad_norm": 0.3710488975048065, |
| "learning_rate": 0.00017992697163551452, |
| "loss": 0.1743, |
| "step": 10820 |
| }, |
| { |
| "epoch": 0.49044470609546237, |
| "grad_norm": 0.4138637185096741, |
| "learning_rate": 0.00017988194838059615, |
| "loss": 0.177, |
| "step": 10830 |
| }, |
| { |
| "epoch": 0.4908975636264831, |
| "grad_norm": 0.34731653332710266, |
| "learning_rate": 0.00017983688033569212, |
| "loss": 0.1972, |
| "step": 10840 |
| }, |
| { |
| "epoch": 0.4913504211575038, |
| "grad_norm": 0.3208175003528595, |
| "learning_rate": 0.0001797917675260722, |
| "loss": 0.1735, |
| "step": 10850 |
| }, |
| { |
| "epoch": 0.4918032786885246, |
| "grad_norm": 0.354033887386322, |
| "learning_rate": 0.00017974660997703126, |
| "loss": 0.1965, |
| "step": 10860 |
| }, |
| { |
| "epoch": 0.49225613621954534, |
| "grad_norm": 0.4648807942867279, |
| "learning_rate": 0.00017970140771388917, |
| "loss": 0.2096, |
| "step": 10870 |
| }, |
| { |
| "epoch": 0.4927089937505661, |
| "grad_norm": 0.4400973618030548, |
| "learning_rate": 0.000179656160761991, |
| "loss": 0.1879, |
| "step": 10880 |
| }, |
| { |
| "epoch": 0.4931618512815868, |
| "grad_norm": 0.4962785840034485, |
| "learning_rate": 0.0001796108691467068, |
| "loss": 0.1716, |
| "step": 10890 |
| }, |
| { |
| "epoch": 0.49361470881260755, |
| "grad_norm": 0.4156644940376282, |
| "learning_rate": 0.00017956553289343166, |
| "loss": 0.1932, |
| "step": 10900 |
| }, |
| { |
| "epoch": 0.4940675663436283, |
| "grad_norm": 0.4051489233970642, |
| "learning_rate": 0.00017952015202758574, |
| "loss": 0.1956, |
| "step": 10910 |
| }, |
| { |
| "epoch": 0.49452042387464906, |
| "grad_norm": 0.4099380671977997, |
| "learning_rate": 0.00017947472657461416, |
| "loss": 0.1833, |
| "step": 10920 |
| }, |
| { |
| "epoch": 0.49497328140566976, |
| "grad_norm": 0.39209482073783875, |
| "learning_rate": 0.00017942925655998711, |
| "loss": 0.2349, |
| "step": 10930 |
| }, |
| { |
| "epoch": 0.4954261389366905, |
| "grad_norm": 0.37333500385284424, |
| "learning_rate": 0.00017938374200919974, |
| "loss": 0.1655, |
| "step": 10940 |
| }, |
| { |
| "epoch": 0.4958789964677113, |
| "grad_norm": 0.38342049717903137, |
| "learning_rate": 0.0001793381829477721, |
| "loss": 0.1969, |
| "step": 10950 |
| }, |
| { |
| "epoch": 0.496331853998732, |
| "grad_norm": 0.3624749779701233, |
| "learning_rate": 0.00017929257940124934, |
| "loss": 0.1828, |
| "step": 10960 |
| }, |
| { |
| "epoch": 0.49678471152975273, |
| "grad_norm": 0.36027488112449646, |
| "learning_rate": 0.00017924693139520137, |
| "loss": 0.2038, |
| "step": 10970 |
| }, |
| { |
| "epoch": 0.4972375690607735, |
| "grad_norm": 0.34053680300712585, |
| "learning_rate": 0.0001792012389552232, |
| "loss": 0.1776, |
| "step": 10980 |
| }, |
| { |
| "epoch": 0.49769042659179424, |
| "grad_norm": 0.2984725832939148, |
| "learning_rate": 0.00017915550210693466, |
| "loss": 0.2036, |
| "step": 10990 |
| }, |
| { |
| "epoch": 0.49814328412281494, |
| "grad_norm": 0.3361246883869171, |
| "learning_rate": 0.00017910972087598051, |
| "loss": 0.1645, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.49814328412281494, |
| "eval_chrf": 75.57746990852579, |
| "eval_loss": 0.14612668752670288, |
| "eval_runtime": 26.7541, |
| "eval_samples_per_second": 0.374, |
| "eval_steps_per_second": 0.037, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.4985961416538357, |
| "grad_norm": 0.41162702441215515, |
| "learning_rate": 0.00017906389528803035, |
| "loss": 0.2009, |
| "step": 11010 |
| }, |
| { |
| "epoch": 0.49904899918485646, |
| "grad_norm": 0.3144682049751282, |
| "learning_rate": 0.00017901802536877877, |
| "loss": 0.1899, |
| "step": 11020 |
| }, |
| { |
| "epoch": 0.4995018567158772, |
| "grad_norm": 0.47382402420043945, |
| "learning_rate": 0.00017897211114394502, |
| "loss": 0.2115, |
| "step": 11030 |
| }, |
| { |
| "epoch": 0.4999547142468979, |
| "grad_norm": 0.2976755201816559, |
| "learning_rate": 0.00017892615263927344, |
| "loss": 0.1869, |
| "step": 11040 |
| }, |
| { |
| "epoch": 0.5004075717779186, |
| "grad_norm": 0.35733887553215027, |
| "learning_rate": 0.00017888014988053297, |
| "loss": 0.1705, |
| "step": 11050 |
| }, |
| { |
| "epoch": 0.5008604293089394, |
| "grad_norm": 0.36196479201316833, |
| "learning_rate": 0.00017883410289351747, |
| "loss": 0.1966, |
| "step": 11060 |
| }, |
| { |
| "epoch": 0.5013132868399601, |
| "grad_norm": 0.4164937734603882, |
| "learning_rate": 0.00017878801170404565, |
| "loss": 0.1725, |
| "step": 11070 |
| }, |
| { |
| "epoch": 0.5017661443709809, |
| "grad_norm": 0.3591492772102356, |
| "learning_rate": 0.00017874187633796086, |
| "loss": 0.1992, |
| "step": 11080 |
| }, |
| { |
| "epoch": 0.5022190019020016, |
| "grad_norm": 0.33890897035598755, |
| "learning_rate": 0.00017869569682113135, |
| "loss": 0.1709, |
| "step": 11090 |
| }, |
| { |
| "epoch": 0.5026718594330224, |
| "grad_norm": 0.36221665143966675, |
| "learning_rate": 0.00017864947317945007, |
| "loss": 0.1734, |
| "step": 11100 |
| }, |
| { |
| "epoch": 0.5031247169640432, |
| "grad_norm": 0.49572357535362244, |
| "learning_rate": 0.0001786032054388347, |
| "loss": 0.1893, |
| "step": 11110 |
| }, |
| { |
| "epoch": 0.5035775744950639, |
| "grad_norm": 0.40498650074005127, |
| "learning_rate": 0.0001785568936252277, |
| "loss": 0.1887, |
| "step": 11120 |
| }, |
| { |
| "epoch": 0.5040304320260846, |
| "grad_norm": 0.3324230909347534, |
| "learning_rate": 0.0001785105377645962, |
| "loss": 0.1993, |
| "step": 11130 |
| }, |
| { |
| "epoch": 0.5044832895571053, |
| "grad_norm": 0.35598695278167725, |
| "learning_rate": 0.000178464137882932, |
| "loss": 0.1983, |
| "step": 11140 |
| }, |
| { |
| "epoch": 0.5049361470881261, |
| "grad_norm": 0.3882586658000946, |
| "learning_rate": 0.00017841769400625163, |
| "loss": 0.1666, |
| "step": 11150 |
| }, |
| { |
| "epoch": 0.5053890046191468, |
| "grad_norm": 0.42131948471069336, |
| "learning_rate": 0.0001783712061605963, |
| "loss": 0.2014, |
| "step": 11160 |
| }, |
| { |
| "epoch": 0.5058418621501676, |
| "grad_norm": 0.40716439485549927, |
| "learning_rate": 0.0001783246743720318, |
| "loss": 0.2202, |
| "step": 11170 |
| }, |
| { |
| "epoch": 0.5062947196811883, |
| "grad_norm": 0.30183571577072144, |
| "learning_rate": 0.00017827809866664867, |
| "loss": 0.1717, |
| "step": 11180 |
| }, |
| { |
| "epoch": 0.5067475772122091, |
| "grad_norm": 0.4751286208629608, |
| "learning_rate": 0.00017823147907056197, |
| "loss": 0.1711, |
| "step": 11190 |
| }, |
| { |
| "epoch": 0.5072004347432297, |
| "grad_norm": 0.37397274374961853, |
| "learning_rate": 0.00017818481560991144, |
| "loss": 0.1883, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.5076532922742505, |
| "grad_norm": 0.4874507486820221, |
| "learning_rate": 0.00017813810831086133, |
| "loss": 0.206, |
| "step": 11210 |
| }, |
| { |
| "epoch": 0.5081061498052712, |
| "grad_norm": 0.31040722131729126, |
| "learning_rate": 0.00017809135719960056, |
| "loss": 0.1902, |
| "step": 11220 |
| }, |
| { |
| "epoch": 0.508559007336292, |
| "grad_norm": 0.49066388607025146, |
| "learning_rate": 0.0001780445623023426, |
| "loss": 0.2025, |
| "step": 11230 |
| }, |
| { |
| "epoch": 0.5090118648673128, |
| "grad_norm": 0.4019545614719391, |
| "learning_rate": 0.00017799772364532546, |
| "loss": 0.175, |
| "step": 11240 |
| }, |
| { |
| "epoch": 0.5094647223983335, |
| "grad_norm": 0.33892253041267395, |
| "learning_rate": 0.00017795084125481162, |
| "loss": 0.1881, |
| "step": 11250 |
| }, |
| { |
| "epoch": 0.5099175799293543, |
| "grad_norm": 0.43992340564727783, |
| "learning_rate": 0.0001779039151570882, |
| "loss": 0.1854, |
| "step": 11260 |
| }, |
| { |
| "epoch": 0.5103704374603749, |
| "grad_norm": 0.4423786401748657, |
| "learning_rate": 0.00017785694537846673, |
| "loss": 0.2154, |
| "step": 11270 |
| }, |
| { |
| "epoch": 0.5108232949913957, |
| "grad_norm": 0.54453444480896, |
| "learning_rate": 0.00017780993194528328, |
| "loss": 0.186, |
| "step": 11280 |
| }, |
| { |
| "epoch": 0.5112761525224164, |
| "grad_norm": 0.3905886709690094, |
| "learning_rate": 0.00017776287488389838, |
| "loss": 0.202, |
| "step": 11290 |
| }, |
| { |
| "epoch": 0.5117290100534372, |
| "grad_norm": 0.3799689710140228, |
| "learning_rate": 0.00017771577422069705, |
| "loss": 0.1575, |
| "step": 11300 |
| }, |
| { |
| "epoch": 0.5121818675844579, |
| "grad_norm": 0.5496554374694824, |
| "learning_rate": 0.00017766862998208873, |
| "loss": 0.1695, |
| "step": 11310 |
| }, |
| { |
| "epoch": 0.5126347251154787, |
| "grad_norm": 0.34472495317459106, |
| "learning_rate": 0.00017762144219450726, |
| "loss": 0.185, |
| "step": 11320 |
| }, |
| { |
| "epoch": 0.5130875826464995, |
| "grad_norm": 0.4271334111690521, |
| "learning_rate": 0.000177574210884411, |
| "loss": 0.2074, |
| "step": 11330 |
| }, |
| { |
| "epoch": 0.5135404401775201, |
| "grad_norm": 0.37587055563926697, |
| "learning_rate": 0.0001775269360782826, |
| "loss": 0.1664, |
| "step": 11340 |
| }, |
| { |
| "epoch": 0.5139932977085409, |
| "grad_norm": 0.31624364852905273, |
| "learning_rate": 0.0001774796178026292, |
| "loss": 0.186, |
| "step": 11350 |
| }, |
| { |
| "epoch": 0.5144461552395616, |
| "grad_norm": 0.5031947493553162, |
| "learning_rate": 0.0001774322560839822, |
| "loss": 0.1635, |
| "step": 11360 |
| }, |
| { |
| "epoch": 0.5148990127705824, |
| "grad_norm": 0.39671581983566284, |
| "learning_rate": 0.00017738485094889747, |
| "loss": 0.1945, |
| "step": 11370 |
| }, |
| { |
| "epoch": 0.5153518703016031, |
| "grad_norm": 0.3413325548171997, |
| "learning_rate": 0.00017733740242395515, |
| "loss": 0.1745, |
| "step": 11380 |
| }, |
| { |
| "epoch": 0.5158047278326239, |
| "grad_norm": 0.3880815804004669, |
| "learning_rate": 0.00017728991053575974, |
| "loss": 0.189, |
| "step": 11390 |
| }, |
| { |
| "epoch": 0.5162575853636446, |
| "grad_norm": 0.40092733502388, |
| "learning_rate": 0.00017724237531094002, |
| "loss": 0.1689, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.5167104428946654, |
| "grad_norm": 0.3720043897628784, |
| "learning_rate": 0.00017719479677614917, |
| "loss": 0.194, |
| "step": 11410 |
| }, |
| { |
| "epoch": 0.517163300425686, |
| "grad_norm": 0.3282621502876282, |
| "learning_rate": 0.0001771471749580645, |
| "loss": 0.1609, |
| "step": 11420 |
| }, |
| { |
| "epoch": 0.5176161579567068, |
| "grad_norm": 0.4283618927001953, |
| "learning_rate": 0.00017709950988338772, |
| "loss": 0.1654, |
| "step": 11430 |
| }, |
| { |
| "epoch": 0.5180690154877275, |
| "grad_norm": 0.480474591255188, |
| "learning_rate": 0.0001770518015788447, |
| "loss": 0.1965, |
| "step": 11440 |
| }, |
| { |
| "epoch": 0.5185218730187483, |
| "grad_norm": 0.4488031566143036, |
| "learning_rate": 0.00017700405007118564, |
| "loss": 0.1829, |
| "step": 11450 |
| }, |
| { |
| "epoch": 0.5189747305497691, |
| "grad_norm": 0.4382071793079376, |
| "learning_rate": 0.00017695625538718485, |
| "loss": 0.1626, |
| "step": 11460 |
| }, |
| { |
| "epoch": 0.5194275880807898, |
| "grad_norm": 0.38167351484298706, |
| "learning_rate": 0.000176908417553641, |
| "loss": 0.1858, |
| "step": 11470 |
| }, |
| { |
| "epoch": 0.5198804456118106, |
| "grad_norm": 0.320625364780426, |
| "learning_rate": 0.0001768605365973768, |
| "loss": 0.1794, |
| "step": 11480 |
| }, |
| { |
| "epoch": 0.5203333031428312, |
| "grad_norm": 0.39186790585517883, |
| "learning_rate": 0.00017681261254523926, |
| "loss": 0.1769, |
| "step": 11490 |
| }, |
| { |
| "epoch": 0.520786160673852, |
| "grad_norm": 0.34316763281822205, |
| "learning_rate": 0.00017676464542409946, |
| "loss": 0.1764, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.5212390182048727, |
| "grad_norm": 0.39774084091186523, |
| "learning_rate": 0.0001767166352608527, |
| "loss": 0.1957, |
| "step": 11510 |
| }, |
| { |
| "epoch": 0.5216918757358935, |
| "grad_norm": 0.4764063358306885, |
| "learning_rate": 0.0001766685820824184, |
| "loss": 0.1894, |
| "step": 11520 |
| }, |
| { |
| "epoch": 0.5221447332669142, |
| "grad_norm": 0.41005027294158936, |
| "learning_rate": 0.00017662048591574002, |
| "loss": 0.1895, |
| "step": 11530 |
| }, |
| { |
| "epoch": 0.522597590797935, |
| "grad_norm": 0.4711003601551056, |
| "learning_rate": 0.00017657234678778523, |
| "loss": 0.2117, |
| "step": 11540 |
| }, |
| { |
| "epoch": 0.5230504483289558, |
| "grad_norm": 0.3926088511943817, |
| "learning_rate": 0.00017652416472554574, |
| "loss": 0.1927, |
| "step": 11550 |
| }, |
| { |
| "epoch": 0.5235033058599764, |
| "grad_norm": 0.2955372631549835, |
| "learning_rate": 0.00017647593975603736, |
| "loss": 0.1925, |
| "step": 11560 |
| }, |
| { |
| "epoch": 0.5239561633909972, |
| "grad_norm": 0.3705880045890808, |
| "learning_rate": 0.0001764276719062999, |
| "loss": 0.1835, |
| "step": 11570 |
| }, |
| { |
| "epoch": 0.5244090209220179, |
| "grad_norm": 0.37469375133514404, |
| "learning_rate": 0.00017637936120339727, |
| "loss": 0.1819, |
| "step": 11580 |
| }, |
| { |
| "epoch": 0.5248618784530387, |
| "grad_norm": 0.4436321258544922, |
| "learning_rate": 0.0001763310076744174, |
| "loss": 0.2017, |
| "step": 11590 |
| }, |
| { |
| "epoch": 0.5253147359840594, |
| "grad_norm": 0.5750246644020081, |
| "learning_rate": 0.00017628261134647216, |
| "loss": 0.1968, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.5257675935150802, |
| "grad_norm": 0.3641575872898102, |
| "learning_rate": 0.00017623417224669758, |
| "loss": 0.2034, |
| "step": 11610 |
| }, |
| { |
| "epoch": 0.5262204510461009, |
| "grad_norm": 0.4717412292957306, |
| "learning_rate": 0.0001761856904022535, |
| "loss": 0.2, |
| "step": 11620 |
| }, |
| { |
| "epoch": 0.5266733085771217, |
| "grad_norm": 0.447290301322937, |
| "learning_rate": 0.00017613716584032383, |
| "loss": 0.1757, |
| "step": 11630 |
| }, |
| { |
| "epoch": 0.5271261661081423, |
| "grad_norm": 0.5311118960380554, |
| "learning_rate": 0.00017608859858811636, |
| "loss": 0.1872, |
| "step": 11640 |
| }, |
| { |
| "epoch": 0.5275790236391631, |
| "grad_norm": 0.508751630783081, |
| "learning_rate": 0.00017603998867286286, |
| "loss": 0.1916, |
| "step": 11650 |
| }, |
| { |
| "epoch": 0.5280318811701838, |
| "grad_norm": 0.3642581105232239, |
| "learning_rate": 0.00017599133612181906, |
| "loss": 0.1885, |
| "step": 11660 |
| }, |
| { |
| "epoch": 0.5284847387012046, |
| "grad_norm": 0.415244460105896, |
| "learning_rate": 0.00017594264096226455, |
| "loss": 0.1916, |
| "step": 11670 |
| }, |
| { |
| "epoch": 0.5289375962322254, |
| "grad_norm": 0.3395320475101471, |
| "learning_rate": 0.00017589390322150276, |
| "loss": 0.1935, |
| "step": 11680 |
| }, |
| { |
| "epoch": 0.5293904537632461, |
| "grad_norm": 0.3394661843776703, |
| "learning_rate": 0.00017584512292686112, |
| "loss": 0.2148, |
| "step": 11690 |
| }, |
| { |
| "epoch": 0.5298433112942669, |
| "grad_norm": 0.3732159733772278, |
| "learning_rate": 0.00017579630010569077, |
| "loss": 0.1955, |
| "step": 11700 |
| }, |
| { |
| "epoch": 0.5302961688252875, |
| "grad_norm": 0.3915143311023712, |
| "learning_rate": 0.00017574743478536686, |
| "loss": 0.1916, |
| "step": 11710 |
| }, |
| { |
| "epoch": 0.5307490263563083, |
| "grad_norm": 0.44478586316108704, |
| "learning_rate": 0.0001756985269932882, |
| "loss": 0.2156, |
| "step": 11720 |
| }, |
| { |
| "epoch": 0.531201883887329, |
| "grad_norm": 0.394619345664978, |
| "learning_rate": 0.00017564957675687758, |
| "loss": 0.178, |
| "step": 11730 |
| }, |
| { |
| "epoch": 0.5316547414183498, |
| "grad_norm": 0.4088374674320221, |
| "learning_rate": 0.00017560058410358143, |
| "loss": 0.1767, |
| "step": 11740 |
| }, |
| { |
| "epoch": 0.5321075989493705, |
| "grad_norm": 0.46319580078125, |
| "learning_rate": 0.0001755515490608701, |
| "loss": 0.2001, |
| "step": 11750 |
| }, |
| { |
| "epoch": 0.5325604564803913, |
| "grad_norm": 0.5034335255622864, |
| "learning_rate": 0.0001755024716562376, |
| "loss": 0.1801, |
| "step": 11760 |
| }, |
| { |
| "epoch": 0.533013314011412, |
| "grad_norm": 0.4826143682003021, |
| "learning_rate": 0.0001754533519172018, |
| "loss": 0.1964, |
| "step": 11770 |
| }, |
| { |
| "epoch": 0.5334661715424327, |
| "grad_norm": 0.3931207060813904, |
| "learning_rate": 0.00017540418987130413, |
| "loss": 0.173, |
| "step": 11780 |
| }, |
| { |
| "epoch": 0.5339190290734535, |
| "grad_norm": 0.4404812157154083, |
| "learning_rate": 0.00017535498554611, |
| "loss": 0.2032, |
| "step": 11790 |
| }, |
| { |
| "epoch": 0.5343718866044742, |
| "grad_norm": 0.47916218638420105, |
| "learning_rate": 0.0001753057389692083, |
| "loss": 0.1845, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.534824744135495, |
| "grad_norm": 0.3846713900566101, |
| "learning_rate": 0.00017525645016821173, |
| "loss": 0.1753, |
| "step": 11810 |
| }, |
| { |
| "epoch": 0.5352776016665157, |
| "grad_norm": 0.4517151415348053, |
| "learning_rate": 0.00017520711917075657, |
| "loss": 0.1724, |
| "step": 11820 |
| }, |
| { |
| "epoch": 0.5357304591975365, |
| "grad_norm": 0.3133130669593811, |
| "learning_rate": 0.00017515774600450289, |
| "loss": 0.2, |
| "step": 11830 |
| }, |
| { |
| "epoch": 0.5361833167285572, |
| "grad_norm": 0.4651758670806885, |
| "learning_rate": 0.00017510833069713435, |
| "loss": 0.1812, |
| "step": 11840 |
| }, |
| { |
| "epoch": 0.536636174259578, |
| "grad_norm": 0.38983821868896484, |
| "learning_rate": 0.00017505887327635812, |
| "loss": 0.1898, |
| "step": 11850 |
| }, |
| { |
| "epoch": 0.5370890317905986, |
| "grad_norm": 0.49756869673728943, |
| "learning_rate": 0.0001750093737699052, |
| "loss": 0.1697, |
| "step": 11860 |
| }, |
| { |
| "epoch": 0.5375418893216194, |
| "grad_norm": 0.22499801218509674, |
| "learning_rate": 0.00017495983220552997, |
| "loss": 0.1763, |
| "step": 11870 |
| }, |
| { |
| "epoch": 0.5379947468526401, |
| "grad_norm": 0.4487616717815399, |
| "learning_rate": 0.00017491024861101054, |
| "loss": 0.1887, |
| "step": 11880 |
| }, |
| { |
| "epoch": 0.5384476043836609, |
| "grad_norm": 0.4188464879989624, |
| "learning_rate": 0.00017486062301414862, |
| "loss": 0.1763, |
| "step": 11890 |
| }, |
| { |
| "epoch": 0.5389004619146817, |
| "grad_norm": 0.33243581652641296, |
| "learning_rate": 0.00017481095544276921, |
| "loss": 0.2061, |
| "step": 11900 |
| }, |
| { |
| "epoch": 0.5393533194457024, |
| "grad_norm": 0.380401074886322, |
| "learning_rate": 0.0001747612459247212, |
| "loss": 0.1743, |
| "step": 11910 |
| }, |
| { |
| "epoch": 0.5398061769767232, |
| "grad_norm": 0.4662674367427826, |
| "learning_rate": 0.00017471149448787675, |
| "loss": 0.1879, |
| "step": 11920 |
| }, |
| { |
| "epoch": 0.5402590345077438, |
| "grad_norm": 0.3613015115261078, |
| "learning_rate": 0.00017466170116013166, |
| "loss": 0.1565, |
| "step": 11930 |
| }, |
| { |
| "epoch": 0.5407118920387646, |
| "grad_norm": 0.29406189918518066, |
| "learning_rate": 0.00017461186596940507, |
| "loss": 0.1927, |
| "step": 11940 |
| }, |
| { |
| "epoch": 0.5411647495697853, |
| "grad_norm": 0.6028956174850464, |
| "learning_rate": 0.0001745619889436397, |
| "loss": 0.1967, |
| "step": 11950 |
| }, |
| { |
| "epoch": 0.5416176071008061, |
| "grad_norm": 0.4816293716430664, |
| "learning_rate": 0.00017451207011080178, |
| "loss": 0.1964, |
| "step": 11960 |
| }, |
| { |
| "epoch": 0.5420704646318268, |
| "grad_norm": 0.41138580441474915, |
| "learning_rate": 0.00017446210949888084, |
| "loss": 0.2273, |
| "step": 11970 |
| }, |
| { |
| "epoch": 0.5425233221628476, |
| "grad_norm": 0.418501615524292, |
| "learning_rate": 0.0001744121071358899, |
| "loss": 0.1986, |
| "step": 11980 |
| }, |
| { |
| "epoch": 0.5429761796938684, |
| "grad_norm": 0.5979639887809753, |
| "learning_rate": 0.0001743620630498655, |
| "loss": 0.2006, |
| "step": 11990 |
| }, |
| { |
| "epoch": 0.543429037224889, |
| "grad_norm": 0.4319825768470764, |
| "learning_rate": 0.00017431197726886733, |
| "loss": 0.1867, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.543429037224889, |
| "eval_chrf": 75.72498462085531, |
| "eval_loss": 0.1541338711977005, |
| "eval_runtime": 27.1149, |
| "eval_samples_per_second": 0.369, |
| "eval_steps_per_second": 0.037, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.5438818947559098, |
| "grad_norm": 0.378240168094635, |
| "learning_rate": 0.00017426184982097872, |
| "loss": 0.1939, |
| "step": 12010 |
| }, |
| { |
| "epoch": 0.5443347522869305, |
| "grad_norm": 0.45126333832740784, |
| "learning_rate": 0.00017421168073430613, |
| "loss": 0.1846, |
| "step": 12020 |
| }, |
| { |
| "epoch": 0.5447876098179513, |
| "grad_norm": 0.3531319797039032, |
| "learning_rate": 0.00017416147003697957, |
| "loss": 0.1472, |
| "step": 12030 |
| }, |
| { |
| "epoch": 0.545240467348972, |
| "grad_norm": 0.5536248087882996, |
| "learning_rate": 0.00017411121775715222, |
| "loss": 0.1972, |
| "step": 12040 |
| }, |
| { |
| "epoch": 0.5456933248799928, |
| "grad_norm": 0.5452578067779541, |
| "learning_rate": 0.0001740609239230007, |
| "loss": 0.1789, |
| "step": 12050 |
| }, |
| { |
| "epoch": 0.5461461824110135, |
| "grad_norm": 0.5128986835479736, |
| "learning_rate": 0.00017401058856272487, |
| "loss": 0.2148, |
| "step": 12060 |
| }, |
| { |
| "epoch": 0.5465990399420342, |
| "grad_norm": 0.43081146478652954, |
| "learning_rate": 0.00017396021170454785, |
| "loss": 0.2226, |
| "step": 12070 |
| }, |
| { |
| "epoch": 0.5470518974730549, |
| "grad_norm": 0.298042893409729, |
| "learning_rate": 0.00017390979337671608, |
| "loss": 0.1724, |
| "step": 12080 |
| }, |
| { |
| "epoch": 0.5475047550040757, |
| "grad_norm": 0.42853906750679016, |
| "learning_rate": 0.0001738593336074992, |
| "loss": 0.1845, |
| "step": 12090 |
| }, |
| { |
| "epoch": 0.5479576125350964, |
| "grad_norm": 0.45352569222450256, |
| "learning_rate": 0.00017380883242519007, |
| "loss": 0.1822, |
| "step": 12100 |
| }, |
| { |
| "epoch": 0.5484104700661172, |
| "grad_norm": 0.49138566851615906, |
| "learning_rate": 0.00017375828985810495, |
| "loss": 0.1735, |
| "step": 12110 |
| }, |
| { |
| "epoch": 0.548863327597138, |
| "grad_norm": 0.4722447395324707, |
| "learning_rate": 0.00017370770593458308, |
| "loss": 0.1804, |
| "step": 12120 |
| }, |
| { |
| "epoch": 0.5493161851281587, |
| "grad_norm": 0.3948294520378113, |
| "learning_rate": 0.00017365708068298695, |
| "loss": 0.1744, |
| "step": 12130 |
| }, |
| { |
| "epoch": 0.5497690426591795, |
| "grad_norm": 0.35122090578079224, |
| "learning_rate": 0.00017360641413170232, |
| "loss": 0.1713, |
| "step": 12140 |
| }, |
| { |
| "epoch": 0.5502219001902001, |
| "grad_norm": 0.4121938645839691, |
| "learning_rate": 0.00017355570630913804, |
| "loss": 0.1915, |
| "step": 12150 |
| }, |
| { |
| "epoch": 0.5506747577212209, |
| "grad_norm": 0.4639427959918976, |
| "learning_rate": 0.00017350495724372604, |
| "loss": 0.1607, |
| "step": 12160 |
| }, |
| { |
| "epoch": 0.5511276152522416, |
| "grad_norm": 0.39819326996803284, |
| "learning_rate": 0.0001734541669639215, |
| "loss": 0.1722, |
| "step": 12170 |
| }, |
| { |
| "epoch": 0.5515804727832624, |
| "grad_norm": 0.6044564843177795, |
| "learning_rate": 0.00017340333549820255, |
| "loss": 0.1663, |
| "step": 12180 |
| }, |
| { |
| "epoch": 0.5520333303142831, |
| "grad_norm": 0.37291619181632996, |
| "learning_rate": 0.0001733524628750706, |
| "loss": 0.1584, |
| "step": 12190 |
| }, |
| { |
| "epoch": 0.5524861878453039, |
| "grad_norm": 0.42959266901016235, |
| "learning_rate": 0.00017330154912304998, |
| "loss": 0.2168, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.5529390453763247, |
| "grad_norm": 0.35410502552986145, |
| "learning_rate": 0.00017325059427068817, |
| "loss": 0.1891, |
| "step": 12210 |
| }, |
| { |
| "epoch": 0.5533919029073453, |
| "grad_norm": 0.4390549063682556, |
| "learning_rate": 0.00017319959834655565, |
| "loss": 0.1763, |
| "step": 12220 |
| }, |
| { |
| "epoch": 0.5538447604383661, |
| "grad_norm": 0.5032073855400085, |
| "learning_rate": 0.00017314856137924602, |
| "loss": 0.1728, |
| "step": 12230 |
| }, |
| { |
| "epoch": 0.5542976179693868, |
| "grad_norm": 0.49394169449806213, |
| "learning_rate": 0.00017309748339737572, |
| "loss": 0.2135, |
| "step": 12240 |
| }, |
| { |
| "epoch": 0.5547504755004076, |
| "grad_norm": 0.4199583828449249, |
| "learning_rate": 0.00017304636442958432, |
| "loss": 0.2059, |
| "step": 12250 |
| }, |
| { |
| "epoch": 0.5552033330314283, |
| "grad_norm": 0.521976888179779, |
| "learning_rate": 0.00017299520450453438, |
| "loss": 0.1986, |
| "step": 12260 |
| }, |
| { |
| "epoch": 0.5556561905624491, |
| "grad_norm": 0.45586416125297546, |
| "learning_rate": 0.00017294400365091135, |
| "loss": 0.1876, |
| "step": 12270 |
| }, |
| { |
| "epoch": 0.5561090480934698, |
| "grad_norm": 0.31665554642677307, |
| "learning_rate": 0.00017289276189742366, |
| "loss": 0.1909, |
| "step": 12280 |
| }, |
| { |
| "epoch": 0.5565619056244905, |
| "grad_norm": 0.25845375657081604, |
| "learning_rate": 0.00017284147927280267, |
| "loss": 0.2069, |
| "step": 12290 |
| }, |
| { |
| "epoch": 0.5570147631555112, |
| "grad_norm": 0.39719051122665405, |
| "learning_rate": 0.0001727901558058027, |
| "loss": 0.1956, |
| "step": 12300 |
| }, |
| { |
| "epoch": 0.557467620686532, |
| "grad_norm": 0.41454482078552246, |
| "learning_rate": 0.0001727387915252009, |
| "loss": 0.1884, |
| "step": 12310 |
| }, |
| { |
| "epoch": 0.5579204782175528, |
| "grad_norm": 0.36447224020957947, |
| "learning_rate": 0.0001726873864597973, |
| "loss": 0.1692, |
| "step": 12320 |
| }, |
| { |
| "epoch": 0.5583733357485735, |
| "grad_norm": 0.40834981203079224, |
| "learning_rate": 0.00017263594063841493, |
| "loss": 0.2028, |
| "step": 12330 |
| }, |
| { |
| "epoch": 0.5588261932795943, |
| "grad_norm": 0.43426138162612915, |
| "learning_rate": 0.00017258445408989948, |
| "loss": 0.1887, |
| "step": 12340 |
| }, |
| { |
| "epoch": 0.559279050810615, |
| "grad_norm": 0.415056049823761, |
| "learning_rate": 0.00017253292684311965, |
| "loss": 0.1886, |
| "step": 12350 |
| }, |
| { |
| "epoch": 0.5597319083416358, |
| "grad_norm": 0.3616989850997925, |
| "learning_rate": 0.0001724813589269668, |
| "loss": 0.2022, |
| "step": 12360 |
| }, |
| { |
| "epoch": 0.5601847658726564, |
| "grad_norm": 0.454166442155838, |
| "learning_rate": 0.00017242975037035527, |
| "loss": 0.1972, |
| "step": 12370 |
| }, |
| { |
| "epoch": 0.5606376234036772, |
| "grad_norm": 0.29079487919807434, |
| "learning_rate": 0.000172378101202222, |
| "loss": 0.1778, |
| "step": 12380 |
| }, |
| { |
| "epoch": 0.5610904809346979, |
| "grad_norm": 0.4357644319534302, |
| "learning_rate": 0.00017232641145152686, |
| "loss": 0.197, |
| "step": 12390 |
| }, |
| { |
| "epoch": 0.5615433384657187, |
| "grad_norm": 0.34741294384002686, |
| "learning_rate": 0.00017227468114725238, |
| "loss": 0.1714, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.5619961959967394, |
| "grad_norm": 0.4908086061477661, |
| "learning_rate": 0.00017222291031840383, |
| "loss": 0.1976, |
| "step": 12410 |
| }, |
| { |
| "epoch": 0.5624490535277602, |
| "grad_norm": 0.49520352482795715, |
| "learning_rate": 0.0001721710989940093, |
| "loss": 0.178, |
| "step": 12420 |
| }, |
| { |
| "epoch": 0.562901911058781, |
| "grad_norm": 0.47621747851371765, |
| "learning_rate": 0.00017211924720311945, |
| "loss": 0.1798, |
| "step": 12430 |
| }, |
| { |
| "epoch": 0.5633547685898016, |
| "grad_norm": 0.41770318150520325, |
| "learning_rate": 0.0001720673549748077, |
| "loss": 0.1894, |
| "step": 12440 |
| }, |
| { |
| "epoch": 0.5638076261208224, |
| "grad_norm": 0.39249005913734436, |
| "learning_rate": 0.00017201542233817016, |
| "loss": 0.1858, |
| "step": 12450 |
| }, |
| { |
| "epoch": 0.5642604836518431, |
| "grad_norm": 0.43662548065185547, |
| "learning_rate": 0.0001719634493223256, |
| "loss": 0.1754, |
| "step": 12460 |
| }, |
| { |
| "epoch": 0.5647133411828639, |
| "grad_norm": 0.3656141459941864, |
| "learning_rate": 0.00017191143595641535, |
| "loss": 0.184, |
| "step": 12470 |
| }, |
| { |
| "epoch": 0.5651661987138846, |
| "grad_norm": 0.3354056477546692, |
| "learning_rate": 0.00017185938226960346, |
| "loss": 0.2049, |
| "step": 12480 |
| }, |
| { |
| "epoch": 0.5656190562449054, |
| "grad_norm": 0.329375296831131, |
| "learning_rate": 0.0001718072882910765, |
| "loss": 0.1919, |
| "step": 12490 |
| }, |
| { |
| "epoch": 0.5660719137759261, |
| "grad_norm": 0.5593803524971008, |
| "learning_rate": 0.00017175515405004372, |
| "loss": 0.2334, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.5665247713069468, |
| "grad_norm": 0.3757700026035309, |
| "learning_rate": 0.0001717029795757369, |
| "loss": 0.2013, |
| "step": 12510 |
| }, |
| { |
| "epoch": 0.5669776288379675, |
| "grad_norm": 0.3514690101146698, |
| "learning_rate": 0.00017165076489741038, |
| "loss": 0.1819, |
| "step": 12520 |
| }, |
| { |
| "epoch": 0.5674304863689883, |
| "grad_norm": 0.513963520526886, |
| "learning_rate": 0.00017159851004434104, |
| "loss": 0.2205, |
| "step": 12530 |
| }, |
| { |
| "epoch": 0.567883343900009, |
| "grad_norm": 0.3796728849411011, |
| "learning_rate": 0.00017154621504582833, |
| "loss": 0.2084, |
| "step": 12540 |
| }, |
| { |
| "epoch": 0.5683362014310298, |
| "grad_norm": 0.3902125656604767, |
| "learning_rate": 0.0001714938799311941, |
| "loss": 0.2021, |
| "step": 12550 |
| }, |
| { |
| "epoch": 0.5687890589620506, |
| "grad_norm": 0.48895615339279175, |
| "learning_rate": 0.00017144150472978283, |
| "loss": 0.1698, |
| "step": 12560 |
| }, |
| { |
| "epoch": 0.5692419164930713, |
| "grad_norm": 0.4550088346004486, |
| "learning_rate": 0.00017138908947096133, |
| "loss": 0.1822, |
| "step": 12570 |
| }, |
| { |
| "epoch": 0.569694774024092, |
| "grad_norm": 0.36123690009117126, |
| "learning_rate": 0.00017133663418411908, |
| "loss": 0.1781, |
| "step": 12580 |
| }, |
| { |
| "epoch": 0.5701476315551127, |
| "grad_norm": 0.3133276104927063, |
| "learning_rate": 0.00017128413889866772, |
| "loss": 0.1918, |
| "step": 12590 |
| }, |
| { |
| "epoch": 0.5706004890861335, |
| "grad_norm": 0.3202573359012604, |
| "learning_rate": 0.00017123160364404161, |
| "loss": 0.1895, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.5710533466171542, |
| "grad_norm": 0.31997188925743103, |
| "learning_rate": 0.00017117902844969733, |
| "loss": 0.1823, |
| "step": 12610 |
| }, |
| { |
| "epoch": 0.571506204148175, |
| "grad_norm": 0.4120926558971405, |
| "learning_rate": 0.0001711264133451139, |
| "loss": 0.1867, |
| "step": 12620 |
| }, |
| { |
| "epoch": 0.5719590616791957, |
| "grad_norm": 0.47499316930770874, |
| "learning_rate": 0.0001710737583597927, |
| "loss": 0.1824, |
| "step": 12630 |
| }, |
| { |
| "epoch": 0.5724119192102165, |
| "grad_norm": 0.363459974527359, |
| "learning_rate": 0.00017102106352325758, |
| "loss": 0.1869, |
| "step": 12640 |
| }, |
| { |
| "epoch": 0.5728647767412373, |
| "grad_norm": 0.3851202726364136, |
| "learning_rate": 0.0001709683288650546, |
| "loss": 0.1846, |
| "step": 12650 |
| }, |
| { |
| "epoch": 0.5733176342722579, |
| "grad_norm": 0.5470035672187805, |
| "learning_rate": 0.00017091555441475214, |
| "loss": 0.1697, |
| "step": 12660 |
| }, |
| { |
| "epoch": 0.5737704918032787, |
| "grad_norm": 0.4892798066139221, |
| "learning_rate": 0.00017086274020194106, |
| "loss": 0.1849, |
| "step": 12670 |
| }, |
| { |
| "epoch": 0.5742233493342994, |
| "grad_norm": 0.38861966133117676, |
| "learning_rate": 0.00017080988625623435, |
| "loss": 0.1936, |
| "step": 12680 |
| }, |
| { |
| "epoch": 0.5746762068653202, |
| "grad_norm": 0.43364793062210083, |
| "learning_rate": 0.00017075699260726737, |
| "loss": 0.1713, |
| "step": 12690 |
| }, |
| { |
| "epoch": 0.5751290643963409, |
| "grad_norm": 0.42354050278663635, |
| "learning_rate": 0.00017070405928469763, |
| "loss": 0.1698, |
| "step": 12700 |
| }, |
| { |
| "epoch": 0.5755819219273617, |
| "grad_norm": 0.37953200936317444, |
| "learning_rate": 0.00017065108631820507, |
| "loss": 0.199, |
| "step": 12710 |
| }, |
| { |
| "epoch": 0.5760347794583824, |
| "grad_norm": 0.46639448404312134, |
| "learning_rate": 0.0001705980737374916, |
| "loss": 0.1974, |
| "step": 12720 |
| }, |
| { |
| "epoch": 0.5764876369894031, |
| "grad_norm": 0.3444575369358063, |
| "learning_rate": 0.00017054502157228164, |
| "loss": 0.1922, |
| "step": 12730 |
| }, |
| { |
| "epoch": 0.5769404945204238, |
| "grad_norm": 0.429169625043869, |
| "learning_rate": 0.00017049192985232162, |
| "loss": 0.1849, |
| "step": 12740 |
| }, |
| { |
| "epoch": 0.5773933520514446, |
| "grad_norm": 0.4769784212112427, |
| "learning_rate": 0.00017043879860738015, |
| "loss": 0.1912, |
| "step": 12750 |
| }, |
| { |
| "epoch": 0.5778462095824654, |
| "grad_norm": 0.33860528469085693, |
| "learning_rate": 0.00017038562786724802, |
| "loss": 0.161, |
| "step": 12760 |
| }, |
| { |
| "epoch": 0.5782990671134861, |
| "grad_norm": 0.2882649898529053, |
| "learning_rate": 0.00017033241766173826, |
| "loss": 0.1619, |
| "step": 12770 |
| }, |
| { |
| "epoch": 0.5787519246445069, |
| "grad_norm": 0.4699576497077942, |
| "learning_rate": 0.0001702791680206859, |
| "loss": 0.2025, |
| "step": 12780 |
| }, |
| { |
| "epoch": 0.5792047821755276, |
| "grad_norm": 0.4863574504852295, |
| "learning_rate": 0.0001702258789739481, |
| "loss": 0.1661, |
| "step": 12790 |
| }, |
| { |
| "epoch": 0.5796576397065483, |
| "grad_norm": 0.32957029342651367, |
| "learning_rate": 0.0001701725505514042, |
| "loss": 0.2129, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.580110497237569, |
| "grad_norm": 0.5269854664802551, |
| "learning_rate": 0.00017011918278295553, |
| "loss": 0.1843, |
| "step": 12810 |
| }, |
| { |
| "epoch": 0.5805633547685898, |
| "grad_norm": 0.36829832196235657, |
| "learning_rate": 0.00017006577569852555, |
| "loss": 0.1828, |
| "step": 12820 |
| }, |
| { |
| "epoch": 0.5810162122996105, |
| "grad_norm": 0.40970858931541443, |
| "learning_rate": 0.00017001232932805973, |
| "loss": 0.1959, |
| "step": 12830 |
| }, |
| { |
| "epoch": 0.5814690698306313, |
| "grad_norm": 0.33761903643608093, |
| "learning_rate": 0.00016995884370152556, |
| "loss": 0.1903, |
| "step": 12840 |
| }, |
| { |
| "epoch": 0.581921927361652, |
| "grad_norm": 0.4831819534301758, |
| "learning_rate": 0.0001699053188489125, |
| "loss": 0.1869, |
| "step": 12850 |
| }, |
| { |
| "epoch": 0.5823747848926728, |
| "grad_norm": 0.41735371947288513, |
| "learning_rate": 0.00016985175480023213, |
| "loss": 0.1593, |
| "step": 12860 |
| }, |
| { |
| "epoch": 0.5828276424236936, |
| "grad_norm": 0.5282895565032959, |
| "learning_rate": 0.00016979815158551785, |
| "loss": 0.1907, |
| "step": 12870 |
| }, |
| { |
| "epoch": 0.5832804999547142, |
| "grad_norm": 0.387832909822464, |
| "learning_rate": 0.0001697445092348252, |
| "loss": 0.1765, |
| "step": 12880 |
| }, |
| { |
| "epoch": 0.583733357485735, |
| "grad_norm": 0.4405990242958069, |
| "learning_rate": 0.0001696908277782315, |
| "loss": 0.188, |
| "step": 12890 |
| }, |
| { |
| "epoch": 0.5841862150167557, |
| "grad_norm": 0.45045939087867737, |
| "learning_rate": 0.00016963710724583606, |
| "loss": 0.2001, |
| "step": 12900 |
| }, |
| { |
| "epoch": 0.5846390725477765, |
| "grad_norm": 0.42628213763237, |
| "learning_rate": 0.0001695833476677601, |
| "loss": 0.1865, |
| "step": 12910 |
| }, |
| { |
| "epoch": 0.5850919300787972, |
| "grad_norm": 0.49529409408569336, |
| "learning_rate": 0.00016952954907414677, |
| "loss": 0.16, |
| "step": 12920 |
| }, |
| { |
| "epoch": 0.585544787609818, |
| "grad_norm": 0.40902742743492126, |
| "learning_rate": 0.00016947571149516106, |
| "loss": 0.163, |
| "step": 12930 |
| }, |
| { |
| "epoch": 0.5859976451408387, |
| "grad_norm": 0.42194312810897827, |
| "learning_rate": 0.00016942183496098978, |
| "loss": 0.1794, |
| "step": 12940 |
| }, |
| { |
| "epoch": 0.5864505026718594, |
| "grad_norm": 0.5004866123199463, |
| "learning_rate": 0.00016936791950184166, |
| "loss": 0.1669, |
| "step": 12950 |
| }, |
| { |
| "epoch": 0.5869033602028801, |
| "grad_norm": 0.4472070038318634, |
| "learning_rate": 0.00016931396514794717, |
| "loss": 0.2021, |
| "step": 12960 |
| }, |
| { |
| "epoch": 0.5873562177339009, |
| "grad_norm": 0.3932659924030304, |
| "learning_rate": 0.00016925997192955873, |
| "loss": 0.1887, |
| "step": 12970 |
| }, |
| { |
| "epoch": 0.5878090752649217, |
| "grad_norm": 0.3474467098712921, |
| "learning_rate": 0.00016920593987695032, |
| "loss": 0.1698, |
| "step": 12980 |
| }, |
| { |
| "epoch": 0.5882619327959424, |
| "grad_norm": 0.4554367661476135, |
| "learning_rate": 0.00016915186902041794, |
| "loss": 0.1921, |
| "step": 12990 |
| }, |
| { |
| "epoch": 0.5887147903269632, |
| "grad_norm": 0.33510255813598633, |
| "learning_rate": 0.0001690977593902792, |
| "loss": 0.1704, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.5887147903269632, |
| "eval_chrf": 80.09006919939789, |
| "eval_loss": 0.1506444215774536, |
| "eval_runtime": 9.1339, |
| "eval_samples_per_second": 1.095, |
| "eval_steps_per_second": 0.109, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.5891676478579839, |
| "grad_norm": 0.43937554955482483, |
| "learning_rate": 0.0001690436110168735, |
| "loss": 0.17, |
| "step": 13010 |
| }, |
| { |
| "epoch": 0.5896205053890046, |
| "grad_norm": 0.3773411512374878, |
| "learning_rate": 0.00016898942393056196, |
| "loss": 0.1759, |
| "step": 13020 |
| }, |
| { |
| "epoch": 0.5900733629200253, |
| "grad_norm": 0.3168924152851105, |
| "learning_rate": 0.00016893519816172736, |
| "loss": 0.1591, |
| "step": 13030 |
| }, |
| { |
| "epoch": 0.5905262204510461, |
| "grad_norm": 0.4434441328048706, |
| "learning_rate": 0.00016888093374077429, |
| "loss": 0.1874, |
| "step": 13040 |
| }, |
| { |
| "epoch": 0.5909790779820668, |
| "grad_norm": 0.45755648612976074, |
| "learning_rate": 0.0001688266306981288, |
| "loss": 0.1773, |
| "step": 13050 |
| }, |
| { |
| "epoch": 0.5914319355130876, |
| "grad_norm": 0.3861162066459656, |
| "learning_rate": 0.00016877228906423888, |
| "loss": 0.1668, |
| "step": 13060 |
| }, |
| { |
| "epoch": 0.5918847930441083, |
| "grad_norm": 0.4664711058139801, |
| "learning_rate": 0.00016871790886957387, |
| "loss": 0.1837, |
| "step": 13070 |
| }, |
| { |
| "epoch": 0.5923376505751291, |
| "grad_norm": 0.3919123709201813, |
| "learning_rate": 0.00016866349014462494, |
| "loss": 0.1925, |
| "step": 13080 |
| }, |
| { |
| "epoch": 0.5927905081061499, |
| "grad_norm": 0.4407183825969696, |
| "learning_rate": 0.0001686090329199048, |
| "loss": 0.1594, |
| "step": 13090 |
| }, |
| { |
| "epoch": 0.5932433656371705, |
| "grad_norm": 0.44760841131210327, |
| "learning_rate": 0.0001685545372259477, |
| "loss": 0.2046, |
| "step": 13100 |
| }, |
| { |
| "epoch": 0.5936962231681913, |
| "grad_norm": 0.3986581861972809, |
| "learning_rate": 0.0001685000030933095, |
| "loss": 0.1637, |
| "step": 13110 |
| }, |
| { |
| "epoch": 0.594149080699212, |
| "grad_norm": 0.38675206899642944, |
| "learning_rate": 0.00016844543055256762, |
| "loss": 0.1896, |
| "step": 13120 |
| }, |
| { |
| "epoch": 0.5946019382302328, |
| "grad_norm": 0.4637945890426636, |
| "learning_rate": 0.000168390819634321, |
| "loss": 0.1778, |
| "step": 13130 |
| }, |
| { |
| "epoch": 0.5950547957612535, |
| "grad_norm": 0.41805940866470337, |
| "learning_rate": 0.0001683361703691901, |
| "loss": 0.1874, |
| "step": 13140 |
| }, |
| { |
| "epoch": 0.5955076532922743, |
| "grad_norm": 0.3749958574771881, |
| "learning_rate": 0.00016828148278781688, |
| "loss": 0.1922, |
| "step": 13150 |
| }, |
| { |
| "epoch": 0.595960510823295, |
| "grad_norm": 0.6044149994850159, |
| "learning_rate": 0.00016822675692086482, |
| "loss": 0.1661, |
| "step": 13160 |
| }, |
| { |
| "epoch": 0.5964133683543157, |
| "grad_norm": 0.38552409410476685, |
| "learning_rate": 0.0001681719927990188, |
| "loss": 0.1799, |
| "step": 13170 |
| }, |
| { |
| "epoch": 0.5968662258853364, |
| "grad_norm": 0.34303176403045654, |
| "learning_rate": 0.0001681171904529852, |
| "loss": 0.1908, |
| "step": 13180 |
| }, |
| { |
| "epoch": 0.5973190834163572, |
| "grad_norm": 0.4143158197402954, |
| "learning_rate": 0.0001680623499134918, |
| "loss": 0.2099, |
| "step": 13190 |
| }, |
| { |
| "epoch": 0.597771940947378, |
| "grad_norm": 0.46413254737854004, |
| "learning_rate": 0.00016800747121128784, |
| "loss": 0.188, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.5982247984783987, |
| "grad_norm": 0.47489702701568604, |
| "learning_rate": 0.00016795255437714396, |
| "loss": 0.1704, |
| "step": 13210 |
| }, |
| { |
| "epoch": 0.5986776560094195, |
| "grad_norm": 0.36207646131515503, |
| "learning_rate": 0.00016789759944185203, |
| "loss": 0.1859, |
| "step": 13220 |
| }, |
| { |
| "epoch": 0.5991305135404402, |
| "grad_norm": 0.4927097260951996, |
| "learning_rate": 0.0001678426064362255, |
| "loss": 0.1767, |
| "step": 13230 |
| }, |
| { |
| "epoch": 0.5995833710714609, |
| "grad_norm": 0.42713093757629395, |
| "learning_rate": 0.00016778757539109908, |
| "loss": 0.1907, |
| "step": 13240 |
| }, |
| { |
| "epoch": 0.6000362286024816, |
| "grad_norm": 0.4796130657196045, |
| "learning_rate": 0.00016773250633732875, |
| "loss": 0.1916, |
| "step": 13250 |
| }, |
| { |
| "epoch": 0.6004890861335024, |
| "grad_norm": 0.5012636184692383, |
| "learning_rate": 0.00016767739930579188, |
| "loss": 0.1873, |
| "step": 13260 |
| }, |
| { |
| "epoch": 0.6009419436645231, |
| "grad_norm": 0.45663386583328247, |
| "learning_rate": 0.0001676222543273871, |
| "loss": 0.1987, |
| "step": 13270 |
| }, |
| { |
| "epoch": 0.6013948011955439, |
| "grad_norm": 0.4900738298892975, |
| "learning_rate": 0.00016756707143303427, |
| "loss": 0.1848, |
| "step": 13280 |
| }, |
| { |
| "epoch": 0.6018476587265646, |
| "grad_norm": 0.3995307683944702, |
| "learning_rate": 0.00016751185065367466, |
| "loss": 0.1692, |
| "step": 13290 |
| }, |
| { |
| "epoch": 0.6023005162575854, |
| "grad_norm": 0.3223266899585724, |
| "learning_rate": 0.0001674565920202706, |
| "loss": 0.1792, |
| "step": 13300 |
| }, |
| { |
| "epoch": 0.602753373788606, |
| "grad_norm": 0.3668299615383148, |
| "learning_rate": 0.0001674012955638058, |
| "loss": 0.2157, |
| "step": 13310 |
| }, |
| { |
| "epoch": 0.6032062313196268, |
| "grad_norm": 0.2830699384212494, |
| "learning_rate": 0.000167345961315285, |
| "loss": 0.1748, |
| "step": 13320 |
| }, |
| { |
| "epoch": 0.6036590888506476, |
| "grad_norm": 0.493255615234375, |
| "learning_rate": 0.00016729058930573437, |
| "loss": 0.184, |
| "step": 13330 |
| }, |
| { |
| "epoch": 0.6041119463816683, |
| "grad_norm": 0.41153353452682495, |
| "learning_rate": 0.000167235179566201, |
| "loss": 0.1774, |
| "step": 13340 |
| }, |
| { |
| "epoch": 0.6045648039126891, |
| "grad_norm": 0.4964105784893036, |
| "learning_rate": 0.00016717973212775333, |
| "loss": 0.1733, |
| "step": 13350 |
| }, |
| { |
| "epoch": 0.6050176614437098, |
| "grad_norm": 0.3415859043598175, |
| "learning_rate": 0.00016712424702148085, |
| "loss": 0.1691, |
| "step": 13360 |
| }, |
| { |
| "epoch": 0.6054705189747306, |
| "grad_norm": 0.41315367817878723, |
| "learning_rate": 0.00016706872427849415, |
| "loss": 0.1889, |
| "step": 13370 |
| }, |
| { |
| "epoch": 0.6059233765057513, |
| "grad_norm": 0.36277538537979126, |
| "learning_rate": 0.00016701316392992495, |
| "loss": 0.17, |
| "step": 13380 |
| }, |
| { |
| "epoch": 0.606376234036772, |
| "grad_norm": 0.4528109133243561, |
| "learning_rate": 0.0001669575660069261, |
| "loss": 0.1808, |
| "step": 13390 |
| }, |
| { |
| "epoch": 0.6068290915677927, |
| "grad_norm": 0.36657941341400146, |
| "learning_rate": 0.0001669019305406715, |
| "loss": 0.2003, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.6072819490988135, |
| "grad_norm": 0.4129428267478943, |
| "learning_rate": 0.000166846257562356, |
| "loss": 0.1891, |
| "step": 13410 |
| }, |
| { |
| "epoch": 0.6077348066298343, |
| "grad_norm": 0.4617420434951782, |
| "learning_rate": 0.00016679054710319564, |
| "loss": 0.1801, |
| "step": 13420 |
| }, |
| { |
| "epoch": 0.608187664160855, |
| "grad_norm": 0.3708018362522125, |
| "learning_rate": 0.00016673479919442733, |
| "loss": 0.1781, |
| "step": 13430 |
| }, |
| { |
| "epoch": 0.6086405216918758, |
| "grad_norm": 0.4533080458641052, |
| "learning_rate": 0.00016667901386730908, |
| "loss": 0.1928, |
| "step": 13440 |
| }, |
| { |
| "epoch": 0.6090933792228965, |
| "grad_norm": 0.29118624329566956, |
| "learning_rate": 0.00016662319115311986, |
| "loss": 0.1776, |
| "step": 13450 |
| }, |
| { |
| "epoch": 0.6095462367539172, |
| "grad_norm": 0.5299730896949768, |
| "learning_rate": 0.00016656733108315957, |
| "loss": 0.1915, |
| "step": 13460 |
| }, |
| { |
| "epoch": 0.6099990942849379, |
| "grad_norm": 0.46744877099990845, |
| "learning_rate": 0.00016651143368874908, |
| "loss": 0.1553, |
| "step": 13470 |
| }, |
| { |
| "epoch": 0.6104519518159587, |
| "grad_norm": 0.3915580213069916, |
| "learning_rate": 0.00016645549900123018, |
| "loss": 0.2021, |
| "step": 13480 |
| }, |
| { |
| "epoch": 0.6109048093469794, |
| "grad_norm": 0.32643139362335205, |
| "learning_rate": 0.00016639952705196556, |
| "loss": 0.1998, |
| "step": 13490 |
| }, |
| { |
| "epoch": 0.6113576668780002, |
| "grad_norm": 0.37357577681541443, |
| "learning_rate": 0.00016634351787233887, |
| "loss": 0.1631, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.611810524409021, |
| "grad_norm": 0.288700670003891, |
| "learning_rate": 0.0001662874714937545, |
| "loss": 0.2021, |
| "step": 13510 |
| }, |
| { |
| "epoch": 0.6122633819400417, |
| "grad_norm": 0.3211473822593689, |
| "learning_rate": 0.00016623138794763786, |
| "loss": 0.1686, |
| "step": 13520 |
| }, |
| { |
| "epoch": 0.6127162394710624, |
| "grad_norm": 0.4806463420391083, |
| "learning_rate": 0.00016617526726543508, |
| "loss": 0.1893, |
| "step": 13530 |
| }, |
| { |
| "epoch": 0.6131690970020831, |
| "grad_norm": 0.3648808002471924, |
| "learning_rate": 0.00016611910947861316, |
| "loss": 0.156, |
| "step": 13540 |
| }, |
| { |
| "epoch": 0.6136219545331039, |
| "grad_norm": 0.4847327768802643, |
| "learning_rate": 0.00016606291461865986, |
| "loss": 0.2123, |
| "step": 13550 |
| }, |
| { |
| "epoch": 0.6140748120641246, |
| "grad_norm": 0.4586341083049774, |
| "learning_rate": 0.0001660066827170838, |
| "loss": 0.1851, |
| "step": 13560 |
| }, |
| { |
| "epoch": 0.6145276695951454, |
| "grad_norm": 0.46938562393188477, |
| "learning_rate": 0.0001659504138054143, |
| "loss": 0.1598, |
| "step": 13570 |
| }, |
| { |
| "epoch": 0.6149805271261661, |
| "grad_norm": 0.43038567900657654, |
| "learning_rate": 0.00016589410791520152, |
| "loss": 0.1872, |
| "step": 13580 |
| }, |
| { |
| "epoch": 0.6154333846571869, |
| "grad_norm": 0.4367424249649048, |
| "learning_rate": 0.00016583776507801624, |
| "loss": 0.1878, |
| "step": 13590 |
| }, |
| { |
| "epoch": 0.6158862421882076, |
| "grad_norm": 0.5961470603942871, |
| "learning_rate": 0.00016578138532545003, |
| "loss": 0.2002, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.6163390997192283, |
| "grad_norm": 0.4500272572040558, |
| "learning_rate": 0.00016572496868911518, |
| "loss": 0.2165, |
| "step": 13610 |
| }, |
| { |
| "epoch": 0.616791957250249, |
| "grad_norm": 0.4046367406845093, |
| "learning_rate": 0.00016566851520064462, |
| "loss": 0.164, |
| "step": 13620 |
| }, |
| { |
| "epoch": 0.6172448147812698, |
| "grad_norm": 0.4983348250389099, |
| "learning_rate": 0.00016561202489169186, |
| "loss": 0.1654, |
| "step": 13630 |
| }, |
| { |
| "epoch": 0.6176976723122906, |
| "grad_norm": 0.43825361132621765, |
| "learning_rate": 0.00016555549779393122, |
| "loss": 0.1803, |
| "step": 13640 |
| }, |
| { |
| "epoch": 0.6181505298433113, |
| "grad_norm": 0.39608263969421387, |
| "learning_rate": 0.00016549893393905755, |
| "loss": 0.1712, |
| "step": 13650 |
| }, |
| { |
| "epoch": 0.6186033873743321, |
| "grad_norm": 0.3408968448638916, |
| "learning_rate": 0.00016544233335878634, |
| "loss": 0.1707, |
| "step": 13660 |
| }, |
| { |
| "epoch": 0.6190562449053528, |
| "grad_norm": 0.48537546396255493, |
| "learning_rate": 0.00016538569608485365, |
| "loss": 0.1879, |
| "step": 13670 |
| }, |
| { |
| "epoch": 0.6195091024363735, |
| "grad_norm": 0.4846585690975189, |
| "learning_rate": 0.0001653290221490161, |
| "loss": 0.1791, |
| "step": 13680 |
| }, |
| { |
| "epoch": 0.6199619599673942, |
| "grad_norm": 0.4026714861392975, |
| "learning_rate": 0.00016527231158305092, |
| "loss": 0.1861, |
| "step": 13690 |
| }, |
| { |
| "epoch": 0.620414817498415, |
| "grad_norm": 0.5842049717903137, |
| "learning_rate": 0.00016521556441875584, |
| "loss": 0.1913, |
| "step": 13700 |
| }, |
| { |
| "epoch": 0.6208676750294357, |
| "grad_norm": 0.417464941740036, |
| "learning_rate": 0.00016515878068794918, |
| "loss": 0.1638, |
| "step": 13710 |
| }, |
| { |
| "epoch": 0.6213205325604565, |
| "grad_norm": 0.32036682963371277, |
| "learning_rate": 0.00016510196042246963, |
| "loss": 0.1441, |
| "step": 13720 |
| }, |
| { |
| "epoch": 0.6217733900914773, |
| "grad_norm": 0.4793328046798706, |
| "learning_rate": 0.00016504510365417642, |
| "loss": 0.1806, |
| "step": 13730 |
| }, |
| { |
| "epoch": 0.622226247622498, |
| "grad_norm": 0.3982367515563965, |
| "learning_rate": 0.00016498821041494935, |
| "loss": 0.1668, |
| "step": 13740 |
| }, |
| { |
| "epoch": 0.6226791051535187, |
| "grad_norm": 0.2917279005050659, |
| "learning_rate": 0.00016493128073668853, |
| "loss": 0.1669, |
| "step": 13750 |
| }, |
| { |
| "epoch": 0.6231319626845394, |
| "grad_norm": 0.38130876421928406, |
| "learning_rate": 0.00016487431465131455, |
| "loss": 0.1964, |
| "step": 13760 |
| }, |
| { |
| "epoch": 0.6235848202155602, |
| "grad_norm": 0.24941422045230865, |
| "learning_rate": 0.00016481731219076843, |
| "loss": 0.1919, |
| "step": 13770 |
| }, |
| { |
| "epoch": 0.6240376777465809, |
| "grad_norm": 0.35797667503356934, |
| "learning_rate": 0.00016476027338701164, |
| "loss": 0.1757, |
| "step": 13780 |
| }, |
| { |
| "epoch": 0.6244905352776017, |
| "grad_norm": 0.3741672933101654, |
| "learning_rate": 0.00016470319827202587, |
| "loss": 0.1733, |
| "step": 13790 |
| }, |
| { |
| "epoch": 0.6249433928086224, |
| "grad_norm": 0.36127394437789917, |
| "learning_rate": 0.00016464608687781327, |
| "loss": 0.1693, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.6253962503396432, |
| "grad_norm": 0.3201135993003845, |
| "learning_rate": 0.00016458893923639644, |
| "loss": 0.1658, |
| "step": 13810 |
| }, |
| { |
| "epoch": 0.6258491078706638, |
| "grad_norm": 0.4127233922481537, |
| "learning_rate": 0.00016453175537981806, |
| "loss": 0.1792, |
| "step": 13820 |
| }, |
| { |
| "epoch": 0.6263019654016846, |
| "grad_norm": 0.43056994676589966, |
| "learning_rate": 0.0001644745353401413, |
| "loss": 0.1803, |
| "step": 13830 |
| }, |
| { |
| "epoch": 0.6267548229327053, |
| "grad_norm": 0.3311462700366974, |
| "learning_rate": 0.00016441727914944955, |
| "loss": 0.1862, |
| "step": 13840 |
| }, |
| { |
| "epoch": 0.6272076804637261, |
| "grad_norm": 0.4950120151042938, |
| "learning_rate": 0.00016435998683984648, |
| "loss": 0.1782, |
| "step": 13850 |
| }, |
| { |
| "epoch": 0.6276605379947469, |
| "grad_norm": 0.4820355176925659, |
| "learning_rate": 0.00016430265844345602, |
| "loss": 0.194, |
| "step": 13860 |
| }, |
| { |
| "epoch": 0.6281133955257676, |
| "grad_norm": 0.30587446689605713, |
| "learning_rate": 0.00016424529399242235, |
| "loss": 0.2033, |
| "step": 13870 |
| }, |
| { |
| "epoch": 0.6285662530567884, |
| "grad_norm": 0.42574718594551086, |
| "learning_rate": 0.00016418789351890984, |
| "loss": 0.1769, |
| "step": 13880 |
| }, |
| { |
| "epoch": 0.6290191105878091, |
| "grad_norm": 0.3411298394203186, |
| "learning_rate": 0.00016413045705510304, |
| "loss": 0.1806, |
| "step": 13890 |
| }, |
| { |
| "epoch": 0.6294719681188298, |
| "grad_norm": 0.43649446964263916, |
| "learning_rate": 0.0001640729846332067, |
| "loss": 0.1939, |
| "step": 13900 |
| }, |
| { |
| "epoch": 0.6299248256498505, |
| "grad_norm": 0.4125385582447052, |
| "learning_rate": 0.00016401547628544573, |
| "loss": 0.1664, |
| "step": 13910 |
| }, |
| { |
| "epoch": 0.6303776831808713, |
| "grad_norm": 0.3264622986316681, |
| "learning_rate": 0.00016395793204406527, |
| "loss": 0.1784, |
| "step": 13920 |
| }, |
| { |
| "epoch": 0.630830540711892, |
| "grad_norm": 0.4634540379047394, |
| "learning_rate": 0.00016390035194133038, |
| "loss": 0.1744, |
| "step": 13930 |
| }, |
| { |
| "epoch": 0.6312833982429128, |
| "grad_norm": 0.2661249339580536, |
| "learning_rate": 0.00016384273600952645, |
| "loss": 0.1654, |
| "step": 13940 |
| }, |
| { |
| "epoch": 0.6317362557739336, |
| "grad_norm": 0.3725392818450928, |
| "learning_rate": 0.0001637850842809588, |
| "loss": 0.1874, |
| "step": 13950 |
| }, |
| { |
| "epoch": 0.6321891133049543, |
| "grad_norm": 0.38668251037597656, |
| "learning_rate": 0.00016372739678795288, |
| "loss": 0.1789, |
| "step": 13960 |
| }, |
| { |
| "epoch": 0.632641970835975, |
| "grad_norm": 0.3284059464931488, |
| "learning_rate": 0.00016366967356285422, |
| "loss": 0.1799, |
| "step": 13970 |
| }, |
| { |
| "epoch": 0.6330948283669957, |
| "grad_norm": 0.2830691933631897, |
| "learning_rate": 0.00016361191463802843, |
| "loss": 0.1689, |
| "step": 13980 |
| }, |
| { |
| "epoch": 0.6335476858980165, |
| "grad_norm": 0.40212684869766235, |
| "learning_rate": 0.00016355412004586092, |
| "loss": 0.1841, |
| "step": 13990 |
| }, |
| { |
| "epoch": 0.6340005434290372, |
| "grad_norm": 0.36592209339141846, |
| "learning_rate": 0.00016349628981875738, |
| "loss": 0.197, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.6340005434290372, |
| "eval_chrf": 56.30788920587864, |
| "eval_loss": 0.14252474904060364, |
| "eval_runtime": 26.7741, |
| "eval_samples_per_second": 0.373, |
| "eval_steps_per_second": 0.037, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.634453400960058, |
| "grad_norm": 0.48045670986175537, |
| "learning_rate": 0.00016343842398914324, |
| "loss": 0.1697, |
| "step": 14010 |
| }, |
| { |
| "epoch": 0.6349062584910787, |
| "grad_norm": 0.34194836020469666, |
| "learning_rate": 0.0001633805225894641, |
| "loss": 0.1616, |
| "step": 14020 |
| }, |
| { |
| "epoch": 0.6353591160220995, |
| "grad_norm": 0.40786996483802795, |
| "learning_rate": 0.0001633225856521853, |
| "loss": 0.1897, |
| "step": 14030 |
| }, |
| { |
| "epoch": 0.6358119735531201, |
| "grad_norm": 0.5029662251472473, |
| "learning_rate": 0.0001632646132097923, |
| "loss": 0.2056, |
| "step": 14040 |
| }, |
| { |
| "epoch": 0.6362648310841409, |
| "grad_norm": 0.419687956571579, |
| "learning_rate": 0.00016320660529479033, |
| "loss": 0.1913, |
| "step": 14050 |
| }, |
| { |
| "epoch": 0.6367176886151616, |
| "grad_norm": 0.4122810661792755, |
| "learning_rate": 0.00016314856193970454, |
| "loss": 0.1918, |
| "step": 14060 |
| }, |
| { |
| "epoch": 0.6371705461461824, |
| "grad_norm": 0.3298819959163666, |
| "learning_rate": 0.00016309048317708, |
| "loss": 0.1654, |
| "step": 14070 |
| }, |
| { |
| "epoch": 0.6376234036772032, |
| "grad_norm": 0.6079479455947876, |
| "learning_rate": 0.0001630323690394816, |
| "loss": 0.1895, |
| "step": 14080 |
| }, |
| { |
| "epoch": 0.6380762612082239, |
| "grad_norm": 0.3452903628349304, |
| "learning_rate": 0.00016297421955949407, |
| "loss": 0.1766, |
| "step": 14090 |
| }, |
| { |
| "epoch": 0.6385291187392447, |
| "grad_norm": 0.3545880615711212, |
| "learning_rate": 0.00016291603476972192, |
| "loss": 0.1755, |
| "step": 14100 |
| }, |
| { |
| "epoch": 0.6389819762702654, |
| "grad_norm": 0.46532562375068665, |
| "learning_rate": 0.00016285781470278953, |
| "loss": 0.1937, |
| "step": 14110 |
| }, |
| { |
| "epoch": 0.6394348338012861, |
| "grad_norm": 0.32557931542396545, |
| "learning_rate": 0.000162799559391341, |
| "loss": 0.178, |
| "step": 14120 |
| }, |
| { |
| "epoch": 0.6398876913323068, |
| "grad_norm": 0.44921159744262695, |
| "learning_rate": 0.0001627412688680402, |
| "loss": 0.1896, |
| "step": 14130 |
| }, |
| { |
| "epoch": 0.6403405488633276, |
| "grad_norm": 0.5234042406082153, |
| "learning_rate": 0.00016268294316557083, |
| "loss": 0.1662, |
| "step": 14140 |
| }, |
| { |
| "epoch": 0.6407934063943483, |
| "grad_norm": 0.46567443013191223, |
| "learning_rate": 0.00016262458231663612, |
| "loss": 0.1814, |
| "step": 14150 |
| }, |
| { |
| "epoch": 0.6412462639253691, |
| "grad_norm": 0.446133017539978, |
| "learning_rate": 0.0001625661863539592, |
| "loss": 0.1998, |
| "step": 14160 |
| }, |
| { |
| "epoch": 0.6416991214563899, |
| "grad_norm": 0.4532266855239868, |
| "learning_rate": 0.00016250775531028282, |
| "loss": 0.1725, |
| "step": 14170 |
| }, |
| { |
| "epoch": 0.6421519789874106, |
| "grad_norm": 0.4015495181083679, |
| "learning_rate": 0.00016244928921836936, |
| "loss": 0.1588, |
| "step": 14180 |
| }, |
| { |
| "epoch": 0.6426048365184313, |
| "grad_norm": 0.3836342990398407, |
| "learning_rate": 0.00016239078811100093, |
| "loss": 0.1604, |
| "step": 14190 |
| }, |
| { |
| "epoch": 0.643057694049452, |
| "grad_norm": 0.252178817987442, |
| "learning_rate": 0.0001623322520209792, |
| "loss": 0.1878, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.6435105515804728, |
| "grad_norm": 0.3731275796890259, |
| "learning_rate": 0.00016227368098112552, |
| "loss": 0.1735, |
| "step": 14210 |
| }, |
| { |
| "epoch": 0.6439634091114935, |
| "grad_norm": 0.42285943031311035, |
| "learning_rate": 0.00016221507502428074, |
| "loss": 0.1809, |
| "step": 14220 |
| }, |
| { |
| "epoch": 0.6444162666425143, |
| "grad_norm": 0.4480184018611908, |
| "learning_rate": 0.0001621564341833054, |
| "loss": 0.1863, |
| "step": 14230 |
| }, |
| { |
| "epoch": 0.644869124173535, |
| "grad_norm": 0.3354874849319458, |
| "learning_rate": 0.00016209775849107953, |
| "loss": 0.168, |
| "step": 14240 |
| }, |
| { |
| "epoch": 0.6453219817045558, |
| "grad_norm": 0.44964078068733215, |
| "learning_rate": 0.00016203904798050273, |
| "loss": 0.1726, |
| "step": 14250 |
| }, |
| { |
| "epoch": 0.6457748392355764, |
| "grad_norm": 0.5185778141021729, |
| "learning_rate": 0.0001619803026844941, |
| "loss": 0.1824, |
| "step": 14260 |
| }, |
| { |
| "epoch": 0.6462276967665972, |
| "grad_norm": 0.40909233689308167, |
| "learning_rate": 0.0001619215226359923, |
| "loss": 0.1797, |
| "step": 14270 |
| }, |
| { |
| "epoch": 0.646680554297618, |
| "grad_norm": 0.4697762727737427, |
| "learning_rate": 0.00016186270786795536, |
| "loss": 0.1731, |
| "step": 14280 |
| }, |
| { |
| "epoch": 0.6471334118286387, |
| "grad_norm": 0.48620936274528503, |
| "learning_rate": 0.00016180385841336095, |
| "loss": 0.1896, |
| "step": 14290 |
| }, |
| { |
| "epoch": 0.6475862693596595, |
| "grad_norm": 0.37995168566703796, |
| "learning_rate": 0.00016174497430520597, |
| "loss": 0.1731, |
| "step": 14300 |
| }, |
| { |
| "epoch": 0.6480391268906802, |
| "grad_norm": 0.4603124260902405, |
| "learning_rate": 0.000161686055576507, |
| "loss": 0.1811, |
| "step": 14310 |
| }, |
| { |
| "epoch": 0.648491984421701, |
| "grad_norm": 0.28444549441337585, |
| "learning_rate": 0.00016162710226029975, |
| "loss": 0.1875, |
| "step": 14320 |
| }, |
| { |
| "epoch": 0.6489448419527216, |
| "grad_norm": 0.6212883591651917, |
| "learning_rate": 0.00016156811438963964, |
| "loss": 0.1638, |
| "step": 14330 |
| }, |
| { |
| "epoch": 0.6493976994837424, |
| "grad_norm": 0.5507875680923462, |
| "learning_rate": 0.0001615090919976012, |
| "loss": 0.1671, |
| "step": 14340 |
| }, |
| { |
| "epoch": 0.6498505570147631, |
| "grad_norm": 0.3705752491950989, |
| "learning_rate": 0.00016145003511727838, |
| "loss": 0.1837, |
| "step": 14350 |
| }, |
| { |
| "epoch": 0.6503034145457839, |
| "grad_norm": 0.32453569769859314, |
| "learning_rate": 0.0001613909437817846, |
| "loss": 0.1866, |
| "step": 14360 |
| }, |
| { |
| "epoch": 0.6507562720768046, |
| "grad_norm": 0.5403971672058105, |
| "learning_rate": 0.00016133181802425247, |
| "loss": 0.1729, |
| "step": 14370 |
| }, |
| { |
| "epoch": 0.6512091296078254, |
| "grad_norm": 0.36369985342025757, |
| "learning_rate": 0.00016127265787783393, |
| "loss": 0.1754, |
| "step": 14380 |
| }, |
| { |
| "epoch": 0.6516619871388462, |
| "grad_norm": 0.44516465067863464, |
| "learning_rate": 0.00016121346337570017, |
| "loss": 0.1771, |
| "step": 14390 |
| }, |
| { |
| "epoch": 0.6521148446698669, |
| "grad_norm": 0.40263697504997253, |
| "learning_rate": 0.00016115423455104172, |
| "loss": 0.1536, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.6525677022008876, |
| "grad_norm": 0.5482146143913269, |
| "learning_rate": 0.00016109497143706832, |
| "loss": 0.1701, |
| "step": 14410 |
| }, |
| { |
| "epoch": 0.6530205597319083, |
| "grad_norm": 0.48209378123283386, |
| "learning_rate": 0.00016103567406700894, |
| "loss": 0.1801, |
| "step": 14420 |
| }, |
| { |
| "epoch": 0.6534734172629291, |
| "grad_norm": 0.5684372782707214, |
| "learning_rate": 0.00016097634247411166, |
| "loss": 0.1688, |
| "step": 14430 |
| }, |
| { |
| "epoch": 0.6539262747939498, |
| "grad_norm": 0.45816928148269653, |
| "learning_rate": 0.0001609169766916439, |
| "loss": 0.1962, |
| "step": 14440 |
| }, |
| { |
| "epoch": 0.6543791323249706, |
| "grad_norm": 0.3339681923389435, |
| "learning_rate": 0.0001608575767528922, |
| "loss": 0.1811, |
| "step": 14450 |
| }, |
| { |
| "epoch": 0.6548319898559913, |
| "grad_norm": 0.5153873562812805, |
| "learning_rate": 0.00016079814269116222, |
| "loss": 0.1774, |
| "step": 14460 |
| }, |
| { |
| "epoch": 0.6552848473870121, |
| "grad_norm": 0.35115787386894226, |
| "learning_rate": 0.00016073867453977877, |
| "loss": 0.1919, |
| "step": 14470 |
| }, |
| { |
| "epoch": 0.6557377049180327, |
| "grad_norm": 0.3883000314235687, |
| "learning_rate": 0.00016067917233208577, |
| "loss": 0.1746, |
| "step": 14480 |
| }, |
| { |
| "epoch": 0.6561905624490535, |
| "grad_norm": 0.6237671375274658, |
| "learning_rate": 0.00016061963610144626, |
| "loss": 0.1955, |
| "step": 14490 |
| }, |
| { |
| "epoch": 0.6566434199800742, |
| "grad_norm": 0.3687939941883087, |
| "learning_rate": 0.0001605600658812423, |
| "loss": 0.1543, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.657096277511095, |
| "grad_norm": 0.3738263249397278, |
| "learning_rate": 0.00016050046170487507, |
| "loss": 0.1709, |
| "step": 14510 |
| }, |
| { |
| "epoch": 0.6575491350421158, |
| "grad_norm": 0.5553914308547974, |
| "learning_rate": 0.00016044082360576475, |
| "loss": 0.1835, |
| "step": 14520 |
| }, |
| { |
| "epoch": 0.6580019925731365, |
| "grad_norm": 0.31449252367019653, |
| "learning_rate": 0.00016038115161735056, |
| "loss": 0.1818, |
| "step": 14530 |
| }, |
| { |
| "epoch": 0.6584548501041573, |
| "grad_norm": 0.36781567335128784, |
| "learning_rate": 0.00016032144577309075, |
| "loss": 0.183, |
| "step": 14540 |
| }, |
| { |
| "epoch": 0.6589077076351779, |
| "grad_norm": 0.35801130533218384, |
| "learning_rate": 0.00016026170610646245, |
| "loss": 0.1771, |
| "step": 14550 |
| }, |
| { |
| "epoch": 0.6593605651661987, |
| "grad_norm": 0.3693104386329651, |
| "learning_rate": 0.0001602019326509619, |
| "loss": 0.1763, |
| "step": 14560 |
| }, |
| { |
| "epoch": 0.6598134226972194, |
| "grad_norm": 0.3711768090724945, |
| "learning_rate": 0.00016014212544010413, |
| "loss": 0.1707, |
| "step": 14570 |
| }, |
| { |
| "epoch": 0.6602662802282402, |
| "grad_norm": 0.42132726311683655, |
| "learning_rate": 0.00016008228450742324, |
| "loss": 0.1832, |
| "step": 14580 |
| }, |
| { |
| "epoch": 0.6607191377592609, |
| "grad_norm": 0.3945630192756653, |
| "learning_rate": 0.00016002240988647213, |
| "loss": 0.152, |
| "step": 14590 |
| }, |
| { |
| "epoch": 0.6611719952902817, |
| "grad_norm": 0.4569098949432373, |
| "learning_rate": 0.0001599625016108227, |
| "loss": 0.2033, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.6616248528213025, |
| "grad_norm": 0.42940595746040344, |
| "learning_rate": 0.00015990255971406556, |
| "loss": 0.1824, |
| "step": 14610 |
| }, |
| { |
| "epoch": 0.6620777103523232, |
| "grad_norm": 0.4168236553668976, |
| "learning_rate": 0.00015984258422981033, |
| "loss": 0.1647, |
| "step": 14620 |
| }, |
| { |
| "epoch": 0.6625305678833439, |
| "grad_norm": 0.43679308891296387, |
| "learning_rate": 0.00015978257519168535, |
| "loss": 0.1796, |
| "step": 14630 |
| }, |
| { |
| "epoch": 0.6629834254143646, |
| "grad_norm": 0.4291830062866211, |
| "learning_rate": 0.0001597225326333379, |
| "loss": 0.1682, |
| "step": 14640 |
| }, |
| { |
| "epoch": 0.6634362829453854, |
| "grad_norm": 0.46512332558631897, |
| "learning_rate": 0.0001596624565884339, |
| "loss": 0.1599, |
| "step": 14650 |
| }, |
| { |
| "epoch": 0.6638891404764061, |
| "grad_norm": 0.441057950258255, |
| "learning_rate": 0.00015960234709065812, |
| "loss": 0.1759, |
| "step": 14660 |
| }, |
| { |
| "epoch": 0.6643419980074269, |
| "grad_norm": 0.3182176947593689, |
| "learning_rate": 0.00015954220417371416, |
| "loss": 0.1502, |
| "step": 14670 |
| }, |
| { |
| "epoch": 0.6647948555384476, |
| "grad_norm": 0.364551305770874, |
| "learning_rate": 0.00015948202787132421, |
| "loss": 0.181, |
| "step": 14680 |
| }, |
| { |
| "epoch": 0.6652477130694684, |
| "grad_norm": 0.4026900827884674, |
| "learning_rate": 0.0001594218182172293, |
| "loss": 0.188, |
| "step": 14690 |
| }, |
| { |
| "epoch": 0.665700570600489, |
| "grad_norm": 0.5440531969070435, |
| "learning_rate": 0.00015936157524518908, |
| "loss": 0.1913, |
| "step": 14700 |
| }, |
| { |
| "epoch": 0.6661534281315098, |
| "grad_norm": 0.3313021659851074, |
| "learning_rate": 0.000159301298988982, |
| "loss": 0.1691, |
| "step": 14710 |
| }, |
| { |
| "epoch": 0.6666062856625306, |
| "grad_norm": 0.3782554268836975, |
| "learning_rate": 0.00015924098948240497, |
| "loss": 0.2081, |
| "step": 14720 |
| }, |
| { |
| "epoch": 0.6670591431935513, |
| "grad_norm": 0.41381847858428955, |
| "learning_rate": 0.00015918064675927375, |
| "loss": 0.17, |
| "step": 14730 |
| }, |
| { |
| "epoch": 0.6675120007245721, |
| "grad_norm": 0.3430550694465637, |
| "learning_rate": 0.0001591202708534226, |
| "loss": 0.1712, |
| "step": 14740 |
| }, |
| { |
| "epoch": 0.6679648582555928, |
| "grad_norm": 0.43353360891342163, |
| "learning_rate": 0.00015905986179870447, |
| "loss": 0.187, |
| "step": 14750 |
| }, |
| { |
| "epoch": 0.6684177157866136, |
| "grad_norm": 0.38335633277893066, |
| "learning_rate": 0.00015899941962899084, |
| "loss": 0.1688, |
| "step": 14760 |
| }, |
| { |
| "epoch": 0.6688705733176342, |
| "grad_norm": 0.3117828369140625, |
| "learning_rate": 0.0001589389443781717, |
| "loss": 0.1886, |
| "step": 14770 |
| }, |
| { |
| "epoch": 0.669323430848655, |
| "grad_norm": 0.492303729057312, |
| "learning_rate": 0.00015887843608015573, |
| "loss": 0.1917, |
| "step": 14780 |
| }, |
| { |
| "epoch": 0.6697762883796757, |
| "grad_norm": 0.412275105714798, |
| "learning_rate": 0.00015881789476887006, |
| "loss": 0.2031, |
| "step": 14790 |
| }, |
| { |
| "epoch": 0.6702291459106965, |
| "grad_norm": 0.5670540928840637, |
| "learning_rate": 0.0001587573204782603, |
| "loss": 0.1911, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.6706820034417172, |
| "grad_norm": 0.7483437061309814, |
| "learning_rate": 0.00015869671324229061, |
| "loss": 0.1807, |
| "step": 14810 |
| }, |
| { |
| "epoch": 0.671134860972738, |
| "grad_norm": 0.4051755964756012, |
| "learning_rate": 0.00015863607309494362, |
| "loss": 0.1651, |
| "step": 14820 |
| }, |
| { |
| "epoch": 0.6715877185037588, |
| "grad_norm": 0.38806095719337463, |
| "learning_rate": 0.00015857540007022033, |
| "loss": 0.1711, |
| "step": 14830 |
| }, |
| { |
| "epoch": 0.6720405760347795, |
| "grad_norm": 0.45691871643066406, |
| "learning_rate": 0.00015851469420214033, |
| "loss": 0.1823, |
| "step": 14840 |
| }, |
| { |
| "epoch": 0.6724934335658002, |
| "grad_norm": 0.41940930485725403, |
| "learning_rate": 0.00015845395552474147, |
| "loss": 0.1643, |
| "step": 14850 |
| }, |
| { |
| "epoch": 0.6729462910968209, |
| "grad_norm": 0.41174575686454773, |
| "learning_rate": 0.00015839318407208011, |
| "loss": 0.1715, |
| "step": 14860 |
| }, |
| { |
| "epoch": 0.6733991486278417, |
| "grad_norm": 0.36043575406074524, |
| "learning_rate": 0.00015833237987823088, |
| "loss": 0.1951, |
| "step": 14870 |
| }, |
| { |
| "epoch": 0.6738520061588624, |
| "grad_norm": 0.3869519531726837, |
| "learning_rate": 0.0001582715429772869, |
| "loss": 0.1683, |
| "step": 14880 |
| }, |
| { |
| "epoch": 0.6743048636898832, |
| "grad_norm": 0.503814697265625, |
| "learning_rate": 0.0001582106734033595, |
| "loss": 0.158, |
| "step": 14890 |
| }, |
| { |
| "epoch": 0.6747577212209039, |
| "grad_norm": 0.5015385746955872, |
| "learning_rate": 0.00015814977119057836, |
| "loss": 0.1788, |
| "step": 14900 |
| }, |
| { |
| "epoch": 0.6752105787519247, |
| "grad_norm": 0.6002166271209717, |
| "learning_rate": 0.00015808883637309155, |
| "loss": 0.1733, |
| "step": 14910 |
| }, |
| { |
| "epoch": 0.6756634362829453, |
| "grad_norm": 0.4434871971607208, |
| "learning_rate": 0.00015802786898506534, |
| "loss": 0.2155, |
| "step": 14920 |
| }, |
| { |
| "epoch": 0.6761162938139661, |
| "grad_norm": 0.36393222212791443, |
| "learning_rate": 0.00015796686906068425, |
| "loss": 0.149, |
| "step": 14930 |
| }, |
| { |
| "epoch": 0.6765691513449869, |
| "grad_norm": 0.5228678584098816, |
| "learning_rate": 0.0001579058366341511, |
| "loss": 0.1782, |
| "step": 14940 |
| }, |
| { |
| "epoch": 0.6770220088760076, |
| "grad_norm": 0.37390783429145813, |
| "learning_rate": 0.00015784477173968691, |
| "loss": 0.1822, |
| "step": 14950 |
| }, |
| { |
| "epoch": 0.6774748664070284, |
| "grad_norm": 0.4303264319896698, |
| "learning_rate": 0.0001577836744115309, |
| "loss": 0.1965, |
| "step": 14960 |
| }, |
| { |
| "epoch": 0.6779277239380491, |
| "grad_norm": 0.38123011589050293, |
| "learning_rate": 0.00015772254468394045, |
| "loss": 0.1838, |
| "step": 14970 |
| }, |
| { |
| "epoch": 0.6783805814690699, |
| "grad_norm": 0.4034271240234375, |
| "learning_rate": 0.0001576613825911912, |
| "loss": 0.2019, |
| "step": 14980 |
| }, |
| { |
| "epoch": 0.6788334390000905, |
| "grad_norm": 0.47110286355018616, |
| "learning_rate": 0.00015760018816757674, |
| "loss": 0.189, |
| "step": 14990 |
| }, |
| { |
| "epoch": 0.6792862965311113, |
| "grad_norm": 0.3800531029701233, |
| "learning_rate": 0.00015753896144740908, |
| "loss": 0.1712, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.6792862965311113, |
| "eval_chrf": 79.72314247218827, |
| "eval_loss": 0.16112208366394043, |
| "eval_runtime": 10.9363, |
| "eval_samples_per_second": 0.914, |
| "eval_steps_per_second": 0.091, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.679739154062132, |
| "grad_norm": 0.43706080317497253, |
| "learning_rate": 0.00015747770246501806, |
| "loss": 0.1939, |
| "step": 15010 |
| }, |
| { |
| "epoch": 0.6801920115931528, |
| "grad_norm": 0.4327431619167328, |
| "learning_rate": 0.00015741641125475178, |
| "loss": 0.19, |
| "step": 15020 |
| }, |
| { |
| "epoch": 0.6806448691241735, |
| "grad_norm": 0.34704381227493286, |
| "learning_rate": 0.00015735508785097636, |
| "loss": 0.1595, |
| "step": 15030 |
| }, |
| { |
| "epoch": 0.6810977266551943, |
| "grad_norm": 0.45255932211875916, |
| "learning_rate": 0.00015729373228807593, |
| "loss": 0.2069, |
| "step": 15040 |
| }, |
| { |
| "epoch": 0.6815505841862151, |
| "grad_norm": 0.4494575560092926, |
| "learning_rate": 0.00015723234460045273, |
| "loss": 0.1864, |
| "step": 15050 |
| }, |
| { |
| "epoch": 0.6820034417172357, |
| "grad_norm": 0.43747246265411377, |
| "learning_rate": 0.00015717092482252695, |
| "loss": 0.175, |
| "step": 15060 |
| }, |
| { |
| "epoch": 0.6824562992482565, |
| "grad_norm": 0.30457955598831177, |
| "learning_rate": 0.00015710947298873683, |
| "loss": 0.1757, |
| "step": 15070 |
| }, |
| { |
| "epoch": 0.6829091567792772, |
| "grad_norm": 0.3637182414531708, |
| "learning_rate": 0.0001570479891335385, |
| "loss": 0.2001, |
| "step": 15080 |
| }, |
| { |
| "epoch": 0.683362014310298, |
| "grad_norm": 0.47846782207489014, |
| "learning_rate": 0.00015698647329140614, |
| "loss": 0.1855, |
| "step": 15090 |
| }, |
| { |
| "epoch": 0.6838148718413187, |
| "grad_norm": 0.3498048484325409, |
| "learning_rate": 0.00015692492549683178, |
| "loss": 0.1709, |
| "step": 15100 |
| }, |
| { |
| "epoch": 0.6842677293723395, |
| "grad_norm": 0.49282166361808777, |
| "learning_rate": 0.00015686334578432541, |
| "loss": 0.1888, |
| "step": 15110 |
| }, |
| { |
| "epoch": 0.6847205869033602, |
| "grad_norm": 0.35319894552230835, |
| "learning_rate": 0.00015680173418841493, |
| "loss": 0.1775, |
| "step": 15120 |
| }, |
| { |
| "epoch": 0.685173444434381, |
| "grad_norm": 0.4341161251068115, |
| "learning_rate": 0.00015674009074364607, |
| "loss": 0.2043, |
| "step": 15130 |
| }, |
| { |
| "epoch": 0.6856263019654016, |
| "grad_norm": 0.3676917552947998, |
| "learning_rate": 0.00015667841548458252, |
| "loss": 0.1753, |
| "step": 15140 |
| }, |
| { |
| "epoch": 0.6860791594964224, |
| "grad_norm": 0.364055335521698, |
| "learning_rate": 0.00015661670844580567, |
| "loss": 0.1563, |
| "step": 15150 |
| }, |
| { |
| "epoch": 0.6865320170274432, |
| "grad_norm": 0.5195023417472839, |
| "learning_rate": 0.00015655496966191477, |
| "loss": 0.1876, |
| "step": 15160 |
| }, |
| { |
| "epoch": 0.6869848745584639, |
| "grad_norm": 0.4854353070259094, |
| "learning_rate": 0.00015649319916752696, |
| "loss": 0.1817, |
| "step": 15170 |
| }, |
| { |
| "epoch": 0.6874377320894847, |
| "grad_norm": 0.47016629576683044, |
| "learning_rate": 0.00015643139699727705, |
| "loss": 0.1921, |
| "step": 15180 |
| }, |
| { |
| "epoch": 0.6878905896205054, |
| "grad_norm": 0.367403507232666, |
| "learning_rate": 0.00015636956318581765, |
| "loss": 0.1758, |
| "step": 15190 |
| }, |
| { |
| "epoch": 0.6883434471515262, |
| "grad_norm": 0.3292784094810486, |
| "learning_rate": 0.00015630769776781914, |
| "loss": 0.1727, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.6887963046825468, |
| "grad_norm": 0.3884672224521637, |
| "learning_rate": 0.0001562458007779696, |
| "loss": 0.1761, |
| "step": 15210 |
| }, |
| { |
| "epoch": 0.6892491622135676, |
| "grad_norm": 0.3499252200126648, |
| "learning_rate": 0.0001561838722509748, |
| "loss": 0.1887, |
| "step": 15220 |
| }, |
| { |
| "epoch": 0.6897020197445883, |
| "grad_norm": 1.7124887704849243, |
| "learning_rate": 0.0001561219122215582, |
| "loss": 0.1469, |
| "step": 15230 |
| }, |
| { |
| "epoch": 0.6901548772756091, |
| "grad_norm": 0.4366299510002136, |
| "learning_rate": 0.00015605992072446092, |
| "loss": 0.1706, |
| "step": 15240 |
| }, |
| { |
| "epoch": 0.6906077348066298, |
| "grad_norm": 0.29201626777648926, |
| "learning_rate": 0.00015599789779444174, |
| "loss": 0.1959, |
| "step": 15250 |
| }, |
| { |
| "epoch": 0.6910605923376506, |
| "grad_norm": 0.3731488883495331, |
| "learning_rate": 0.00015593584346627702, |
| "loss": 0.194, |
| "step": 15260 |
| }, |
| { |
| "epoch": 0.6915134498686714, |
| "grad_norm": 0.43735894560813904, |
| "learning_rate": 0.00015587375777476083, |
| "loss": 0.1679, |
| "step": 15270 |
| }, |
| { |
| "epoch": 0.691966307399692, |
| "grad_norm": 0.5219751596450806, |
| "learning_rate": 0.00015581164075470472, |
| "loss": 0.1818, |
| "step": 15280 |
| }, |
| { |
| "epoch": 0.6924191649307128, |
| "grad_norm": 0.38412991166114807, |
| "learning_rate": 0.00015574949244093783, |
| "loss": 0.1735, |
| "step": 15290 |
| }, |
| { |
| "epoch": 0.6928720224617335, |
| "grad_norm": 0.4602055251598358, |
| "learning_rate": 0.0001556873128683068, |
| "loss": 0.1802, |
| "step": 15300 |
| }, |
| { |
| "epoch": 0.6933248799927543, |
| "grad_norm": 0.5174024701118469, |
| "learning_rate": 0.000155625102071676, |
| "loss": 0.1584, |
| "step": 15310 |
| }, |
| { |
| "epoch": 0.693777737523775, |
| "grad_norm": 0.47438669204711914, |
| "learning_rate": 0.00015556286008592705, |
| "loss": 0.1802, |
| "step": 15320 |
| }, |
| { |
| "epoch": 0.6942305950547958, |
| "grad_norm": 0.3923074007034302, |
| "learning_rate": 0.0001555005869459592, |
| "loss": 0.1661, |
| "step": 15330 |
| }, |
| { |
| "epoch": 0.6946834525858165, |
| "grad_norm": 0.4011114537715912, |
| "learning_rate": 0.00015543828268668915, |
| "loss": 0.166, |
| "step": 15340 |
| }, |
| { |
| "epoch": 0.6951363101168373, |
| "grad_norm": 0.5458866357803345, |
| "learning_rate": 0.00015537594734305098, |
| "loss": 0.1884, |
| "step": 15350 |
| }, |
| { |
| "epoch": 0.6955891676478579, |
| "grad_norm": 0.5292759537696838, |
| "learning_rate": 0.00015531358094999633, |
| "loss": 0.1781, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.6960420251788787, |
| "grad_norm": 0.46571633219718933, |
| "learning_rate": 0.00015525118354249413, |
| "loss": 0.1807, |
| "step": 15370 |
| }, |
| { |
| "epoch": 0.6964948827098995, |
| "grad_norm": 0.4802543520927429, |
| "learning_rate": 0.00015518875515553075, |
| "loss": 0.1805, |
| "step": 15380 |
| }, |
| { |
| "epoch": 0.6969477402409202, |
| "grad_norm": 0.37716320157051086, |
| "learning_rate": 0.00015512629582410998, |
| "loss": 0.1844, |
| "step": 15390 |
| }, |
| { |
| "epoch": 0.697400597771941, |
| "grad_norm": 0.39970386028289795, |
| "learning_rate": 0.0001550638055832528, |
| "loss": 0.1482, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.6978534553029617, |
| "grad_norm": 0.4422559142112732, |
| "learning_rate": 0.0001550012844679977, |
| "loss": 0.199, |
| "step": 15410 |
| }, |
| { |
| "epoch": 0.6983063128339825, |
| "grad_norm": 0.3976079523563385, |
| "learning_rate": 0.00015493873251340041, |
| "loss": 0.1693, |
| "step": 15420 |
| }, |
| { |
| "epoch": 0.6987591703650031, |
| "grad_norm": 0.42097848653793335, |
| "learning_rate": 0.00015487614975453394, |
| "loss": 0.1636, |
| "step": 15430 |
| }, |
| { |
| "epoch": 0.6992120278960239, |
| "grad_norm": 0.3382587432861328, |
| "learning_rate": 0.00015481353622648854, |
| "loss": 0.1626, |
| "step": 15440 |
| }, |
| { |
| "epoch": 0.6996648854270446, |
| "grad_norm": 0.6046080589294434, |
| "learning_rate": 0.00015475089196437182, |
| "loss": 0.2045, |
| "step": 15450 |
| }, |
| { |
| "epoch": 0.7001177429580654, |
| "grad_norm": 0.36402109265327454, |
| "learning_rate": 0.00015468821700330855, |
| "loss": 0.1881, |
| "step": 15460 |
| }, |
| { |
| "epoch": 0.7005706004890861, |
| "grad_norm": 0.4470110237598419, |
| "learning_rate": 0.00015462551137844063, |
| "loss": 0.1714, |
| "step": 15470 |
| }, |
| { |
| "epoch": 0.7010234580201069, |
| "grad_norm": 0.369165301322937, |
| "learning_rate": 0.00015456277512492737, |
| "loss": 0.1648, |
| "step": 15480 |
| }, |
| { |
| "epoch": 0.7014763155511277, |
| "grad_norm": 0.26493504643440247, |
| "learning_rate": 0.00015450000827794505, |
| "loss": 0.1724, |
| "step": 15490 |
| }, |
| { |
| "epoch": 0.7019291730821483, |
| "grad_norm": 0.44206854701042175, |
| "learning_rate": 0.00015443721087268715, |
| "loss": 0.1534, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.7023820306131691, |
| "grad_norm": 0.4720167815685272, |
| "learning_rate": 0.00015437438294436438, |
| "loss": 0.1945, |
| "step": 15510 |
| }, |
| { |
| "epoch": 0.7028348881441898, |
| "grad_norm": 0.42412590980529785, |
| "learning_rate": 0.0001543115245282045, |
| "loss": 0.1993, |
| "step": 15520 |
| }, |
| { |
| "epoch": 0.7032877456752106, |
| "grad_norm": 0.3611489534378052, |
| "learning_rate": 0.0001542486356594523, |
| "loss": 0.1725, |
| "step": 15530 |
| }, |
| { |
| "epoch": 0.7037406032062313, |
| "grad_norm": 0.467637836933136, |
| "learning_rate": 0.0001541857163733698, |
| "loss": 0.1662, |
| "step": 15540 |
| }, |
| { |
| "epoch": 0.7041934607372521, |
| "grad_norm": 0.44230884313583374, |
| "learning_rate": 0.00015412276670523592, |
| "loss": 0.1812, |
| "step": 15550 |
| }, |
| { |
| "epoch": 0.7046463182682728, |
| "grad_norm": 0.40242454409599304, |
| "learning_rate": 0.0001540597866903467, |
| "loss": 0.1596, |
| "step": 15560 |
| }, |
| { |
| "epoch": 0.7050991757992935, |
| "grad_norm": 0.4991750121116638, |
| "learning_rate": 0.00015399677636401514, |
| "loss": 0.2045, |
| "step": 15570 |
| }, |
| { |
| "epoch": 0.7055520333303142, |
| "grad_norm": 0.38457340002059937, |
| "learning_rate": 0.0001539337357615713, |
| "loss": 0.158, |
| "step": 15580 |
| }, |
| { |
| "epoch": 0.706004890861335, |
| "grad_norm": 0.5183397531509399, |
| "learning_rate": 0.00015387066491836217, |
| "loss": 0.1964, |
| "step": 15590 |
| }, |
| { |
| "epoch": 0.7064577483923558, |
| "grad_norm": 0.37787193059921265, |
| "learning_rate": 0.00015380756386975178, |
| "loss": 0.1738, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.7069106059233765, |
| "grad_norm": 0.41050177812576294, |
| "learning_rate": 0.00015374443265112091, |
| "loss": 0.171, |
| "step": 15610 |
| }, |
| { |
| "epoch": 0.7073634634543973, |
| "grad_norm": 0.45450904965400696, |
| "learning_rate": 0.00015368127129786744, |
| "loss": 0.1597, |
| "step": 15620 |
| }, |
| { |
| "epoch": 0.707816320985418, |
| "grad_norm": 0.3817354440689087, |
| "learning_rate": 0.00015361807984540612, |
| "loss": 0.1904, |
| "step": 15630 |
| }, |
| { |
| "epoch": 0.7082691785164388, |
| "grad_norm": 0.47394871711730957, |
| "learning_rate": 0.00015355485832916846, |
| "loss": 0.199, |
| "step": 15640 |
| }, |
| { |
| "epoch": 0.7087220360474594, |
| "grad_norm": 0.41219305992126465, |
| "learning_rate": 0.00015349160678460294, |
| "loss": 0.1688, |
| "step": 15650 |
| }, |
| { |
| "epoch": 0.7091748935784802, |
| "grad_norm": 0.4790438413619995, |
| "learning_rate": 0.00015342832524717484, |
| "loss": 0.1725, |
| "step": 15660 |
| }, |
| { |
| "epoch": 0.7096277511095009, |
| "grad_norm": 0.37097597122192383, |
| "learning_rate": 0.00015336501375236625, |
| "loss": 0.179, |
| "step": 15670 |
| }, |
| { |
| "epoch": 0.7100806086405217, |
| "grad_norm": 0.4001186192035675, |
| "learning_rate": 0.00015330167233567607, |
| "loss": 0.1846, |
| "step": 15680 |
| }, |
| { |
| "epoch": 0.7105334661715424, |
| "grad_norm": 0.2776915431022644, |
| "learning_rate": 0.00015323830103262, |
| "loss": 0.1459, |
| "step": 15690 |
| }, |
| { |
| "epoch": 0.7109863237025632, |
| "grad_norm": 0.45224013924598694, |
| "learning_rate": 0.00015317489987873043, |
| "loss": 0.1904, |
| "step": 15700 |
| }, |
| { |
| "epoch": 0.711439181233584, |
| "grad_norm": 0.5740782618522644, |
| "learning_rate": 0.00015311146890955655, |
| "loss": 0.1683, |
| "step": 15710 |
| }, |
| { |
| "epoch": 0.7118920387646046, |
| "grad_norm": 0.4492044448852539, |
| "learning_rate": 0.00015304800816066426, |
| "loss": 0.1652, |
| "step": 15720 |
| }, |
| { |
| "epoch": 0.7123448962956254, |
| "grad_norm": 0.36929264664649963, |
| "learning_rate": 0.00015298451766763608, |
| "loss": 0.1718, |
| "step": 15730 |
| }, |
| { |
| "epoch": 0.7127977538266461, |
| "grad_norm": 0.4240851402282715, |
| "learning_rate": 0.00015292099746607135, |
| "loss": 0.2047, |
| "step": 15740 |
| }, |
| { |
| "epoch": 0.7132506113576669, |
| "grad_norm": 0.42248156666755676, |
| "learning_rate": 0.00015285744759158592, |
| "loss": 0.205, |
| "step": 15750 |
| }, |
| { |
| "epoch": 0.7137034688886876, |
| "grad_norm": 0.35184019804000854, |
| "learning_rate": 0.0001527938680798124, |
| "loss": 0.1894, |
| "step": 15760 |
| }, |
| { |
| "epoch": 0.7141563264197084, |
| "grad_norm": 0.45526647567749023, |
| "learning_rate": 0.00015273025896639993, |
| "loss": 0.1753, |
| "step": 15770 |
| }, |
| { |
| "epoch": 0.7146091839507291, |
| "grad_norm": 0.3351684510707855, |
| "learning_rate": 0.00015266662028701425, |
| "loss": 0.1756, |
| "step": 15780 |
| }, |
| { |
| "epoch": 0.7150620414817498, |
| "grad_norm": 0.36095982789993286, |
| "learning_rate": 0.0001526029520773378, |
| "loss": 0.1781, |
| "step": 15790 |
| }, |
| { |
| "epoch": 0.7155148990127705, |
| "grad_norm": 0.40348491072654724, |
| "learning_rate": 0.00015253925437306939, |
| "loss": 0.2053, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.7159677565437913, |
| "grad_norm": 0.6563317775726318, |
| "learning_rate": 0.00015247552720992454, |
| "loss": 0.2023, |
| "step": 15810 |
| }, |
| { |
| "epoch": 0.7164206140748121, |
| "grad_norm": 0.5342344641685486, |
| "learning_rate": 0.00015241177062363522, |
| "loss": 0.1682, |
| "step": 15820 |
| }, |
| { |
| "epoch": 0.7168734716058328, |
| "grad_norm": 0.4724293053150177, |
| "learning_rate": 0.00015234798464994976, |
| "loss": 0.1865, |
| "step": 15830 |
| }, |
| { |
| "epoch": 0.7173263291368536, |
| "grad_norm": 0.3945305347442627, |
| "learning_rate": 0.00015228416932463326, |
| "loss": 0.1545, |
| "step": 15840 |
| }, |
| { |
| "epoch": 0.7177791866678743, |
| "grad_norm": 0.4521254003047943, |
| "learning_rate": 0.00015222032468346702, |
| "loss": 0.1825, |
| "step": 15850 |
| }, |
| { |
| "epoch": 0.7182320441988951, |
| "grad_norm": 0.38483917713165283, |
| "learning_rate": 0.0001521564507622489, |
| "loss": 0.1686, |
| "step": 15860 |
| }, |
| { |
| "epoch": 0.7186849017299157, |
| "grad_norm": 0.40425315499305725, |
| "learning_rate": 0.00015209254759679313, |
| "loss": 0.1505, |
| "step": 15870 |
| }, |
| { |
| "epoch": 0.7191377592609365, |
| "grad_norm": 0.34470799565315247, |
| "learning_rate": 0.00015202861522293034, |
| "loss": 0.1855, |
| "step": 15880 |
| }, |
| { |
| "epoch": 0.7195906167919572, |
| "grad_norm": 0.3882667124271393, |
| "learning_rate": 0.00015196465367650763, |
| "loss": 0.1721, |
| "step": 15890 |
| }, |
| { |
| "epoch": 0.720043474322978, |
| "grad_norm": 0.46202096343040466, |
| "learning_rate": 0.00015190066299338833, |
| "loss": 0.1869, |
| "step": 15900 |
| }, |
| { |
| "epoch": 0.7204963318539988, |
| "grad_norm": 0.444326788187027, |
| "learning_rate": 0.00015183664320945215, |
| "loss": 0.1734, |
| "step": 15910 |
| }, |
| { |
| "epoch": 0.7209491893850195, |
| "grad_norm": 0.3723192811012268, |
| "learning_rate": 0.00015177259436059513, |
| "loss": 0.172, |
| "step": 15920 |
| }, |
| { |
| "epoch": 0.7214020469160403, |
| "grad_norm": 0.4908646047115326, |
| "learning_rate": 0.00015170851648272962, |
| "loss": 0.1834, |
| "step": 15930 |
| }, |
| { |
| "epoch": 0.7218549044470609, |
| "grad_norm": 0.6243703961372375, |
| "learning_rate": 0.0001516444096117842, |
| "loss": 0.2002, |
| "step": 15940 |
| }, |
| { |
| "epoch": 0.7223077619780817, |
| "grad_norm": 0.3855245113372803, |
| "learning_rate": 0.0001515802737837038, |
| "loss": 0.1922, |
| "step": 15950 |
| }, |
| { |
| "epoch": 0.7227606195091024, |
| "grad_norm": 0.3904649019241333, |
| "learning_rate": 0.00015151610903444942, |
| "loss": 0.185, |
| "step": 15960 |
| }, |
| { |
| "epoch": 0.7232134770401232, |
| "grad_norm": 0.28631916642189026, |
| "learning_rate": 0.00015145191539999846, |
| "loss": 0.1685, |
| "step": 15970 |
| }, |
| { |
| "epoch": 0.7236663345711439, |
| "grad_norm": 0.38608241081237793, |
| "learning_rate": 0.00015138769291634437, |
| "loss": 0.2114, |
| "step": 15980 |
| }, |
| { |
| "epoch": 0.7241191921021647, |
| "grad_norm": 0.4355546534061432, |
| "learning_rate": 0.00015132344161949689, |
| "loss": 0.1499, |
| "step": 15990 |
| }, |
| { |
| "epoch": 0.7245720496331854, |
| "grad_norm": 0.4393002986907959, |
| "learning_rate": 0.00015125916154548185, |
| "loss": 0.1834, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.7245720496331854, |
| "eval_chrf": 80.97540239216912, |
| "eval_loss": 0.16372597217559814, |
| "eval_runtime": 16.3583, |
| "eval_samples_per_second": 0.611, |
| "eval_steps_per_second": 0.061, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.7250249071642061, |
| "grad_norm": 0.413165807723999, |
| "learning_rate": 0.00015119485273034123, |
| "loss": 0.1591, |
| "step": 16010 |
| }, |
| { |
| "epoch": 0.7254777646952268, |
| "grad_norm": 0.5554599761962891, |
| "learning_rate": 0.00015113051521013313, |
| "loss": 0.1754, |
| "step": 16020 |
| }, |
| { |
| "epoch": 0.7259306222262476, |
| "grad_norm": 0.41952240467071533, |
| "learning_rate": 0.00015106614902093174, |
| "loss": 0.1743, |
| "step": 16030 |
| }, |
| { |
| "epoch": 0.7263834797572684, |
| "grad_norm": 0.5459387302398682, |
| "learning_rate": 0.00015100175419882728, |
| "loss": 0.2083, |
| "step": 16040 |
| }, |
| { |
| "epoch": 0.7268363372882891, |
| "grad_norm": 0.5478861331939697, |
| "learning_rate": 0.00015093733077992612, |
| "loss": 0.1837, |
| "step": 16050 |
| }, |
| { |
| "epoch": 0.7272891948193099, |
| "grad_norm": 0.3802858889102936, |
| "learning_rate": 0.0001508728788003506, |
| "loss": 0.1839, |
| "step": 16060 |
| }, |
| { |
| "epoch": 0.7277420523503306, |
| "grad_norm": 0.39728957414627075, |
| "learning_rate": 0.00015080839829623909, |
| "loss": 0.1603, |
| "step": 16070 |
| }, |
| { |
| "epoch": 0.7281949098813514, |
| "grad_norm": 0.4951670467853546, |
| "learning_rate": 0.00015074388930374596, |
| "loss": 0.1769, |
| "step": 16080 |
| }, |
| { |
| "epoch": 0.728647767412372, |
| "grad_norm": 0.37879490852355957, |
| "learning_rate": 0.00015067935185904152, |
| "loss": 0.1603, |
| "step": 16090 |
| }, |
| { |
| "epoch": 0.7291006249433928, |
| "grad_norm": 0.33771833777427673, |
| "learning_rate": 0.00015061478599831212, |
| "loss": 0.1876, |
| "step": 16100 |
| }, |
| { |
| "epoch": 0.7295534824744135, |
| "grad_norm": 0.5644478797912598, |
| "learning_rate": 0.0001505501917577599, |
| "loss": 0.177, |
| "step": 16110 |
| }, |
| { |
| "epoch": 0.7300063400054343, |
| "grad_norm": 0.4218476116657257, |
| "learning_rate": 0.000150485569173603, |
| "loss": 0.1801, |
| "step": 16120 |
| }, |
| { |
| "epoch": 0.730459197536455, |
| "grad_norm": 0.2640566825866699, |
| "learning_rate": 0.00015042091828207553, |
| "loss": 0.1885, |
| "step": 16130 |
| }, |
| { |
| "epoch": 0.7309120550674758, |
| "grad_norm": 0.5227727293968201, |
| "learning_rate": 0.0001503562391194273, |
| "loss": 0.1689, |
| "step": 16140 |
| }, |
| { |
| "epoch": 0.7313649125984966, |
| "grad_norm": 0.5304290056228638, |
| "learning_rate": 0.00015029153172192413, |
| "loss": 0.203, |
| "step": 16150 |
| }, |
| { |
| "epoch": 0.7318177701295172, |
| "grad_norm": 0.35998350381851196, |
| "learning_rate": 0.00015022679612584753, |
| "loss": 0.1614, |
| "step": 16160 |
| }, |
| { |
| "epoch": 0.732270627660538, |
| "grad_norm": 0.3472217321395874, |
| "learning_rate": 0.0001501620323674949, |
| "loss": 0.1797, |
| "step": 16170 |
| }, |
| { |
| "epoch": 0.7327234851915587, |
| "grad_norm": 0.30541810393333435, |
| "learning_rate": 0.00015009724048317954, |
| "loss": 0.1527, |
| "step": 16180 |
| }, |
| { |
| "epoch": 0.7331763427225795, |
| "grad_norm": 0.4038911759853363, |
| "learning_rate": 0.00015003242050923022, |
| "loss": 0.2105, |
| "step": 16190 |
| }, |
| { |
| "epoch": 0.7336292002536002, |
| "grad_norm": 0.4016546308994293, |
| "learning_rate": 0.0001499675724819918, |
| "loss": 0.1821, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.734082057784621, |
| "grad_norm": 0.37570664286613464, |
| "learning_rate": 0.0001499026964378246, |
| "loss": 0.2005, |
| "step": 16210 |
| }, |
| { |
| "epoch": 0.7345349153156417, |
| "grad_norm": 0.35822850465774536, |
| "learning_rate": 0.00014983779241310488, |
| "loss": 0.2041, |
| "step": 16220 |
| }, |
| { |
| "epoch": 0.7349877728466624, |
| "grad_norm": 0.5933873653411865, |
| "learning_rate": 0.00014977286044422436, |
| "loss": 0.1714, |
| "step": 16230 |
| }, |
| { |
| "epoch": 0.7354406303776831, |
| "grad_norm": 0.35240015387535095, |
| "learning_rate": 0.0001497079005675906, |
| "loss": 0.1685, |
| "step": 16240 |
| }, |
| { |
| "epoch": 0.7358934879087039, |
| "grad_norm": 0.5472621321678162, |
| "learning_rate": 0.0001496429128196267, |
| "loss": 0.1909, |
| "step": 16250 |
| }, |
| { |
| "epoch": 0.7363463454397247, |
| "grad_norm": 0.3498932421207428, |
| "learning_rate": 0.00014957789723677147, |
| "loss": 0.1505, |
| "step": 16260 |
| }, |
| { |
| "epoch": 0.7367992029707454, |
| "grad_norm": 0.4525830149650574, |
| "learning_rate": 0.0001495128538554793, |
| "loss": 0.1731, |
| "step": 16270 |
| }, |
| { |
| "epoch": 0.7372520605017662, |
| "grad_norm": 0.4489855170249939, |
| "learning_rate": 0.00014944778271222013, |
| "loss": 0.1724, |
| "step": 16280 |
| }, |
| { |
| "epoch": 0.7377049180327869, |
| "grad_norm": 0.401915967464447, |
| "learning_rate": 0.0001493826838434795, |
| "loss": 0.1678, |
| "step": 16290 |
| }, |
| { |
| "epoch": 0.7381577755638076, |
| "grad_norm": 0.30955928564071655, |
| "learning_rate": 0.00014931755728575852, |
| "loss": 0.1657, |
| "step": 16300 |
| }, |
| { |
| "epoch": 0.7386106330948283, |
| "grad_norm": 0.43423953652381897, |
| "learning_rate": 0.00014925240307557376, |
| "loss": 0.1949, |
| "step": 16310 |
| }, |
| { |
| "epoch": 0.7390634906258491, |
| "grad_norm": 0.3125792443752289, |
| "learning_rate": 0.00014918722124945735, |
| "loss": 0.1707, |
| "step": 16320 |
| }, |
| { |
| "epoch": 0.7395163481568698, |
| "grad_norm": 0.6532090306282043, |
| "learning_rate": 0.00014912201184395685, |
| "loss": 0.1801, |
| "step": 16330 |
| }, |
| { |
| "epoch": 0.7399692056878906, |
| "grad_norm": 0.3583991825580597, |
| "learning_rate": 0.00014905677489563537, |
| "loss": 0.1661, |
| "step": 16340 |
| }, |
| { |
| "epoch": 0.7404220632189114, |
| "grad_norm": 0.3103122413158417, |
| "learning_rate": 0.0001489915104410714, |
| "loss": 0.1585, |
| "step": 16350 |
| }, |
| { |
| "epoch": 0.7408749207499321, |
| "grad_norm": 0.5210757851600647, |
| "learning_rate": 0.00014892621851685879, |
| "loss": 0.167, |
| "step": 16360 |
| }, |
| { |
| "epoch": 0.7413277782809529, |
| "grad_norm": 0.3990892767906189, |
| "learning_rate": 0.000148860899159607, |
| "loss": 0.1672, |
| "step": 16370 |
| }, |
| { |
| "epoch": 0.7417806358119735, |
| "grad_norm": 0.3420082926750183, |
| "learning_rate": 0.00014879555240594064, |
| "loss": 0.163, |
| "step": 16380 |
| }, |
| { |
| "epoch": 0.7422334933429943, |
| "grad_norm": 0.39129629731178284, |
| "learning_rate": 0.0001487301782924998, |
| "loss": 0.1637, |
| "step": 16390 |
| }, |
| { |
| "epoch": 0.742686350874015, |
| "grad_norm": 0.4083895981311798, |
| "learning_rate": 0.00014866477685593989, |
| "loss": 0.1748, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.7431392084050358, |
| "grad_norm": 0.5733197927474976, |
| "learning_rate": 0.00014859934813293165, |
| "loss": 0.167, |
| "step": 16410 |
| }, |
| { |
| "epoch": 0.7435920659360565, |
| "grad_norm": 0.45333850383758545, |
| "learning_rate": 0.0001485338921601611, |
| "loss": 0.1763, |
| "step": 16420 |
| }, |
| { |
| "epoch": 0.7440449234670773, |
| "grad_norm": 0.4153973162174225, |
| "learning_rate": 0.0001484684089743296, |
| "loss": 0.1784, |
| "step": 16430 |
| }, |
| { |
| "epoch": 0.744497780998098, |
| "grad_norm": 0.4648512899875641, |
| "learning_rate": 0.00014840289861215363, |
| "loss": 0.1816, |
| "step": 16440 |
| }, |
| { |
| "epoch": 0.7449506385291187, |
| "grad_norm": 0.3321559727191925, |
| "learning_rate": 0.00014833736111036507, |
| "loss": 0.179, |
| "step": 16450 |
| }, |
| { |
| "epoch": 0.7454034960601394, |
| "grad_norm": 0.4481167793273926, |
| "learning_rate": 0.0001482717965057109, |
| "loss": 0.1517, |
| "step": 16460 |
| }, |
| { |
| "epoch": 0.7458563535911602, |
| "grad_norm": 0.3410244584083557, |
| "learning_rate": 0.00014820620483495332, |
| "loss": 0.1651, |
| "step": 16470 |
| }, |
| { |
| "epoch": 0.746309211122181, |
| "grad_norm": 0.40615367889404297, |
| "learning_rate": 0.00014814058613486978, |
| "loss": 0.1855, |
| "step": 16480 |
| }, |
| { |
| "epoch": 0.7467620686532017, |
| "grad_norm": 0.5035942792892456, |
| "learning_rate": 0.00014807494044225282, |
| "loss": 0.1768, |
| "step": 16490 |
| }, |
| { |
| "epoch": 0.7472149261842225, |
| "grad_norm": 0.6195945739746094, |
| "learning_rate": 0.00014800926779391012, |
| "loss": 0.1635, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.7476677837152432, |
| "grad_norm": 0.43557047843933105, |
| "learning_rate": 0.00014794356822666444, |
| "loss": 0.1511, |
| "step": 16510 |
| }, |
| { |
| "epoch": 0.7481206412462639, |
| "grad_norm": 0.3755180537700653, |
| "learning_rate": 0.00014787784177735372, |
| "loss": 0.184, |
| "step": 16520 |
| }, |
| { |
| "epoch": 0.7485734987772846, |
| "grad_norm": 0.3500405251979828, |
| "learning_rate": 0.0001478120884828309, |
| "loss": 0.1753, |
| "step": 16530 |
| }, |
| { |
| "epoch": 0.7490263563083054, |
| "grad_norm": 0.43246424198150635, |
| "learning_rate": 0.00014774630837996404, |
| "loss": 0.1919, |
| "step": 16540 |
| }, |
| { |
| "epoch": 0.7494792138393261, |
| "grad_norm": 0.3369980752468109, |
| "learning_rate": 0.00014768050150563611, |
| "loss": 0.1831, |
| "step": 16550 |
| }, |
| { |
| "epoch": 0.7499320713703469, |
| "grad_norm": 0.3230205774307251, |
| "learning_rate": 0.0001476146678967453, |
| "loss": 0.1981, |
| "step": 16560 |
| }, |
| { |
| "epoch": 0.7503849289013677, |
| "grad_norm": 0.5379915237426758, |
| "learning_rate": 0.0001475488075902045, |
| "loss": 0.1676, |
| "step": 16570 |
| }, |
| { |
| "epoch": 0.7508377864323884, |
| "grad_norm": 0.3465295433998108, |
| "learning_rate": 0.00014748292062294183, |
| "loss": 0.1706, |
| "step": 16580 |
| }, |
| { |
| "epoch": 0.7512906439634092, |
| "grad_norm": 0.39468634128570557, |
| "learning_rate": 0.00014741700703190026, |
| "loss": 0.1481, |
| "step": 16590 |
| }, |
| { |
| "epoch": 0.7517435014944298, |
| "grad_norm": 0.40046387910842896, |
| "learning_rate": 0.00014735106685403768, |
| "loss": 0.1889, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.7521963590254506, |
| "grad_norm": 0.3478832542896271, |
| "learning_rate": 0.00014728510012632686, |
| "loss": 0.197, |
| "step": 16610 |
| }, |
| { |
| "epoch": 0.7526492165564713, |
| "grad_norm": 0.4016125202178955, |
| "learning_rate": 0.00014721910688575548, |
| "loss": 0.1549, |
| "step": 16620 |
| }, |
| { |
| "epoch": 0.7531020740874921, |
| "grad_norm": 0.415632039308548, |
| "learning_rate": 0.00014715308716932617, |
| "loss": 0.1722, |
| "step": 16630 |
| }, |
| { |
| "epoch": 0.7535549316185128, |
| "grad_norm": 0.4447801411151886, |
| "learning_rate": 0.00014708704101405626, |
| "loss": 0.1649, |
| "step": 16640 |
| }, |
| { |
| "epoch": 0.7540077891495336, |
| "grad_norm": 0.3012316823005676, |
| "learning_rate": 0.000147020968456978, |
| "loss": 0.1813, |
| "step": 16650 |
| }, |
| { |
| "epoch": 0.7544606466805543, |
| "grad_norm": 0.3829558789730072, |
| "learning_rate": 0.00014695486953513845, |
| "loss": 0.1678, |
| "step": 16660 |
| }, |
| { |
| "epoch": 0.754913504211575, |
| "grad_norm": 0.4798243045806885, |
| "learning_rate": 0.00014688874428559937, |
| "loss": 0.1805, |
| "step": 16670 |
| }, |
| { |
| "epoch": 0.7553663617425957, |
| "grad_norm": 0.40611493587493896, |
| "learning_rate": 0.00014682259274543738, |
| "loss": 0.1735, |
| "step": 16680 |
| }, |
| { |
| "epoch": 0.7558192192736165, |
| "grad_norm": 0.43578040599823, |
| "learning_rate": 0.00014675641495174376, |
| "loss": 0.1868, |
| "step": 16690 |
| }, |
| { |
| "epoch": 0.7562720768046373, |
| "grad_norm": 0.325115442276001, |
| "learning_rate": 0.00014669021094162457, |
| "loss": 0.1977, |
| "step": 16700 |
| }, |
| { |
| "epoch": 0.756724934335658, |
| "grad_norm": 0.32389017939567566, |
| "learning_rate": 0.00014662398075220053, |
| "loss": 0.1472, |
| "step": 16710 |
| }, |
| { |
| "epoch": 0.7571777918666788, |
| "grad_norm": 0.46646058559417725, |
| "learning_rate": 0.00014655772442060706, |
| "loss": 0.1864, |
| "step": 16720 |
| }, |
| { |
| "epoch": 0.7576306493976995, |
| "grad_norm": 0.3650246560573578, |
| "learning_rate": 0.00014649144198399422, |
| "loss": 0.1564, |
| "step": 16730 |
| }, |
| { |
| "epoch": 0.7580835069287202, |
| "grad_norm": 0.3701794147491455, |
| "learning_rate": 0.00014642513347952674, |
| "loss": 0.1871, |
| "step": 16740 |
| }, |
| { |
| "epoch": 0.7585363644597409, |
| "grad_norm": 0.2885875999927521, |
| "learning_rate": 0.00014635879894438395, |
| "loss": 0.1663, |
| "step": 16750 |
| }, |
| { |
| "epoch": 0.7589892219907617, |
| "grad_norm": 0.5447561144828796, |
| "learning_rate": 0.00014629243841575974, |
| "loss": 0.2039, |
| "step": 16760 |
| }, |
| { |
| "epoch": 0.7594420795217824, |
| "grad_norm": 0.36518388986587524, |
| "learning_rate": 0.00014622605193086264, |
| "loss": 0.1666, |
| "step": 16770 |
| }, |
| { |
| "epoch": 0.7598949370528032, |
| "grad_norm": 0.4391559958457947, |
| "learning_rate": 0.00014615963952691567, |
| "loss": 0.176, |
| "step": 16780 |
| }, |
| { |
| "epoch": 0.760347794583824, |
| "grad_norm": 0.3832821846008301, |
| "learning_rate": 0.00014609320124115641, |
| "loss": 0.1987, |
| "step": 16790 |
| }, |
| { |
| "epoch": 0.7608006521148447, |
| "grad_norm": 0.3192236125469208, |
| "learning_rate": 0.000146026737110837, |
| "loss": 0.1716, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.7612535096458654, |
| "grad_norm": 0.4159684479236603, |
| "learning_rate": 0.000145960247173224, |
| "loss": 0.1624, |
| "step": 16810 |
| }, |
| { |
| "epoch": 0.7617063671768861, |
| "grad_norm": 0.35425105690956116, |
| "learning_rate": 0.00014589373146559843, |
| "loss": 0.1704, |
| "step": 16820 |
| }, |
| { |
| "epoch": 0.7621592247079069, |
| "grad_norm": 0.3028491139411926, |
| "learning_rate": 0.00014582719002525582, |
| "loss": 0.1564, |
| "step": 16830 |
| }, |
| { |
| "epoch": 0.7626120822389276, |
| "grad_norm": 0.5623031258583069, |
| "learning_rate": 0.00014576062288950613, |
| "loss": 0.146, |
| "step": 16840 |
| }, |
| { |
| "epoch": 0.7630649397699484, |
| "grad_norm": 0.49645712971687317, |
| "learning_rate": 0.00014569403009567365, |
| "loss": 0.1825, |
| "step": 16850 |
| }, |
| { |
| "epoch": 0.7635177973009691, |
| "grad_norm": 0.36377304792404175, |
| "learning_rate": 0.00014562741168109715, |
| "loss": 0.1858, |
| "step": 16860 |
| }, |
| { |
| "epoch": 0.7639706548319899, |
| "grad_norm": 0.4291059672832489, |
| "learning_rate": 0.00014556076768312975, |
| "loss": 0.1638, |
| "step": 16870 |
| }, |
| { |
| "epoch": 0.7644235123630106, |
| "grad_norm": 0.5143736004829407, |
| "learning_rate": 0.0001454940981391388, |
| "loss": 0.2007, |
| "step": 16880 |
| }, |
| { |
| "epoch": 0.7648763698940313, |
| "grad_norm": 0.36746224761009216, |
| "learning_rate": 0.0001454274030865061, |
| "loss": 0.1696, |
| "step": 16890 |
| }, |
| { |
| "epoch": 0.765329227425052, |
| "grad_norm": 0.33395445346832275, |
| "learning_rate": 0.00014536068256262773, |
| "loss": 0.1762, |
| "step": 16900 |
| }, |
| { |
| "epoch": 0.7657820849560728, |
| "grad_norm": 0.24888546764850616, |
| "learning_rate": 0.00014529393660491405, |
| "loss": 0.1755, |
| "step": 16910 |
| }, |
| { |
| "epoch": 0.7662349424870936, |
| "grad_norm": 0.3185586929321289, |
| "learning_rate": 0.00014522716525078964, |
| "loss": 0.1797, |
| "step": 16920 |
| }, |
| { |
| "epoch": 0.7666878000181143, |
| "grad_norm": 0.5242403745651245, |
| "learning_rate": 0.00014516036853769334, |
| "loss": 0.1675, |
| "step": 16930 |
| }, |
| { |
| "epoch": 0.7671406575491351, |
| "grad_norm": 0.555658221244812, |
| "learning_rate": 0.00014509354650307817, |
| "loss": 0.1782, |
| "step": 16940 |
| }, |
| { |
| "epoch": 0.7675935150801558, |
| "grad_norm": 0.4107116162776947, |
| "learning_rate": 0.00014502669918441152, |
| "loss": 0.1678, |
| "step": 16950 |
| }, |
| { |
| "epoch": 0.7680463726111765, |
| "grad_norm": 0.27505549788475037, |
| "learning_rate": 0.00014495982661917473, |
| "loss": 0.1538, |
| "step": 16960 |
| }, |
| { |
| "epoch": 0.7684992301421972, |
| "grad_norm": 0.5204653143882751, |
| "learning_rate": 0.00014489292884486342, |
| "loss": 0.1879, |
| "step": 16970 |
| }, |
| { |
| "epoch": 0.768952087673218, |
| "grad_norm": 0.35485538840293884, |
| "learning_rate": 0.00014482600589898732, |
| "loss": 0.1701, |
| "step": 16980 |
| }, |
| { |
| "epoch": 0.7694049452042387, |
| "grad_norm": 0.3793945014476776, |
| "learning_rate": 0.00014475905781907033, |
| "loss": 0.1726, |
| "step": 16990 |
| }, |
| { |
| "epoch": 0.7698578027352595, |
| "grad_norm": 0.4990425109863281, |
| "learning_rate": 0.0001446920846426503, |
| "loss": 0.1692, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.7698578027352595, |
| "eval_chrf": 81.35576111494969, |
| "eval_loss": 0.13622412085533142, |
| "eval_runtime": 7.5338, |
| "eval_samples_per_second": 1.327, |
| "eval_steps_per_second": 0.133, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.7703106602662803, |
| "grad_norm": 0.47874826192855835, |
| "learning_rate": 0.00014462508640727926, |
| "loss": 0.1742, |
| "step": 17010 |
| }, |
| { |
| "epoch": 0.770763517797301, |
| "grad_norm": 0.34694787859916687, |
| "learning_rate": 0.00014455806315052328, |
| "loss": 0.1803, |
| "step": 17020 |
| }, |
| { |
| "epoch": 0.7712163753283217, |
| "grad_norm": 0.3238365650177002, |
| "learning_rate": 0.0001444910149099625, |
| "loss": 0.1822, |
| "step": 17030 |
| }, |
| { |
| "epoch": 0.7716692328593424, |
| "grad_norm": 0.4790160357952118, |
| "learning_rate": 0.0001444239417231909, |
| "loss": 0.1539, |
| "step": 17040 |
| }, |
| { |
| "epoch": 0.7721220903903632, |
| "grad_norm": 0.5026448369026184, |
| "learning_rate": 0.00014435684362781666, |
| "loss": 0.1889, |
| "step": 17050 |
| }, |
| { |
| "epoch": 0.7725749479213839, |
| "grad_norm": 0.3439713418483734, |
| "learning_rate": 0.00014428972066146185, |
| "loss": 0.1631, |
| "step": 17060 |
| }, |
| { |
| "epoch": 0.7730278054524047, |
| "grad_norm": 0.42995843291282654, |
| "learning_rate": 0.00014422257286176237, |
| "loss": 0.1802, |
| "step": 17070 |
| }, |
| { |
| "epoch": 0.7734806629834254, |
| "grad_norm": 0.33130544424057007, |
| "learning_rate": 0.0001441554002663682, |
| "loss": 0.1446, |
| "step": 17080 |
| }, |
| { |
| "epoch": 0.7739335205144462, |
| "grad_norm": 0.40167292952537537, |
| "learning_rate": 0.00014408820291294316, |
| "loss": 0.1611, |
| "step": 17090 |
| }, |
| { |
| "epoch": 0.774386378045467, |
| "grad_norm": 0.3448004126548767, |
| "learning_rate": 0.00014402098083916493, |
| "loss": 0.1491, |
| "step": 17100 |
| }, |
| { |
| "epoch": 0.7748392355764876, |
| "grad_norm": 0.29201722145080566, |
| "learning_rate": 0.00014395373408272516, |
| "loss": 0.1777, |
| "step": 17110 |
| }, |
| { |
| "epoch": 0.7752920931075084, |
| "grad_norm": 0.4543125033378601, |
| "learning_rate": 0.0001438864626813291, |
| "loss": 0.1725, |
| "step": 17120 |
| }, |
| { |
| "epoch": 0.7757449506385291, |
| "grad_norm": 0.37582921981811523, |
| "learning_rate": 0.0001438191666726961, |
| "loss": 0.1894, |
| "step": 17130 |
| }, |
| { |
| "epoch": 0.7761978081695499, |
| "grad_norm": 0.3258911371231079, |
| "learning_rate": 0.00014375184609455917, |
| "loss": 0.1624, |
| "step": 17140 |
| }, |
| { |
| "epoch": 0.7766506657005706, |
| "grad_norm": 0.508508563041687, |
| "learning_rate": 0.00014368450098466506, |
| "loss": 0.1739, |
| "step": 17150 |
| }, |
| { |
| "epoch": 0.7771035232315914, |
| "grad_norm": 0.35970497131347656, |
| "learning_rate": 0.00014361713138077433, |
| "loss": 0.1926, |
| "step": 17160 |
| }, |
| { |
| "epoch": 0.7775563807626121, |
| "grad_norm": 0.3001771867275238, |
| "learning_rate": 0.00014354973732066132, |
| "loss": 0.1668, |
| "step": 17170 |
| }, |
| { |
| "epoch": 0.7780092382936328, |
| "grad_norm": 0.38364094495773315, |
| "learning_rate": 0.00014348231884211399, |
| "loss": 0.1876, |
| "step": 17180 |
| }, |
| { |
| "epoch": 0.7784620958246535, |
| "grad_norm": 0.34611520171165466, |
| "learning_rate": 0.000143414875982934, |
| "loss": 0.185, |
| "step": 17190 |
| }, |
| { |
| "epoch": 0.7789149533556743, |
| "grad_norm": 0.3437662720680237, |
| "learning_rate": 0.00014334740878093675, |
| "loss": 0.1668, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.779367810886695, |
| "grad_norm": 0.34300467371940613, |
| "learning_rate": 0.00014327991727395125, |
| "loss": 0.1841, |
| "step": 17210 |
| }, |
| { |
| "epoch": 0.7798206684177158, |
| "grad_norm": 0.48151400685310364, |
| "learning_rate": 0.0001432124014998201, |
| "loss": 0.1598, |
| "step": 17220 |
| }, |
| { |
| "epoch": 0.7802735259487366, |
| "grad_norm": 0.41797640919685364, |
| "learning_rate": 0.00014314486149639963, |
| "loss": 0.1672, |
| "step": 17230 |
| }, |
| { |
| "epoch": 0.7807263834797573, |
| "grad_norm": 0.34387126564979553, |
| "learning_rate": 0.00014307729730155956, |
| "loss": 0.1826, |
| "step": 17240 |
| }, |
| { |
| "epoch": 0.781179241010778, |
| "grad_norm": 0.47516635060310364, |
| "learning_rate": 0.00014300970895318336, |
| "loss": 0.1627, |
| "step": 17250 |
| }, |
| { |
| "epoch": 0.7816320985417987, |
| "grad_norm": 0.4279870390892029, |
| "learning_rate": 0.000142942096489168, |
| "loss": 0.1616, |
| "step": 17260 |
| }, |
| { |
| "epoch": 0.7820849560728195, |
| "grad_norm": 0.31233519315719604, |
| "learning_rate": 0.00014287445994742382, |
| "loss": 0.1484, |
| "step": 17270 |
| }, |
| { |
| "epoch": 0.7825378136038402, |
| "grad_norm": 0.4351353645324707, |
| "learning_rate": 0.00014280679936587483, |
| "loss": 0.1739, |
| "step": 17280 |
| }, |
| { |
| "epoch": 0.782990671134861, |
| "grad_norm": 0.3079448640346527, |
| "learning_rate": 0.0001427391147824585, |
| "loss": 0.1645, |
| "step": 17290 |
| }, |
| { |
| "epoch": 0.7834435286658817, |
| "grad_norm": 0.542190670967102, |
| "learning_rate": 0.00014267140623512573, |
| "loss": 0.18, |
| "step": 17300 |
| }, |
| { |
| "epoch": 0.7838963861969025, |
| "grad_norm": 0.41900062561035156, |
| "learning_rate": 0.0001426036737618408, |
| "loss": 0.1575, |
| "step": 17310 |
| }, |
| { |
| "epoch": 0.7843492437279231, |
| "grad_norm": 0.4176177680492401, |
| "learning_rate": 0.00014253591740058148, |
| "loss": 0.147, |
| "step": 17320 |
| }, |
| { |
| "epoch": 0.7848021012589439, |
| "grad_norm": 0.3914748430252075, |
| "learning_rate": 0.00014246813718933897, |
| "loss": 0.1871, |
| "step": 17330 |
| }, |
| { |
| "epoch": 0.7852549587899647, |
| "grad_norm": 0.3752351403236389, |
| "learning_rate": 0.00014240033316611768, |
| "loss": 0.153, |
| "step": 17340 |
| }, |
| { |
| "epoch": 0.7857078163209854, |
| "grad_norm": 0.5136403441429138, |
| "learning_rate": 0.00014233250536893553, |
| "loss": 0.1738, |
| "step": 17350 |
| }, |
| { |
| "epoch": 0.7861606738520062, |
| "grad_norm": 0.6337378621101379, |
| "learning_rate": 0.00014226465383582375, |
| "loss": 0.1708, |
| "step": 17360 |
| }, |
| { |
| "epoch": 0.7866135313830269, |
| "grad_norm": 0.42952653765678406, |
| "learning_rate": 0.0001421967786048268, |
| "loss": 0.1901, |
| "step": 17370 |
| }, |
| { |
| "epoch": 0.7870663889140477, |
| "grad_norm": 0.352598637342453, |
| "learning_rate": 0.00014212887971400248, |
| "loss": 0.1735, |
| "step": 17380 |
| }, |
| { |
| "epoch": 0.7875192464450684, |
| "grad_norm": 0.4647071659564972, |
| "learning_rate": 0.00014206095720142186, |
| "loss": 0.17, |
| "step": 17390 |
| }, |
| { |
| "epoch": 0.7879721039760891, |
| "grad_norm": 0.5100772976875305, |
| "learning_rate": 0.00014199301110516923, |
| "loss": 0.1549, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.7884249615071098, |
| "grad_norm": 0.3351843059062958, |
| "learning_rate": 0.0001419250414633421, |
| "loss": 0.1597, |
| "step": 17410 |
| }, |
| { |
| "epoch": 0.7888778190381306, |
| "grad_norm": 0.45948684215545654, |
| "learning_rate": 0.00014185704831405125, |
| "loss": 0.211, |
| "step": 17420 |
| }, |
| { |
| "epoch": 0.7893306765691513, |
| "grad_norm": 0.4675339162349701, |
| "learning_rate": 0.00014178903169542056, |
| "loss": 0.1643, |
| "step": 17430 |
| }, |
| { |
| "epoch": 0.7897835341001721, |
| "grad_norm": 0.3438480496406555, |
| "learning_rate": 0.0001417209916455871, |
| "loss": 0.1509, |
| "step": 17440 |
| }, |
| { |
| "epoch": 0.7902363916311929, |
| "grad_norm": 0.399120569229126, |
| "learning_rate": 0.0001416529282027011, |
| "loss": 0.1437, |
| "step": 17450 |
| }, |
| { |
| "epoch": 0.7906892491622136, |
| "grad_norm": 0.4113789200782776, |
| "learning_rate": 0.00014158484140492584, |
| "loss": 0.1588, |
| "step": 17460 |
| }, |
| { |
| "epoch": 0.7911421066932343, |
| "grad_norm": 0.26559188961982727, |
| "learning_rate": 0.00014151673129043774, |
| "loss": 0.1866, |
| "step": 17470 |
| }, |
| { |
| "epoch": 0.791594964224255, |
| "grad_norm": 0.5179800391197205, |
| "learning_rate": 0.00014144859789742633, |
| "loss": 0.1728, |
| "step": 17480 |
| }, |
| { |
| "epoch": 0.7920478217552758, |
| "grad_norm": 0.3756614625453949, |
| "learning_rate": 0.00014138044126409414, |
| "loss": 0.1769, |
| "step": 17490 |
| }, |
| { |
| "epoch": 0.7925006792862965, |
| "grad_norm": 0.5483672618865967, |
| "learning_rate": 0.00014131226142865674, |
| "loss": 0.1763, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.7929535368173173, |
| "grad_norm": 0.36379554867744446, |
| "learning_rate": 0.0001412440584293427, |
| "loss": 0.1943, |
| "step": 17510 |
| }, |
| { |
| "epoch": 0.793406394348338, |
| "grad_norm": 0.39273983240127563, |
| "learning_rate": 0.00014117583230439365, |
| "loss": 0.1749, |
| "step": 17520 |
| }, |
| { |
| "epoch": 0.7938592518793588, |
| "grad_norm": 0.5314661264419556, |
| "learning_rate": 0.00014110758309206404, |
| "loss": 0.1744, |
| "step": 17530 |
| }, |
| { |
| "epoch": 0.7943121094103794, |
| "grad_norm": 0.4262920022010803, |
| "learning_rate": 0.00014103931083062142, |
| "loss": 0.1979, |
| "step": 17540 |
| }, |
| { |
| "epoch": 0.7947649669414002, |
| "grad_norm": 0.7184832096099854, |
| "learning_rate": 0.00014097101555834619, |
| "loss": 0.1747, |
| "step": 17550 |
| }, |
| { |
| "epoch": 0.795217824472421, |
| "grad_norm": 0.33851149678230286, |
| "learning_rate": 0.00014090269731353166, |
| "loss": 0.1597, |
| "step": 17560 |
| }, |
| { |
| "epoch": 0.7956706820034417, |
| "grad_norm": 0.42809876799583435, |
| "learning_rate": 0.00014083435613448402, |
| "loss": 0.1591, |
| "step": 17570 |
| }, |
| { |
| "epoch": 0.7961235395344625, |
| "grad_norm": 0.4343106150627136, |
| "learning_rate": 0.0001407659920595223, |
| "loss": 0.1664, |
| "step": 17580 |
| }, |
| { |
| "epoch": 0.7965763970654832, |
| "grad_norm": 0.2742310166358948, |
| "learning_rate": 0.0001406976051269784, |
| "loss": 0.1845, |
| "step": 17590 |
| }, |
| { |
| "epoch": 0.797029254596504, |
| "grad_norm": 0.3528534471988678, |
| "learning_rate": 0.00014062919537519703, |
| "loss": 0.1606, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.7974821121275247, |
| "grad_norm": 0.34193626046180725, |
| "learning_rate": 0.0001405607628425357, |
| "loss": 0.1507, |
| "step": 17610 |
| }, |
| { |
| "epoch": 0.7979349696585454, |
| "grad_norm": 0.39214470982551575, |
| "learning_rate": 0.00014049230756736468, |
| "loss": 0.2263, |
| "step": 17620 |
| }, |
| { |
| "epoch": 0.7983878271895661, |
| "grad_norm": 0.39479154348373413, |
| "learning_rate": 0.00014042382958806695, |
| "loss": 0.1968, |
| "step": 17630 |
| }, |
| { |
| "epoch": 0.7988406847205869, |
| "grad_norm": 0.4590633511543274, |
| "learning_rate": 0.00014035532894303833, |
| "loss": 0.1643, |
| "step": 17640 |
| }, |
| { |
| "epoch": 0.7992935422516076, |
| "grad_norm": 0.5160406827926636, |
| "learning_rate": 0.00014028680567068725, |
| "loss": 0.1696, |
| "step": 17650 |
| }, |
| { |
| "epoch": 0.7997463997826284, |
| "grad_norm": 0.3378937840461731, |
| "learning_rate": 0.00014021825980943485, |
| "loss": 0.1803, |
| "step": 17660 |
| }, |
| { |
| "epoch": 0.8001992573136492, |
| "grad_norm": 0.4125441014766693, |
| "learning_rate": 0.000140149691397715, |
| "loss": 0.1462, |
| "step": 17670 |
| }, |
| { |
| "epoch": 0.8006521148446699, |
| "grad_norm": 0.4796926975250244, |
| "learning_rate": 0.0001400811004739741, |
| "loss": 0.1605, |
| "step": 17680 |
| }, |
| { |
| "epoch": 0.8011049723756906, |
| "grad_norm": 0.31294503808021545, |
| "learning_rate": 0.00014001248707667122, |
| "loss": 0.1583, |
| "step": 17690 |
| }, |
| { |
| "epoch": 0.8015578299067113, |
| "grad_norm": 0.4358683228492737, |
| "learning_rate": 0.00013994385124427812, |
| "loss": 0.1648, |
| "step": 17700 |
| }, |
| { |
| "epoch": 0.8020106874377321, |
| "grad_norm": 0.4065668284893036, |
| "learning_rate": 0.000139875193015279, |
| "loss": 0.153, |
| "step": 17710 |
| }, |
| { |
| "epoch": 0.8024635449687528, |
| "grad_norm": 0.4194456934928894, |
| "learning_rate": 0.00013980651242817072, |
| "loss": 0.1767, |
| "step": 17720 |
| }, |
| { |
| "epoch": 0.8029164024997736, |
| "grad_norm": 0.343515008687973, |
| "learning_rate": 0.00013973780952146263, |
| "loss": 0.173, |
| "step": 17730 |
| }, |
| { |
| "epoch": 0.8033692600307943, |
| "grad_norm": 0.4212777614593506, |
| "learning_rate": 0.00013966908433367655, |
| "loss": 0.1804, |
| "step": 17740 |
| }, |
| { |
| "epoch": 0.8038221175618151, |
| "grad_norm": 0.5466294288635254, |
| "learning_rate": 0.00013960033690334694, |
| "loss": 0.1559, |
| "step": 17750 |
| }, |
| { |
| "epoch": 0.8042749750928357, |
| "grad_norm": 0.6396472454071045, |
| "learning_rate": 0.00013953156726902058, |
| "loss": 0.1683, |
| "step": 17760 |
| }, |
| { |
| "epoch": 0.8047278326238565, |
| "grad_norm": 0.3665732741355896, |
| "learning_rate": 0.00013946277546925673, |
| "loss": 0.1843, |
| "step": 17770 |
| }, |
| { |
| "epoch": 0.8051806901548773, |
| "grad_norm": 0.2784450054168701, |
| "learning_rate": 0.00013939396154262715, |
| "loss": 0.1835, |
| "step": 17780 |
| }, |
| { |
| "epoch": 0.805633547685898, |
| "grad_norm": 0.3693857789039612, |
| "learning_rate": 0.00013932512552771597, |
| "loss": 0.1727, |
| "step": 17790 |
| }, |
| { |
| "epoch": 0.8060864052169188, |
| "grad_norm": 0.3366953134536743, |
| "learning_rate": 0.00013925626746311967, |
| "loss": 0.1716, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.8065392627479395, |
| "grad_norm": 0.3310844898223877, |
| "learning_rate": 0.0001391873873874471, |
| "loss": 0.1738, |
| "step": 17810 |
| }, |
| { |
| "epoch": 0.8069921202789603, |
| "grad_norm": 0.31167569756507874, |
| "learning_rate": 0.0001391184853393195, |
| "loss": 0.1516, |
| "step": 17820 |
| }, |
| { |
| "epoch": 0.807444977809981, |
| "grad_norm": 0.3066498041152954, |
| "learning_rate": 0.00013904956135737042, |
| "loss": 0.1602, |
| "step": 17830 |
| }, |
| { |
| "epoch": 0.8078978353410017, |
| "grad_norm": 0.3827773928642273, |
| "learning_rate": 0.00013898061548024563, |
| "loss": 0.179, |
| "step": 17840 |
| }, |
| { |
| "epoch": 0.8083506928720224, |
| "grad_norm": 0.33546361327171326, |
| "learning_rate": 0.0001389116477466033, |
| "loss": 0.191, |
| "step": 17850 |
| }, |
| { |
| "epoch": 0.8088035504030432, |
| "grad_norm": 0.3183661699295044, |
| "learning_rate": 0.00013884265819511375, |
| "loss": 0.1717, |
| "step": 17860 |
| }, |
| { |
| "epoch": 0.809256407934064, |
| "grad_norm": 0.40386420488357544, |
| "learning_rate": 0.00013877364686445961, |
| "loss": 0.166, |
| "step": 17870 |
| }, |
| { |
| "epoch": 0.8097092654650847, |
| "grad_norm": 0.43872517347335815, |
| "learning_rate": 0.00013870461379333565, |
| "loss": 0.1755, |
| "step": 17880 |
| }, |
| { |
| "epoch": 0.8101621229961055, |
| "grad_norm": 0.41907742619514465, |
| "learning_rate": 0.00013863555902044884, |
| "loss": 0.1637, |
| "step": 17890 |
| }, |
| { |
| "epoch": 0.8106149805271262, |
| "grad_norm": 0.3590562343597412, |
| "learning_rate": 0.00013856648258451842, |
| "loss": 0.164, |
| "step": 17900 |
| }, |
| { |
| "epoch": 0.8110678380581469, |
| "grad_norm": 0.7209745049476624, |
| "learning_rate": 0.0001384973845242757, |
| "loss": 0.1882, |
| "step": 17910 |
| }, |
| { |
| "epoch": 0.8115206955891676, |
| "grad_norm": 0.4454377591609955, |
| "learning_rate": 0.00013842826487846406, |
| "loss": 0.2026, |
| "step": 17920 |
| }, |
| { |
| "epoch": 0.8119735531201884, |
| "grad_norm": 0.36102646589279175, |
| "learning_rate": 0.00013835912368583903, |
| "loss": 0.1707, |
| "step": 17930 |
| }, |
| { |
| "epoch": 0.8124264106512091, |
| "grad_norm": 0.42702364921569824, |
| "learning_rate": 0.00013828996098516828, |
| "loss": 0.1748, |
| "step": 17940 |
| }, |
| { |
| "epoch": 0.8128792681822299, |
| "grad_norm": 0.6162272691726685, |
| "learning_rate": 0.00013822077681523149, |
| "loss": 0.164, |
| "step": 17950 |
| }, |
| { |
| "epoch": 0.8133321257132506, |
| "grad_norm": 0.5108441114425659, |
| "learning_rate": 0.00013815157121482033, |
| "loss": 0.1623, |
| "step": 17960 |
| }, |
| { |
| "epoch": 0.8137849832442714, |
| "grad_norm": 0.84194016456604, |
| "learning_rate": 0.00013808234422273857, |
| "loss": 0.1859, |
| "step": 17970 |
| }, |
| { |
| "epoch": 0.814237840775292, |
| "grad_norm": 0.577024519443512, |
| "learning_rate": 0.0001380130958778019, |
| "loss": 0.1862, |
| "step": 17980 |
| }, |
| { |
| "epoch": 0.8146906983063128, |
| "grad_norm": 0.3528417944908142, |
| "learning_rate": 0.00013794382621883807, |
| "loss": 0.178, |
| "step": 17990 |
| }, |
| { |
| "epoch": 0.8151435558373336, |
| "grad_norm": 0.46698030829429626, |
| "learning_rate": 0.00013787453528468674, |
| "loss": 0.1615, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.8151435558373336, |
| "eval_chrf": 87.79281231152616, |
| "eval_loss": 0.14898350834846497, |
| "eval_runtime": 6.5857, |
| "eval_samples_per_second": 1.518, |
| "eval_steps_per_second": 0.152, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.8155964133683543, |
| "grad_norm": 0.3077142536640167, |
| "learning_rate": 0.00013780522311419944, |
| "loss": 0.1648, |
| "step": 18010 |
| }, |
| { |
| "epoch": 0.8160492708993751, |
| "grad_norm": 0.3573880195617676, |
| "learning_rate": 0.00013773588974623968, |
| "loss": 0.1724, |
| "step": 18020 |
| }, |
| { |
| "epoch": 0.8165021284303958, |
| "grad_norm": 0.27870693802833557, |
| "learning_rate": 0.00013766653521968283, |
| "loss": 0.1714, |
| "step": 18030 |
| }, |
| { |
| "epoch": 0.8169549859614166, |
| "grad_norm": 0.49699628353118896, |
| "learning_rate": 0.00013759715957341613, |
| "loss": 0.1805, |
| "step": 18040 |
| }, |
| { |
| "epoch": 0.8174078434924372, |
| "grad_norm": 0.281811386346817, |
| "learning_rate": 0.00013752776284633867, |
| "loss": 0.1554, |
| "step": 18050 |
| }, |
| { |
| "epoch": 0.817860701023458, |
| "grad_norm": 0.4690953195095062, |
| "learning_rate": 0.0001374583450773613, |
| "loss": 0.1842, |
| "step": 18060 |
| }, |
| { |
| "epoch": 0.8183135585544787, |
| "grad_norm": 0.3380923271179199, |
| "learning_rate": 0.0001373889063054068, |
| "loss": 0.1734, |
| "step": 18070 |
| }, |
| { |
| "epoch": 0.8187664160854995, |
| "grad_norm": 0.4177154302597046, |
| "learning_rate": 0.00013731944656940955, |
| "loss": 0.1864, |
| "step": 18080 |
| }, |
| { |
| "epoch": 0.8192192736165202, |
| "grad_norm": 0.3774815499782562, |
| "learning_rate": 0.00013724996590831586, |
| "loss": 0.1843, |
| "step": 18090 |
| }, |
| { |
| "epoch": 0.819672131147541, |
| "grad_norm": 0.4200029969215393, |
| "learning_rate": 0.00013718046436108364, |
| "loss": 0.1555, |
| "step": 18100 |
| }, |
| { |
| "epoch": 0.8201249886785618, |
| "grad_norm": 0.4777076840400696, |
| "learning_rate": 0.0001371109419666826, |
| "loss": 0.1599, |
| "step": 18110 |
| }, |
| { |
| "epoch": 0.8205778462095825, |
| "grad_norm": 0.3393424451351166, |
| "learning_rate": 0.00013704139876409406, |
| "loss": 0.1785, |
| "step": 18120 |
| }, |
| { |
| "epoch": 0.8210307037406032, |
| "grad_norm": 0.4316195249557495, |
| "learning_rate": 0.00013697183479231107, |
| "loss": 0.1823, |
| "step": 18130 |
| }, |
| { |
| "epoch": 0.8214835612716239, |
| "grad_norm": 0.3540068566799164, |
| "learning_rate": 0.0001369022500903383, |
| "loss": 0.1555, |
| "step": 18140 |
| }, |
| { |
| "epoch": 0.8219364188026447, |
| "grad_norm": 0.5195394158363342, |
| "learning_rate": 0.00013683264469719207, |
| "loss": 0.1789, |
| "step": 18150 |
| }, |
| { |
| "epoch": 0.8223892763336654, |
| "grad_norm": 0.5439714193344116, |
| "learning_rate": 0.00013676301865190023, |
| "loss": 0.1614, |
| "step": 18160 |
| }, |
| { |
| "epoch": 0.8228421338646862, |
| "grad_norm": 0.3994079530239105, |
| "learning_rate": 0.00013669337199350226, |
| "loss": 0.1764, |
| "step": 18170 |
| }, |
| { |
| "epoch": 0.8232949913957069, |
| "grad_norm": 0.40646135807037354, |
| "learning_rate": 0.00013662370476104924, |
| "loss": 0.1548, |
| "step": 18180 |
| }, |
| { |
| "epoch": 0.8237478489267277, |
| "grad_norm": 0.5411052703857422, |
| "learning_rate": 0.00013655401699360373, |
| "loss": 0.1716, |
| "step": 18190 |
| }, |
| { |
| "epoch": 0.8242007064577483, |
| "grad_norm": 0.35534271597862244, |
| "learning_rate": 0.0001364843087302398, |
| "loss": 0.1652, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.8246535639887691, |
| "grad_norm": 0.4019007384777069, |
| "learning_rate": 0.00013641458001004304, |
| "loss": 0.1776, |
| "step": 18210 |
| }, |
| { |
| "epoch": 0.8251064215197899, |
| "grad_norm": 0.6988927721977234, |
| "learning_rate": 0.0001363448308721105, |
| "loss": 0.1747, |
| "step": 18220 |
| }, |
| { |
| "epoch": 0.8255592790508106, |
| "grad_norm": 0.4218527674674988, |
| "learning_rate": 0.00013627506135555065, |
| "loss": 0.1903, |
| "step": 18230 |
| }, |
| { |
| "epoch": 0.8260121365818314, |
| "grad_norm": 0.321307510137558, |
| "learning_rate": 0.00013620527149948343, |
| "loss": 0.174, |
| "step": 18240 |
| }, |
| { |
| "epoch": 0.8264649941128521, |
| "grad_norm": 0.38911622762680054, |
| "learning_rate": 0.00013613546134304017, |
| "loss": 0.185, |
| "step": 18250 |
| }, |
| { |
| "epoch": 0.8269178516438729, |
| "grad_norm": 0.3723275363445282, |
| "learning_rate": 0.00013606563092536362, |
| "loss": 0.1894, |
| "step": 18260 |
| }, |
| { |
| "epoch": 0.8273707091748935, |
| "grad_norm": 0.4374692738056183, |
| "learning_rate": 0.00013599578028560778, |
| "loss": 0.1571, |
| "step": 18270 |
| }, |
| { |
| "epoch": 0.8278235667059143, |
| "grad_norm": 0.5195155143737793, |
| "learning_rate": 0.00013592590946293805, |
| "loss": 0.1629, |
| "step": 18280 |
| }, |
| { |
| "epoch": 0.828276424236935, |
| "grad_norm": 0.38242098689079285, |
| "learning_rate": 0.00013585601849653125, |
| "loss": 0.1831, |
| "step": 18290 |
| }, |
| { |
| "epoch": 0.8287292817679558, |
| "grad_norm": 0.43245652318000793, |
| "learning_rate": 0.0001357861074255753, |
| "loss": 0.1784, |
| "step": 18300 |
| }, |
| { |
| "epoch": 0.8291821392989766, |
| "grad_norm": 0.40913039445877075, |
| "learning_rate": 0.00013571617628926956, |
| "loss": 0.1589, |
| "step": 18310 |
| }, |
| { |
| "epoch": 0.8296349968299973, |
| "grad_norm": 0.3442098796367645, |
| "learning_rate": 0.00013564622512682453, |
| "loss": 0.1806, |
| "step": 18320 |
| }, |
| { |
| "epoch": 0.8300878543610181, |
| "grad_norm": 0.549633264541626, |
| "learning_rate": 0.00013557625397746202, |
| "loss": 0.1751, |
| "step": 18330 |
| }, |
| { |
| "epoch": 0.8305407118920388, |
| "grad_norm": 0.23728685081005096, |
| "learning_rate": 0.00013550626288041497, |
| "loss": 0.1571, |
| "step": 18340 |
| }, |
| { |
| "epoch": 0.8309935694230595, |
| "grad_norm": 0.4054383933544159, |
| "learning_rate": 0.00013543625187492755, |
| "loss": 0.1529, |
| "step": 18350 |
| }, |
| { |
| "epoch": 0.8314464269540802, |
| "grad_norm": 0.3019578754901886, |
| "learning_rate": 0.00013536622100025514, |
| "loss": 0.1614, |
| "step": 18360 |
| }, |
| { |
| "epoch": 0.831899284485101, |
| "grad_norm": 0.5014357566833496, |
| "learning_rate": 0.00013529617029566415, |
| "loss": 0.1703, |
| "step": 18370 |
| }, |
| { |
| "epoch": 0.8323521420161217, |
| "grad_norm": 0.4359058737754822, |
| "learning_rate": 0.00013522609980043214, |
| "loss": 0.2027, |
| "step": 18380 |
| }, |
| { |
| "epoch": 0.8328049995471425, |
| "grad_norm": 0.3514244556427002, |
| "learning_rate": 0.00013515600955384786, |
| "loss": 0.1515, |
| "step": 18390 |
| }, |
| { |
| "epoch": 0.8332578570781632, |
| "grad_norm": 0.4225936830043793, |
| "learning_rate": 0.00013508589959521105, |
| "loss": 0.1937, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.833710714609184, |
| "grad_norm": 0.5653186440467834, |
| "learning_rate": 0.0001350157699638325, |
| "loss": 0.1969, |
| "step": 18410 |
| }, |
| { |
| "epoch": 0.8341635721402046, |
| "grad_norm": 0.3789112865924835, |
| "learning_rate": 0.00013494562069903408, |
| "loss": 0.1673, |
| "step": 18420 |
| }, |
| { |
| "epoch": 0.8346164296712254, |
| "grad_norm": 0.3935513496398926, |
| "learning_rate": 0.0001348754518401486, |
| "loss": 0.1875, |
| "step": 18430 |
| }, |
| { |
| "epoch": 0.8350692872022462, |
| "grad_norm": 0.34713736176490784, |
| "learning_rate": 0.0001348052634265199, |
| "loss": 0.1584, |
| "step": 18440 |
| }, |
| { |
| "epoch": 0.8355221447332669, |
| "grad_norm": 0.395840048789978, |
| "learning_rate": 0.00013473505549750284, |
| "loss": 0.1761, |
| "step": 18450 |
| }, |
| { |
| "epoch": 0.8359750022642877, |
| "grad_norm": 0.47905436158180237, |
| "learning_rate": 0.00013466482809246303, |
| "loss": 0.1687, |
| "step": 18460 |
| }, |
| { |
| "epoch": 0.8364278597953084, |
| "grad_norm": 0.46361273527145386, |
| "learning_rate": 0.00013459458125077725, |
| "loss": 0.1912, |
| "step": 18470 |
| }, |
| { |
| "epoch": 0.8368807173263292, |
| "grad_norm": 0.37687766551971436, |
| "learning_rate": 0.00013452431501183304, |
| "loss": 0.1664, |
| "step": 18480 |
| }, |
| { |
| "epoch": 0.8373335748573498, |
| "grad_norm": 0.515882670879364, |
| "learning_rate": 0.00013445402941502881, |
| "loss": 0.1706, |
| "step": 18490 |
| }, |
| { |
| "epoch": 0.8377864323883706, |
| "grad_norm": 0.5330377817153931, |
| "learning_rate": 0.00013438372449977386, |
| "loss": 0.1863, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.8382392899193913, |
| "grad_norm": 0.2746626138687134, |
| "learning_rate": 0.0001343134003054883, |
| "loss": 0.1675, |
| "step": 18510 |
| }, |
| { |
| "epoch": 0.8386921474504121, |
| "grad_norm": 0.32701629400253296, |
| "learning_rate": 0.00013424305687160308, |
| "loss": 0.1727, |
| "step": 18520 |
| }, |
| { |
| "epoch": 0.8391450049814329, |
| "grad_norm": 0.3433604836463928, |
| "learning_rate": 0.00013417269423755994, |
| "loss": 0.1805, |
| "step": 18530 |
| }, |
| { |
| "epoch": 0.8395978625124536, |
| "grad_norm": 0.44374939799308777, |
| "learning_rate": 0.0001341023124428113, |
| "loss": 0.1723, |
| "step": 18540 |
| }, |
| { |
| "epoch": 0.8400507200434744, |
| "grad_norm": 0.2972099184989929, |
| "learning_rate": 0.00013403191152682045, |
| "loss": 0.1763, |
| "step": 18550 |
| }, |
| { |
| "epoch": 0.840503577574495, |
| "grad_norm": 0.4678293764591217, |
| "learning_rate": 0.0001339614915290613, |
| "loss": 0.1534, |
| "step": 18560 |
| }, |
| { |
| "epoch": 0.8409564351055158, |
| "grad_norm": 0.5384738445281982, |
| "learning_rate": 0.00013389105248901853, |
| "loss": 0.2031, |
| "step": 18570 |
| }, |
| { |
| "epoch": 0.8414092926365365, |
| "grad_norm": 0.4295203387737274, |
| "learning_rate": 0.00013382059444618744, |
| "loss": 0.1875, |
| "step": 18580 |
| }, |
| { |
| "epoch": 0.8418621501675573, |
| "grad_norm": 0.31987982988357544, |
| "learning_rate": 0.00013375011744007402, |
| "loss": 0.1756, |
| "step": 18590 |
| }, |
| { |
| "epoch": 0.842315007698578, |
| "grad_norm": 0.3537510931491852, |
| "learning_rate": 0.00013367962151019492, |
| "loss": 0.1782, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.8427678652295988, |
| "grad_norm": 0.4680691063404083, |
| "learning_rate": 0.0001336091066960773, |
| "loss": 0.1539, |
| "step": 18610 |
| }, |
| { |
| "epoch": 0.8432207227606195, |
| "grad_norm": 0.43693849444389343, |
| "learning_rate": 0.000133538573037259, |
| "loss": 0.1569, |
| "step": 18620 |
| }, |
| { |
| "epoch": 0.8436735802916403, |
| "grad_norm": 0.3837292790412903, |
| "learning_rate": 0.0001334680205732884, |
| "loss": 0.1864, |
| "step": 18630 |
| }, |
| { |
| "epoch": 0.844126437822661, |
| "grad_norm": 0.5856052041053772, |
| "learning_rate": 0.00013339744934372444, |
| "loss": 0.1763, |
| "step": 18640 |
| }, |
| { |
| "epoch": 0.8445792953536817, |
| "grad_norm": 0.2959698438644409, |
| "learning_rate": 0.00013332685938813657, |
| "loss": 0.1503, |
| "step": 18650 |
| }, |
| { |
| "epoch": 0.8450321528847025, |
| "grad_norm": 0.39290863275527954, |
| "learning_rate": 0.00013325625074610465, |
| "loss": 0.1512, |
| "step": 18660 |
| }, |
| { |
| "epoch": 0.8454850104157232, |
| "grad_norm": 0.400134414434433, |
| "learning_rate": 0.00013318562345721922, |
| "loss": 0.1758, |
| "step": 18670 |
| }, |
| { |
| "epoch": 0.845937867946744, |
| "grad_norm": 0.407697468996048, |
| "learning_rate": 0.0001331149775610811, |
| "loss": 0.1946, |
| "step": 18680 |
| }, |
| { |
| "epoch": 0.8463907254777647, |
| "grad_norm": 0.497427374124527, |
| "learning_rate": 0.00013304431309730158, |
| "loss": 0.2094, |
| "step": 18690 |
| }, |
| { |
| "epoch": 0.8468435830087855, |
| "grad_norm": 0.41305825114250183, |
| "learning_rate": 0.0001329736301055024, |
| "loss": 0.1808, |
| "step": 18700 |
| }, |
| { |
| "epoch": 0.8472964405398061, |
| "grad_norm": 0.39067190885543823, |
| "learning_rate": 0.0001329029286253157, |
| "loss": 0.1704, |
| "step": 18710 |
| }, |
| { |
| "epoch": 0.8477492980708269, |
| "grad_norm": 0.27752751111984253, |
| "learning_rate": 0.00013283220869638395, |
| "loss": 0.1722, |
| "step": 18720 |
| }, |
| { |
| "epoch": 0.8482021556018476, |
| "grad_norm": 0.3473440408706665, |
| "learning_rate": 0.00013276147035835994, |
| "loss": 0.165, |
| "step": 18730 |
| }, |
| { |
| "epoch": 0.8486550131328684, |
| "grad_norm": 0.3963128924369812, |
| "learning_rate": 0.00013269071365090682, |
| "loss": 0.1722, |
| "step": 18740 |
| }, |
| { |
| "epoch": 0.8491078706638892, |
| "grad_norm": 0.39070188999176025, |
| "learning_rate": 0.00013261993861369805, |
| "loss": 0.1658, |
| "step": 18750 |
| }, |
| { |
| "epoch": 0.8495607281949099, |
| "grad_norm": 0.3592500686645508, |
| "learning_rate": 0.00013254914528641732, |
| "loss": 0.1706, |
| "step": 18760 |
| }, |
| { |
| "epoch": 0.8500135857259307, |
| "grad_norm": 0.42990413308143616, |
| "learning_rate": 0.00013247833370875865, |
| "loss": 0.1676, |
| "step": 18770 |
| }, |
| { |
| "epoch": 0.8504664432569513, |
| "grad_norm": 0.41089728474617004, |
| "learning_rate": 0.00013240750392042621, |
| "loss": 0.1692, |
| "step": 18780 |
| }, |
| { |
| "epoch": 0.8509193007879721, |
| "grad_norm": 0.4188067615032196, |
| "learning_rate": 0.00013233665596113444, |
| "loss": 0.1655, |
| "step": 18790 |
| }, |
| { |
| "epoch": 0.8513721583189928, |
| "grad_norm": 0.41813355684280396, |
| "learning_rate": 0.00013226578987060795, |
| "loss": 0.1775, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.8518250158500136, |
| "grad_norm": 0.42807725071907043, |
| "learning_rate": 0.00013219490568858148, |
| "loss": 0.1607, |
| "step": 18810 |
| }, |
| { |
| "epoch": 0.8522778733810343, |
| "grad_norm": 0.4690854251384735, |
| "learning_rate": 0.00013212400345480001, |
| "loss": 0.1721, |
| "step": 18820 |
| }, |
| { |
| "epoch": 0.8527307309120551, |
| "grad_norm": 0.31033626198768616, |
| "learning_rate": 0.00013205308320901854, |
| "loss": 0.1679, |
| "step": 18830 |
| }, |
| { |
| "epoch": 0.8531835884430758, |
| "grad_norm": 0.5011230707168579, |
| "learning_rate": 0.0001319821449910022, |
| "loss": 0.1763, |
| "step": 18840 |
| }, |
| { |
| "epoch": 0.8536364459740966, |
| "grad_norm": 0.3080917000770569, |
| "learning_rate": 0.0001319111888405262, |
| "loss": 0.1638, |
| "step": 18850 |
| }, |
| { |
| "epoch": 0.8540893035051172, |
| "grad_norm": 0.39213791489601135, |
| "learning_rate": 0.00013184021479737586, |
| "loss": 0.1598, |
| "step": 18860 |
| }, |
| { |
| "epoch": 0.854542161036138, |
| "grad_norm": 0.38774001598358154, |
| "learning_rate": 0.00013176922290134645, |
| "loss": 0.1877, |
| "step": 18870 |
| }, |
| { |
| "epoch": 0.8549950185671588, |
| "grad_norm": 0.3466808497905731, |
| "learning_rate": 0.00013169821319224326, |
| "loss": 0.1871, |
| "step": 18880 |
| }, |
| { |
| "epoch": 0.8554478760981795, |
| "grad_norm": 0.2837083637714386, |
| "learning_rate": 0.00013162718570988166, |
| "loss": 0.1614, |
| "step": 18890 |
| }, |
| { |
| "epoch": 0.8559007336292003, |
| "grad_norm": 0.48217517137527466, |
| "learning_rate": 0.00013155614049408684, |
| "loss": 0.1823, |
| "step": 18900 |
| }, |
| { |
| "epoch": 0.856353591160221, |
| "grad_norm": 0.483885258436203, |
| "learning_rate": 0.00013148507758469407, |
| "loss": 0.1734, |
| "step": 18910 |
| }, |
| { |
| "epoch": 0.8568064486912418, |
| "grad_norm": 0.516879677772522, |
| "learning_rate": 0.00013141399702154848, |
| "loss": 0.1756, |
| "step": 18920 |
| }, |
| { |
| "epoch": 0.8572593062222624, |
| "grad_norm": 0.39171308279037476, |
| "learning_rate": 0.00013134289884450504, |
| "loss": 0.1522, |
| "step": 18930 |
| }, |
| { |
| "epoch": 0.8577121637532832, |
| "grad_norm": 0.4065245985984802, |
| "learning_rate": 0.0001312717830934287, |
| "loss": 0.1578, |
| "step": 18940 |
| }, |
| { |
| "epoch": 0.8581650212843039, |
| "grad_norm": 0.29680877923965454, |
| "learning_rate": 0.00013120064980819422, |
| "loss": 0.1882, |
| "step": 18950 |
| }, |
| { |
| "epoch": 0.8586178788153247, |
| "grad_norm": 0.29750049114227295, |
| "learning_rate": 0.00013112949902868617, |
| "loss": 0.1789, |
| "step": 18960 |
| }, |
| { |
| "epoch": 0.8590707363463455, |
| "grad_norm": 0.324982225894928, |
| "learning_rate": 0.00013105833079479897, |
| "loss": 0.1846, |
| "step": 18970 |
| }, |
| { |
| "epoch": 0.8595235938773662, |
| "grad_norm": 0.31095758080482483, |
| "learning_rate": 0.0001309871451464368, |
| "loss": 0.1584, |
| "step": 18980 |
| }, |
| { |
| "epoch": 0.859976451408387, |
| "grad_norm": 0.2947046458721161, |
| "learning_rate": 0.00013091594212351362, |
| "loss": 0.1834, |
| "step": 18990 |
| }, |
| { |
| "epoch": 0.8604293089394076, |
| "grad_norm": 0.40782859921455383, |
| "learning_rate": 0.00013084472176595307, |
| "loss": 0.1716, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.8604293089394076, |
| "eval_chrf": 75.47261980576427, |
| "eval_loss": 0.1526792198419571, |
| "eval_runtime": 26.5755, |
| "eval_samples_per_second": 0.376, |
| "eval_steps_per_second": 0.038, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.8608821664704284, |
| "grad_norm": 0.5305262804031372, |
| "learning_rate": 0.00013077348411368863, |
| "loss": 0.1745, |
| "step": 19010 |
| }, |
| { |
| "epoch": 0.8613350240014491, |
| "grad_norm": 0.3732976019382477, |
| "learning_rate": 0.0001307022292066634, |
| "loss": 0.1635, |
| "step": 19020 |
| }, |
| { |
| "epoch": 0.8617878815324699, |
| "grad_norm": 0.3454831838607788, |
| "learning_rate": 0.00013063095708483016, |
| "loss": 0.1764, |
| "step": 19030 |
| }, |
| { |
| "epoch": 0.8622407390634906, |
| "grad_norm": 0.4817901849746704, |
| "learning_rate": 0.00013055966778815128, |
| "loss": 0.1635, |
| "step": 19040 |
| }, |
| { |
| "epoch": 0.8626935965945114, |
| "grad_norm": 0.500036358833313, |
| "learning_rate": 0.00013048836135659891, |
| "loss": 0.154, |
| "step": 19050 |
| }, |
| { |
| "epoch": 0.8631464541255321, |
| "grad_norm": 0.2934643030166626, |
| "learning_rate": 0.00013041703783015472, |
| "loss": 0.1992, |
| "step": 19060 |
| }, |
| { |
| "epoch": 0.8635993116565529, |
| "grad_norm": 0.38243478536605835, |
| "learning_rate": 0.00013034569724880993, |
| "loss": 0.1561, |
| "step": 19070 |
| }, |
| { |
| "epoch": 0.8640521691875735, |
| "grad_norm": 0.25158563256263733, |
| "learning_rate": 0.0001302743396525654, |
| "loss": 0.1652, |
| "step": 19080 |
| }, |
| { |
| "epoch": 0.8645050267185943, |
| "grad_norm": 0.426620215177536, |
| "learning_rate": 0.00013020296508143143, |
| "loss": 0.1529, |
| "step": 19090 |
| }, |
| { |
| "epoch": 0.8649578842496151, |
| "grad_norm": 0.3732966184616089, |
| "learning_rate": 0.000130131573575428, |
| "loss": 0.1948, |
| "step": 19100 |
| }, |
| { |
| "epoch": 0.8654107417806358, |
| "grad_norm": 0.4865129590034485, |
| "learning_rate": 0.00013006016517458436, |
| "loss": 0.1756, |
| "step": 19110 |
| }, |
| { |
| "epoch": 0.8658635993116566, |
| "grad_norm": 0.3668878674507141, |
| "learning_rate": 0.00012998873991893948, |
| "loss": 0.1642, |
| "step": 19120 |
| }, |
| { |
| "epoch": 0.8663164568426773, |
| "grad_norm": 0.3702840209007263, |
| "learning_rate": 0.00012991729784854158, |
| "loss": 0.1598, |
| "step": 19130 |
| }, |
| { |
| "epoch": 0.8667693143736981, |
| "grad_norm": 0.40551936626434326, |
| "learning_rate": 0.0001298458390034484, |
| "loss": 0.1824, |
| "step": 19140 |
| }, |
| { |
| "epoch": 0.8672221719047187, |
| "grad_norm": 0.41427895426750183, |
| "learning_rate": 0.0001297743634237271, |
| "loss": 0.1862, |
| "step": 19150 |
| }, |
| { |
| "epoch": 0.8676750294357395, |
| "grad_norm": 0.42724162340164185, |
| "learning_rate": 0.00012970287114945418, |
| "loss": 0.1645, |
| "step": 19160 |
| }, |
| { |
| "epoch": 0.8681278869667602, |
| "grad_norm": 0.3212936818599701, |
| "learning_rate": 0.00012963136222071553, |
| "loss": 0.1619, |
| "step": 19170 |
| }, |
| { |
| "epoch": 0.868580744497781, |
| "grad_norm": 0.4648410975933075, |
| "learning_rate": 0.00012955983667760636, |
| "loss": 0.169, |
| "step": 19180 |
| }, |
| { |
| "epoch": 0.8690336020288018, |
| "grad_norm": 0.4471531808376312, |
| "learning_rate": 0.00012948829456023116, |
| "loss": 0.1645, |
| "step": 19190 |
| }, |
| { |
| "epoch": 0.8694864595598225, |
| "grad_norm": 0.518792986869812, |
| "learning_rate": 0.00012941673590870383, |
| "loss": 0.173, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.8699393170908433, |
| "grad_norm": 0.40794938802719116, |
| "learning_rate": 0.00012934516076314737, |
| "loss": 0.1732, |
| "step": 19210 |
| }, |
| { |
| "epoch": 0.8703921746218639, |
| "grad_norm": 0.37186163663864136, |
| "learning_rate": 0.00012927356916369424, |
| "loss": 0.1667, |
| "step": 19220 |
| }, |
| { |
| "epoch": 0.8708450321528847, |
| "grad_norm": 0.2797354757785797, |
| "learning_rate": 0.0001292019611504859, |
| "loss": 0.1723, |
| "step": 19230 |
| }, |
| { |
| "epoch": 0.8712978896839054, |
| "grad_norm": 0.43414950370788574, |
| "learning_rate": 0.00012913033676367316, |
| "loss": 0.179, |
| "step": 19240 |
| }, |
| { |
| "epoch": 0.8717507472149262, |
| "grad_norm": 0.41452449560165405, |
| "learning_rate": 0.000129058696043416, |
| "loss": 0.1482, |
| "step": 19250 |
| }, |
| { |
| "epoch": 0.8722036047459469, |
| "grad_norm": 0.3870978057384491, |
| "learning_rate": 0.00012898703902988352, |
| "loss": 0.1714, |
| "step": 19260 |
| }, |
| { |
| "epoch": 0.8726564622769677, |
| "grad_norm": 0.4780474007129669, |
| "learning_rate": 0.00012891536576325395, |
| "loss": 0.1803, |
| "step": 19270 |
| }, |
| { |
| "epoch": 0.8731093198079884, |
| "grad_norm": 0.3603726029396057, |
| "learning_rate": 0.00012884367628371464, |
| "loss": 0.1755, |
| "step": 19280 |
| }, |
| { |
| "epoch": 0.8735621773390091, |
| "grad_norm": 0.43098974227905273, |
| "learning_rate": 0.00012877197063146208, |
| "loss": 0.1659, |
| "step": 19290 |
| }, |
| { |
| "epoch": 0.8740150348700298, |
| "grad_norm": 0.27953872084617615, |
| "learning_rate": 0.00012870024884670172, |
| "loss": 0.1511, |
| "step": 19300 |
| }, |
| { |
| "epoch": 0.8744678924010506, |
| "grad_norm": 0.43370550870895386, |
| "learning_rate": 0.0001286285109696482, |
| "loss": 0.1984, |
| "step": 19310 |
| }, |
| { |
| "epoch": 0.8749207499320714, |
| "grad_norm": 0.45979931950569153, |
| "learning_rate": 0.00012855675704052507, |
| "loss": 0.2006, |
| "step": 19320 |
| }, |
| { |
| "epoch": 0.8753736074630921, |
| "grad_norm": 0.40239641070365906, |
| "learning_rate": 0.00012848498709956493, |
| "loss": 0.1909, |
| "step": 19330 |
| }, |
| { |
| "epoch": 0.8758264649941129, |
| "grad_norm": 0.3627179265022278, |
| "learning_rate": 0.00012841320118700932, |
| "loss": 0.1714, |
| "step": 19340 |
| }, |
| { |
| "epoch": 0.8762793225251336, |
| "grad_norm": 0.4519731104373932, |
| "learning_rate": 0.00012834139934310882, |
| "loss": 0.1651, |
| "step": 19350 |
| }, |
| { |
| "epoch": 0.8767321800561544, |
| "grad_norm": 0.43609437346458435, |
| "learning_rate": 0.00012826958160812278, |
| "loss": 0.18, |
| "step": 19360 |
| }, |
| { |
| "epoch": 0.877185037587175, |
| "grad_norm": 0.5031613111495972, |
| "learning_rate": 0.00012819774802231968, |
| "loss": 0.1814, |
| "step": 19370 |
| }, |
| { |
| "epoch": 0.8776378951181958, |
| "grad_norm": 0.32297253608703613, |
| "learning_rate": 0.00012812589862597665, |
| "loss": 0.162, |
| "step": 19380 |
| }, |
| { |
| "epoch": 0.8780907526492165, |
| "grad_norm": 0.507293701171875, |
| "learning_rate": 0.0001280540334593799, |
| "loss": 0.1762, |
| "step": 19390 |
| }, |
| { |
| "epoch": 0.8785436101802373, |
| "grad_norm": 0.24356497824192047, |
| "learning_rate": 0.0001279821525628244, |
| "loss": 0.1643, |
| "step": 19400 |
| }, |
| { |
| "epoch": 0.8789964677112581, |
| "grad_norm": 0.5529003739356995, |
| "learning_rate": 0.00012791025597661386, |
| "loss": 0.152, |
| "step": 19410 |
| }, |
| { |
| "epoch": 0.8794493252422788, |
| "grad_norm": 0.4408567249774933, |
| "learning_rate": 0.00012783834374106086, |
| "loss": 0.1831, |
| "step": 19420 |
| }, |
| { |
| "epoch": 0.8799021827732996, |
| "grad_norm": 0.5482146143913269, |
| "learning_rate": 0.00012776641589648676, |
| "loss": 0.1752, |
| "step": 19430 |
| }, |
| { |
| "epoch": 0.8803550403043202, |
| "grad_norm": 0.4384392499923706, |
| "learning_rate": 0.00012769447248322174, |
| "loss": 0.1712, |
| "step": 19440 |
| }, |
| { |
| "epoch": 0.880807897835341, |
| "grad_norm": 0.46668869256973267, |
| "learning_rate": 0.00012762251354160454, |
| "loss": 0.1736, |
| "step": 19450 |
| }, |
| { |
| "epoch": 0.8812607553663617, |
| "grad_norm": 0.2595170736312866, |
| "learning_rate": 0.0001275505391119827, |
| "loss": 0.1724, |
| "step": 19460 |
| }, |
| { |
| "epoch": 0.8817136128973825, |
| "grad_norm": 0.4810440242290497, |
| "learning_rate": 0.00012747854923471247, |
| "loss": 0.1842, |
| "step": 19470 |
| }, |
| { |
| "epoch": 0.8821664704284032, |
| "grad_norm": 0.43019989132881165, |
| "learning_rate": 0.00012740654395015875, |
| "loss": 0.1953, |
| "step": 19480 |
| }, |
| { |
| "epoch": 0.882619327959424, |
| "grad_norm": 0.4522167146205902, |
| "learning_rate": 0.00012733452329869504, |
| "loss": 0.1633, |
| "step": 19490 |
| }, |
| { |
| "epoch": 0.8830721854904447, |
| "grad_norm": 0.34009966254234314, |
| "learning_rate": 0.00012726248732070346, |
| "loss": 0.1387, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.8835250430214654, |
| "grad_norm": 0.3226310908794403, |
| "learning_rate": 0.00012719043605657478, |
| "loss": 0.1698, |
| "step": 19510 |
| }, |
| { |
| "epoch": 0.8839779005524862, |
| "grad_norm": 0.5143437385559082, |
| "learning_rate": 0.0001271183695467083, |
| "loss": 0.165, |
| "step": 19520 |
| }, |
| { |
| "epoch": 0.8844307580835069, |
| "grad_norm": 0.5247877836227417, |
| "learning_rate": 0.00012704628783151184, |
| "loss": 0.182, |
| "step": 19530 |
| }, |
| { |
| "epoch": 0.8848836156145277, |
| "grad_norm": 0.3131117522716522, |
| "learning_rate": 0.00012697419095140182, |
| "loss": 0.1618, |
| "step": 19540 |
| }, |
| { |
| "epoch": 0.8853364731455484, |
| "grad_norm": 0.4093931019306183, |
| "learning_rate": 0.00012690207894680306, |
| "loss": 0.1672, |
| "step": 19550 |
| }, |
| { |
| "epoch": 0.8857893306765692, |
| "grad_norm": 0.4105209708213806, |
| "learning_rate": 0.000126829951858149, |
| "loss": 0.186, |
| "step": 19560 |
| }, |
| { |
| "epoch": 0.8862421882075899, |
| "grad_norm": 0.4243008494377136, |
| "learning_rate": 0.00012675780972588142, |
| "loss": 0.1784, |
| "step": 19570 |
| }, |
| { |
| "epoch": 0.8866950457386107, |
| "grad_norm": 0.43030065298080444, |
| "learning_rate": 0.00012668565259045056, |
| "loss": 0.1774, |
| "step": 19580 |
| }, |
| { |
| "epoch": 0.8871479032696313, |
| "grad_norm": 0.42522814869880676, |
| "learning_rate": 0.00012661348049231507, |
| "loss": 0.1906, |
| "step": 19590 |
| }, |
| { |
| "epoch": 0.8876007608006521, |
| "grad_norm": 0.5001883506774902, |
| "learning_rate": 0.00012654129347194207, |
| "loss": 0.1915, |
| "step": 19600 |
| }, |
| { |
| "epoch": 0.8880536183316728, |
| "grad_norm": 0.31651952862739563, |
| "learning_rate": 0.00012646909156980692, |
| "loss": 0.1562, |
| "step": 19610 |
| }, |
| { |
| "epoch": 0.8885064758626936, |
| "grad_norm": 0.35776180028915405, |
| "learning_rate": 0.00012639687482639343, |
| "loss": 0.1728, |
| "step": 19620 |
| }, |
| { |
| "epoch": 0.8889593333937144, |
| "grad_norm": 0.39580416679382324, |
| "learning_rate": 0.00012632464328219363, |
| "loss": 0.1799, |
| "step": 19630 |
| }, |
| { |
| "epoch": 0.8894121909247351, |
| "grad_norm": 0.3365071713924408, |
| "learning_rate": 0.00012625239697770804, |
| "loss": 0.1586, |
| "step": 19640 |
| }, |
| { |
| "epoch": 0.8898650484557559, |
| "grad_norm": 0.3606979548931122, |
| "learning_rate": 0.00012618013595344516, |
| "loss": 0.1815, |
| "step": 19650 |
| }, |
| { |
| "epoch": 0.8903179059867765, |
| "grad_norm": 0.43666747212409973, |
| "learning_rate": 0.00012610786024992197, |
| "loss": 0.1538, |
| "step": 19660 |
| }, |
| { |
| "epoch": 0.8907707635177973, |
| "grad_norm": 0.4219381511211395, |
| "learning_rate": 0.00012603556990766366, |
| "loss": 0.1796, |
| "step": 19670 |
| }, |
| { |
| "epoch": 0.891223621048818, |
| "grad_norm": 0.38019394874572754, |
| "learning_rate": 0.00012596326496720355, |
| "loss": 0.1694, |
| "step": 19680 |
| }, |
| { |
| "epoch": 0.8916764785798388, |
| "grad_norm": 0.31997179985046387, |
| "learning_rate": 0.00012589094546908315, |
| "loss": 0.1744, |
| "step": 19690 |
| }, |
| { |
| "epoch": 0.8921293361108595, |
| "grad_norm": 0.365190714597702, |
| "learning_rate": 0.0001258186114538522, |
| "loss": 0.2, |
| "step": 19700 |
| }, |
| { |
| "epoch": 0.8925821936418803, |
| "grad_norm": 0.4236038029193878, |
| "learning_rate": 0.00012574626296206855, |
| "loss": 0.159, |
| "step": 19710 |
| }, |
| { |
| "epoch": 0.893035051172901, |
| "grad_norm": 0.5261380672454834, |
| "learning_rate": 0.00012567390003429812, |
| "loss": 0.178, |
| "step": 19720 |
| }, |
| { |
| "epoch": 0.8934879087039217, |
| "grad_norm": 0.5068031549453735, |
| "learning_rate": 0.00012560152271111504, |
| "loss": 0.1615, |
| "step": 19730 |
| }, |
| { |
| "epoch": 0.8939407662349425, |
| "grad_norm": 0.3815973401069641, |
| "learning_rate": 0.00012552913103310134, |
| "loss": 0.1653, |
| "step": 19740 |
| }, |
| { |
| "epoch": 0.8943936237659632, |
| "grad_norm": 0.309400349855423, |
| "learning_rate": 0.0001254567250408473, |
| "loss": 0.1759, |
| "step": 19750 |
| }, |
| { |
| "epoch": 0.894846481296984, |
| "grad_norm": 0.3322960138320923, |
| "learning_rate": 0.000125384304774951, |
| "loss": 0.1803, |
| "step": 19760 |
| }, |
| { |
| "epoch": 0.8952993388280047, |
| "grad_norm": 0.40339186787605286, |
| "learning_rate": 0.00012531187027601872, |
| "loss": 0.1832, |
| "step": 19770 |
| }, |
| { |
| "epoch": 0.8957521963590255, |
| "grad_norm": 0.47105276584625244, |
| "learning_rate": 0.0001252394215846646, |
| "loss": 0.1772, |
| "step": 19780 |
| }, |
| { |
| "epoch": 0.8962050538900462, |
| "grad_norm": 0.42406246066093445, |
| "learning_rate": 0.00012516695874151086, |
| "loss": 0.1874, |
| "step": 19790 |
| }, |
| { |
| "epoch": 0.8966579114210669, |
| "grad_norm": 0.4142908453941345, |
| "learning_rate": 0.00012509448178718753, |
| "loss": 0.1702, |
| "step": 19800 |
| }, |
| { |
| "epoch": 0.8971107689520876, |
| "grad_norm": 0.3621874451637268, |
| "learning_rate": 0.00012502199076233254, |
| "loss": 0.149, |
| "step": 19810 |
| }, |
| { |
| "epoch": 0.8975636264831084, |
| "grad_norm": 0.29331645369529724, |
| "learning_rate": 0.00012494948570759186, |
| "loss": 0.1819, |
| "step": 19820 |
| }, |
| { |
| "epoch": 0.8980164840141291, |
| "grad_norm": 0.32960814237594604, |
| "learning_rate": 0.00012487696666361924, |
| "loss": 0.1852, |
| "step": 19830 |
| }, |
| { |
| "epoch": 0.8984693415451499, |
| "grad_norm": 0.4001091420650482, |
| "learning_rate": 0.00012480443367107616, |
| "loss": 0.1491, |
| "step": 19840 |
| }, |
| { |
| "epoch": 0.8989221990761707, |
| "grad_norm": 0.31662559509277344, |
| "learning_rate": 0.00012473188677063215, |
| "loss": 0.1681, |
| "step": 19850 |
| }, |
| { |
| "epoch": 0.8993750566071914, |
| "grad_norm": 0.37801915407180786, |
| "learning_rate": 0.00012465932600296432, |
| "loss": 0.1531, |
| "step": 19860 |
| }, |
| { |
| "epoch": 0.8998279141382122, |
| "grad_norm": 0.43345585465431213, |
| "learning_rate": 0.00012458675140875774, |
| "loss": 0.1482, |
| "step": 19870 |
| }, |
| { |
| "epoch": 0.9002807716692328, |
| "grad_norm": 0.4435916543006897, |
| "learning_rate": 0.0001245141630287051, |
| "loss": 0.1631, |
| "step": 19880 |
| }, |
| { |
| "epoch": 0.9007336292002536, |
| "grad_norm": 0.35654473304748535, |
| "learning_rate": 0.00012444156090350687, |
| "loss": 0.1535, |
| "step": 19890 |
| }, |
| { |
| "epoch": 0.9011864867312743, |
| "grad_norm": 0.40578678250312805, |
| "learning_rate": 0.0001243689450738712, |
| "loss": 0.1862, |
| "step": 19900 |
| }, |
| { |
| "epoch": 0.9016393442622951, |
| "grad_norm": 0.386254221200943, |
| "learning_rate": 0.000124296315580514, |
| "loss": 0.164, |
| "step": 19910 |
| }, |
| { |
| "epoch": 0.9020922017933158, |
| "grad_norm": 0.4299822747707367, |
| "learning_rate": 0.00012422367246415873, |
| "loss": 0.1852, |
| "step": 19920 |
| }, |
| { |
| "epoch": 0.9025450593243366, |
| "grad_norm": 0.4686278700828552, |
| "learning_rate": 0.0001241510157655366, |
| "loss": 0.1773, |
| "step": 19930 |
| }, |
| { |
| "epoch": 0.9029979168553574, |
| "grad_norm": 0.35422420501708984, |
| "learning_rate": 0.00012407834552538637, |
| "loss": 0.1786, |
| "step": 19940 |
| }, |
| { |
| "epoch": 0.903450774386378, |
| "grad_norm": 0.3344101905822754, |
| "learning_rate": 0.00012400566178445436, |
| "loss": 0.1601, |
| "step": 19950 |
| }, |
| { |
| "epoch": 0.9039036319173988, |
| "grad_norm": 0.4716935157775879, |
| "learning_rate": 0.00012393296458349456, |
| "loss": 0.1395, |
| "step": 19960 |
| }, |
| { |
| "epoch": 0.9043564894484195, |
| "grad_norm": 0.3622031509876251, |
| "learning_rate": 0.0001238602539632684, |
| "loss": 0.1645, |
| "step": 19970 |
| }, |
| { |
| "epoch": 0.9048093469794403, |
| "grad_norm": 0.4005542993545532, |
| "learning_rate": 0.00012378752996454494, |
| "loss": 0.1799, |
| "step": 19980 |
| }, |
| { |
| "epoch": 0.905262204510461, |
| "grad_norm": 0.4990575909614563, |
| "learning_rate": 0.0001237147926281007, |
| "loss": 0.1653, |
| "step": 19990 |
| }, |
| { |
| "epoch": 0.9057150620414818, |
| "grad_norm": 0.47105804085731506, |
| "learning_rate": 0.00012364204199471956, |
| "loss": 0.1906, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.9057150620414818, |
| "eval_chrf": 82.21479011673927, |
| "eval_loss": 0.16131897270679474, |
| "eval_runtime": 10.4072, |
| "eval_samples_per_second": 0.961, |
| "eval_steps_per_second": 0.096, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.9061679195725025, |
| "grad_norm": 0.3149387240409851, |
| "learning_rate": 0.00012356927810519306, |
| "loss": 0.1668, |
| "step": 20010 |
| }, |
| { |
| "epoch": 0.9066207771035232, |
| "grad_norm": 0.3994668126106262, |
| "learning_rate": 0.00012349650100032004, |
| "loss": 0.1893, |
| "step": 20020 |
| }, |
| { |
| "epoch": 0.9070736346345439, |
| "grad_norm": 0.3106105327606201, |
| "learning_rate": 0.0001234237107209068, |
| "loss": 0.1739, |
| "step": 20030 |
| }, |
| { |
| "epoch": 0.9075264921655647, |
| "grad_norm": 0.3529313802719116, |
| "learning_rate": 0.000123350907307767, |
| "loss": 0.1554, |
| "step": 20040 |
| }, |
| { |
| "epoch": 0.9079793496965854, |
| "grad_norm": 0.4536036550998688, |
| "learning_rate": 0.00012327809080172163, |
| "loss": 0.1773, |
| "step": 20050 |
| }, |
| { |
| "epoch": 0.9084322072276062, |
| "grad_norm": 0.48275458812713623, |
| "learning_rate": 0.00012320526124359917, |
| "loss": 0.1731, |
| "step": 20060 |
| }, |
| { |
| "epoch": 0.908885064758627, |
| "grad_norm": 0.3880382776260376, |
| "learning_rate": 0.00012313241867423523, |
| "loss": 0.1476, |
| "step": 20070 |
| }, |
| { |
| "epoch": 0.9093379222896477, |
| "grad_norm": 0.3142046332359314, |
| "learning_rate": 0.00012305956313447285, |
| "loss": 0.1439, |
| "step": 20080 |
| }, |
| { |
| "epoch": 0.9097907798206685, |
| "grad_norm": 0.37252524495124817, |
| "learning_rate": 0.00012298669466516223, |
| "loss": 0.1763, |
| "step": 20090 |
| }, |
| { |
| "epoch": 0.9102436373516891, |
| "grad_norm": 0.3946685492992401, |
| "learning_rate": 0.00012291381330716095, |
| "loss": 0.1838, |
| "step": 20100 |
| }, |
| { |
| "epoch": 0.9106964948827099, |
| "grad_norm": 0.5468018054962158, |
| "learning_rate": 0.00012284091910133368, |
| "loss": 0.1552, |
| "step": 20110 |
| }, |
| { |
| "epoch": 0.9111493524137306, |
| "grad_norm": 0.38861414790153503, |
| "learning_rate": 0.00012276801208855245, |
| "loss": 0.1724, |
| "step": 20120 |
| }, |
| { |
| "epoch": 0.9116022099447514, |
| "grad_norm": 0.4579319655895233, |
| "learning_rate": 0.00012269509230969632, |
| "loss": 0.1714, |
| "step": 20130 |
| }, |
| { |
| "epoch": 0.9120550674757721, |
| "grad_norm": 0.3694315552711487, |
| "learning_rate": 0.0001226221598056516, |
| "loss": 0.1831, |
| "step": 20140 |
| }, |
| { |
| "epoch": 0.9125079250067929, |
| "grad_norm": 0.391891747713089, |
| "learning_rate": 0.00012254921461731167, |
| "loss": 0.152, |
| "step": 20150 |
| }, |
| { |
| "epoch": 0.9129607825378137, |
| "grad_norm": 0.4429548978805542, |
| "learning_rate": 0.0001224762567855771, |
| "loss": 0.1794, |
| "step": 20160 |
| }, |
| { |
| "epoch": 0.9134136400688343, |
| "grad_norm": 0.6164150834083557, |
| "learning_rate": 0.00012240328635135548, |
| "loss": 0.18, |
| "step": 20170 |
| }, |
| { |
| "epoch": 0.913866497599855, |
| "grad_norm": 0.3344804644584656, |
| "learning_rate": 0.00012233030335556157, |
| "loss": 0.1473, |
| "step": 20180 |
| }, |
| { |
| "epoch": 0.9143193551308758, |
| "grad_norm": 0.34692907333374023, |
| "learning_rate": 0.00012225730783911698, |
| "loss": 0.175, |
| "step": 20190 |
| }, |
| { |
| "epoch": 0.9147722126618966, |
| "grad_norm": 0.39275220036506653, |
| "learning_rate": 0.00012218429984295047, |
| "loss": 0.1679, |
| "step": 20200 |
| }, |
| { |
| "epoch": 0.9152250701929173, |
| "grad_norm": 0.3784923851490021, |
| "learning_rate": 0.0001221112794079979, |
| "loss": 0.1685, |
| "step": 20210 |
| }, |
| { |
| "epoch": 0.9156779277239381, |
| "grad_norm": 0.31384286284446716, |
| "learning_rate": 0.0001220382465752019, |
| "loss": 0.1795, |
| "step": 20220 |
| }, |
| { |
| "epoch": 0.9161307852549588, |
| "grad_norm": 0.4809572100639343, |
| "learning_rate": 0.00012196520138551215, |
| "loss": 0.165, |
| "step": 20230 |
| }, |
| { |
| "epoch": 0.9165836427859795, |
| "grad_norm": 0.3832622468471527, |
| "learning_rate": 0.00012189214387988525, |
| "loss": 0.1566, |
| "step": 20240 |
| }, |
| { |
| "epoch": 0.9170365003170002, |
| "grad_norm": 0.3654673993587494, |
| "learning_rate": 0.00012181907409928475, |
| "loss": 0.1637, |
| "step": 20250 |
| }, |
| { |
| "epoch": 0.917489357848021, |
| "grad_norm": 0.3105533719062805, |
| "learning_rate": 0.00012174599208468099, |
| "loss": 0.1506, |
| "step": 20260 |
| }, |
| { |
| "epoch": 0.9179422153790417, |
| "grad_norm": 0.3788946866989136, |
| "learning_rate": 0.0001216728978770512, |
| "loss": 0.1676, |
| "step": 20270 |
| }, |
| { |
| "epoch": 0.9183950729100625, |
| "grad_norm": 0.45621371269226074, |
| "learning_rate": 0.00012159979151737952, |
| "loss": 0.1642, |
| "step": 20280 |
| }, |
| { |
| "epoch": 0.9188479304410833, |
| "grad_norm": 0.39650899171829224, |
| "learning_rate": 0.00012152667304665683, |
| "loss": 0.1547, |
| "step": 20290 |
| }, |
| { |
| "epoch": 0.919300787972104, |
| "grad_norm": 0.41694116592407227, |
| "learning_rate": 0.00012145354250588079, |
| "loss": 0.1577, |
| "step": 20300 |
| }, |
| { |
| "epoch": 0.9197536455031247, |
| "grad_norm": 0.4147912859916687, |
| "learning_rate": 0.00012138039993605588, |
| "loss": 0.1747, |
| "step": 20310 |
| }, |
| { |
| "epoch": 0.9202065030341454, |
| "grad_norm": 0.6738628149032593, |
| "learning_rate": 0.00012130724537819332, |
| "loss": 0.1763, |
| "step": 20320 |
| }, |
| { |
| "epoch": 0.9206593605651662, |
| "grad_norm": 0.4486740529537201, |
| "learning_rate": 0.00012123407887331102, |
| "loss": 0.1649, |
| "step": 20330 |
| }, |
| { |
| "epoch": 0.9211122180961869, |
| "grad_norm": 0.35589438676834106, |
| "learning_rate": 0.00012116090046243355, |
| "loss": 0.1676, |
| "step": 20340 |
| }, |
| { |
| "epoch": 0.9215650756272077, |
| "grad_norm": 0.3210257291793823, |
| "learning_rate": 0.00012108771018659229, |
| "loss": 0.1736, |
| "step": 20350 |
| }, |
| { |
| "epoch": 0.9220179331582284, |
| "grad_norm": 0.39245256781578064, |
| "learning_rate": 0.00012101450808682513, |
| "loss": 0.1705, |
| "step": 20360 |
| }, |
| { |
| "epoch": 0.9224707906892492, |
| "grad_norm": 0.4380585849285126, |
| "learning_rate": 0.00012094129420417668, |
| "loss": 0.1705, |
| "step": 20370 |
| }, |
| { |
| "epoch": 0.92292364822027, |
| "grad_norm": 0.2744285762310028, |
| "learning_rate": 0.00012086806857969812, |
| "loss": 0.1471, |
| "step": 20380 |
| }, |
| { |
| "epoch": 0.9233765057512906, |
| "grad_norm": 0.41960641741752625, |
| "learning_rate": 0.00012079483125444719, |
| "loss": 0.1819, |
| "step": 20390 |
| }, |
| { |
| "epoch": 0.9238293632823114, |
| "grad_norm": 0.3407396674156189, |
| "learning_rate": 0.00012072158226948829, |
| "loss": 0.1854, |
| "step": 20400 |
| }, |
| { |
| "epoch": 0.9242822208133321, |
| "grad_norm": 0.4826781451702118, |
| "learning_rate": 0.00012064832166589222, |
| "loss": 0.1484, |
| "step": 20410 |
| }, |
| { |
| "epoch": 0.9247350783443529, |
| "grad_norm": 0.3638167083263397, |
| "learning_rate": 0.0001205750494847364, |
| "loss": 0.1497, |
| "step": 20420 |
| }, |
| { |
| "epoch": 0.9251879358753736, |
| "grad_norm": 0.4030158221721649, |
| "learning_rate": 0.00012050176576710468, |
| "loss": 0.1573, |
| "step": 20430 |
| }, |
| { |
| "epoch": 0.9256407934063944, |
| "grad_norm": 0.3100288510322571, |
| "learning_rate": 0.00012042847055408741, |
| "loss": 0.1558, |
| "step": 20440 |
| }, |
| { |
| "epoch": 0.9260936509374151, |
| "grad_norm": 0.32881560921669006, |
| "learning_rate": 0.00012035516388678143, |
| "loss": 0.1667, |
| "step": 20450 |
| }, |
| { |
| "epoch": 0.9265465084684358, |
| "grad_norm": 0.32451504468917847, |
| "learning_rate": 0.00012028184580628987, |
| "loss": 0.1761, |
| "step": 20460 |
| }, |
| { |
| "epoch": 0.9269993659994565, |
| "grad_norm": 0.4388659596443176, |
| "learning_rate": 0.00012020851635372236, |
| "loss": 0.1648, |
| "step": 20470 |
| }, |
| { |
| "epoch": 0.9274522235304773, |
| "grad_norm": 0.360840767621994, |
| "learning_rate": 0.00012013517557019494, |
| "loss": 0.1834, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.927905081061498, |
| "grad_norm": 0.3814234137535095, |
| "learning_rate": 0.0001200618234968299, |
| "loss": 0.1587, |
| "step": 20490 |
| }, |
| { |
| "epoch": 0.9283579385925188, |
| "grad_norm": 0.34749165177345276, |
| "learning_rate": 0.00011998846017475592, |
| "loss": 0.1834, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.9288107961235396, |
| "grad_norm": 0.340725839138031, |
| "learning_rate": 0.00011991508564510798, |
| "loss": 0.1534, |
| "step": 20510 |
| }, |
| { |
| "epoch": 0.9292636536545603, |
| "grad_norm": 1.043115496635437, |
| "learning_rate": 0.00011984169994902736, |
| "loss": 0.1837, |
| "step": 20520 |
| }, |
| { |
| "epoch": 0.929716511185581, |
| "grad_norm": 0.46120333671569824, |
| "learning_rate": 0.00011976830312766156, |
| "loss": 0.1585, |
| "step": 20530 |
| }, |
| { |
| "epoch": 0.9301693687166017, |
| "grad_norm": 0.39356350898742676, |
| "learning_rate": 0.00011969489522216433, |
| "loss": 0.1699, |
| "step": 20540 |
| }, |
| { |
| "epoch": 0.9306222262476225, |
| "grad_norm": 0.27802759408950806, |
| "learning_rate": 0.00011962147627369567, |
| "loss": 0.1558, |
| "step": 20550 |
| }, |
| { |
| "epoch": 0.9310750837786432, |
| "grad_norm": 0.48346295952796936, |
| "learning_rate": 0.00011954804632342176, |
| "loss": 0.1897, |
| "step": 20560 |
| }, |
| { |
| "epoch": 0.931527941309664, |
| "grad_norm": 0.4480477571487427, |
| "learning_rate": 0.00011947460541251487, |
| "loss": 0.1599, |
| "step": 20570 |
| }, |
| { |
| "epoch": 0.9319807988406847, |
| "grad_norm": 0.30732083320617676, |
| "learning_rate": 0.00011940115358215354, |
| "loss": 0.1808, |
| "step": 20580 |
| }, |
| { |
| "epoch": 0.9324336563717055, |
| "grad_norm": 0.3854066729545593, |
| "learning_rate": 0.00011932769087352236, |
| "loss": 0.1765, |
| "step": 20590 |
| }, |
| { |
| "epoch": 0.9328865139027263, |
| "grad_norm": 0.4150194525718689, |
| "learning_rate": 0.00011925421732781206, |
| "loss": 0.1785, |
| "step": 20600 |
| }, |
| { |
| "epoch": 0.9333393714337469, |
| "grad_norm": 0.2741736173629761, |
| "learning_rate": 0.00011918073298621936, |
| "loss": 0.1724, |
| "step": 20610 |
| }, |
| { |
| "epoch": 0.9337922289647677, |
| "grad_norm": 0.38691046833992004, |
| "learning_rate": 0.00011910723788994716, |
| "loss": 0.1639, |
| "step": 20620 |
| }, |
| { |
| "epoch": 0.9342450864957884, |
| "grad_norm": 0.4636295437812805, |
| "learning_rate": 0.00011903373208020425, |
| "loss": 0.152, |
| "step": 20630 |
| }, |
| { |
| "epoch": 0.9346979440268092, |
| "grad_norm": 0.43888625502586365, |
| "learning_rate": 0.0001189602155982056, |
| "loss": 0.168, |
| "step": 20640 |
| }, |
| { |
| "epoch": 0.9351508015578299, |
| "grad_norm": 0.3848056197166443, |
| "learning_rate": 0.00011888668848517198, |
| "loss": 0.1534, |
| "step": 20650 |
| }, |
| { |
| "epoch": 0.9356036590888507, |
| "grad_norm": 0.5369208455085754, |
| "learning_rate": 0.0001188131507823302, |
| "loss": 0.15, |
| "step": 20660 |
| }, |
| { |
| "epoch": 0.9360565166198714, |
| "grad_norm": 0.3236347734928131, |
| "learning_rate": 0.00011873960253091308, |
| "loss": 0.1584, |
| "step": 20670 |
| }, |
| { |
| "epoch": 0.9365093741508921, |
| "grad_norm": 0.43311768770217896, |
| "learning_rate": 0.00011866604377215922, |
| "loss": 0.1673, |
| "step": 20680 |
| }, |
| { |
| "epoch": 0.9369622316819128, |
| "grad_norm": 0.28458085656166077, |
| "learning_rate": 0.00011859247454731323, |
| "loss": 0.1377, |
| "step": 20690 |
| }, |
| { |
| "epoch": 0.9374150892129336, |
| "grad_norm": 0.3330056369304657, |
| "learning_rate": 0.0001185188948976255, |
| "loss": 0.1327, |
| "step": 20700 |
| }, |
| { |
| "epoch": 0.9378679467439544, |
| "grad_norm": 0.43871769309043884, |
| "learning_rate": 0.00011844530486435239, |
| "loss": 0.1771, |
| "step": 20710 |
| }, |
| { |
| "epoch": 0.9383208042749751, |
| "grad_norm": 0.34857964515686035, |
| "learning_rate": 0.00011837170448875587, |
| "loss": 0.1577, |
| "step": 20720 |
| }, |
| { |
| "epoch": 0.9387736618059959, |
| "grad_norm": 0.3620039224624634, |
| "learning_rate": 0.00011829809381210389, |
| "loss": 0.1583, |
| "step": 20730 |
| }, |
| { |
| "epoch": 0.9392265193370166, |
| "grad_norm": 0.3414750099182129, |
| "learning_rate": 0.00011822447287567014, |
| "loss": 0.1608, |
| "step": 20740 |
| }, |
| { |
| "epoch": 0.9396793768680373, |
| "grad_norm": 0.357601523399353, |
| "learning_rate": 0.000118150841720734, |
| "loss": 0.1558, |
| "step": 20750 |
| }, |
| { |
| "epoch": 0.940132234399058, |
| "grad_norm": 0.4363787770271301, |
| "learning_rate": 0.00011807720038858068, |
| "loss": 0.1757, |
| "step": 20760 |
| }, |
| { |
| "epoch": 0.9405850919300788, |
| "grad_norm": 0.3434285819530487, |
| "learning_rate": 0.00011800354892050093, |
| "loss": 0.1641, |
| "step": 20770 |
| }, |
| { |
| "epoch": 0.9410379494610995, |
| "grad_norm": 0.3395892083644867, |
| "learning_rate": 0.00011792988735779143, |
| "loss": 0.1491, |
| "step": 20780 |
| }, |
| { |
| "epoch": 0.9414908069921203, |
| "grad_norm": 0.41426268219947815, |
| "learning_rate": 0.00011785621574175431, |
| "loss": 0.1848, |
| "step": 20790 |
| }, |
| { |
| "epoch": 0.941943664523141, |
| "grad_norm": 0.39705103635787964, |
| "learning_rate": 0.0001177825341136974, |
| "loss": 0.1712, |
| "step": 20800 |
| }, |
| { |
| "epoch": 0.9423965220541618, |
| "grad_norm": 0.5175170302391052, |
| "learning_rate": 0.00011770884251493417, |
| "loss": 0.1831, |
| "step": 20810 |
| }, |
| { |
| "epoch": 0.9428493795851826, |
| "grad_norm": 0.3863191306591034, |
| "learning_rate": 0.00011763514098678366, |
| "loss": 0.1581, |
| "step": 20820 |
| }, |
| { |
| "epoch": 0.9433022371162032, |
| "grad_norm": 0.4071378707885742, |
| "learning_rate": 0.0001175614295705705, |
| "loss": 0.1622, |
| "step": 20830 |
| }, |
| { |
| "epoch": 0.943755094647224, |
| "grad_norm": 0.4960409998893738, |
| "learning_rate": 0.00011748770830762481, |
| "loss": 0.1541, |
| "step": 20840 |
| }, |
| { |
| "epoch": 0.9442079521782447, |
| "grad_norm": 0.38787540793418884, |
| "learning_rate": 0.00011741397723928228, |
| "loss": 0.1571, |
| "step": 20850 |
| }, |
| { |
| "epoch": 0.9446608097092655, |
| "grad_norm": 0.3468814194202423, |
| "learning_rate": 0.00011734023640688406, |
| "loss": 0.1446, |
| "step": 20860 |
| }, |
| { |
| "epoch": 0.9451136672402862, |
| "grad_norm": 0.5620803833007812, |
| "learning_rate": 0.00011726648585177685, |
| "loss": 0.1719, |
| "step": 20870 |
| }, |
| { |
| "epoch": 0.945566524771307, |
| "grad_norm": 0.4213656187057495, |
| "learning_rate": 0.0001171927256153127, |
| "loss": 0.1607, |
| "step": 20880 |
| }, |
| { |
| "epoch": 0.9460193823023277, |
| "grad_norm": 0.5288670659065247, |
| "learning_rate": 0.00011711895573884917, |
| "loss": 0.1607, |
| "step": 20890 |
| }, |
| { |
| "epoch": 0.9464722398333484, |
| "grad_norm": 0.4692736268043518, |
| "learning_rate": 0.00011704517626374918, |
| "loss": 0.1741, |
| "step": 20900 |
| }, |
| { |
| "epoch": 0.9469250973643691, |
| "grad_norm": 0.39539778232574463, |
| "learning_rate": 0.00011697138723138108, |
| "loss": 0.1605, |
| "step": 20910 |
| }, |
| { |
| "epoch": 0.9473779548953899, |
| "grad_norm": 0.4380393326282501, |
| "learning_rate": 0.00011689758868311846, |
| "loss": 0.2162, |
| "step": 20920 |
| }, |
| { |
| "epoch": 0.9478308124264107, |
| "grad_norm": 0.4683678448200226, |
| "learning_rate": 0.00011682378066034041, |
| "loss": 0.1818, |
| "step": 20930 |
| }, |
| { |
| "epoch": 0.9482836699574314, |
| "grad_norm": 0.4942455291748047, |
| "learning_rate": 0.00011674996320443123, |
| "loss": 0.1823, |
| "step": 20940 |
| }, |
| { |
| "epoch": 0.9487365274884522, |
| "grad_norm": 0.2584671378135681, |
| "learning_rate": 0.00011667613635678054, |
| "loss": 0.1601, |
| "step": 20950 |
| }, |
| { |
| "epoch": 0.9491893850194729, |
| "grad_norm": 0.30206599831581116, |
| "learning_rate": 0.00011660230015878316, |
| "loss": 0.1674, |
| "step": 20960 |
| }, |
| { |
| "epoch": 0.9496422425504936, |
| "grad_norm": 0.35249218344688416, |
| "learning_rate": 0.00011652845465183928, |
| "loss": 0.231, |
| "step": 20970 |
| }, |
| { |
| "epoch": 0.9500951000815143, |
| "grad_norm": 0.40178045630455017, |
| "learning_rate": 0.00011645459987735423, |
| "loss": 0.1838, |
| "step": 20980 |
| }, |
| { |
| "epoch": 0.9505479576125351, |
| "grad_norm": 0.2893231511116028, |
| "learning_rate": 0.0001163807358767385, |
| "loss": 0.151, |
| "step": 20990 |
| }, |
| { |
| "epoch": 0.9510008151435558, |
| "grad_norm": 0.3375166356563568, |
| "learning_rate": 0.00011630686269140782, |
| "loss": 0.168, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.9510008151435558, |
| "eval_chrf": 82.61639619570752, |
| "eval_loss": 0.13870181143283844, |
| "eval_runtime": 7.946, |
| "eval_samples_per_second": 1.258, |
| "eval_steps_per_second": 0.126, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.9514536726745766, |
| "grad_norm": 0.3449011445045471, |
| "learning_rate": 0.00011623298036278307, |
| "loss": 0.1653, |
| "step": 21010 |
| }, |
| { |
| "epoch": 0.9519065302055973, |
| "grad_norm": 0.4119865894317627, |
| "learning_rate": 0.00011615908893229027, |
| "loss": 0.1471, |
| "step": 21020 |
| }, |
| { |
| "epoch": 0.9523593877366181, |
| "grad_norm": 0.3247711956501007, |
| "learning_rate": 0.0001160851884413604, |
| "loss": 0.1378, |
| "step": 21030 |
| }, |
| { |
| "epoch": 0.9528122452676387, |
| "grad_norm": 0.4262673556804657, |
| "learning_rate": 0.00011601127893142971, |
| "loss": 0.1623, |
| "step": 21040 |
| }, |
| { |
| "epoch": 0.9532651027986595, |
| "grad_norm": 0.3229232132434845, |
| "learning_rate": 0.0001159373604439394, |
| "loss": 0.1606, |
| "step": 21050 |
| }, |
| { |
| "epoch": 0.9537179603296803, |
| "grad_norm": 0.45158863067626953, |
| "learning_rate": 0.00011586343302033573, |
| "loss": 0.1705, |
| "step": 21060 |
| }, |
| { |
| "epoch": 0.954170817860701, |
| "grad_norm": 0.4874420762062073, |
| "learning_rate": 0.00011578949670206998, |
| "loss": 0.1654, |
| "step": 21070 |
| }, |
| { |
| "epoch": 0.9546236753917218, |
| "grad_norm": 0.4906403720378876, |
| "learning_rate": 0.00011571555153059841, |
| "loss": 0.1764, |
| "step": 21080 |
| }, |
| { |
| "epoch": 0.9550765329227425, |
| "grad_norm": 0.44753482937812805, |
| "learning_rate": 0.00011564159754738223, |
| "loss": 0.1529, |
| "step": 21090 |
| }, |
| { |
| "epoch": 0.9555293904537633, |
| "grad_norm": 0.5234647989273071, |
| "learning_rate": 0.00011556763479388763, |
| "loss": 0.1671, |
| "step": 21100 |
| }, |
| { |
| "epoch": 0.955982247984784, |
| "grad_norm": 0.25665393471717834, |
| "learning_rate": 0.00011549366331158567, |
| "loss": 0.1399, |
| "step": 21110 |
| }, |
| { |
| "epoch": 0.9564351055158047, |
| "grad_norm": 0.4347527325153351, |
| "learning_rate": 0.00011541968314195228, |
| "loss": 0.1867, |
| "step": 21120 |
| }, |
| { |
| "epoch": 0.9568879630468254, |
| "grad_norm": 0.4241902232170105, |
| "learning_rate": 0.00011534569432646838, |
| "loss": 0.1716, |
| "step": 21130 |
| }, |
| { |
| "epoch": 0.9573408205778462, |
| "grad_norm": 0.3855469524860382, |
| "learning_rate": 0.00011527169690661965, |
| "loss": 0.1605, |
| "step": 21140 |
| }, |
| { |
| "epoch": 0.957793678108867, |
| "grad_norm": 0.4303516745567322, |
| "learning_rate": 0.00011519769092389653, |
| "loss": 0.1919, |
| "step": 21150 |
| }, |
| { |
| "epoch": 0.9582465356398877, |
| "grad_norm": 0.44530144333839417, |
| "learning_rate": 0.00011512367641979445, |
| "loss": 0.1452, |
| "step": 21160 |
| }, |
| { |
| "epoch": 0.9586993931709085, |
| "grad_norm": 0.4251921772956848, |
| "learning_rate": 0.00011504965343581345, |
| "loss": 0.1952, |
| "step": 21170 |
| }, |
| { |
| "epoch": 0.9591522507019292, |
| "grad_norm": 0.3609083294868469, |
| "learning_rate": 0.00011497562201345835, |
| "loss": 0.1586, |
| "step": 21180 |
| }, |
| { |
| "epoch": 0.9596051082329499, |
| "grad_norm": 0.30689379572868347, |
| "learning_rate": 0.00011490158219423879, |
| "loss": 0.1575, |
| "step": 21190 |
| }, |
| { |
| "epoch": 0.9600579657639706, |
| "grad_norm": 0.46872153878211975, |
| "learning_rate": 0.00011482753401966904, |
| "loss": 0.1583, |
| "step": 21200 |
| }, |
| { |
| "epoch": 0.9605108232949914, |
| "grad_norm": 0.5662645697593689, |
| "learning_rate": 0.00011475347753126809, |
| "loss": 0.1746, |
| "step": 21210 |
| }, |
| { |
| "epoch": 0.9609636808260121, |
| "grad_norm": 0.3720380365848541, |
| "learning_rate": 0.00011467941277055953, |
| "loss": 0.1445, |
| "step": 21220 |
| }, |
| { |
| "epoch": 0.9614165383570329, |
| "grad_norm": 0.49396568536758423, |
| "learning_rate": 0.00011460533977907166, |
| "loss": 0.159, |
| "step": 21230 |
| }, |
| { |
| "epoch": 0.9618693958880536, |
| "grad_norm": 0.3395419120788574, |
| "learning_rate": 0.0001145312585983374, |
| "loss": 0.1708, |
| "step": 21240 |
| }, |
| { |
| "epoch": 0.9623222534190744, |
| "grad_norm": 0.42840003967285156, |
| "learning_rate": 0.00011445716926989421, |
| "loss": 0.159, |
| "step": 21250 |
| }, |
| { |
| "epoch": 0.962775110950095, |
| "grad_norm": 0.4080982804298401, |
| "learning_rate": 0.00011438307183528413, |
| "loss": 0.1438, |
| "step": 21260 |
| }, |
| { |
| "epoch": 0.9632279684811158, |
| "grad_norm": 0.3299719989299774, |
| "learning_rate": 0.0001143089663360538, |
| "loss": 0.1622, |
| "step": 21270 |
| }, |
| { |
| "epoch": 0.9636808260121366, |
| "grad_norm": 0.4868001341819763, |
| "learning_rate": 0.00011423485281375426, |
| "loss": 0.173, |
| "step": 21280 |
| }, |
| { |
| "epoch": 0.9641336835431573, |
| "grad_norm": 0.8481622934341431, |
| "learning_rate": 0.0001141607313099412, |
| "loss": 0.1577, |
| "step": 21290 |
| }, |
| { |
| "epoch": 0.9645865410741781, |
| "grad_norm": 0.4979325234889984, |
| "learning_rate": 0.00011408660186617467, |
| "loss": 0.1587, |
| "step": 21300 |
| }, |
| { |
| "epoch": 0.9650393986051988, |
| "grad_norm": 0.4334494173526764, |
| "learning_rate": 0.00011401246452401923, |
| "loss": 0.172, |
| "step": 21310 |
| }, |
| { |
| "epoch": 0.9654922561362196, |
| "grad_norm": 0.3916884660720825, |
| "learning_rate": 0.00011393831932504384, |
| "loss": 0.178, |
| "step": 21320 |
| }, |
| { |
| "epoch": 0.9659451136672403, |
| "grad_norm": 0.2896512448787689, |
| "learning_rate": 0.00011386416631082193, |
| "loss": 0.1622, |
| "step": 21330 |
| }, |
| { |
| "epoch": 0.966397971198261, |
| "grad_norm": 0.48449796438217163, |
| "learning_rate": 0.00011379000552293117, |
| "loss": 0.1698, |
| "step": 21340 |
| }, |
| { |
| "epoch": 0.9668508287292817, |
| "grad_norm": 0.43414372205734253, |
| "learning_rate": 0.00011371583700295371, |
| "loss": 0.1722, |
| "step": 21350 |
| }, |
| { |
| "epoch": 0.9673036862603025, |
| "grad_norm": 0.39366286993026733, |
| "learning_rate": 0.00011364166079247604, |
| "loss": 0.1701, |
| "step": 21360 |
| }, |
| { |
| "epoch": 0.9677565437913233, |
| "grad_norm": 0.3768509328365326, |
| "learning_rate": 0.00011356747693308891, |
| "loss": 0.1692, |
| "step": 21370 |
| }, |
| { |
| "epoch": 0.968209401322344, |
| "grad_norm": 0.38022980093955994, |
| "learning_rate": 0.00011349328546638738, |
| "loss": 0.1917, |
| "step": 21380 |
| }, |
| { |
| "epoch": 0.9686622588533648, |
| "grad_norm": 0.3759568929672241, |
| "learning_rate": 0.00011341908643397072, |
| "loss": 0.1916, |
| "step": 21390 |
| }, |
| { |
| "epoch": 0.9691151163843855, |
| "grad_norm": 0.3977113366127014, |
| "learning_rate": 0.00011334487987744256, |
| "loss": 0.175, |
| "step": 21400 |
| }, |
| { |
| "epoch": 0.9695679739154062, |
| "grad_norm": 0.4212728440761566, |
| "learning_rate": 0.00011327066583841066, |
| "loss": 0.1672, |
| "step": 21410 |
| }, |
| { |
| "epoch": 0.9700208314464269, |
| "grad_norm": 0.36486679315567017, |
| "learning_rate": 0.00011319644435848697, |
| "loss": 0.176, |
| "step": 21420 |
| }, |
| { |
| "epoch": 0.9704736889774477, |
| "grad_norm": 0.46536704897880554, |
| "learning_rate": 0.00011312221547928766, |
| "loss": 0.1629, |
| "step": 21430 |
| }, |
| { |
| "epoch": 0.9709265465084684, |
| "grad_norm": 0.35601305961608887, |
| "learning_rate": 0.000113047979242433, |
| "loss": 0.1671, |
| "step": 21440 |
| }, |
| { |
| "epoch": 0.9713794040394892, |
| "grad_norm": 0.32921913266181946, |
| "learning_rate": 0.00011297373568954745, |
| "loss": 0.1455, |
| "step": 21450 |
| }, |
| { |
| "epoch": 0.97183226157051, |
| "grad_norm": 0.32573631405830383, |
| "learning_rate": 0.00011289948486225952, |
| "loss": 0.1607, |
| "step": 21460 |
| }, |
| { |
| "epoch": 0.9722851191015307, |
| "grad_norm": 0.47739171981811523, |
| "learning_rate": 0.0001128252268022018, |
| "loss": 0.1556, |
| "step": 21470 |
| }, |
| { |
| "epoch": 0.9727379766325513, |
| "grad_norm": 0.40861859917640686, |
| "learning_rate": 0.00011275096155101102, |
| "loss": 0.1824, |
| "step": 21480 |
| }, |
| { |
| "epoch": 0.9731908341635721, |
| "grad_norm": 0.41582047939300537, |
| "learning_rate": 0.00011267668915032776, |
| "loss": 0.1548, |
| "step": 21490 |
| }, |
| { |
| "epoch": 0.9736436916945929, |
| "grad_norm": 0.45832404494285583, |
| "learning_rate": 0.00011260240964179678, |
| "loss": 0.1585, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.9740965492256136, |
| "grad_norm": 0.3012300729751587, |
| "learning_rate": 0.00011252812306706677, |
| "loss": 0.1486, |
| "step": 21510 |
| }, |
| { |
| "epoch": 0.9745494067566344, |
| "grad_norm": 0.3662928640842438, |
| "learning_rate": 0.00011245382946779037, |
| "loss": 0.1588, |
| "step": 21520 |
| }, |
| { |
| "epoch": 0.9750022642876551, |
| "grad_norm": 0.39952388405799866, |
| "learning_rate": 0.00011237952888562415, |
| "loss": 0.1885, |
| "step": 21530 |
| }, |
| { |
| "epoch": 0.9754551218186759, |
| "grad_norm": 0.3074231743812561, |
| "learning_rate": 0.00011230522136222861, |
| "loss": 0.1519, |
| "step": 21540 |
| }, |
| { |
| "epoch": 0.9759079793496965, |
| "grad_norm": 0.44197362661361694, |
| "learning_rate": 0.00011223090693926817, |
| "loss": 0.1674, |
| "step": 21550 |
| }, |
| { |
| "epoch": 0.9763608368807173, |
| "grad_norm": 0.4374144971370697, |
| "learning_rate": 0.00011215658565841109, |
| "loss": 0.1624, |
| "step": 21560 |
| }, |
| { |
| "epoch": 0.976813694411738, |
| "grad_norm": 0.4729935824871063, |
| "learning_rate": 0.00011208225756132944, |
| "loss": 0.1807, |
| "step": 21570 |
| }, |
| { |
| "epoch": 0.9772665519427588, |
| "grad_norm": 0.2917758822441101, |
| "learning_rate": 0.0001120079226896992, |
| "loss": 0.1616, |
| "step": 21580 |
| }, |
| { |
| "epoch": 0.9777194094737796, |
| "grad_norm": 0.3587353527545929, |
| "learning_rate": 0.00011193358108520005, |
| "loss": 0.1721, |
| "step": 21590 |
| }, |
| { |
| "epoch": 0.9781722670048003, |
| "grad_norm": 0.3926864266395569, |
| "learning_rate": 0.00011185923278951556, |
| "loss": 0.1572, |
| "step": 21600 |
| }, |
| { |
| "epoch": 0.9786251245358211, |
| "grad_norm": 0.36469602584838867, |
| "learning_rate": 0.0001117848778443329, |
| "loss": 0.1521, |
| "step": 21610 |
| }, |
| { |
| "epoch": 0.9790779820668418, |
| "grad_norm": 0.39263635873794556, |
| "learning_rate": 0.00011171051629134308, |
| "loss": 0.1759, |
| "step": 21620 |
| }, |
| { |
| "epoch": 0.9795308395978625, |
| "grad_norm": 0.33528193831443787, |
| "learning_rate": 0.00011163614817224082, |
| "loss": 0.1388, |
| "step": 21630 |
| }, |
| { |
| "epoch": 0.9799836971288832, |
| "grad_norm": 0.48358967900276184, |
| "learning_rate": 0.00011156177352872446, |
| "loss": 0.1658, |
| "step": 21640 |
| }, |
| { |
| "epoch": 0.980436554659904, |
| "grad_norm": 0.47192758321762085, |
| "learning_rate": 0.00011148739240249603, |
| "loss": 0.1658, |
| "step": 21650 |
| }, |
| { |
| "epoch": 0.9808894121909247, |
| "grad_norm": 0.3485560119152069, |
| "learning_rate": 0.0001114130048352612, |
| "loss": 0.1754, |
| "step": 21660 |
| }, |
| { |
| "epoch": 0.9813422697219455, |
| "grad_norm": 0.42553985118865967, |
| "learning_rate": 0.00011133861086872928, |
| "loss": 0.1793, |
| "step": 21670 |
| }, |
| { |
| "epoch": 0.9817951272529662, |
| "grad_norm": 0.38573968410491943, |
| "learning_rate": 0.00011126421054461305, |
| "loss": 0.1605, |
| "step": 21680 |
| }, |
| { |
| "epoch": 0.982247984783987, |
| "grad_norm": 0.47721385955810547, |
| "learning_rate": 0.00011118980390462899, |
| "loss": 0.1615, |
| "step": 21690 |
| }, |
| { |
| "epoch": 0.9827008423150076, |
| "grad_norm": 0.47984451055526733, |
| "learning_rate": 0.00011111539099049705, |
| "loss": 0.1595, |
| "step": 21700 |
| }, |
| { |
| "epoch": 0.9831536998460284, |
| "grad_norm": 0.29439032077789307, |
| "learning_rate": 0.00011104097184394074, |
| "loss": 0.1643, |
| "step": 21710 |
| }, |
| { |
| "epoch": 0.9836065573770492, |
| "grad_norm": 0.29327526688575745, |
| "learning_rate": 0.00011096654650668705, |
| "loss": 0.149, |
| "step": 21720 |
| }, |
| { |
| "epoch": 0.9840594149080699, |
| "grad_norm": 0.4929738938808441, |
| "learning_rate": 0.00011089211502046634, |
| "loss": 0.1629, |
| "step": 21730 |
| }, |
| { |
| "epoch": 0.9845122724390907, |
| "grad_norm": 0.3322129547595978, |
| "learning_rate": 0.00011081767742701263, |
| "loss": 0.1437, |
| "step": 21740 |
| }, |
| { |
| "epoch": 0.9849651299701114, |
| "grad_norm": 0.4976238012313843, |
| "learning_rate": 0.0001107432337680632, |
| "loss": 0.1635, |
| "step": 21750 |
| }, |
| { |
| "epoch": 0.9854179875011322, |
| "grad_norm": 0.3639504611492157, |
| "learning_rate": 0.00011066878408535872, |
| "loss": 0.1469, |
| "step": 21760 |
| }, |
| { |
| "epoch": 0.9858708450321528, |
| "grad_norm": 0.45734038949012756, |
| "learning_rate": 0.00011059432842064336, |
| "loss": 0.1692, |
| "step": 21770 |
| }, |
| { |
| "epoch": 0.9863237025631736, |
| "grad_norm": 0.44936123490333557, |
| "learning_rate": 0.00011051986681566458, |
| "loss": 0.1744, |
| "step": 21780 |
| }, |
| { |
| "epoch": 0.9867765600941943, |
| "grad_norm": 0.40992605686187744, |
| "learning_rate": 0.00011044539931217312, |
| "loss": 0.1719, |
| "step": 21790 |
| }, |
| { |
| "epoch": 0.9872294176252151, |
| "grad_norm": 0.36595234274864197, |
| "learning_rate": 0.00011037092595192309, |
| "loss": 0.1749, |
| "step": 21800 |
| }, |
| { |
| "epoch": 0.9876822751562359, |
| "grad_norm": 0.36314117908477783, |
| "learning_rate": 0.00011029644677667188, |
| "loss": 0.1774, |
| "step": 21810 |
| }, |
| { |
| "epoch": 0.9881351326872566, |
| "grad_norm": 0.47040295600891113, |
| "learning_rate": 0.0001102219618281801, |
| "loss": 0.1817, |
| "step": 21820 |
| }, |
| { |
| "epoch": 0.9885879902182774, |
| "grad_norm": 0.3664730489253998, |
| "learning_rate": 0.00011014747114821167, |
| "loss": 0.1443, |
| "step": 21830 |
| }, |
| { |
| "epoch": 0.9890408477492981, |
| "grad_norm": 0.3812478184700012, |
| "learning_rate": 0.00011007297477853365, |
| "loss": 0.1578, |
| "step": 21840 |
| }, |
| { |
| "epoch": 0.9894937052803188, |
| "grad_norm": 0.4033992290496826, |
| "learning_rate": 0.00010999847276091632, |
| "loss": 0.1463, |
| "step": 21850 |
| }, |
| { |
| "epoch": 0.9899465628113395, |
| "grad_norm": 0.37883979082107544, |
| "learning_rate": 0.00010992396513713315, |
| "loss": 0.1476, |
| "step": 21860 |
| }, |
| { |
| "epoch": 0.9903994203423603, |
| "grad_norm": 0.39016664028167725, |
| "learning_rate": 0.00010984945194896073, |
| "loss": 0.1744, |
| "step": 21870 |
| }, |
| { |
| "epoch": 0.990852277873381, |
| "grad_norm": 0.3429701626300812, |
| "learning_rate": 0.00010977493323817873, |
| "loss": 0.1599, |
| "step": 21880 |
| }, |
| { |
| "epoch": 0.9913051354044018, |
| "grad_norm": 0.43505728244781494, |
| "learning_rate": 0.00010970040904656997, |
| "loss": 0.1693, |
| "step": 21890 |
| }, |
| { |
| "epoch": 0.9917579929354225, |
| "grad_norm": 0.3216778039932251, |
| "learning_rate": 0.00010962587941592036, |
| "loss": 0.1513, |
| "step": 21900 |
| }, |
| { |
| "epoch": 0.9922108504664433, |
| "grad_norm": 0.4504454433917999, |
| "learning_rate": 0.00010955134438801882, |
| "loss": 0.1396, |
| "step": 21910 |
| }, |
| { |
| "epoch": 0.992663707997464, |
| "grad_norm": 0.3114122450351715, |
| "learning_rate": 0.00010947680400465725, |
| "loss": 0.1453, |
| "step": 21920 |
| }, |
| { |
| "epoch": 0.9931165655284847, |
| "grad_norm": 0.4424838125705719, |
| "learning_rate": 0.00010940225830763066, |
| "loss": 0.1713, |
| "step": 21930 |
| }, |
| { |
| "epoch": 0.9935694230595055, |
| "grad_norm": 0.3902185261249542, |
| "learning_rate": 0.00010932770733873703, |
| "loss": 0.1813, |
| "step": 21940 |
| }, |
| { |
| "epoch": 0.9940222805905262, |
| "grad_norm": 0.34867456555366516, |
| "learning_rate": 0.00010925315113977719, |
| "loss": 0.173, |
| "step": 21950 |
| }, |
| { |
| "epoch": 0.994475138121547, |
| "grad_norm": 0.46827998757362366, |
| "learning_rate": 0.00010917858975255496, |
| "loss": 0.1607, |
| "step": 21960 |
| }, |
| { |
| "epoch": 0.9949279956525677, |
| "grad_norm": 0.37731778621673584, |
| "learning_rate": 0.00010910402321887709, |
| "loss": 0.1736, |
| "step": 21970 |
| }, |
| { |
| "epoch": 0.9953808531835885, |
| "grad_norm": 0.45258596539497375, |
| "learning_rate": 0.00010902945158055324, |
| "loss": 0.1583, |
| "step": 21980 |
| }, |
| { |
| "epoch": 0.9958337107146091, |
| "grad_norm": 0.32490190863609314, |
| "learning_rate": 0.00010895487487939582, |
| "loss": 0.1762, |
| "step": 21990 |
| }, |
| { |
| "epoch": 0.9962865682456299, |
| "grad_norm": 0.39621788263320923, |
| "learning_rate": 0.00010888029315722022, |
| "loss": 0.1548, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.9962865682456299, |
| "eval_chrf": 76.49056376513751, |
| "eval_loss": 0.1375478208065033, |
| "eval_runtime": 26.6076, |
| "eval_samples_per_second": 0.376, |
| "eval_steps_per_second": 0.038, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.9967394257766506, |
| "grad_norm": 0.35230210423469543, |
| "learning_rate": 0.00010880570645584452, |
| "loss": 0.1785, |
| "step": 22010 |
| }, |
| { |
| "epoch": 0.9971922833076714, |
| "grad_norm": 0.37812137603759766, |
| "learning_rate": 0.00010873111481708969, |
| "loss": 0.1567, |
| "step": 22020 |
| }, |
| { |
| "epoch": 0.9976451408386922, |
| "grad_norm": 0.4391620457172394, |
| "learning_rate": 0.0001086565182827794, |
| "loss": 0.1745, |
| "step": 22030 |
| }, |
| { |
| "epoch": 0.9980979983697129, |
| "grad_norm": 0.32292863726615906, |
| "learning_rate": 0.00010858191689474013, |
| "loss": 0.1548, |
| "step": 22040 |
| }, |
| { |
| "epoch": 0.9985508559007337, |
| "grad_norm": 0.3486003279685974, |
| "learning_rate": 0.00010850731069480102, |
| "loss": 0.1632, |
| "step": 22050 |
| }, |
| { |
| "epoch": 0.9990037134317544, |
| "grad_norm": 0.4655235707759857, |
| "learning_rate": 0.00010843269972479396, |
| "loss": 0.1687, |
| "step": 22060 |
| }, |
| { |
| "epoch": 0.9994565709627751, |
| "grad_norm": 0.36596444249153137, |
| "learning_rate": 0.00010835808402655341, |
| "loss": 0.1667, |
| "step": 22070 |
| }, |
| { |
| "epoch": 0.9999094284937958, |
| "grad_norm": 0.3869829475879669, |
| "learning_rate": 0.00010828346364191661, |
| "loss": 0.1676, |
| "step": 22080 |
| }, |
| { |
| "epoch": 1.0003622860248167, |
| "grad_norm": 0.28441983461380005, |
| "learning_rate": 0.00010820883861272339, |
| "loss": 0.1416, |
| "step": 22090 |
| }, |
| { |
| "epoch": 1.0008151435558372, |
| "grad_norm": 0.3512919545173645, |
| "learning_rate": 0.00010813420898081616, |
| "loss": 0.1395, |
| "step": 22100 |
| }, |
| { |
| "epoch": 1.001268001086858, |
| "grad_norm": 0.3900757431983948, |
| "learning_rate": 0.00010805957478803988, |
| "loss": 0.1396, |
| "step": 22110 |
| }, |
| { |
| "epoch": 1.0017208586178787, |
| "grad_norm": 0.32627588510513306, |
| "learning_rate": 0.00010798493607624214, |
| "loss": 0.1382, |
| "step": 22120 |
| }, |
| { |
| "epoch": 1.0021737161488995, |
| "grad_norm": 0.4319378733634949, |
| "learning_rate": 0.00010791029288727306, |
| "loss": 0.1418, |
| "step": 22130 |
| }, |
| { |
| "epoch": 1.0026265736799203, |
| "grad_norm": 0.32595789432525635, |
| "learning_rate": 0.0001078356452629852, |
| "loss": 0.1414, |
| "step": 22140 |
| }, |
| { |
| "epoch": 1.003079431210941, |
| "grad_norm": 0.28670698404312134, |
| "learning_rate": 0.00010776099324523363, |
| "loss": 0.1513, |
| "step": 22150 |
| }, |
| { |
| "epoch": 1.0035322887419618, |
| "grad_norm": 0.4010210633277893, |
| "learning_rate": 0.00010768633687587598, |
| "loss": 0.1329, |
| "step": 22160 |
| }, |
| { |
| "epoch": 1.0039851462729825, |
| "grad_norm": 0.38099223375320435, |
| "learning_rate": 0.00010761167619677214, |
| "loss": 0.1422, |
| "step": 22170 |
| }, |
| { |
| "epoch": 1.0044380038040033, |
| "grad_norm": 0.40705400705337524, |
| "learning_rate": 0.00010753701124978463, |
| "loss": 0.1401, |
| "step": 22180 |
| }, |
| { |
| "epoch": 1.004890861335024, |
| "grad_norm": 0.932197093963623, |
| "learning_rate": 0.00010746234207677817, |
| "loss": 0.1512, |
| "step": 22190 |
| }, |
| { |
| "epoch": 1.0053437188660448, |
| "grad_norm": 0.5024510025978088, |
| "learning_rate": 0.00010738766871961994, |
| "loss": 0.1488, |
| "step": 22200 |
| }, |
| { |
| "epoch": 1.0057965763970655, |
| "grad_norm": 0.3365090787410736, |
| "learning_rate": 0.00010731299122017948, |
| "loss": 0.1638, |
| "step": 22210 |
| }, |
| { |
| "epoch": 1.0062494339280863, |
| "grad_norm": 0.2693597078323364, |
| "learning_rate": 0.00010723830962032861, |
| "loss": 0.1397, |
| "step": 22220 |
| }, |
| { |
| "epoch": 1.006702291459107, |
| "grad_norm": 0.3750072419643402, |
| "learning_rate": 0.00010716362396194149, |
| "loss": 0.1546, |
| "step": 22230 |
| }, |
| { |
| "epoch": 1.0071551489901278, |
| "grad_norm": 0.29761919379234314, |
| "learning_rate": 0.00010708893428689453, |
| "loss": 0.1396, |
| "step": 22240 |
| }, |
| { |
| "epoch": 1.0076080065211483, |
| "grad_norm": 0.37203189730644226, |
| "learning_rate": 0.0001070142406370664, |
| "loss": 0.1436, |
| "step": 22250 |
| }, |
| { |
| "epoch": 1.008060864052169, |
| "grad_norm": 0.35829347372055054, |
| "learning_rate": 0.00010693954305433795, |
| "loss": 0.1564, |
| "step": 22260 |
| }, |
| { |
| "epoch": 1.0085137215831899, |
| "grad_norm": 0.3740488588809967, |
| "learning_rate": 0.00010686484158059234, |
| "loss": 0.1455, |
| "step": 22270 |
| }, |
| { |
| "epoch": 1.0089665791142106, |
| "grad_norm": 0.3748299777507782, |
| "learning_rate": 0.00010679013625771484, |
| "loss": 0.1524, |
| "step": 22280 |
| }, |
| { |
| "epoch": 1.0094194366452314, |
| "grad_norm": 0.5208830237388611, |
| "learning_rate": 0.00010671542712759285, |
| "loss": 0.1742, |
| "step": 22290 |
| }, |
| { |
| "epoch": 1.0098722941762521, |
| "grad_norm": 0.4118936359882355, |
| "learning_rate": 0.00010664071423211599, |
| "loss": 0.1486, |
| "step": 22300 |
| }, |
| { |
| "epoch": 1.0103251517072729, |
| "grad_norm": 0.3148258924484253, |
| "learning_rate": 0.00010656599761317591, |
| "loss": 0.1282, |
| "step": 22310 |
| }, |
| { |
| "epoch": 1.0107780092382936, |
| "grad_norm": 0.3419557213783264, |
| "learning_rate": 0.00010649127731266641, |
| "loss": 0.1379, |
| "step": 22320 |
| }, |
| { |
| "epoch": 1.0112308667693144, |
| "grad_norm": 0.40964436531066895, |
| "learning_rate": 0.00010641655337248335, |
| "loss": 0.1484, |
| "step": 22330 |
| }, |
| { |
| "epoch": 1.0116837243003352, |
| "grad_norm": 0.32588711380958557, |
| "learning_rate": 0.00010634182583452456, |
| "loss": 0.1606, |
| "step": 22340 |
| }, |
| { |
| "epoch": 1.012136581831356, |
| "grad_norm": 0.4060667157173157, |
| "learning_rate": 0.00010626709474068996, |
| "loss": 0.1553, |
| "step": 22350 |
| }, |
| { |
| "epoch": 1.0125894393623767, |
| "grad_norm": 0.3781129717826843, |
| "learning_rate": 0.00010619236013288143, |
| "loss": 0.1389, |
| "step": 22360 |
| }, |
| { |
| "epoch": 1.0130422968933974, |
| "grad_norm": 0.4548001289367676, |
| "learning_rate": 0.00010611762205300286, |
| "loss": 0.1704, |
| "step": 22370 |
| }, |
| { |
| "epoch": 1.0134951544244182, |
| "grad_norm": 0.3514746427536011, |
| "learning_rate": 0.00010604288054296001, |
| "loss": 0.1213, |
| "step": 22380 |
| }, |
| { |
| "epoch": 1.0139480119554387, |
| "grad_norm": 0.46039777994155884, |
| "learning_rate": 0.00010596813564466064, |
| "loss": 0.1597, |
| "step": 22390 |
| }, |
| { |
| "epoch": 1.0144008694864595, |
| "grad_norm": 0.7013415098190308, |
| "learning_rate": 0.00010589338740001438, |
| "loss": 0.1791, |
| "step": 22400 |
| }, |
| { |
| "epoch": 1.0148537270174802, |
| "grad_norm": 0.3029846251010895, |
| "learning_rate": 0.00010581863585093272, |
| "loss": 0.1366, |
| "step": 22410 |
| }, |
| { |
| "epoch": 1.015306584548501, |
| "grad_norm": 0.31886354088783264, |
| "learning_rate": 0.00010574388103932904, |
| "loss": 0.1605, |
| "step": 22420 |
| }, |
| { |
| "epoch": 1.0157594420795217, |
| "grad_norm": 0.3466900587081909, |
| "learning_rate": 0.00010566912300711854, |
| "loss": 0.1679, |
| "step": 22430 |
| }, |
| { |
| "epoch": 1.0162122996105425, |
| "grad_norm": 0.4567810297012329, |
| "learning_rate": 0.00010559436179621818, |
| "loss": 0.1647, |
| "step": 22440 |
| }, |
| { |
| "epoch": 1.0166651571415632, |
| "grad_norm": 0.4758337140083313, |
| "learning_rate": 0.00010551959744854673, |
| "loss": 0.1779, |
| "step": 22450 |
| }, |
| { |
| "epoch": 1.017118014672584, |
| "grad_norm": 0.28552138805389404, |
| "learning_rate": 0.00010544483000602477, |
| "loss": 0.1367, |
| "step": 22460 |
| }, |
| { |
| "epoch": 1.0175708722036048, |
| "grad_norm": 0.5133971571922302, |
| "learning_rate": 0.00010537005951057454, |
| "loss": 0.1613, |
| "step": 22470 |
| }, |
| { |
| "epoch": 1.0180237297346255, |
| "grad_norm": 0.4176846444606781, |
| "learning_rate": 0.00010529528600412005, |
| "loss": 0.1583, |
| "step": 22480 |
| }, |
| { |
| "epoch": 1.0184765872656463, |
| "grad_norm": 0.39284244179725647, |
| "learning_rate": 0.00010522050952858692, |
| "loss": 0.1496, |
| "step": 22490 |
| }, |
| { |
| "epoch": 1.018929444796667, |
| "grad_norm": 0.38390839099884033, |
| "learning_rate": 0.00010514573012590252, |
| "loss": 0.1524, |
| "step": 22500 |
| }, |
| { |
| "epoch": 1.0193823023276878, |
| "grad_norm": 0.5051958560943604, |
| "learning_rate": 0.00010507094783799583, |
| "loss": 0.1695, |
| "step": 22510 |
| }, |
| { |
| "epoch": 1.0198351598587085, |
| "grad_norm": 0.4255245327949524, |
| "learning_rate": 0.00010499616270679747, |
| "loss": 0.1525, |
| "step": 22520 |
| }, |
| { |
| "epoch": 1.0202880173897293, |
| "grad_norm": 0.5033703446388245, |
| "learning_rate": 0.00010492137477423955, |
| "loss": 0.1549, |
| "step": 22530 |
| }, |
| { |
| "epoch": 1.0207408749207498, |
| "grad_norm": 0.39385154843330383, |
| "learning_rate": 0.00010484658408225589, |
| "loss": 0.1412, |
| "step": 22540 |
| }, |
| { |
| "epoch": 1.0211937324517706, |
| "grad_norm": 0.34952855110168457, |
| "learning_rate": 0.0001047717906727818, |
| "loss": 0.158, |
| "step": 22550 |
| }, |
| { |
| "epoch": 1.0216465899827913, |
| "grad_norm": 0.35554689168930054, |
| "learning_rate": 0.0001046969945877541, |
| "loss": 0.1451, |
| "step": 22560 |
| }, |
| { |
| "epoch": 1.022099447513812, |
| "grad_norm": 0.313905268907547, |
| "learning_rate": 0.00010462219586911111, |
| "loss": 0.1746, |
| "step": 22570 |
| }, |
| { |
| "epoch": 1.0225523050448329, |
| "grad_norm": 0.3594076633453369, |
| "learning_rate": 0.00010454739455879263, |
| "loss": 0.1796, |
| "step": 22580 |
| }, |
| { |
| "epoch": 1.0230051625758536, |
| "grad_norm": 0.312707781791687, |
| "learning_rate": 0.00010447259069873993, |
| "loss": 0.149, |
| "step": 22590 |
| }, |
| { |
| "epoch": 1.0234580201068744, |
| "grad_norm": 0.3713543713092804, |
| "learning_rate": 0.0001043977843308957, |
| "loss": 0.1529, |
| "step": 22600 |
| }, |
| { |
| "epoch": 1.0239108776378951, |
| "grad_norm": 0.8227018713951111, |
| "learning_rate": 0.00010432297549720407, |
| "loss": 0.1308, |
| "step": 22610 |
| }, |
| { |
| "epoch": 1.0243637351689159, |
| "grad_norm": 0.38186246156692505, |
| "learning_rate": 0.00010424816423961046, |
| "loss": 0.1658, |
| "step": 22620 |
| }, |
| { |
| "epoch": 1.0248165926999366, |
| "grad_norm": 0.4258041977882385, |
| "learning_rate": 0.00010417335060006177, |
| "loss": 0.1493, |
| "step": 22630 |
| }, |
| { |
| "epoch": 1.0252694502309574, |
| "grad_norm": 0.3841048777103424, |
| "learning_rate": 0.00010409853462050611, |
| "loss": 0.1593, |
| "step": 22640 |
| }, |
| { |
| "epoch": 1.0257223077619781, |
| "grad_norm": 0.2536158561706543, |
| "learning_rate": 0.000104023716342893, |
| "loss": 0.1557, |
| "step": 22650 |
| }, |
| { |
| "epoch": 1.026175165292999, |
| "grad_norm": 0.28186845779418945, |
| "learning_rate": 0.00010394889580917325, |
| "loss": 0.151, |
| "step": 22660 |
| }, |
| { |
| "epoch": 1.0266280228240197, |
| "grad_norm": 0.40862837433815, |
| "learning_rate": 0.00010387407306129882, |
| "loss": 0.1464, |
| "step": 22670 |
| }, |
| { |
| "epoch": 1.0270808803550402, |
| "grad_norm": 0.4918057322502136, |
| "learning_rate": 0.00010379924814122304, |
| "loss": 0.1499, |
| "step": 22680 |
| }, |
| { |
| "epoch": 1.027533737886061, |
| "grad_norm": 0.39476755261421204, |
| "learning_rate": 0.00010372442109090039, |
| "loss": 0.1311, |
| "step": 22690 |
| }, |
| { |
| "epoch": 1.0279865954170817, |
| "grad_norm": 0.38193678855895996, |
| "learning_rate": 0.00010364959195228656, |
| "loss": 0.1567, |
| "step": 22700 |
| }, |
| { |
| "epoch": 1.0284394529481025, |
| "grad_norm": 0.40314528346061707, |
| "learning_rate": 0.00010357476076733847, |
| "loss": 0.1545, |
| "step": 22710 |
| }, |
| { |
| "epoch": 1.0288923104791232, |
| "grad_norm": 0.3467330038547516, |
| "learning_rate": 0.00010349992757801408, |
| "loss": 0.1469, |
| "step": 22720 |
| }, |
| { |
| "epoch": 1.029345168010144, |
| "grad_norm": 0.2559974789619446, |
| "learning_rate": 0.00010342509242627252, |
| "loss": 0.1517, |
| "step": 22730 |
| }, |
| { |
| "epoch": 1.0297980255411647, |
| "grad_norm": 0.5004274845123291, |
| "learning_rate": 0.00010335025535407403, |
| "loss": 0.1448, |
| "step": 22740 |
| }, |
| { |
| "epoch": 1.0302508830721855, |
| "grad_norm": 0.30660852789878845, |
| "learning_rate": 0.00010327541640337996, |
| "loss": 0.1423, |
| "step": 22750 |
| }, |
| { |
| "epoch": 1.0307037406032062, |
| "grad_norm": 0.22897501289844513, |
| "learning_rate": 0.00010320057561615262, |
| "loss": 0.162, |
| "step": 22760 |
| }, |
| { |
| "epoch": 1.031156598134227, |
| "grad_norm": 0.3936751186847687, |
| "learning_rate": 0.00010312573303435544, |
| "loss": 0.1315, |
| "step": 22770 |
| }, |
| { |
| "epoch": 1.0316094556652478, |
| "grad_norm": 0.3321261703968048, |
| "learning_rate": 0.0001030508886999528, |
| "loss": 0.1582, |
| "step": 22780 |
| }, |
| { |
| "epoch": 1.0320623131962685, |
| "grad_norm": 0.30580833554267883, |
| "learning_rate": 0.00010297604265491012, |
| "loss": 0.1694, |
| "step": 22790 |
| }, |
| { |
| "epoch": 1.0325151707272893, |
| "grad_norm": 0.3953670859336853, |
| "learning_rate": 0.00010290119494119372, |
| "loss": 0.1545, |
| "step": 22800 |
| }, |
| { |
| "epoch": 1.03296802825831, |
| "grad_norm": 0.41720178723335266, |
| "learning_rate": 0.00010282634560077087, |
| "loss": 0.1492, |
| "step": 22810 |
| }, |
| { |
| "epoch": 1.0334208857893308, |
| "grad_norm": 0.3415760397911072, |
| "learning_rate": 0.00010275149467560978, |
| "loss": 0.1699, |
| "step": 22820 |
| }, |
| { |
| "epoch": 1.0338737433203513, |
| "grad_norm": 0.5049695372581482, |
| "learning_rate": 0.00010267664220767954, |
| "loss": 0.1823, |
| "step": 22830 |
| }, |
| { |
| "epoch": 1.034326600851372, |
| "grad_norm": 0.25039663910865784, |
| "learning_rate": 0.00010260178823895005, |
| "loss": 0.1235, |
| "step": 22840 |
| }, |
| { |
| "epoch": 1.0347794583823928, |
| "grad_norm": 0.44545069336891174, |
| "learning_rate": 0.00010252693281139212, |
| "loss": 0.1686, |
| "step": 22850 |
| }, |
| { |
| "epoch": 1.0352323159134136, |
| "grad_norm": 0.4185168445110321, |
| "learning_rate": 0.00010245207596697737, |
| "loss": 0.1647, |
| "step": 22860 |
| }, |
| { |
| "epoch": 1.0356851734444343, |
| "grad_norm": 0.3525523841381073, |
| "learning_rate": 0.0001023772177476782, |
| "loss": 0.1423, |
| "step": 22870 |
| }, |
| { |
| "epoch": 1.036138030975455, |
| "grad_norm": 0.40866556763648987, |
| "learning_rate": 0.00010230235819546772, |
| "loss": 0.1598, |
| "step": 22880 |
| }, |
| { |
| "epoch": 1.0365908885064758, |
| "grad_norm": 0.244676873087883, |
| "learning_rate": 0.00010222749735231993, |
| "loss": 0.1442, |
| "step": 22890 |
| }, |
| { |
| "epoch": 1.0370437460374966, |
| "grad_norm": 0.47297218441963196, |
| "learning_rate": 0.00010215263526020943, |
| "loss": 0.1649, |
| "step": 22900 |
| }, |
| { |
| "epoch": 1.0374966035685174, |
| "grad_norm": 0.44725802540779114, |
| "learning_rate": 0.00010207777196111156, |
| "loss": 0.1343, |
| "step": 22910 |
| }, |
| { |
| "epoch": 1.0379494610995381, |
| "grad_norm": 0.4038509726524353, |
| "learning_rate": 0.00010200290749700234, |
| "loss": 0.1538, |
| "step": 22920 |
| }, |
| { |
| "epoch": 1.0384023186305589, |
| "grad_norm": 0.34586551785469055, |
| "learning_rate": 0.00010192804190985845, |
| "loss": 0.1473, |
| "step": 22930 |
| }, |
| { |
| "epoch": 1.0388551761615796, |
| "grad_norm": 0.3961934447288513, |
| "learning_rate": 0.0001018531752416572, |
| "loss": 0.1465, |
| "step": 22940 |
| }, |
| { |
| "epoch": 1.0393080336926004, |
| "grad_norm": 0.46139267086982727, |
| "learning_rate": 0.00010177830753437645, |
| "loss": 0.1622, |
| "step": 22950 |
| }, |
| { |
| "epoch": 1.0397608912236211, |
| "grad_norm": 0.4097284972667694, |
| "learning_rate": 0.00010170343882999473, |
| "loss": 0.1822, |
| "step": 22960 |
| }, |
| { |
| "epoch": 1.040213748754642, |
| "grad_norm": 0.34919238090515137, |
| "learning_rate": 0.00010162856917049109, |
| "loss": 0.1658, |
| "step": 22970 |
| }, |
| { |
| "epoch": 1.0406666062856624, |
| "grad_norm": 0.4870430827140808, |
| "learning_rate": 0.0001015536985978451, |
| "loss": 0.1514, |
| "step": 22980 |
| }, |
| { |
| "epoch": 1.0411194638166832, |
| "grad_norm": 0.38768109679222107, |
| "learning_rate": 0.00010147882715403683, |
| "loss": 0.1511, |
| "step": 22990 |
| }, |
| { |
| "epoch": 1.041572321347704, |
| "grad_norm": 0.35548967123031616, |
| "learning_rate": 0.00010140395488104693, |
| "loss": 0.1608, |
| "step": 23000 |
| }, |
| { |
| "epoch": 1.041572321347704, |
| "eval_chrf": 84.92145032155595, |
| "eval_loss": 0.14364776015281677, |
| "eval_runtime": 12.9171, |
| "eval_samples_per_second": 0.774, |
| "eval_steps_per_second": 0.077, |
| "step": 23000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 44164, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.023525553167401e+18, |
| "train_batch_size": 16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|