| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.9961270333075136, |
| "eval_steps": 500, |
| "global_step": 1935, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.015491866769945779, |
| "grad_norm": 0.40339195728302, |
| "learning_rate": 9.278350515463919e-07, |
| "loss": 1.8526, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.030983733539891558, |
| "grad_norm": 0.3715074062347412, |
| "learning_rate": 1.9587628865979384e-06, |
| "loss": 1.8835, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.046475600309837335, |
| "grad_norm": 0.3445941209793091, |
| "learning_rate": 2.9896907216494846e-06, |
| "loss": 1.8733, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.061967467079783116, |
| "grad_norm": 0.3444959223270416, |
| "learning_rate": 4.020618556701032e-06, |
| "loss": 1.8952, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.07745933384972889, |
| "grad_norm": 0.3230769634246826, |
| "learning_rate": 5.051546391752578e-06, |
| "loss": 1.8588, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.09295120061967467, |
| "grad_norm": 0.33751699328422546, |
| "learning_rate": 6.082474226804124e-06, |
| "loss": 1.8307, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.10844306738962045, |
| "grad_norm": 0.2963869869709015, |
| "learning_rate": 7.113402061855671e-06, |
| "loss": 1.8419, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.12393493415956623, |
| "grad_norm": 0.2550879716873169, |
| "learning_rate": 8.144329896907216e-06, |
| "loss": 1.8281, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.139426800929512, |
| "grad_norm": 0.22610870003700256, |
| "learning_rate": 9.175257731958764e-06, |
| "loss": 1.8005, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.15491866769945778, |
| "grad_norm": 0.1930716335773468, |
| "learning_rate": 1.0206185567010309e-05, |
| "loss": 1.7897, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.17041053446940357, |
| "grad_norm": 0.18139410018920898, |
| "learning_rate": 1.1237113402061856e-05, |
| "loss": 1.7181, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.18590240123934934, |
| "grad_norm": 0.1586223542690277, |
| "learning_rate": 1.2268041237113405e-05, |
| "loss": 1.7328, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2013942680092951, |
| "grad_norm": 0.2047862708568573, |
| "learning_rate": 1.3298969072164948e-05, |
| "loss": 1.7268, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2168861347792409, |
| "grad_norm": 0.16858699917793274, |
| "learning_rate": 1.4329896907216495e-05, |
| "loss": 1.7029, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.23237800154918667, |
| "grad_norm": 0.15446807444095612, |
| "learning_rate": 1.5360824742268042e-05, |
| "loss": 1.7114, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.24786986831913246, |
| "grad_norm": 0.1550971418619156, |
| "learning_rate": 1.6391752577319588e-05, |
| "loss": 1.7054, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.26336173508907823, |
| "grad_norm": 0.13774091005325317, |
| "learning_rate": 1.7422680412371137e-05, |
| "loss": 1.685, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.278853601859024, |
| "grad_norm": 0.13972033560276031, |
| "learning_rate": 1.8453608247422682e-05, |
| "loss": 1.6774, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.29434546862896976, |
| "grad_norm": 0.14723576605319977, |
| "learning_rate": 1.9484536082474227e-05, |
| "loss": 1.63, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.30983733539891556, |
| "grad_norm": 0.1486756056547165, |
| "learning_rate": 1.9999592986072886e-05, |
| "loss": 1.6429, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.32532920216886135, |
| "grad_norm": 0.1460903435945511, |
| "learning_rate": 1.9996337073445673e-05, |
| "loss": 1.669, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.34082106893880715, |
| "grad_norm": 0.16247884929180145, |
| "learning_rate": 1.9989826308331103e-05, |
| "loss": 1.6572, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.3563129357087529, |
| "grad_norm": 0.16369281709194183, |
| "learning_rate": 1.998006281066369e-05, |
| "loss": 1.6545, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.3718048024786987, |
| "grad_norm": 0.1450214833021164, |
| "learning_rate": 1.996704975948236e-05, |
| "loss": 1.6762, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.3872966692486445, |
| "grad_norm": 0.17881567776203156, |
| "learning_rate": 1.9950791391895335e-05, |
| "loss": 1.6397, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.4027885360185902, |
| "grad_norm": 0.15863952040672302, |
| "learning_rate": 1.9931293001700518e-05, |
| "loss": 1.6597, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.418280402788536, |
| "grad_norm": 0.1730642467737198, |
| "learning_rate": 1.990856093766179e-05, |
| "loss": 1.6586, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.4337722695584818, |
| "grad_norm": 0.14790953695774078, |
| "learning_rate": 1.988260260144185e-05, |
| "loss": 1.6416, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.4492641363284276, |
| "grad_norm": 0.14577385783195496, |
| "learning_rate": 1.9853426445192175e-05, |
| "loss": 1.659, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.46475600309837334, |
| "grad_norm": 0.13978034257888794, |
| "learning_rate": 1.9821041968800982e-05, |
| "loss": 1.6362, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.48024786986831913, |
| "grad_norm": 0.17719446122646332, |
| "learning_rate": 1.9785459716800005e-05, |
| "loss": 1.6648, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.4957397366382649, |
| "grad_norm": 0.1415952891111374, |
| "learning_rate": 1.9746691274931168e-05, |
| "loss": 1.6518, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.5112316034082107, |
| "grad_norm": 0.1455707997083664, |
| "learning_rate": 1.970474926637418e-05, |
| "loss": 1.6445, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.5267234701781565, |
| "grad_norm": 0.19560855627059937, |
| "learning_rate": 1.9659647347636422e-05, |
| "loss": 1.6557, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.5422153369481022, |
| "grad_norm": 0.15447896718978882, |
| "learning_rate": 1.961140020410627e-05, |
| "loss": 1.6361, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.557707203718048, |
| "grad_norm": 0.17043063044548035, |
| "learning_rate": 1.9560023545271512e-05, |
| "loss": 1.6289, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.5731990704879938, |
| "grad_norm": 0.16939246654510498, |
| "learning_rate": 1.9505534099604245e-05, |
| "loss": 1.6318, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.5886909372579395, |
| "grad_norm": 0.1785915642976761, |
| "learning_rate": 1.9447949609114018e-05, |
| "loss": 1.6321, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.6041828040278854, |
| "grad_norm": 0.16734299063682556, |
| "learning_rate": 1.938728882357093e-05, |
| "loss": 1.6443, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.6196746707978311, |
| "grad_norm": 0.18670211732387543, |
| "learning_rate": 1.932357149440067e-05, |
| "loss": 1.6683, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.635166537567777, |
| "grad_norm": 0.15596874058246613, |
| "learning_rate": 1.925681836825331e-05, |
| "loss": 1.6336, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.6506584043377227, |
| "grad_norm": 0.18547730147838593, |
| "learning_rate": 1.9187051180248134e-05, |
| "loss": 1.6374, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.6661502711076684, |
| "grad_norm": 0.15439730882644653, |
| "learning_rate": 1.9114292646896574e-05, |
| "loss": 1.6298, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.6816421378776143, |
| "grad_norm": 0.15702180564403534, |
| "learning_rate": 1.9038566458705615e-05, |
| "loss": 1.6235, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.69713400464756, |
| "grad_norm": 0.20213943719863892, |
| "learning_rate": 1.895989727246405e-05, |
| "loss": 1.6511, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.7126258714175058, |
| "grad_norm": 0.17505770921707153, |
| "learning_rate": 1.8878310703214148e-05, |
| "loss": 1.6385, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.7281177381874516, |
| "grad_norm": 0.29876482486724854, |
| "learning_rate": 1.879383331591123e-05, |
| "loss": 1.6508, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.7436096049573974, |
| "grad_norm": 0.16168834269046783, |
| "learning_rate": 1.8706492616774043e-05, |
| "loss": 1.6424, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.7591014717273431, |
| "grad_norm": 0.1786322444677353, |
| "learning_rate": 1.86163170443286e-05, |
| "loss": 1.6699, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.774593338497289, |
| "grad_norm": 0.16993215680122375, |
| "learning_rate": 1.8523335960148446e-05, |
| "loss": 1.6499, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.7900852052672347, |
| "grad_norm": 0.15823154151439667, |
| "learning_rate": 1.8427579639294436e-05, |
| "loss": 1.6227, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.8055770720371804, |
| "grad_norm": 0.1717902421951294, |
| "learning_rate": 1.8329079260457e-05, |
| "loss": 1.6216, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.8210689388071263, |
| "grad_norm": 0.17375893890857697, |
| "learning_rate": 1.822786689580425e-05, |
| "loss": 1.6264, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.836560805577072, |
| "grad_norm": 0.18277738988399506, |
| "learning_rate": 1.8123975500539114e-05, |
| "loss": 1.6314, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.8520526723470179, |
| "grad_norm": 0.16117972135543823, |
| "learning_rate": 1.8017438902168987e-05, |
| "loss": 1.6274, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.8675445391169636, |
| "grad_norm": 0.1836647391319275, |
| "learning_rate": 1.7908291789491348e-05, |
| "loss": 1.6163, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.8830364058869093, |
| "grad_norm": 0.1835409700870514, |
| "learning_rate": 1.7796569701298906e-05, |
| "loss": 1.624, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.8985282726568552, |
| "grad_norm": 0.20427103340625763, |
| "learning_rate": 1.7682309014808043e-05, |
| "loss": 1.6575, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.9140201394268009, |
| "grad_norm": 0.18505772948265076, |
| "learning_rate": 1.756554693381419e-05, |
| "loss": 1.6478, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.9295120061967467, |
| "grad_norm": 0.16430599987506866, |
| "learning_rate": 1.7446321476578138e-05, |
| "loss": 1.6358, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.9450038729666925, |
| "grad_norm": 0.1757052093744278, |
| "learning_rate": 1.7324671463447092e-05, |
| "loss": 1.6383, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.9604957397366383, |
| "grad_norm": 0.1813608705997467, |
| "learning_rate": 1.7200636504214618e-05, |
| "loss": 1.6394, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.975987606506584, |
| "grad_norm": 0.16642023622989655, |
| "learning_rate": 1.7074256985223496e-05, |
| "loss": 1.6293, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.9914794732765299, |
| "grad_norm": 0.16024267673492432, |
| "learning_rate": 1.6945574056215742e-05, |
| "loss": 1.6186, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.0061967467079782, |
| "grad_norm": 0.1785087138414383, |
| "learning_rate": 1.6814629616934078e-05, |
| "loss": 1.6575, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.0216886134779242, |
| "grad_norm": 0.15702630579471588, |
| "learning_rate": 1.6681466303479196e-05, |
| "loss": 1.6431, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.03718048024787, |
| "grad_norm": 0.18289993703365326, |
| "learning_rate": 1.6546127474427217e-05, |
| "loss": 1.6231, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.0526723470178156, |
| "grad_norm": 0.18264566361904144, |
| "learning_rate": 1.6408657196711977e-05, |
| "loss": 1.6293, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.0681642137877614, |
| "grad_norm": 0.1770721971988678, |
| "learning_rate": 1.6269100231276617e-05, |
| "loss": 1.6123, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.0836560805577071, |
| "grad_norm": 0.18177859485149384, |
| "learning_rate": 1.6127502018499216e-05, |
| "loss": 1.6367, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.099147947327653, |
| "grad_norm": 0.17744611203670502, |
| "learning_rate": 1.598390866339721e-05, |
| "loss": 1.6037, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.1146398140975988, |
| "grad_norm": 0.17207013070583344, |
| "learning_rate": 1.5838366920615395e-05, |
| "loss": 1.6184, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.1301316808675446, |
| "grad_norm": 0.19843930006027222, |
| "learning_rate": 1.5690924179202375e-05, |
| "loss": 1.649, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.1456235476374903, |
| "grad_norm": 0.1967499703168869, |
| "learning_rate": 1.5541628447180494e-05, |
| "loss": 1.644, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.161115414407436, |
| "grad_norm": 0.2497672736644745, |
| "learning_rate": 1.5390528335914216e-05, |
| "loss": 1.6452, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.1766072811773818, |
| "grad_norm": 0.22346507012844086, |
| "learning_rate": 1.5237673044282028e-05, |
| "loss": 1.5909, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.1920991479473277, |
| "grad_norm": 0.1980983465909958, |
| "learning_rate": 1.5083112342657071e-05, |
| "loss": 1.6415, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.2075910147172735, |
| "grad_norm": 0.22096283733844757, |
| "learning_rate": 1.4926896556701676e-05, |
| "loss": 1.6281, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.2230828814872192, |
| "grad_norm": 0.20375041663646698, |
| "learning_rate": 1.4769076550981107e-05, |
| "loss": 1.6327, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.238574748257165, |
| "grad_norm": 0.1991424262523651, |
| "learning_rate": 1.4609703712401832e-05, |
| "loss": 1.628, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.2540666150271107, |
| "grad_norm": 0.1924315243959427, |
| "learning_rate": 1.44488299334797e-05, |
| "loss": 1.6228, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.2695584817970564, |
| "grad_norm": 0.200628861784935, |
| "learning_rate": 1.4286507595443527e-05, |
| "loss": 1.6252, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.2850503485670024, |
| "grad_norm": 0.18464049696922302, |
| "learning_rate": 1.4122789551179495e-05, |
| "loss": 1.6521, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.3005422153369481, |
| "grad_norm": 0.19593623280525208, |
| "learning_rate": 1.3957729108022057e-05, |
| "loss": 1.6357, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.3160340821068939, |
| "grad_norm": 0.1850130409002304, |
| "learning_rate": 1.37913800103968e-05, |
| "loss": 1.6214, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.3315259488768396, |
| "grad_norm": 0.19042982161045074, |
| "learning_rate": 1.3623796422321018e-05, |
| "loss": 1.6258, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.3470178156467854, |
| "grad_norm": 0.16621778905391693, |
| "learning_rate": 1.345503290976768e-05, |
| "loss": 1.5954, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.3625096824167313, |
| "grad_norm": 0.22534021735191345, |
| "learning_rate": 1.3285144422898486e-05, |
| "loss": 1.63, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.378001549186677, |
| "grad_norm": 0.20389960706233978, |
| "learning_rate": 1.3114186278171855e-05, |
| "loss": 1.6105, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.3934934159566228, |
| "grad_norm": 0.19341149926185608, |
| "learning_rate": 1.294221414033163e-05, |
| "loss": 1.6311, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.4089852827265685, |
| "grad_norm": 0.18202827870845795, |
| "learning_rate": 1.2769284004282398e-05, |
| "loss": 1.6071, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.4244771494965143, |
| "grad_norm": 0.24331383407115936, |
| "learning_rate": 1.2595452176857283e-05, |
| "loss": 1.6308, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.4399690162664602, |
| "grad_norm": 0.19823439419269562, |
| "learning_rate": 1.2420775258484194e-05, |
| "loss": 1.6338, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.4554608830364058, |
| "grad_norm": 0.20651289820671082, |
| "learning_rate": 1.224531012475647e-05, |
| "loss": 1.6289, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.4709527498063517, |
| "grad_norm": 0.19937390089035034, |
| "learning_rate": 1.2069113907913921e-05, |
| "loss": 1.6251, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.4864446165762975, |
| "grad_norm": 0.2028992474079132, |
| "learning_rate": 1.1892243978240332e-05, |
| "loss": 1.6032, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.5019364833462432, |
| "grad_norm": 0.19516679644584656, |
| "learning_rate": 1.1714757925383418e-05, |
| "loss": 1.658, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.5174283501161892, |
| "grad_norm": 0.19880403578281403, |
| "learning_rate": 1.1536713539603392e-05, |
| "loss": 1.6286, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.5329202168861347, |
| "grad_norm": 0.20677272975444794, |
| "learning_rate": 1.1358168792956178e-05, |
| "loss": 1.6366, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.5484120836560806, |
| "grad_norm": 0.19314983487129211, |
| "learning_rate": 1.1179181820417469e-05, |
| "loss": 1.6277, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.5639039504260264, |
| "grad_norm": 0.22714097797870636, |
| "learning_rate": 1.0999810900953701e-05, |
| "loss": 1.604, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.579395817195972, |
| "grad_norm": 0.19973160326480865, |
| "learning_rate": 1.0820114438546152e-05, |
| "loss": 1.6419, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.5948876839659178, |
| "grad_norm": 0.19317737221717834, |
| "learning_rate": 1.0640150943174368e-05, |
| "loss": 1.6208, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.6103795507358636, |
| "grad_norm": 0.19110378623008728, |
| "learning_rate": 1.045997901176503e-05, |
| "loss": 1.6419, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.6258714175058095, |
| "grad_norm": 0.19884715974330902, |
| "learning_rate": 1.0279657309112526e-05, |
| "loss": 1.6374, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.641363284275755, |
| "grad_norm": 0.2037876695394516, |
| "learning_rate": 1.0099244548777444e-05, |
| "loss": 1.6226, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.656855151045701, |
| "grad_norm": 0.19929030537605286, |
| "learning_rate": 9.918799473969162e-06, |
| "loss": 1.6246, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.6723470178156468, |
| "grad_norm": 0.2003982663154602, |
| "learning_rate": 9.738380838418804e-06, |
| "loss": 1.6284, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.6878388845855925, |
| "grad_norm": 0.19803060591220856, |
| "learning_rate": 9.558047387248736e-06, |
| "loss": 1.6735, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.7033307513555385, |
| "grad_norm": 0.1816408932209015, |
| "learning_rate": 9.37785783784492e-06, |
| "loss": 1.6476, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.718822618125484, |
| "grad_norm": 0.2075837105512619, |
| "learning_rate": 9.197870860738245e-06, |
| "loss": 1.6332, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.73431448489543, |
| "grad_norm": 0.18789494037628174, |
| "learning_rate": 9.018145060501152e-06, |
| "loss": 1.6261, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.7498063516653757, |
| "grad_norm": 0.2126590460538864, |
| "learning_rate": 8.838738956665709e-06, |
| "loss": 1.638, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.7652982184353214, |
| "grad_norm": 0.19256949424743652, |
| "learning_rate": 8.65971096466939e-06, |
| "loss": 1.633, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.7807900852052674, |
| "grad_norm": 0.22011205554008484, |
| "learning_rate": 8.481119376834753e-06, |
| "loss": 1.6397, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.796281951975213, |
| "grad_norm": 0.2018994390964508, |
| "learning_rate": 8.303022343389188e-06, |
| "loss": 1.6424, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.8117738187451589, |
| "grad_norm": 0.22901326417922974, |
| "learning_rate": 8.125477853530944e-06, |
| "loss": 1.594, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.8272656855151046, |
| "grad_norm": 0.20335343480110168, |
| "learning_rate": 7.948543716547584e-06, |
| "loss": 1.6246, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.8427575522850503, |
| "grad_norm": 0.19213028252124786, |
| "learning_rate": 7.772277542993006e-06, |
| "loss": 1.6424, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.858249419054996, |
| "grad_norm": 0.5027441382408142, |
| "learning_rate": 7.596736725929218e-06, |
| "loss": 1.618, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.8737412858249418, |
| "grad_norm": 0.19005711376667023, |
| "learning_rate": 7.421978422238871e-06, |
| "loss": 1.6082, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.8892331525948878, |
| "grad_norm": 0.21805432438850403, |
| "learning_rate": 7.248059534014728e-06, |
| "loss": 1.6259, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.9047250193648335, |
| "grad_norm": 0.21786893904209137, |
| "learning_rate": 7.075036690032088e-06, |
| "loss": 1.6041, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.9202168861347793, |
| "grad_norm": 0.20781753957271576, |
| "learning_rate": 6.9029662273102015e-06, |
| "loss": 1.6276, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.935708752904725, |
| "grad_norm": 0.20254895091056824, |
| "learning_rate": 6.731904172768668e-06, |
| "loss": 1.6146, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.9512006196746707, |
| "grad_norm": 0.17970645427703857, |
| "learning_rate": 6.561906224984844e-06, |
| "loss": 1.6505, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.9666924864446167, |
| "grad_norm": 0.19660718739032745, |
| "learning_rate": 6.393027736058117e-06, |
| "loss": 1.6383, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.9821843532145622, |
| "grad_norm": 0.21242186427116394, |
| "learning_rate": 6.225323693587014e-06, |
| "loss": 1.6003, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.9976762199845082, |
| "grad_norm": 0.20118798315525055, |
| "learning_rate": 6.0588487027649954e-06, |
| "loss": 1.671, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.0123934934159564, |
| "grad_norm": 0.19734402000904083, |
| "learning_rate": 5.89365696860075e-06, |
| "loss": 1.6098, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.0278853601859024, |
| "grad_norm": 0.2078474462032318, |
| "learning_rate": 5.729802278268813e-06, |
| "loss": 1.6456, |
| "step": 1310 |
| }, |
| { |
| "epoch": 2.0433772269558483, |
| "grad_norm": 0.1926499605178833, |
| "learning_rate": 5.567337983596201e-06, |
| "loss": 1.5858, |
| "step": 1320 |
| }, |
| { |
| "epoch": 2.058869093725794, |
| "grad_norm": 0.1910993903875351, |
| "learning_rate": 5.4063169836908355e-06, |
| "loss": 1.6251, |
| "step": 1330 |
| }, |
| { |
| "epoch": 2.07436096049574, |
| "grad_norm": 0.1886081099510193, |
| "learning_rate": 5.246791707717343e-06, |
| "loss": 1.6366, |
| "step": 1340 |
| }, |
| { |
| "epoch": 2.0898528272656853, |
| "grad_norm": 0.20095673203468323, |
| "learning_rate": 5.088814097825871e-06, |
| "loss": 1.6346, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.1053446940356313, |
| "grad_norm": 0.22384069859981537, |
| "learning_rate": 4.93243559223952e-06, |
| "loss": 1.6186, |
| "step": 1360 |
| }, |
| { |
| "epoch": 2.1208365608055773, |
| "grad_norm": 0.19841165840625763, |
| "learning_rate": 4.777707108505801e-06, |
| "loss": 1.6127, |
| "step": 1370 |
| }, |
| { |
| "epoch": 2.1363284275755228, |
| "grad_norm": 0.2496616691350937, |
| "learning_rate": 4.624679026917658e-06, |
| "loss": 1.6116, |
| "step": 1380 |
| }, |
| { |
| "epoch": 2.1518202943454687, |
| "grad_norm": 0.1951538771390915, |
| "learning_rate": 4.473401174109423e-06, |
| "loss": 1.6312, |
| "step": 1390 |
| }, |
| { |
| "epoch": 2.1673121611154142, |
| "grad_norm": 0.21068304777145386, |
| "learning_rate": 4.323922806833031e-06, |
| "loss": 1.6177, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.18280402788536, |
| "grad_norm": 0.20252861082553864, |
| "learning_rate": 4.176292595919803e-06, |
| "loss": 1.6402, |
| "step": 1410 |
| }, |
| { |
| "epoch": 2.198295894655306, |
| "grad_norm": 0.18212322890758514, |
| "learning_rate": 4.030558610433005e-06, |
| "loss": 1.6559, |
| "step": 1420 |
| }, |
| { |
| "epoch": 2.2137877614252517, |
| "grad_norm": 0.4208082854747772, |
| "learning_rate": 3.8867683020163446e-06, |
| "loss": 1.6085, |
| "step": 1430 |
| }, |
| { |
| "epoch": 2.2292796281951976, |
| "grad_norm": 0.21806304156780243, |
| "learning_rate": 3.744968489443488e-06, |
| "loss": 1.6341, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.244771494965143, |
| "grad_norm": 0.2629954516887665, |
| "learning_rate": 3.6052053433736777e-06, |
| "loss": 1.6332, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.260263361735089, |
| "grad_norm": 0.21003496646881104, |
| "learning_rate": 3.4675243713183436e-06, |
| "loss": 1.641, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.2757552285050346, |
| "grad_norm": 0.20624355971813202, |
| "learning_rate": 3.3319704028236553e-06, |
| "loss": 1.6215, |
| "step": 1470 |
| }, |
| { |
| "epoch": 2.2912470952749806, |
| "grad_norm": 0.20449386537075043, |
| "learning_rate": 3.1985875748738193e-06, |
| "loss": 1.6289, |
| "step": 1480 |
| }, |
| { |
| "epoch": 2.3067389620449266, |
| "grad_norm": 0.2125074714422226, |
| "learning_rate": 3.067419317519875e-06, |
| "loss": 1.6083, |
| "step": 1490 |
| }, |
| { |
| "epoch": 2.322230828814872, |
| "grad_norm": 0.22426113486289978, |
| "learning_rate": 2.938508339738683e-06, |
| "loss": 1.6209, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.337722695584818, |
| "grad_norm": 0.20228077471256256, |
| "learning_rate": 2.81189661552667e-06, |
| "loss": 1.6317, |
| "step": 1510 |
| }, |
| { |
| "epoch": 2.3532145623547636, |
| "grad_norm": 0.21674266457557678, |
| "learning_rate": 2.68762537023293e-06, |
| "loss": 1.6008, |
| "step": 1520 |
| }, |
| { |
| "epoch": 2.3687064291247095, |
| "grad_norm": 0.20280566811561584, |
| "learning_rate": 2.5657350671360514e-06, |
| "loss": 1.6458, |
| "step": 1530 |
| }, |
| { |
| "epoch": 2.3841982958946555, |
| "grad_norm": 0.19379937648773193, |
| "learning_rate": 2.4462653942690895e-06, |
| "loss": 1.6324, |
| "step": 1540 |
| }, |
| { |
| "epoch": 2.399690162664601, |
| "grad_norm": 0.19534389674663544, |
| "learning_rate": 2.3292552514969723e-06, |
| "loss": 1.6027, |
| "step": 1550 |
| }, |
| { |
| "epoch": 2.415182029434547, |
| "grad_norm": 0.19875919818878174, |
| "learning_rate": 2.214742737850514e-06, |
| "loss": 1.6078, |
| "step": 1560 |
| }, |
| { |
| "epoch": 2.4306738962044925, |
| "grad_norm": 0.2097163200378418, |
| "learning_rate": 2.1027651391212158e-06, |
| "loss": 1.6286, |
| "step": 1570 |
| }, |
| { |
| "epoch": 2.4461657629744384, |
| "grad_norm": 0.2157508283853531, |
| "learning_rate": 1.9933589157208356e-06, |
| "loss": 1.6266, |
| "step": 1580 |
| }, |
| { |
| "epoch": 2.4616576297443844, |
| "grad_norm": 0.2269178032875061, |
| "learning_rate": 1.8865596908097105e-06, |
| "loss": 1.6323, |
| "step": 1590 |
| }, |
| { |
| "epoch": 2.47714949651433, |
| "grad_norm": 0.19033896923065186, |
| "learning_rate": 1.7824022386977014e-06, |
| "loss": 1.6399, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.492641363284276, |
| "grad_norm": 0.19749732315540314, |
| "learning_rate": 1.6809204735215179e-06, |
| "loss": 1.6319, |
| "step": 1610 |
| }, |
| { |
| "epoch": 2.5081332300542214, |
| "grad_norm": 0.20683521032333374, |
| "learning_rate": 1.5821474382021128e-06, |
| "loss": 1.6358, |
| "step": 1620 |
| }, |
| { |
| "epoch": 2.5236250968241674, |
| "grad_norm": 0.20513305068016052, |
| "learning_rate": 1.4861152936857792e-06, |
| "loss": 1.6265, |
| "step": 1630 |
| }, |
| { |
| "epoch": 2.539116963594113, |
| "grad_norm": 0.209518700838089, |
| "learning_rate": 1.3928553084723828e-06, |
| "loss": 1.6172, |
| "step": 1640 |
| }, |
| { |
| "epoch": 2.554608830364059, |
| "grad_norm": 0.1861906498670578, |
| "learning_rate": 1.3023978484342027e-06, |
| "loss": 1.6316, |
| "step": 1650 |
| }, |
| { |
| "epoch": 2.570100697134005, |
| "grad_norm": 0.2016245722770691, |
| "learning_rate": 1.2147723669286703e-06, |
| "loss": 1.5947, |
| "step": 1660 |
| }, |
| { |
| "epoch": 2.5855925639039503, |
| "grad_norm": 0.213214710354805, |
| "learning_rate": 1.1300073952082147e-06, |
| "loss": 1.5899, |
| "step": 1670 |
| }, |
| { |
| "epoch": 2.6010844306738963, |
| "grad_norm": 0.20174169540405273, |
| "learning_rate": 1.0481305331303659e-06, |
| "loss": 1.6347, |
| "step": 1680 |
| }, |
| { |
| "epoch": 2.6165762974438422, |
| "grad_norm": 0.21556764841079712, |
| "learning_rate": 9.691684401711143e-07, |
| "loss": 1.641, |
| "step": 1690 |
| }, |
| { |
| "epoch": 2.6320681642137878, |
| "grad_norm": 0.21022352576255798, |
| "learning_rate": 8.93146826744462e-07, |
| "loss": 1.609, |
| "step": 1700 |
| }, |
| { |
| "epoch": 2.6475600309837333, |
| "grad_norm": 0.22143913805484772, |
| "learning_rate": 8.200904458310022e-07, |
| "loss": 1.6119, |
| "step": 1710 |
| }, |
| { |
| "epoch": 2.6630518977536792, |
| "grad_norm": 0.20341992378234863, |
| "learning_rate": 7.500230849182278e-07, |
| "loss": 1.5872, |
| "step": 1720 |
| }, |
| { |
| "epoch": 2.678543764523625, |
| "grad_norm": 0.20610594749450684, |
| "learning_rate": 6.829675582552253e-07, |
| "loss": 1.6578, |
| "step": 1730 |
| }, |
| { |
| "epoch": 2.6940356312935707, |
| "grad_norm": 0.2443259358406067, |
| "learning_rate": 6.189456994242516e-07, |
| "loss": 1.6138, |
| "step": 1740 |
| }, |
| { |
| "epoch": 2.7095274980635167, |
| "grad_norm": 0.1948014199733734, |
| "learning_rate": 5.579783542316175e-07, |
| "loss": 1.6071, |
| "step": 1750 |
| }, |
| { |
| "epoch": 2.7250193648334626, |
| "grad_norm": 0.21449612081050873, |
| "learning_rate": 5.000853739202039e-07, |
| "loss": 1.628, |
| "step": 1760 |
| }, |
| { |
| "epoch": 2.740511231603408, |
| "grad_norm": 0.21719783544540405, |
| "learning_rate": 4.452856087058044e-07, |
| "loss": 1.6278, |
| "step": 1770 |
| }, |
| { |
| "epoch": 2.756003098373354, |
| "grad_norm": 0.20155809819698334, |
| "learning_rate": 3.935969016394048e-07, |
| "loss": 1.6397, |
| "step": 1780 |
| }, |
| { |
| "epoch": 2.7714949651432996, |
| "grad_norm": 0.23018154501914978, |
| "learning_rate": 3.450360827974175e-07, |
| "loss": 1.6381, |
| "step": 1790 |
| }, |
| { |
| "epoch": 2.7869868319132456, |
| "grad_norm": 0.21632038056850433, |
| "learning_rate": 2.996189638017233e-07, |
| "loss": 1.6044, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.802478698683191, |
| "grad_norm": 0.22561421990394592, |
| "learning_rate": 2.57360332671337e-07, |
| "loss": 1.6358, |
| "step": 1810 |
| }, |
| { |
| "epoch": 2.817970565453137, |
| "grad_norm": 0.21071907877922058, |
| "learning_rate": 2.1827394900736377e-07, |
| "loss": 1.6223, |
| "step": 1820 |
| }, |
| { |
| "epoch": 2.833462432223083, |
| "grad_norm": 0.2211053967475891, |
| "learning_rate": 1.8237253951281287e-07, |
| "loss": 1.6293, |
| "step": 1830 |
| }, |
| { |
| "epoch": 2.8489542989930285, |
| "grad_norm": 0.21328748762607574, |
| "learning_rate": 1.4966779384871789e-07, |
| "loss": 1.602, |
| "step": 1840 |
| }, |
| { |
| "epoch": 2.8644461657629745, |
| "grad_norm": 0.23242874443531036, |
| "learning_rate": 1.2017036082793922e-07, |
| "loss": 1.6249, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.8799380325329205, |
| "grad_norm": 0.20212414860725403, |
| "learning_rate": 9.388984494785869e-08, |
| "loss": 1.6401, |
| "step": 1860 |
| }, |
| { |
| "epoch": 2.895429899302866, |
| "grad_norm": 0.21839973330497742, |
| "learning_rate": 7.08348032631101e-08, |
| "loss": 1.6185, |
| "step": 1870 |
| }, |
| { |
| "epoch": 2.9109217660728115, |
| "grad_norm": 0.2018660604953766, |
| "learning_rate": 5.101274259936451e-08, |
| "loss": 1.6284, |
| "step": 1880 |
| }, |
| { |
| "epoch": 2.9264136328427575, |
| "grad_norm": 0.2246350198984146, |
| "learning_rate": 3.443011710907662e-08, |
| "loss": 1.6129, |
| "step": 1890 |
| }, |
| { |
| "epoch": 2.9419054996127034, |
| "grad_norm": 0.2279912680387497, |
| "learning_rate": 2.109232616998247e-08, |
| "loss": 1.6084, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.957397366382649, |
| "grad_norm": 0.22195599973201752, |
| "learning_rate": 1.100371262703459e-08, |
| "loss": 1.6222, |
| "step": 1910 |
| }, |
| { |
| "epoch": 2.972889233152595, |
| "grad_norm": 0.20175737142562866, |
| "learning_rate": 4.1675613783565e-09, |
| "loss": 1.6109, |
| "step": 1920 |
| }, |
| { |
| "epoch": 2.988381099922541, |
| "grad_norm": 0.23770569264888763, |
| "learning_rate": 5.860983056604763e-10, |
| "loss": 1.629, |
| "step": 1930 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1935, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.3807044630404874e+18, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|