{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 2555, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0019588638589618022, "grad_norm": 1.1779376668292296, "learning_rate": 0.0, "loss": 0.6095, "num_tokens": 957228.0, "step": 1 }, { "epoch": 0.0039177277179236044, "grad_norm": 1.14453866876595, "learning_rate": 1.298701298701299e-07, "loss": 0.5863, "num_tokens": 1903020.0, "step": 2 }, { "epoch": 0.005876591576885406, "grad_norm": 1.1558548993371778, "learning_rate": 2.597402597402598e-07, "loss": 0.5987, "num_tokens": 2835325.0, "step": 3 }, { "epoch": 0.007835455435847209, "grad_norm": 1.1580675566497325, "learning_rate": 3.896103896103896e-07, "loss": 0.6001, "num_tokens": 3804624.0, "step": 4 }, { "epoch": 0.009794319294809012, "grad_norm": 1.131687899148094, "learning_rate": 5.194805194805196e-07, "loss": 0.591, "num_tokens": 4760897.0, "step": 5 }, { "epoch": 0.011753183153770812, "grad_norm": 1.1365950486885912, "learning_rate": 6.493506493506493e-07, "loss": 0.5938, "num_tokens": 5696321.0, "step": 6 }, { "epoch": 0.013712047012732615, "grad_norm": 1.1369691359607021, "learning_rate": 7.792207792207792e-07, "loss": 0.5961, "num_tokens": 6645763.0, "step": 7 }, { "epoch": 0.015670910871694418, "grad_norm": 1.1233025219879869, "learning_rate": 9.090909090909091e-07, "loss": 0.5764, "num_tokens": 7583888.0, "step": 8 }, { "epoch": 0.01762977473065622, "grad_norm": 1.1513047413325934, "learning_rate": 1.0389610389610392e-06, "loss": 0.6001, "num_tokens": 8544514.0, "step": 9 }, { "epoch": 0.019588638589618023, "grad_norm": 1.090111147138629, "learning_rate": 1.168831168831169e-06, "loss": 0.5682, "num_tokens": 9505325.0, "step": 10 }, { "epoch": 0.021547502448579822, "grad_norm": 1.128435799580429, "learning_rate": 1.2987012987012986e-06, "loss": 0.5969, "num_tokens": 10496025.0, "step": 11 }, { "epoch": 0.023506366307541625, "grad_norm": 1.1419469297779599, "learning_rate": 1.4285714285714286e-06, "loss": 0.5918, "num_tokens": 11483781.0, "step": 12 }, { "epoch": 0.025465230166503428, "grad_norm": 1.0837958411329958, "learning_rate": 1.5584415584415584e-06, "loss": 0.5797, "num_tokens": 12421527.0, "step": 13 }, { "epoch": 0.02742409402546523, "grad_norm": 1.1202969148222806, "learning_rate": 1.6883116883116885e-06, "loss": 0.5968, "num_tokens": 13402688.0, "step": 14 }, { "epoch": 0.029382957884427033, "grad_norm": 1.0672780406526166, "learning_rate": 1.8181818181818183e-06, "loss": 0.5771, "num_tokens": 14367162.0, "step": 15 }, { "epoch": 0.031341821743388835, "grad_norm": 1.0931389983274584, "learning_rate": 1.9480519480519483e-06, "loss": 0.6058, "num_tokens": 15278974.0, "step": 16 }, { "epoch": 0.03330068560235064, "grad_norm": 0.9912007092939249, "learning_rate": 2.0779220779220784e-06, "loss": 0.5904, "num_tokens": 16229440.0, "step": 17 }, { "epoch": 0.03525954946131244, "grad_norm": 0.9334645992333939, "learning_rate": 2.207792207792208e-06, "loss": 0.5673, "num_tokens": 17174371.0, "step": 18 }, { "epoch": 0.03721841332027424, "grad_norm": 0.9154496415543664, "learning_rate": 2.337662337662338e-06, "loss": 0.5739, "num_tokens": 18132871.0, "step": 19 }, { "epoch": 0.039177277179236046, "grad_norm": 0.9180404646017777, "learning_rate": 2.4675324675324676e-06, "loss": 0.5808, "num_tokens": 19079741.0, "step": 20 }, { "epoch": 0.04113614103819784, "grad_norm": 0.8626980715207839, "learning_rate": 2.597402597402597e-06, "loss": 0.5777, "num_tokens": 20037323.0, "step": 21 }, { "epoch": 0.043095004897159644, "grad_norm": 0.8510957187588554, "learning_rate": 2.7272727272727272e-06, "loss": 0.5781, "num_tokens": 20992105.0, "step": 22 }, { "epoch": 0.04505386875612145, "grad_norm": 0.822763278842823, "learning_rate": 2.8571428571428573e-06, "loss": 0.5522, "num_tokens": 21917740.0, "step": 23 }, { "epoch": 0.04701273261508325, "grad_norm": 0.5682119647768065, "learning_rate": 2.9870129870129873e-06, "loss": 0.5346, "num_tokens": 22868921.0, "step": 24 }, { "epoch": 0.04897159647404505, "grad_norm": 0.5212911432498203, "learning_rate": 3.116883116883117e-06, "loss": 0.5392, "num_tokens": 23827074.0, "step": 25 }, { "epoch": 0.050930460333006855, "grad_norm": 0.5034383592828012, "learning_rate": 3.246753246753247e-06, "loss": 0.5447, "num_tokens": 24746581.0, "step": 26 }, { "epoch": 0.05288932419196866, "grad_norm": 0.47309618987632196, "learning_rate": 3.376623376623377e-06, "loss": 0.546, "num_tokens": 25705115.0, "step": 27 }, { "epoch": 0.05484818805093046, "grad_norm": 0.44261582789260967, "learning_rate": 3.506493506493507e-06, "loss": 0.5519, "num_tokens": 26632617.0, "step": 28 }, { "epoch": 0.05680705190989226, "grad_norm": 0.4182589041396013, "learning_rate": 3.6363636363636366e-06, "loss": 0.5313, "num_tokens": 27575977.0, "step": 29 }, { "epoch": 0.058765915768854066, "grad_norm": 0.38193294303014225, "learning_rate": 3.7662337662337666e-06, "loss": 0.5208, "num_tokens": 28568378.0, "step": 30 }, { "epoch": 0.06072477962781587, "grad_norm": 0.35997287973769965, "learning_rate": 3.896103896103897e-06, "loss": 0.5199, "num_tokens": 29517430.0, "step": 31 }, { "epoch": 0.06268364348677767, "grad_norm": 0.24736828742042855, "learning_rate": 4.025974025974026e-06, "loss": 0.5232, "num_tokens": 30460107.0, "step": 32 }, { "epoch": 0.06464250734573947, "grad_norm": 0.29430500472969917, "learning_rate": 4.155844155844157e-06, "loss": 0.4902, "num_tokens": 31376862.0, "step": 33 }, { "epoch": 0.06660137120470128, "grad_norm": 0.3813996244800663, "learning_rate": 4.2857142857142855e-06, "loss": 0.5095, "num_tokens": 32333039.0, "step": 34 }, { "epoch": 0.06856023506366307, "grad_norm": 0.3734526706562682, "learning_rate": 4.415584415584416e-06, "loss": 0.4982, "num_tokens": 33301775.0, "step": 35 }, { "epoch": 0.07051909892262488, "grad_norm": 0.4071231857824654, "learning_rate": 4.5454545454545455e-06, "loss": 0.5053, "num_tokens": 34281803.0, "step": 36 }, { "epoch": 0.07247796278158668, "grad_norm": 0.38881600781955006, "learning_rate": 4.675324675324676e-06, "loss": 0.5156, "num_tokens": 35278446.0, "step": 37 }, { "epoch": 0.07443682664054849, "grad_norm": 0.34959618219486355, "learning_rate": 4.805194805194806e-06, "loss": 0.495, "num_tokens": 36174529.0, "step": 38 }, { "epoch": 0.07639569049951028, "grad_norm": 0.30582243304816603, "learning_rate": 4.935064935064935e-06, "loss": 0.4981, "num_tokens": 37096798.0, "step": 39 }, { "epoch": 0.07835455435847209, "grad_norm": 0.2683098108248299, "learning_rate": 5.064935064935065e-06, "loss": 0.5024, "num_tokens": 38025690.0, "step": 40 }, { "epoch": 0.08031341821743389, "grad_norm": 0.2227478708148392, "learning_rate": 5.194805194805194e-06, "loss": 0.49, "num_tokens": 38967788.0, "step": 41 }, { "epoch": 0.08227228207639568, "grad_norm": 0.18108191554443825, "learning_rate": 5.324675324675325e-06, "loss": 0.5014, "num_tokens": 39929380.0, "step": 42 }, { "epoch": 0.0842311459353575, "grad_norm": 0.15497597357842827, "learning_rate": 5.4545454545454545e-06, "loss": 0.4946, "num_tokens": 40891263.0, "step": 43 }, { "epoch": 0.08619000979431929, "grad_norm": 0.15671125686656093, "learning_rate": 5.584415584415585e-06, "loss": 0.4799, "num_tokens": 41833850.0, "step": 44 }, { "epoch": 0.0881488736532811, "grad_norm": 0.17507981763637595, "learning_rate": 5.7142857142857145e-06, "loss": 0.5, "num_tokens": 42760693.0, "step": 45 }, { "epoch": 0.0901077375122429, "grad_norm": 0.18455600553146348, "learning_rate": 5.844155844155844e-06, "loss": 0.4942, "num_tokens": 43665379.0, "step": 46 }, { "epoch": 0.0920666013712047, "grad_norm": 0.20963781093615366, "learning_rate": 5.9740259740259746e-06, "loss": 0.4791, "num_tokens": 44609358.0, "step": 47 }, { "epoch": 0.0940254652301665, "grad_norm": 0.20219312988770385, "learning_rate": 6.103896103896104e-06, "loss": 0.493, "num_tokens": 45561854.0, "step": 48 }, { "epoch": 0.09598432908912831, "grad_norm": 0.18454774054032433, "learning_rate": 6.233766233766234e-06, "loss": 0.4865, "num_tokens": 46546409.0, "step": 49 }, { "epoch": 0.0979431929480901, "grad_norm": 0.16319189525503663, "learning_rate": 6.363636363636364e-06, "loss": 0.4849, "num_tokens": 47486971.0, "step": 50 }, { "epoch": 0.09990205680705191, "grad_norm": 0.14530181600723818, "learning_rate": 6.493506493506494e-06, "loss": 0.4934, "num_tokens": 48457717.0, "step": 51 }, { "epoch": 0.10186092066601371, "grad_norm": 0.12857250564688383, "learning_rate": 6.623376623376624e-06, "loss": 0.4863, "num_tokens": 49385047.0, "step": 52 }, { "epoch": 0.10381978452497552, "grad_norm": 0.13135618685388173, "learning_rate": 6.753246753246754e-06, "loss": 0.489, "num_tokens": 50295374.0, "step": 53 }, { "epoch": 0.10577864838393732, "grad_norm": 0.134389045356148, "learning_rate": 6.8831168831168835e-06, "loss": 0.4723, "num_tokens": 51245492.0, "step": 54 }, { "epoch": 0.10773751224289912, "grad_norm": 0.144607400296704, "learning_rate": 7.012987012987014e-06, "loss": 0.4751, "num_tokens": 52187156.0, "step": 55 }, { "epoch": 0.10969637610186092, "grad_norm": 0.15328301289443136, "learning_rate": 7.1428571428571436e-06, "loss": 0.4743, "num_tokens": 53175990.0, "step": 56 }, { "epoch": 0.11165523996082272, "grad_norm": 0.13884142700520555, "learning_rate": 7.272727272727273e-06, "loss": 0.4647, "num_tokens": 54116901.0, "step": 57 }, { "epoch": 0.11361410381978453, "grad_norm": 0.13359747069670078, "learning_rate": 7.402597402597404e-06, "loss": 0.4692, "num_tokens": 55059892.0, "step": 58 }, { "epoch": 0.11557296767874632, "grad_norm": 0.1200875386005197, "learning_rate": 7.532467532467533e-06, "loss": 0.478, "num_tokens": 56011558.0, "step": 59 }, { "epoch": 0.11753183153770813, "grad_norm": 0.11530192743485264, "learning_rate": 7.662337662337663e-06, "loss": 0.4694, "num_tokens": 56978072.0, "step": 60 }, { "epoch": 0.11949069539666993, "grad_norm": 0.11869136818231486, "learning_rate": 7.792207792207793e-06, "loss": 0.483, "num_tokens": 57932775.0, "step": 61 }, { "epoch": 0.12144955925563174, "grad_norm": 0.10469749897829281, "learning_rate": 7.922077922077924e-06, "loss": 0.4744, "num_tokens": 58921803.0, "step": 62 }, { "epoch": 0.12340842311459353, "grad_norm": 0.1220032554685136, "learning_rate": 8.051948051948052e-06, "loss": 0.4739, "num_tokens": 59825350.0, "step": 63 }, { "epoch": 0.12536728697355534, "grad_norm": 0.12522096927717064, "learning_rate": 8.181818181818183e-06, "loss": 0.4808, "num_tokens": 60736848.0, "step": 64 }, { "epoch": 0.12732615083251714, "grad_norm": 1.7728242669152647, "learning_rate": 8.311688311688313e-06, "loss": 0.4775, "num_tokens": 61692068.0, "step": 65 }, { "epoch": 0.12928501469147893, "grad_norm": 0.1248389564577605, "learning_rate": 8.441558441558442e-06, "loss": 0.4897, "num_tokens": 62657029.0, "step": 66 }, { "epoch": 0.13124387855044076, "grad_norm": 0.11025857585858609, "learning_rate": 8.571428571428571e-06, "loss": 0.4659, "num_tokens": 63579287.0, "step": 67 }, { "epoch": 0.13320274240940255, "grad_norm": 0.11299800436889519, "learning_rate": 8.701298701298701e-06, "loss": 0.4816, "num_tokens": 64536200.0, "step": 68 }, { "epoch": 0.13516160626836435, "grad_norm": 0.10967372205625224, "learning_rate": 8.831168831168832e-06, "loss": 0.4781, "num_tokens": 65474334.0, "step": 69 }, { "epoch": 0.13712047012732614, "grad_norm": 0.10362839198604146, "learning_rate": 8.96103896103896e-06, "loss": 0.4756, "num_tokens": 66428712.0, "step": 70 }, { "epoch": 0.13907933398628794, "grad_norm": 0.10866195870559414, "learning_rate": 9.090909090909091e-06, "loss": 0.4682, "num_tokens": 67373340.0, "step": 71 }, { "epoch": 0.14103819784524976, "grad_norm": 0.10862245165189673, "learning_rate": 9.220779220779221e-06, "loss": 0.4941, "num_tokens": 68279915.0, "step": 72 }, { "epoch": 0.14299706170421156, "grad_norm": 0.10756775407424338, "learning_rate": 9.350649350649352e-06, "loss": 0.4757, "num_tokens": 69239390.0, "step": 73 }, { "epoch": 0.14495592556317335, "grad_norm": 0.10241016679452052, "learning_rate": 9.48051948051948e-06, "loss": 0.4838, "num_tokens": 70186275.0, "step": 74 }, { "epoch": 0.14691478942213515, "grad_norm": 0.10414405498619668, "learning_rate": 9.610389610389611e-06, "loss": 0.4751, "num_tokens": 71150805.0, "step": 75 }, { "epoch": 0.14887365328109697, "grad_norm": 0.10111059772940759, "learning_rate": 9.740259740259742e-06, "loss": 0.4675, "num_tokens": 72095683.0, "step": 76 }, { "epoch": 0.15083251714005877, "grad_norm": 0.1014881117391909, "learning_rate": 9.87012987012987e-06, "loss": 0.4621, "num_tokens": 73031911.0, "step": 77 }, { "epoch": 0.15279138099902057, "grad_norm": 0.10404653706199633, "learning_rate": 1e-05, "loss": 0.48, "num_tokens": 73977616.0, "step": 78 }, { "epoch": 0.15475024485798236, "grad_norm": 0.10386392762988193, "learning_rate": 9.99999598174872e-06, "loss": 0.4722, "num_tokens": 74938755.0, "step": 79 }, { "epoch": 0.15670910871694418, "grad_norm": 0.10369883566275276, "learning_rate": 9.999983927001336e-06, "loss": 0.4826, "num_tokens": 75890007.0, "step": 80 }, { "epoch": 0.15866797257590598, "grad_norm": 0.10335357131879185, "learning_rate": 9.999963835777224e-06, "loss": 0.4733, "num_tokens": 76850365.0, "step": 81 }, { "epoch": 0.16062683643486778, "grad_norm": 0.1014465030505948, "learning_rate": 9.99993570810868e-06, "loss": 0.4725, "num_tokens": 77789082.0, "step": 82 }, { "epoch": 0.16258570029382957, "grad_norm": 0.0969864906933695, "learning_rate": 9.999899544040908e-06, "loss": 0.4603, "num_tokens": 78746891.0, "step": 83 }, { "epoch": 0.16454456415279137, "grad_norm": 0.09908553798822536, "learning_rate": 9.999855343632037e-06, "loss": 0.4705, "num_tokens": 79652749.0, "step": 84 }, { "epoch": 0.1665034280117532, "grad_norm": 0.10178596303441723, "learning_rate": 9.99980310695311e-06, "loss": 0.4718, "num_tokens": 80592581.0, "step": 85 }, { "epoch": 0.168462291870715, "grad_norm": 0.10039756909225854, "learning_rate": 9.99974283408809e-06, "loss": 0.4702, "num_tokens": 81534344.0, "step": 86 }, { "epoch": 0.17042115572967678, "grad_norm": 0.10292980005118785, "learning_rate": 9.99967452513385e-06, "loss": 0.4774, "num_tokens": 82487549.0, "step": 87 }, { "epoch": 0.17238001958863858, "grad_norm": 0.10086921342177411, "learning_rate": 9.999598180200186e-06, "loss": 0.4843, "num_tokens": 83472867.0, "step": 88 }, { "epoch": 0.1743388834476004, "grad_norm": 0.10124601224604896, "learning_rate": 9.999513799409804e-06, "loss": 0.4555, "num_tokens": 84390727.0, "step": 89 }, { "epoch": 0.1762977473065622, "grad_norm": 0.10185638919754106, "learning_rate": 9.99942138289833e-06, "loss": 0.4529, "num_tokens": 85371181.0, "step": 90 }, { "epoch": 0.178256611165524, "grad_norm": 0.09902084863732517, "learning_rate": 9.999320930814307e-06, "loss": 0.465, "num_tokens": 86357901.0, "step": 91 }, { "epoch": 0.1802154750244858, "grad_norm": 0.09762296388519529, "learning_rate": 9.999212443319191e-06, "loss": 0.4628, "num_tokens": 87343535.0, "step": 92 }, { "epoch": 0.1821743388834476, "grad_norm": 0.09448978900098406, "learning_rate": 9.999095920587352e-06, "loss": 0.4452, "num_tokens": 88297905.0, "step": 93 }, { "epoch": 0.1841332027424094, "grad_norm": 0.09431248116101847, "learning_rate": 9.998971362806081e-06, "loss": 0.4453, "num_tokens": 89225471.0, "step": 94 }, { "epoch": 0.1860920666013712, "grad_norm": 0.10055354255483136, "learning_rate": 9.998838770175575e-06, "loss": 0.4678, "num_tokens": 90164738.0, "step": 95 }, { "epoch": 0.188050930460333, "grad_norm": 0.10230246968909389, "learning_rate": 9.998698142908954e-06, "loss": 0.4664, "num_tokens": 91125556.0, "step": 96 }, { "epoch": 0.19000979431929482, "grad_norm": 0.10625479777481586, "learning_rate": 9.998549481232247e-06, "loss": 0.4568, "num_tokens": 92093541.0, "step": 97 }, { "epoch": 0.19196865817825662, "grad_norm": 0.09861753488551372, "learning_rate": 9.998392785384397e-06, "loss": 0.4807, "num_tokens": 93025537.0, "step": 98 }, { "epoch": 0.1939275220372184, "grad_norm": 0.09927039259565387, "learning_rate": 9.998228055617264e-06, "loss": 0.4584, "num_tokens": 93946672.0, "step": 99 }, { "epoch": 0.1958863858961802, "grad_norm": 0.10248383003320141, "learning_rate": 9.998055292195615e-06, "loss": 0.4696, "num_tokens": 94908060.0, "step": 100 }, { "epoch": 0.197845249755142, "grad_norm": 0.10451441636833611, "learning_rate": 9.997874495397135e-06, "loss": 0.4729, "num_tokens": 95832620.0, "step": 101 }, { "epoch": 0.19980411361410383, "grad_norm": 0.09930874844991569, "learning_rate": 9.997685665512418e-06, "loss": 0.4687, "num_tokens": 96768246.0, "step": 102 }, { "epoch": 0.20176297747306562, "grad_norm": 0.09437867539319489, "learning_rate": 9.99748880284497e-06, "loss": 0.4647, "num_tokens": 97763421.0, "step": 103 }, { "epoch": 0.20372184133202742, "grad_norm": 0.09494153272122935, "learning_rate": 9.997283907711208e-06, "loss": 0.4711, "num_tokens": 98710496.0, "step": 104 }, { "epoch": 0.20568070519098922, "grad_norm": 0.09366365651865821, "learning_rate": 9.997070980440463e-06, "loss": 0.4488, "num_tokens": 99659612.0, "step": 105 }, { "epoch": 0.20763956904995104, "grad_norm": 0.10299014324035684, "learning_rate": 9.996850021374969e-06, "loss": 0.4773, "num_tokens": 100617208.0, "step": 106 }, { "epoch": 0.20959843290891284, "grad_norm": 0.09487793679625174, "learning_rate": 9.996621030869876e-06, "loss": 0.4669, "num_tokens": 101615021.0, "step": 107 }, { "epoch": 0.21155729676787463, "grad_norm": 0.09766041109201681, "learning_rate": 9.99638400929324e-06, "loss": 0.4668, "num_tokens": 102528013.0, "step": 108 }, { "epoch": 0.21351616062683643, "grad_norm": 0.09904813750168198, "learning_rate": 9.996138957026026e-06, "loss": 0.4649, "num_tokens": 103493513.0, "step": 109 }, { "epoch": 0.21547502448579825, "grad_norm": 0.09869728621227473, "learning_rate": 9.99588587446211e-06, "loss": 0.4734, "num_tokens": 104431043.0, "step": 110 }, { "epoch": 0.21743388834476005, "grad_norm": 0.09597599562526722, "learning_rate": 9.995624762008265e-06, "loss": 0.4577, "num_tokens": 105379859.0, "step": 111 }, { "epoch": 0.21939275220372184, "grad_norm": 0.09831372165074696, "learning_rate": 9.995355620084182e-06, "loss": 0.4609, "num_tokens": 106311570.0, "step": 112 }, { "epoch": 0.22135161606268364, "grad_norm": 0.09551136337814933, "learning_rate": 9.99507844912245e-06, "loss": 0.4609, "num_tokens": 107290051.0, "step": 113 }, { "epoch": 0.22331047992164543, "grad_norm": 0.09801363147795901, "learning_rate": 9.994793249568568e-06, "loss": 0.4541, "num_tokens": 108225334.0, "step": 114 }, { "epoch": 0.22526934378060726, "grad_norm": 0.09284290995644295, "learning_rate": 9.994500021880939e-06, "loss": 0.4575, "num_tokens": 109192825.0, "step": 115 }, { "epoch": 0.22722820763956905, "grad_norm": 0.09904086714200677, "learning_rate": 9.994198766530864e-06, "loss": 0.4591, "num_tokens": 110153495.0, "step": 116 }, { "epoch": 0.22918707149853085, "grad_norm": 0.09637123963622646, "learning_rate": 9.993889484002553e-06, "loss": 0.4464, "num_tokens": 111098418.0, "step": 117 }, { "epoch": 0.23114593535749264, "grad_norm": 0.10312546519736356, "learning_rate": 9.993572174793116e-06, "loss": 0.4524, "num_tokens": 112058709.0, "step": 118 }, { "epoch": 0.23310479921645447, "grad_norm": 0.09479021598118789, "learning_rate": 9.993246839412565e-06, "loss": 0.4572, "num_tokens": 113041947.0, "step": 119 }, { "epoch": 0.23506366307541626, "grad_norm": 0.0970162426335051, "learning_rate": 9.99291347838381e-06, "loss": 0.4597, "num_tokens": 113981192.0, "step": 120 }, { "epoch": 0.23702252693437806, "grad_norm": 0.09632502927371162, "learning_rate": 9.992572092242664e-06, "loss": 0.4809, "num_tokens": 114966318.0, "step": 121 }, { "epoch": 0.23898139079333985, "grad_norm": 0.10416236432460022, "learning_rate": 9.992222681537835e-06, "loss": 0.4567, "num_tokens": 115939878.0, "step": 122 }, { "epoch": 0.24094025465230168, "grad_norm": 0.09873768878554416, "learning_rate": 9.991865246830935e-06, "loss": 0.4664, "num_tokens": 116915166.0, "step": 123 }, { "epoch": 0.24289911851126347, "grad_norm": 0.1003905240372282, "learning_rate": 9.991499788696463e-06, "loss": 0.4707, "num_tokens": 117866964.0, "step": 124 }, { "epoch": 0.24485798237022527, "grad_norm": 0.0935735181673574, "learning_rate": 9.991126307721826e-06, "loss": 0.4684, "num_tokens": 118837592.0, "step": 125 }, { "epoch": 0.24681684622918706, "grad_norm": 0.0982612410275377, "learning_rate": 9.990744804507315e-06, "loss": 0.4701, "num_tokens": 119778663.0, "step": 126 }, { "epoch": 0.24877571008814886, "grad_norm": 0.09777596468391736, "learning_rate": 9.990355279666124e-06, "loss": 0.4645, "num_tokens": 120718675.0, "step": 127 }, { "epoch": 0.2507345739471107, "grad_norm": 0.10350691006912617, "learning_rate": 9.989957733824336e-06, "loss": 0.4781, "num_tokens": 121638180.0, "step": 128 }, { "epoch": 0.25269343780607245, "grad_norm": 0.10022440245488685, "learning_rate": 9.989552167620926e-06, "loss": 0.4612, "num_tokens": 122582958.0, "step": 129 }, { "epoch": 0.2546523016650343, "grad_norm": 0.10176036166639991, "learning_rate": 9.98913858170776e-06, "loss": 0.4776, "num_tokens": 123522480.0, "step": 130 }, { "epoch": 0.2566111655239961, "grad_norm": 0.09743072937780196, "learning_rate": 9.988716976749596e-06, "loss": 0.4612, "num_tokens": 124434998.0, "step": 131 }, { "epoch": 0.25857002938295787, "grad_norm": 0.10040714573425007, "learning_rate": 9.988287353424077e-06, "loss": 0.4632, "num_tokens": 125399319.0, "step": 132 }, { "epoch": 0.2605288932419197, "grad_norm": 0.09584888758063531, "learning_rate": 9.987849712421741e-06, "loss": 0.4506, "num_tokens": 126331517.0, "step": 133 }, { "epoch": 0.2624877571008815, "grad_norm": 0.09510636035826173, "learning_rate": 9.987404054446009e-06, "loss": 0.435, "num_tokens": 127286047.0, "step": 134 }, { "epoch": 0.2644466209598433, "grad_norm": 0.09737635875932876, "learning_rate": 9.986950380213182e-06, "loss": 0.4664, "num_tokens": 128243241.0, "step": 135 }, { "epoch": 0.2664054848188051, "grad_norm": 0.10324901586881302, "learning_rate": 9.986488690452456e-06, "loss": 0.4458, "num_tokens": 129183998.0, "step": 136 }, { "epoch": 0.2683643486777669, "grad_norm": 0.09511139653578601, "learning_rate": 9.986018985905901e-06, "loss": 0.4508, "num_tokens": 130112534.0, "step": 137 }, { "epoch": 0.2703232125367287, "grad_norm": 0.09701502066571129, "learning_rate": 9.985541267328479e-06, "loss": 0.4565, "num_tokens": 131075496.0, "step": 138 }, { "epoch": 0.2722820763956905, "grad_norm": 0.09540233096015516, "learning_rate": 9.98505553548802e-06, "loss": 0.4587, "num_tokens": 132049561.0, "step": 139 }, { "epoch": 0.2742409402546523, "grad_norm": 0.09394609726026784, "learning_rate": 9.984561791165245e-06, "loss": 0.4464, "num_tokens": 133008081.0, "step": 140 }, { "epoch": 0.2761998041136141, "grad_norm": 0.09773689866820262, "learning_rate": 9.984060035153752e-06, "loss": 0.4695, "num_tokens": 133956658.0, "step": 141 }, { "epoch": 0.2781586679725759, "grad_norm": 0.0961859719144124, "learning_rate": 9.983550268260009e-06, "loss": 0.4546, "num_tokens": 134926369.0, "step": 142 }, { "epoch": 0.2801175318315377, "grad_norm": 0.098413033893867, "learning_rate": 9.983032491303368e-06, "loss": 0.4702, "num_tokens": 135845886.0, "step": 143 }, { "epoch": 0.2820763956904995, "grad_norm": 0.09579879342736346, "learning_rate": 9.98250670511605e-06, "loss": 0.4672, "num_tokens": 136808351.0, "step": 144 }, { "epoch": 0.2840352595494613, "grad_norm": 0.09754525670821552, "learning_rate": 9.981972910543151e-06, "loss": 0.4784, "num_tokens": 137754233.0, "step": 145 }, { "epoch": 0.2859941234084231, "grad_norm": 0.1010148746273363, "learning_rate": 9.981431108442644e-06, "loss": 0.4505, "num_tokens": 138693576.0, "step": 146 }, { "epoch": 0.28795298726738494, "grad_norm": 0.1048396205973233, "learning_rate": 9.98088129968536e-06, "loss": 0.4667, "num_tokens": 139639294.0, "step": 147 }, { "epoch": 0.2899118511263467, "grad_norm": 0.09495042331450833, "learning_rate": 9.980323485155013e-06, "loss": 0.4508, "num_tokens": 140623819.0, "step": 148 }, { "epoch": 0.29187071498530853, "grad_norm": 0.09758568279279468, "learning_rate": 9.979757665748177e-06, "loss": 0.4433, "num_tokens": 141575936.0, "step": 149 }, { "epoch": 0.2938295788442703, "grad_norm": 0.09686099257479716, "learning_rate": 9.979183842374294e-06, "loss": 0.4663, "num_tokens": 142543786.0, "step": 150 }, { "epoch": 0.2957884427032321, "grad_norm": 0.09499912774299393, "learning_rate": 9.97860201595567e-06, "loss": 0.4562, "num_tokens": 143541287.0, "step": 151 }, { "epoch": 0.29774730656219395, "grad_norm": 0.09994523700180594, "learning_rate": 9.978012187427474e-06, "loss": 0.4721, "num_tokens": 144461732.0, "step": 152 }, { "epoch": 0.2997061704211557, "grad_norm": 0.10057207703671751, "learning_rate": 9.97741435773774e-06, "loss": 0.4609, "num_tokens": 145407103.0, "step": 153 }, { "epoch": 0.30166503428011754, "grad_norm": 0.10188797169880409, "learning_rate": 9.976808527847357e-06, "loss": 0.4613, "num_tokens": 146342159.0, "step": 154 }, { "epoch": 0.3036238981390793, "grad_norm": 0.10147521760797401, "learning_rate": 9.97619469873008e-06, "loss": 0.4612, "num_tokens": 147243557.0, "step": 155 }, { "epoch": 0.30558276199804113, "grad_norm": 0.09355508554161461, "learning_rate": 9.975572871372513e-06, "loss": 0.4498, "num_tokens": 148193129.0, "step": 156 }, { "epoch": 0.30754162585700295, "grad_norm": 0.09932265253341842, "learning_rate": 9.974943046774121e-06, "loss": 0.4669, "num_tokens": 149159142.0, "step": 157 }, { "epoch": 0.3095004897159647, "grad_norm": 0.10334059832200888, "learning_rate": 9.97430522594722e-06, "loss": 0.4565, "num_tokens": 150120735.0, "step": 158 }, { "epoch": 0.31145935357492655, "grad_norm": 0.10284601693636423, "learning_rate": 9.973659409916982e-06, "loss": 0.4619, "num_tokens": 151058295.0, "step": 159 }, { "epoch": 0.31341821743388837, "grad_norm": 0.1031684679282704, "learning_rate": 9.973005599721425e-06, "loss": 0.474, "num_tokens": 152000140.0, "step": 160 }, { "epoch": 0.31537708129285014, "grad_norm": 0.09461282557066732, "learning_rate": 9.972343796411423e-06, "loss": 0.4488, "num_tokens": 152947362.0, "step": 161 }, { "epoch": 0.31733594515181196, "grad_norm": 0.09383580011255986, "learning_rate": 9.971674001050687e-06, "loss": 0.4547, "num_tokens": 153891057.0, "step": 162 }, { "epoch": 0.31929480901077373, "grad_norm": 0.10351535825224005, "learning_rate": 9.970996214715782e-06, "loss": 0.4461, "num_tokens": 154832165.0, "step": 163 }, { "epoch": 0.32125367286973555, "grad_norm": 0.10053717067009102, "learning_rate": 9.970310438496115e-06, "loss": 0.466, "num_tokens": 155778733.0, "step": 164 }, { "epoch": 0.3232125367286974, "grad_norm": 0.0977421156293791, "learning_rate": 9.969616673493932e-06, "loss": 0.456, "num_tokens": 156722675.0, "step": 165 }, { "epoch": 0.32517140058765914, "grad_norm": 0.09462641000536073, "learning_rate": 9.968914920824327e-06, "loss": 0.4481, "num_tokens": 157706865.0, "step": 166 }, { "epoch": 0.32713026444662097, "grad_norm": 0.09685626465188592, "learning_rate": 9.96820518161522e-06, "loss": 0.4612, "num_tokens": 158677094.0, "step": 167 }, { "epoch": 0.32908912830558273, "grad_norm": 0.10525691473208647, "learning_rate": 9.967487457007382e-06, "loss": 0.4725, "num_tokens": 159628301.0, "step": 168 }, { "epoch": 0.33104799216454456, "grad_norm": 0.10515616858242463, "learning_rate": 9.96676174815441e-06, "loss": 0.4647, "num_tokens": 160509714.0, "step": 169 }, { "epoch": 0.3330068560235064, "grad_norm": 0.09361787506160345, "learning_rate": 9.966028056222734e-06, "loss": 0.4592, "num_tokens": 161487383.0, "step": 170 }, { "epoch": 0.33496571988246815, "grad_norm": 0.10545719861616981, "learning_rate": 9.965286382391619e-06, "loss": 0.4717, "num_tokens": 162412904.0, "step": 171 }, { "epoch": 0.33692458374143, "grad_norm": 0.09518391320721968, "learning_rate": 9.964536727853158e-06, "loss": 0.4557, "num_tokens": 163353241.0, "step": 172 }, { "epoch": 0.3388834476003918, "grad_norm": 0.09686861040030187, "learning_rate": 9.96377909381227e-06, "loss": 0.4688, "num_tokens": 164297299.0, "step": 173 }, { "epoch": 0.34084231145935356, "grad_norm": 0.09648744616787576, "learning_rate": 9.963013481486704e-06, "loss": 0.4438, "num_tokens": 165233075.0, "step": 174 }, { "epoch": 0.3428011753183154, "grad_norm": 0.09489418835775852, "learning_rate": 9.962239892107024e-06, "loss": 0.4666, "num_tokens": 166195433.0, "step": 175 }, { "epoch": 0.34476003917727716, "grad_norm": 0.09528846230202102, "learning_rate": 9.961458326916624e-06, "loss": 0.4514, "num_tokens": 167149954.0, "step": 176 }, { "epoch": 0.346718903036239, "grad_norm": 0.09420971492743258, "learning_rate": 9.960668787171713e-06, "loss": 0.4456, "num_tokens": 168131808.0, "step": 177 }, { "epoch": 0.3486777668952008, "grad_norm": 0.09878621581394058, "learning_rate": 9.959871274141319e-06, "loss": 0.4499, "num_tokens": 169108543.0, "step": 178 }, { "epoch": 0.35063663075416257, "grad_norm": 0.09722607708153193, "learning_rate": 9.959065789107285e-06, "loss": 0.4631, "num_tokens": 170024930.0, "step": 179 }, { "epoch": 0.3525954946131244, "grad_norm": 0.09715582363770695, "learning_rate": 9.958252333364266e-06, "loss": 0.458, "num_tokens": 170956490.0, "step": 180 }, { "epoch": 0.3545543584720862, "grad_norm": 0.10091101459947915, "learning_rate": 9.957430908219734e-06, "loss": 0.4649, "num_tokens": 171888577.0, "step": 181 }, { "epoch": 0.356513222331048, "grad_norm": 0.0956927902389695, "learning_rate": 9.956601514993962e-06, "loss": 0.454, "num_tokens": 172846773.0, "step": 182 }, { "epoch": 0.3584720861900098, "grad_norm": 0.0996617214313076, "learning_rate": 9.955764155020037e-06, "loss": 0.4674, "num_tokens": 173781976.0, "step": 183 }, { "epoch": 0.3604309500489716, "grad_norm": 0.09237992855339172, "learning_rate": 9.954918829643845e-06, "loss": 0.4489, "num_tokens": 174750604.0, "step": 184 }, { "epoch": 0.3623898139079334, "grad_norm": 0.09898987379939003, "learning_rate": 9.954065540224081e-06, "loss": 0.4517, "num_tokens": 175654902.0, "step": 185 }, { "epoch": 0.3643486777668952, "grad_norm": 0.10068051655230977, "learning_rate": 9.953204288132234e-06, "loss": 0.4414, "num_tokens": 176586402.0, "step": 186 }, { "epoch": 0.366307541625857, "grad_norm": 0.09827300657050554, "learning_rate": 9.9523350747526e-06, "loss": 0.4648, "num_tokens": 177515759.0, "step": 187 }, { "epoch": 0.3682664054848188, "grad_norm": 0.09367935695651718, "learning_rate": 9.95145790148226e-06, "loss": 0.4525, "num_tokens": 178476413.0, "step": 188 }, { "epoch": 0.3702252693437806, "grad_norm": 0.0968048834780351, "learning_rate": 9.9505727697311e-06, "loss": 0.4586, "num_tokens": 179446696.0, "step": 189 }, { "epoch": 0.3721841332027424, "grad_norm": 0.10315128405197668, "learning_rate": 9.94967968092179e-06, "loss": 0.4597, "num_tokens": 180353014.0, "step": 190 }, { "epoch": 0.37414299706170423, "grad_norm": 0.09643704412573983, "learning_rate": 9.948778636489793e-06, "loss": 0.4585, "num_tokens": 181352431.0, "step": 191 }, { "epoch": 0.376101860920666, "grad_norm": 0.09753806609190649, "learning_rate": 9.947869637883359e-06, "loss": 0.4436, "num_tokens": 182300791.0, "step": 192 }, { "epoch": 0.3780607247796278, "grad_norm": 0.09620530562443642, "learning_rate": 9.94695268656352e-06, "loss": 0.4474, "num_tokens": 183242162.0, "step": 193 }, { "epoch": 0.38001958863858964, "grad_norm": 0.09601966800599304, "learning_rate": 9.946027784004096e-06, "loss": 0.4384, "num_tokens": 184192887.0, "step": 194 }, { "epoch": 0.3819784524975514, "grad_norm": 0.09912862956296686, "learning_rate": 9.945094931691677e-06, "loss": 0.4514, "num_tokens": 185101798.0, "step": 195 }, { "epoch": 0.38393731635651324, "grad_norm": 0.09356569357224666, "learning_rate": 9.944154131125643e-06, "loss": 0.4453, "num_tokens": 186050228.0, "step": 196 }, { "epoch": 0.385896180215475, "grad_norm": 0.10936038967219135, "learning_rate": 9.943205383818142e-06, "loss": 0.4523, "num_tokens": 187011757.0, "step": 197 }, { "epoch": 0.3878550440744368, "grad_norm": 0.10152135007297276, "learning_rate": 9.942248691294092e-06, "loss": 0.4437, "num_tokens": 187933276.0, "step": 198 }, { "epoch": 0.38981390793339865, "grad_norm": 0.09454429685683488, "learning_rate": 9.941284055091191e-06, "loss": 0.4566, "num_tokens": 188880767.0, "step": 199 }, { "epoch": 0.3917727717923604, "grad_norm": 0.09458942177389343, "learning_rate": 9.940311476759896e-06, "loss": 0.4529, "num_tokens": 189882809.0, "step": 200 }, { "epoch": 0.39373163565132224, "grad_norm": 0.09888960327025009, "learning_rate": 9.939330957863433e-06, "loss": 0.4524, "num_tokens": 190814376.0, "step": 201 }, { "epoch": 0.395690499510284, "grad_norm": 0.09688253630738085, "learning_rate": 9.938342499977791e-06, "loss": 0.4648, "num_tokens": 191767854.0, "step": 202 }, { "epoch": 0.39764936336924583, "grad_norm": 0.09527070915427467, "learning_rate": 9.93734610469172e-06, "loss": 0.4523, "num_tokens": 192778592.0, "step": 203 }, { "epoch": 0.39960822722820766, "grad_norm": 0.09610244703120024, "learning_rate": 9.936341773606723e-06, "loss": 0.4563, "num_tokens": 193743846.0, "step": 204 }, { "epoch": 0.4015670910871694, "grad_norm": 0.0954323129485936, "learning_rate": 9.935329508337066e-06, "loss": 0.4221, "num_tokens": 194703539.0, "step": 205 }, { "epoch": 0.40352595494613125, "grad_norm": 0.09224066794714314, "learning_rate": 9.934309310509761e-06, "loss": 0.4633, "num_tokens": 195642924.0, "step": 206 }, { "epoch": 0.4054848188050931, "grad_norm": 0.09215081705810675, "learning_rate": 9.933281181764573e-06, "loss": 0.451, "num_tokens": 196600794.0, "step": 207 }, { "epoch": 0.40744368266405484, "grad_norm": 0.092979103110267, "learning_rate": 9.932245123754017e-06, "loss": 0.472, "num_tokens": 197554677.0, "step": 208 }, { "epoch": 0.40940254652301666, "grad_norm": 0.08910947696799769, "learning_rate": 9.931201138143342e-06, "loss": 0.4524, "num_tokens": 198536397.0, "step": 209 }, { "epoch": 0.41136141038197843, "grad_norm": 0.09604903925261911, "learning_rate": 9.930149226610555e-06, "loss": 0.4588, "num_tokens": 199467040.0, "step": 210 }, { "epoch": 0.41332027424094026, "grad_norm": 0.0996152785331448, "learning_rate": 9.929089390846389e-06, "loss": 0.4684, "num_tokens": 200421819.0, "step": 211 }, { "epoch": 0.4152791380999021, "grad_norm": 0.09463592045098039, "learning_rate": 9.928021632554318e-06, "loss": 0.4499, "num_tokens": 201345914.0, "step": 212 }, { "epoch": 0.41723800195886385, "grad_norm": 0.09975797702654883, "learning_rate": 9.926945953450555e-06, "loss": 0.4541, "num_tokens": 202269876.0, "step": 213 }, { "epoch": 0.41919686581782567, "grad_norm": 0.08908783685003974, "learning_rate": 9.925862355264035e-06, "loss": 0.4456, "num_tokens": 203204620.0, "step": 214 }, { "epoch": 0.42115572967678744, "grad_norm": 0.09551668100580148, "learning_rate": 9.924770839736429e-06, "loss": 0.4768, "num_tokens": 204123253.0, "step": 215 }, { "epoch": 0.42311459353574926, "grad_norm": 0.09703383213342595, "learning_rate": 9.923671408622128e-06, "loss": 0.4612, "num_tokens": 205070357.0, "step": 216 }, { "epoch": 0.4250734573947111, "grad_norm": 0.10500185746203437, "learning_rate": 9.922564063688248e-06, "loss": 0.4541, "num_tokens": 205981152.0, "step": 217 }, { "epoch": 0.42703232125367285, "grad_norm": 0.0944810782117902, "learning_rate": 9.92144880671463e-06, "loss": 0.444, "num_tokens": 206925820.0, "step": 218 }, { "epoch": 0.4289911851126347, "grad_norm": 0.09409864592076014, "learning_rate": 9.920325639493821e-06, "loss": 0.4492, "num_tokens": 207894358.0, "step": 219 }, { "epoch": 0.4309500489715965, "grad_norm": 0.09397986303935289, "learning_rate": 9.919194563831093e-06, "loss": 0.4509, "num_tokens": 208853063.0, "step": 220 }, { "epoch": 0.43290891283055827, "grad_norm": 0.09483722894944634, "learning_rate": 9.918055581544422e-06, "loss": 0.451, "num_tokens": 209803005.0, "step": 221 }, { "epoch": 0.4348677766895201, "grad_norm": 0.09240936963096365, "learning_rate": 9.916908694464494e-06, "loss": 0.4402, "num_tokens": 210743515.0, "step": 222 }, { "epoch": 0.43682664054848186, "grad_norm": 0.09969646231609247, "learning_rate": 9.915753904434702e-06, "loss": 0.4649, "num_tokens": 211678649.0, "step": 223 }, { "epoch": 0.4387855044074437, "grad_norm": 0.09630734303627361, "learning_rate": 9.914591213311142e-06, "loss": 0.4436, "num_tokens": 212640252.0, "step": 224 }, { "epoch": 0.4407443682664055, "grad_norm": 0.0901899193736916, "learning_rate": 9.913420622962606e-06, "loss": 0.458, "num_tokens": 213589770.0, "step": 225 }, { "epoch": 0.4427032321253673, "grad_norm": 0.09172175274308668, "learning_rate": 9.912242135270585e-06, "loss": 0.4562, "num_tokens": 214576120.0, "step": 226 }, { "epoch": 0.4446620959843291, "grad_norm": 0.09612569240774674, "learning_rate": 9.911055752129264e-06, "loss": 0.4434, "num_tokens": 215521108.0, "step": 227 }, { "epoch": 0.44662095984329087, "grad_norm": 0.09344083762321673, "learning_rate": 9.909861475445517e-06, "loss": 0.447, "num_tokens": 216458762.0, "step": 228 }, { "epoch": 0.4485798237022527, "grad_norm": 0.09106724915275911, "learning_rate": 9.908659307138905e-06, "loss": 0.4467, "num_tokens": 217433105.0, "step": 229 }, { "epoch": 0.4505386875612145, "grad_norm": 0.10357482160918961, "learning_rate": 9.907449249141673e-06, "loss": 0.4602, "num_tokens": 218362026.0, "step": 230 }, { "epoch": 0.4524975514201763, "grad_norm": 0.09185665370892589, "learning_rate": 9.90623130339875e-06, "loss": 0.4506, "num_tokens": 219311418.0, "step": 231 }, { "epoch": 0.4544564152791381, "grad_norm": 0.09647090136629549, "learning_rate": 9.90500547186774e-06, "loss": 0.4714, "num_tokens": 220312439.0, "step": 232 }, { "epoch": 0.4564152791380999, "grad_norm": 0.09242560530540733, "learning_rate": 9.903771756518919e-06, "loss": 0.4423, "num_tokens": 221246761.0, "step": 233 }, { "epoch": 0.4583741429970617, "grad_norm": 0.09337682539103548, "learning_rate": 9.902530159335245e-06, "loss": 0.453, "num_tokens": 222199221.0, "step": 234 }, { "epoch": 0.4603330068560235, "grad_norm": 0.10012730710127711, "learning_rate": 9.90128068231233e-06, "loss": 0.4518, "num_tokens": 223155514.0, "step": 235 }, { "epoch": 0.4622918707149853, "grad_norm": 0.10405044615190953, "learning_rate": 9.900023327458466e-06, "loss": 0.4471, "num_tokens": 224050665.0, "step": 236 }, { "epoch": 0.4642507345739471, "grad_norm": 0.09671175081752656, "learning_rate": 9.898758096794598e-06, "loss": 0.4449, "num_tokens": 224973216.0, "step": 237 }, { "epoch": 0.46620959843290893, "grad_norm": 0.09221217335060064, "learning_rate": 9.897484992354329e-06, "loss": 0.4615, "num_tokens": 225905305.0, "step": 238 }, { "epoch": 0.4681684622918707, "grad_norm": 0.09305484057306082, "learning_rate": 9.896204016183924e-06, "loss": 0.4534, "num_tokens": 226893498.0, "step": 239 }, { "epoch": 0.4701273261508325, "grad_norm": 0.08985035827138547, "learning_rate": 9.894915170342297e-06, "loss": 0.4474, "num_tokens": 227847424.0, "step": 240 }, { "epoch": 0.4720861900097943, "grad_norm": 0.09671387702973935, "learning_rate": 9.893618456901006e-06, "loss": 0.4598, "num_tokens": 228770094.0, "step": 241 }, { "epoch": 0.4740450538687561, "grad_norm": 0.09511629695889492, "learning_rate": 9.892313877944263e-06, "loss": 0.4375, "num_tokens": 229729772.0, "step": 242 }, { "epoch": 0.47600391772771794, "grad_norm": 0.09380621456050775, "learning_rate": 9.891001435568917e-06, "loss": 0.4508, "num_tokens": 230686754.0, "step": 243 }, { "epoch": 0.4779627815866797, "grad_norm": 0.09198683670913939, "learning_rate": 9.889681131884459e-06, "loss": 0.4535, "num_tokens": 231634183.0, "step": 244 }, { "epoch": 0.47992164544564153, "grad_norm": 0.09502283634944488, "learning_rate": 9.888352969013014e-06, "loss": 0.4624, "num_tokens": 232579231.0, "step": 245 }, { "epoch": 0.48188050930460335, "grad_norm": 0.09040936834978072, "learning_rate": 9.887016949089334e-06, "loss": 0.4523, "num_tokens": 233559115.0, "step": 246 }, { "epoch": 0.4838393731635651, "grad_norm": 0.09254363806857926, "learning_rate": 9.88567307426081e-06, "loss": 0.4495, "num_tokens": 234481493.0, "step": 247 }, { "epoch": 0.48579823702252695, "grad_norm": 0.1009075494272476, "learning_rate": 9.88432134668745e-06, "loss": 0.4732, "num_tokens": 235430540.0, "step": 248 }, { "epoch": 0.4877571008814887, "grad_norm": 0.09171640568723448, "learning_rate": 9.882961768541887e-06, "loss": 0.4429, "num_tokens": 236376157.0, "step": 249 }, { "epoch": 0.48971596474045054, "grad_norm": 0.08764846780429664, "learning_rate": 9.881594342009374e-06, "loss": 0.4515, "num_tokens": 237357177.0, "step": 250 }, { "epoch": 0.49167482859941236, "grad_norm": 0.09579368673659945, "learning_rate": 9.88021906928777e-06, "loss": 0.4603, "num_tokens": 238282985.0, "step": 251 }, { "epoch": 0.49363369245837413, "grad_norm": 0.09445820333911897, "learning_rate": 9.878835952587559e-06, "loss": 0.4509, "num_tokens": 239244676.0, "step": 252 }, { "epoch": 0.49559255631733595, "grad_norm": 0.08951040263052458, "learning_rate": 9.87744499413182e-06, "loss": 0.4417, "num_tokens": 240205678.0, "step": 253 }, { "epoch": 0.4975514201762977, "grad_norm": 0.09826525864645287, "learning_rate": 9.876046196156243e-06, "loss": 0.4571, "num_tokens": 241166522.0, "step": 254 }, { "epoch": 0.49951028403525954, "grad_norm": 0.09249167457583525, "learning_rate": 9.874639560909118e-06, "loss": 0.4318, "num_tokens": 242118412.0, "step": 255 }, { "epoch": 0.5014691478942214, "grad_norm": 0.1043974877643401, "learning_rate": 9.87322509065133e-06, "loss": 0.4604, "num_tokens": 243072617.0, "step": 256 }, { "epoch": 0.5034280117531832, "grad_norm": 0.08975347248528376, "learning_rate": 9.871802787656356e-06, "loss": 0.4518, "num_tokens": 244029342.0, "step": 257 }, { "epoch": 0.5053868756121449, "grad_norm": 0.09772314602053161, "learning_rate": 9.870372654210265e-06, "loss": 0.4596, "num_tokens": 244972993.0, "step": 258 }, { "epoch": 0.5073457394711067, "grad_norm": 0.09532417433774386, "learning_rate": 9.868934692611712e-06, "loss": 0.4616, "num_tokens": 245928400.0, "step": 259 }, { "epoch": 0.5093046033300686, "grad_norm": 0.09888763723078879, "learning_rate": 9.867488905171934e-06, "loss": 0.4406, "num_tokens": 246856281.0, "step": 260 }, { "epoch": 0.5112634671890304, "grad_norm": 0.09822568450389157, "learning_rate": 9.866035294214744e-06, "loss": 0.4516, "num_tokens": 247805102.0, "step": 261 }, { "epoch": 0.5132223310479922, "grad_norm": 0.09195695916021966, "learning_rate": 9.864573862076533e-06, "loss": 0.448, "num_tokens": 248747606.0, "step": 262 }, { "epoch": 0.515181194906954, "grad_norm": 0.08829654375672243, "learning_rate": 9.863104611106262e-06, "loss": 0.4433, "num_tokens": 249691221.0, "step": 263 }, { "epoch": 0.5171400587659157, "grad_norm": 0.10011745782468426, "learning_rate": 9.861627543665456e-06, "loss": 0.4511, "num_tokens": 250604365.0, "step": 264 }, { "epoch": 0.5190989226248776, "grad_norm": 0.09731081354203551, "learning_rate": 9.86014266212821e-06, "loss": 0.4464, "num_tokens": 251551288.0, "step": 265 }, { "epoch": 0.5210577864838394, "grad_norm": 0.09819161282037218, "learning_rate": 9.858649968881173e-06, "loss": 0.4551, "num_tokens": 252523236.0, "step": 266 }, { "epoch": 0.5230166503428012, "grad_norm": 0.09666669659878116, "learning_rate": 9.85714946632355e-06, "loss": 0.467, "num_tokens": 253453639.0, "step": 267 }, { "epoch": 0.524975514201763, "grad_norm": 0.08908227776173835, "learning_rate": 9.855641156867103e-06, "loss": 0.4469, "num_tokens": 254401672.0, "step": 268 }, { "epoch": 0.5269343780607247, "grad_norm": 0.09282226968659334, "learning_rate": 9.854125042936137e-06, "loss": 0.4521, "num_tokens": 255371297.0, "step": 269 }, { "epoch": 0.5288932419196866, "grad_norm": 0.09828684199638049, "learning_rate": 9.852601126967502e-06, "loss": 0.4532, "num_tokens": 256321317.0, "step": 270 }, { "epoch": 0.5308521057786484, "grad_norm": 0.09683561527174155, "learning_rate": 9.85106941141059e-06, "loss": 0.4416, "num_tokens": 257257547.0, "step": 271 }, { "epoch": 0.5328109696376102, "grad_norm": 0.09016846920338978, "learning_rate": 9.849529898727328e-06, "loss": 0.4521, "num_tokens": 258196782.0, "step": 272 }, { "epoch": 0.534769833496572, "grad_norm": 0.10246950371582385, "learning_rate": 9.847982591392173e-06, "loss": 0.4533, "num_tokens": 259113699.0, "step": 273 }, { "epoch": 0.5367286973555337, "grad_norm": 0.09970486600889314, "learning_rate": 9.846427491892117e-06, "loss": 0.4531, "num_tokens": 260062450.0, "step": 274 }, { "epoch": 0.5386875612144956, "grad_norm": 0.09086206195124324, "learning_rate": 9.844864602726671e-06, "loss": 0.4339, "num_tokens": 261015365.0, "step": 275 }, { "epoch": 0.5406464250734574, "grad_norm": 0.08951612961854831, "learning_rate": 9.843293926407866e-06, "loss": 0.4416, "num_tokens": 261951943.0, "step": 276 }, { "epoch": 0.5426052889324192, "grad_norm": 0.09759160821842901, "learning_rate": 9.841715465460251e-06, "loss": 0.4672, "num_tokens": 262870691.0, "step": 277 }, { "epoch": 0.544564152791381, "grad_norm": 0.09110634732133736, "learning_rate": 9.84012922242089e-06, "loss": 0.4566, "num_tokens": 263836081.0, "step": 278 }, { "epoch": 0.5465230166503428, "grad_norm": 0.09072382602508929, "learning_rate": 9.838535199839348e-06, "loss": 0.4506, "num_tokens": 264812493.0, "step": 279 }, { "epoch": 0.5484818805093046, "grad_norm": 0.08893797740730669, "learning_rate": 9.8369334002777e-06, "loss": 0.4456, "num_tokens": 265757155.0, "step": 280 }, { "epoch": 0.5504407443682664, "grad_norm": 0.09018712653195393, "learning_rate": 9.835323826310522e-06, "loss": 0.4578, "num_tokens": 266692706.0, "step": 281 }, { "epoch": 0.5523996082272282, "grad_norm": 0.09131692346053066, "learning_rate": 9.833706480524878e-06, "loss": 0.449, "num_tokens": 267648410.0, "step": 282 }, { "epoch": 0.55435847208619, "grad_norm": 0.08739332252346171, "learning_rate": 9.832081365520334e-06, "loss": 0.4508, "num_tokens": 268595990.0, "step": 283 }, { "epoch": 0.5563173359451518, "grad_norm": 0.09139986037545283, "learning_rate": 9.830448483908935e-06, "loss": 0.434, "num_tokens": 269537447.0, "step": 284 }, { "epoch": 0.5582761998041136, "grad_norm": 0.09065811066751005, "learning_rate": 9.82880783831521e-06, "loss": 0.4561, "num_tokens": 270486740.0, "step": 285 }, { "epoch": 0.5602350636630754, "grad_norm": 0.09054968942661933, "learning_rate": 9.827159431376176e-06, "loss": 0.4465, "num_tokens": 271440384.0, "step": 286 }, { "epoch": 0.5621939275220372, "grad_norm": 0.08513515561595217, "learning_rate": 9.825503265741314e-06, "loss": 0.4312, "num_tokens": 272416401.0, "step": 287 }, { "epoch": 0.564152791380999, "grad_norm": 0.091901688905363, "learning_rate": 9.823839344072582e-06, "loss": 0.4486, "num_tokens": 273346804.0, "step": 288 }, { "epoch": 0.5661116552399609, "grad_norm": 0.08792598502990735, "learning_rate": 9.822167669044398e-06, "loss": 0.4495, "num_tokens": 274284897.0, "step": 289 }, { "epoch": 0.5680705190989226, "grad_norm": 0.0933845119975612, "learning_rate": 9.82048824334365e-06, "loss": 0.4505, "num_tokens": 275232899.0, "step": 290 }, { "epoch": 0.5700293829578844, "grad_norm": 0.08847993498549075, "learning_rate": 9.818801069669679e-06, "loss": 0.4532, "num_tokens": 276190165.0, "step": 291 }, { "epoch": 0.5719882468168462, "grad_norm": 0.08838487689527708, "learning_rate": 9.817106150734281e-06, "loss": 0.4653, "num_tokens": 277157577.0, "step": 292 }, { "epoch": 0.5739471106758081, "grad_norm": 0.0891535266335383, "learning_rate": 9.815403489261698e-06, "loss": 0.453, "num_tokens": 278086368.0, "step": 293 }, { "epoch": 0.5759059745347699, "grad_norm": 0.09765021390877121, "learning_rate": 9.81369308798862e-06, "loss": 0.4517, "num_tokens": 279014464.0, "step": 294 }, { "epoch": 0.5778648383937316, "grad_norm": 0.08813812734907714, "learning_rate": 9.811974949664176e-06, "loss": 0.4426, "num_tokens": 279972567.0, "step": 295 }, { "epoch": 0.5798237022526934, "grad_norm": 0.09170718468035989, "learning_rate": 9.810249077049929e-06, "loss": 0.4549, "num_tokens": 280921291.0, "step": 296 }, { "epoch": 0.5817825661116552, "grad_norm": 0.09043515510180414, "learning_rate": 9.808515472919876e-06, "loss": 0.4519, "num_tokens": 281892629.0, "step": 297 }, { "epoch": 0.5837414299706171, "grad_norm": 0.08901240213313642, "learning_rate": 9.80677414006044e-06, "loss": 0.4554, "num_tokens": 282842117.0, "step": 298 }, { "epoch": 0.5857002938295789, "grad_norm": 0.09261848310830277, "learning_rate": 9.805025081270467e-06, "loss": 0.4364, "num_tokens": 283790343.0, "step": 299 }, { "epoch": 0.5876591576885406, "grad_norm": 0.09437822926353868, "learning_rate": 9.803268299361217e-06, "loss": 0.4555, "num_tokens": 284725518.0, "step": 300 }, { "epoch": 0.5896180215475024, "grad_norm": 0.10839960974848895, "learning_rate": 9.80150379715637e-06, "loss": 0.4791, "num_tokens": 285646307.0, "step": 301 }, { "epoch": 0.5915768854064642, "grad_norm": 0.09620758857257257, "learning_rate": 9.79973157749201e-06, "loss": 0.4481, "num_tokens": 286598500.0, "step": 302 }, { "epoch": 0.5935357492654261, "grad_norm": 0.08658250998764305, "learning_rate": 9.797951643216628e-06, "loss": 0.4597, "num_tokens": 287563888.0, "step": 303 }, { "epoch": 0.5954946131243879, "grad_norm": 0.09191132805686522, "learning_rate": 9.79616399719111e-06, "loss": 0.4766, "num_tokens": 288542116.0, "step": 304 }, { "epoch": 0.5974534769833496, "grad_norm": 0.1017523258754097, "learning_rate": 9.794368642288741e-06, "loss": 0.4523, "num_tokens": 289439264.0, "step": 305 }, { "epoch": 0.5994123408423114, "grad_norm": 0.09229643659596272, "learning_rate": 9.7925655813952e-06, "loss": 0.4422, "num_tokens": 290388706.0, "step": 306 }, { "epoch": 0.6013712047012733, "grad_norm": 0.09180609004222215, "learning_rate": 9.790754817408541e-06, "loss": 0.453, "num_tokens": 291356017.0, "step": 307 }, { "epoch": 0.6033300685602351, "grad_norm": 0.08948438390689484, "learning_rate": 9.788936353239211e-06, "loss": 0.4597, "num_tokens": 292342082.0, "step": 308 }, { "epoch": 0.6052889324191969, "grad_norm": 0.0884597685046138, "learning_rate": 9.787110191810027e-06, "loss": 0.4454, "num_tokens": 293275934.0, "step": 309 }, { "epoch": 0.6072477962781586, "grad_norm": 0.09238464655217382, "learning_rate": 9.78527633605618e-06, "loss": 0.4518, "num_tokens": 294228528.0, "step": 310 }, { "epoch": 0.6092066601371204, "grad_norm": 0.08783977724736827, "learning_rate": 9.783434788925227e-06, "loss": 0.4489, "num_tokens": 295190320.0, "step": 311 }, { "epoch": 0.6111655239960823, "grad_norm": 0.09515052060454643, "learning_rate": 9.781585553377086e-06, "loss": 0.4505, "num_tokens": 296154943.0, "step": 312 }, { "epoch": 0.6131243878550441, "grad_norm": 0.09104776922006945, "learning_rate": 9.779728632384035e-06, "loss": 0.4464, "num_tokens": 297092062.0, "step": 313 }, { "epoch": 0.6150832517140059, "grad_norm": 0.08777636231747861, "learning_rate": 9.777864028930705e-06, "loss": 0.4512, "num_tokens": 298062069.0, "step": 314 }, { "epoch": 0.6170421155729677, "grad_norm": 0.09365530562426334, "learning_rate": 9.775991746014073e-06, "loss": 0.4356, "num_tokens": 298979969.0, "step": 315 }, { "epoch": 0.6190009794319294, "grad_norm": 0.09484238379783078, "learning_rate": 9.77411178664346e-06, "loss": 0.4628, "num_tokens": 299928548.0, "step": 316 }, { "epoch": 0.6209598432908913, "grad_norm": 0.0923923755037889, "learning_rate": 9.772224153840528e-06, "loss": 0.4427, "num_tokens": 300875247.0, "step": 317 }, { "epoch": 0.6229187071498531, "grad_norm": 0.09215877415929054, "learning_rate": 9.770328850639268e-06, "loss": 0.441, "num_tokens": 301809712.0, "step": 318 }, { "epoch": 0.6248775710088149, "grad_norm": 0.08874374870559448, "learning_rate": 9.768425880086002e-06, "loss": 0.4455, "num_tokens": 302773650.0, "step": 319 }, { "epoch": 0.6268364348677767, "grad_norm": 0.090207249747187, "learning_rate": 9.766515245239377e-06, "loss": 0.4444, "num_tokens": 303722502.0, "step": 320 }, { "epoch": 0.6287952987267384, "grad_norm": 0.08503990355250717, "learning_rate": 9.764596949170356e-06, "loss": 0.4338, "num_tokens": 304706080.0, "step": 321 }, { "epoch": 0.6307541625857003, "grad_norm": 0.09025974958469772, "learning_rate": 9.762670994962215e-06, "loss": 0.4404, "num_tokens": 305650384.0, "step": 322 }, { "epoch": 0.6327130264446621, "grad_norm": 0.08638379902993888, "learning_rate": 9.760737385710546e-06, "loss": 0.438, "num_tokens": 306635856.0, "step": 323 }, { "epoch": 0.6346718903036239, "grad_norm": 0.08985329866314863, "learning_rate": 9.758796124523238e-06, "loss": 0.4458, "num_tokens": 307625254.0, "step": 324 }, { "epoch": 0.6366307541625857, "grad_norm": 0.09014705464588635, "learning_rate": 9.756847214520482e-06, "loss": 0.4522, "num_tokens": 308593837.0, "step": 325 }, { "epoch": 0.6385896180215475, "grad_norm": 0.09122915265370712, "learning_rate": 9.754890658834759e-06, "loss": 0.4434, "num_tokens": 309554650.0, "step": 326 }, { "epoch": 0.6405484818805093, "grad_norm": 0.08461284621938435, "learning_rate": 9.752926460610846e-06, "loss": 0.4386, "num_tokens": 310526204.0, "step": 327 }, { "epoch": 0.6425073457394711, "grad_norm": 0.08801795933423717, "learning_rate": 9.750954623005795e-06, "loss": 0.4608, "num_tokens": 311492787.0, "step": 328 }, { "epoch": 0.6444662095984329, "grad_norm": 0.0901115841850163, "learning_rate": 9.748975149188946e-06, "loss": 0.4603, "num_tokens": 312441688.0, "step": 329 }, { "epoch": 0.6464250734573947, "grad_norm": 0.08901845538892801, "learning_rate": 9.746988042341907e-06, "loss": 0.4529, "num_tokens": 313376394.0, "step": 330 }, { "epoch": 0.6483839373163565, "grad_norm": 0.0849474795941554, "learning_rate": 9.744993305658556e-06, "loss": 0.4346, "num_tokens": 314315578.0, "step": 331 }, { "epoch": 0.6503428011753183, "grad_norm": 0.0860787213869078, "learning_rate": 9.742990942345032e-06, "loss": 0.4547, "num_tokens": 315273796.0, "step": 332 }, { "epoch": 0.6523016650342801, "grad_norm": 0.08995415547258594, "learning_rate": 9.74098095561974e-06, "loss": 0.4513, "num_tokens": 316218943.0, "step": 333 }, { "epoch": 0.6542605288932419, "grad_norm": 0.09049418205944079, "learning_rate": 9.738963348713324e-06, "loss": 0.4356, "num_tokens": 317141320.0, "step": 334 }, { "epoch": 0.6562193927522038, "grad_norm": 0.09074946806627159, "learning_rate": 9.736938124868693e-06, "loss": 0.4474, "num_tokens": 318074977.0, "step": 335 }, { "epoch": 0.6581782566111655, "grad_norm": 0.09407292204888344, "learning_rate": 9.734905287340985e-06, "loss": 0.4617, "num_tokens": 319036215.0, "step": 336 }, { "epoch": 0.6601371204701273, "grad_norm": 0.09482236087266514, "learning_rate": 9.732864839397585e-06, "loss": 0.4313, "num_tokens": 319961419.0, "step": 337 }, { "epoch": 0.6620959843290891, "grad_norm": 0.08750525545608744, "learning_rate": 9.730816784318103e-06, "loss": 0.4385, "num_tokens": 320887567.0, "step": 338 }, { "epoch": 0.6640548481880509, "grad_norm": 0.09008099656070187, "learning_rate": 9.728761125394379e-06, "loss": 0.4453, "num_tokens": 321858337.0, "step": 339 }, { "epoch": 0.6660137120470128, "grad_norm": 0.0923712313256544, "learning_rate": 9.726697865930477e-06, "loss": 0.4468, "num_tokens": 322814978.0, "step": 340 }, { "epoch": 0.6679725759059746, "grad_norm": 0.08994049077738044, "learning_rate": 9.724627009242673e-06, "loss": 0.4477, "num_tokens": 323770176.0, "step": 341 }, { "epoch": 0.6699314397649363, "grad_norm": 0.08719580060077986, "learning_rate": 9.722548558659457e-06, "loss": 0.4355, "num_tokens": 324699181.0, "step": 342 }, { "epoch": 0.6718903036238981, "grad_norm": 0.08997896557435177, "learning_rate": 9.720462517521522e-06, "loss": 0.4407, "num_tokens": 325636045.0, "step": 343 }, { "epoch": 0.67384916748286, "grad_norm": 0.09408528451076777, "learning_rate": 9.718368889181763e-06, "loss": 0.4559, "num_tokens": 326598812.0, "step": 344 }, { "epoch": 0.6758080313418218, "grad_norm": 0.08601851635722685, "learning_rate": 9.716267677005273e-06, "loss": 0.435, "num_tokens": 327527276.0, "step": 345 }, { "epoch": 0.6777668952007836, "grad_norm": 0.08934376371860722, "learning_rate": 9.71415888436933e-06, "loss": 0.4614, "num_tokens": 328481063.0, "step": 346 }, { "epoch": 0.6797257590597453, "grad_norm": 0.09134788028869219, "learning_rate": 9.712042514663396e-06, "loss": 0.4563, "num_tokens": 329408909.0, "step": 347 }, { "epoch": 0.6816846229187071, "grad_norm": 0.09070727995108552, "learning_rate": 9.709918571289114e-06, "loss": 0.4499, "num_tokens": 330338376.0, "step": 348 }, { "epoch": 0.683643486777669, "grad_norm": 0.09732961017889742, "learning_rate": 9.7077870576603e-06, "loss": 0.4505, "num_tokens": 331305055.0, "step": 349 }, { "epoch": 0.6856023506366308, "grad_norm": 0.084475019256982, "learning_rate": 9.705647977202937e-06, "loss": 0.4342, "num_tokens": 332259264.0, "step": 350 }, { "epoch": 0.6875612144955926, "grad_norm": 0.08864252586488931, "learning_rate": 9.703501333355167e-06, "loss": 0.446, "num_tokens": 333210070.0, "step": 351 }, { "epoch": 0.6895200783545543, "grad_norm": 0.08872160192459715, "learning_rate": 9.701347129567296e-06, "loss": 0.4484, "num_tokens": 334175126.0, "step": 352 }, { "epoch": 0.6914789422135161, "grad_norm": 0.0930468611145152, "learning_rate": 9.699185369301773e-06, "loss": 0.4418, "num_tokens": 335155541.0, "step": 353 }, { "epoch": 0.693437806072478, "grad_norm": 0.09593356596707556, "learning_rate": 9.697016056033202e-06, "loss": 0.448, "num_tokens": 336093931.0, "step": 354 }, { "epoch": 0.6953966699314398, "grad_norm": 0.09139549920653682, "learning_rate": 9.694839193248315e-06, "loss": 0.4448, "num_tokens": 337044870.0, "step": 355 }, { "epoch": 0.6973555337904016, "grad_norm": 0.08923685382483237, "learning_rate": 9.692654784445988e-06, "loss": 0.448, "num_tokens": 337957570.0, "step": 356 }, { "epoch": 0.6993143976493633, "grad_norm": 0.09319457465664423, "learning_rate": 9.690462833137222e-06, "loss": 0.4372, "num_tokens": 338915262.0, "step": 357 }, { "epoch": 0.7012732615083251, "grad_norm": 0.09144500484833851, "learning_rate": 9.68826334284514e-06, "loss": 0.4353, "num_tokens": 339840823.0, "step": 358 }, { "epoch": 0.703232125367287, "grad_norm": 0.09068135189565031, "learning_rate": 9.686056317104986e-06, "loss": 0.4342, "num_tokens": 340787790.0, "step": 359 }, { "epoch": 0.7051909892262488, "grad_norm": 0.0897309059628541, "learning_rate": 9.683841759464114e-06, "loss": 0.4382, "num_tokens": 341705934.0, "step": 360 }, { "epoch": 0.7071498530852106, "grad_norm": 0.09186132785983991, "learning_rate": 9.68161967348198e-06, "loss": 0.4737, "num_tokens": 342661299.0, "step": 361 }, { "epoch": 0.7091087169441724, "grad_norm": 0.0926437461265092, "learning_rate": 9.679390062730147e-06, "loss": 0.4419, "num_tokens": 343595015.0, "step": 362 }, { "epoch": 0.7110675808031341, "grad_norm": 0.09275070419172855, "learning_rate": 9.677152930792271e-06, "loss": 0.4616, "num_tokens": 344524727.0, "step": 363 }, { "epoch": 0.713026444662096, "grad_norm": 0.08591248310796139, "learning_rate": 9.67490828126409e-06, "loss": 0.4498, "num_tokens": 345491807.0, "step": 364 }, { "epoch": 0.7149853085210578, "grad_norm": 0.09257358320354882, "learning_rate": 9.672656117753435e-06, "loss": 0.4483, "num_tokens": 346433272.0, "step": 365 }, { "epoch": 0.7169441723800196, "grad_norm": 0.0924765321124182, "learning_rate": 9.670396443880208e-06, "loss": 0.4459, "num_tokens": 347388710.0, "step": 366 }, { "epoch": 0.7189030362389814, "grad_norm": 0.08878238593226352, "learning_rate": 9.668129263276384e-06, "loss": 0.4534, "num_tokens": 348339683.0, "step": 367 }, { "epoch": 0.7208619000979432, "grad_norm": 0.09381300424385022, "learning_rate": 9.665854579586003e-06, "loss": 0.4446, "num_tokens": 349282115.0, "step": 368 }, { "epoch": 0.722820763956905, "grad_norm": 0.08907529279081705, "learning_rate": 9.663572396465165e-06, "loss": 0.4352, "num_tokens": 350225227.0, "step": 369 }, { "epoch": 0.7247796278158668, "grad_norm": 0.08936131212725164, "learning_rate": 9.661282717582028e-06, "loss": 0.461, "num_tokens": 351156796.0, "step": 370 }, { "epoch": 0.7267384916748286, "grad_norm": 0.09185757621293171, "learning_rate": 9.658985546616787e-06, "loss": 0.4448, "num_tokens": 352096120.0, "step": 371 }, { "epoch": 0.7286973555337904, "grad_norm": 0.09153254942213147, "learning_rate": 9.656680887261693e-06, "loss": 0.4416, "num_tokens": 353038771.0, "step": 372 }, { "epoch": 0.7306562193927522, "grad_norm": 0.09460985738793483, "learning_rate": 9.654368743221022e-06, "loss": 0.4386, "num_tokens": 353961679.0, "step": 373 }, { "epoch": 0.732615083251714, "grad_norm": 0.09009951630424058, "learning_rate": 9.652049118211085e-06, "loss": 0.4446, "num_tokens": 354922684.0, "step": 374 }, { "epoch": 0.7345739471106758, "grad_norm": 0.09185998933597071, "learning_rate": 9.649722015960219e-06, "loss": 0.4352, "num_tokens": 355865391.0, "step": 375 }, { "epoch": 0.7365328109696376, "grad_norm": 0.08574006743953584, "learning_rate": 9.647387440208772e-06, "loss": 0.4265, "num_tokens": 356807318.0, "step": 376 }, { "epoch": 0.7384916748285995, "grad_norm": 0.09213513041723538, "learning_rate": 9.645045394709113e-06, "loss": 0.4519, "num_tokens": 357756208.0, "step": 377 }, { "epoch": 0.7404505386875612, "grad_norm": 0.0914118115135455, "learning_rate": 9.64269588322561e-06, "loss": 0.453, "num_tokens": 358690434.0, "step": 378 }, { "epoch": 0.742409402546523, "grad_norm": 0.08515546604270077, "learning_rate": 9.640338909534636e-06, "loss": 0.4338, "num_tokens": 359600815.0, "step": 379 }, { "epoch": 0.7443682664054848, "grad_norm": 0.09099837438538474, "learning_rate": 9.637974477424556e-06, "loss": 0.4465, "num_tokens": 360546023.0, "step": 380 }, { "epoch": 0.7463271302644466, "grad_norm": 0.0916984793702679, "learning_rate": 9.63560259069572e-06, "loss": 0.4474, "num_tokens": 361484235.0, "step": 381 }, { "epoch": 0.7482859941234085, "grad_norm": 0.0907311847590698, "learning_rate": 9.633223253160468e-06, "loss": 0.4351, "num_tokens": 362430720.0, "step": 382 }, { "epoch": 0.7502448579823702, "grad_norm": 0.08487893107581852, "learning_rate": 9.630836468643105e-06, "loss": 0.4471, "num_tokens": 363419069.0, "step": 383 }, { "epoch": 0.752203721841332, "grad_norm": 0.08527321582104172, "learning_rate": 9.628442240979915e-06, "loss": 0.4448, "num_tokens": 364373467.0, "step": 384 }, { "epoch": 0.7541625857002938, "grad_norm": 0.09142315875170678, "learning_rate": 9.62604057401914e-06, "loss": 0.4359, "num_tokens": 365321364.0, "step": 385 }, { "epoch": 0.7561214495592556, "grad_norm": 0.09108466254026837, "learning_rate": 9.62363147162098e-06, "loss": 0.4489, "num_tokens": 366255055.0, "step": 386 }, { "epoch": 0.7580803134182175, "grad_norm": 0.09116160041104576, "learning_rate": 9.621214937657585e-06, "loss": 0.4473, "num_tokens": 367188147.0, "step": 387 }, { "epoch": 0.7600391772771793, "grad_norm": 0.09229959461153356, "learning_rate": 9.618790976013056e-06, "loss": 0.4511, "num_tokens": 368117102.0, "step": 388 }, { "epoch": 0.761998041136141, "grad_norm": 0.09153196002525947, "learning_rate": 9.616359590583424e-06, "loss": 0.4503, "num_tokens": 369092244.0, "step": 389 }, { "epoch": 0.7639569049951028, "grad_norm": 0.09012900386813807, "learning_rate": 9.613920785276655e-06, "loss": 0.4478, "num_tokens": 370033011.0, "step": 390 }, { "epoch": 0.7659157688540646, "grad_norm": 0.09032377835086211, "learning_rate": 9.611474564012646e-06, "loss": 0.4453, "num_tokens": 370970221.0, "step": 391 }, { "epoch": 0.7678746327130265, "grad_norm": 0.09174864200834358, "learning_rate": 9.609020930723208e-06, "loss": 0.4294, "num_tokens": 371927360.0, "step": 392 }, { "epoch": 0.7698334965719883, "grad_norm": 0.08799839349782142, "learning_rate": 9.606559889352065e-06, "loss": 0.4258, "num_tokens": 372880974.0, "step": 393 }, { "epoch": 0.77179236043095, "grad_norm": 0.09857544007537455, "learning_rate": 9.604091443854853e-06, "loss": 0.4495, "num_tokens": 373809205.0, "step": 394 }, { "epoch": 0.7737512242899118, "grad_norm": 0.0890729432210977, "learning_rate": 9.601615598199105e-06, "loss": 0.4502, "num_tokens": 374750745.0, "step": 395 }, { "epoch": 0.7757100881488737, "grad_norm": 0.09614982654170924, "learning_rate": 9.599132356364247e-06, "loss": 0.4502, "num_tokens": 375694515.0, "step": 396 }, { "epoch": 0.7776689520078355, "grad_norm": 0.09497610635385678, "learning_rate": 9.596641722341597e-06, "loss": 0.4625, "num_tokens": 376648630.0, "step": 397 }, { "epoch": 0.7796278158667973, "grad_norm": 0.08869489109692409, "learning_rate": 9.594143700134354e-06, "loss": 0.4422, "num_tokens": 377601707.0, "step": 398 }, { "epoch": 0.781586679725759, "grad_norm": 0.08748225330410787, "learning_rate": 9.591638293757584e-06, "loss": 0.4422, "num_tokens": 378528785.0, "step": 399 }, { "epoch": 0.7835455435847208, "grad_norm": 0.09171552222842465, "learning_rate": 9.589125507238234e-06, "loss": 0.4377, "num_tokens": 379465500.0, "step": 400 }, { "epoch": 0.7855044074436827, "grad_norm": 0.0924306305287081, "learning_rate": 9.586605344615105e-06, "loss": 0.4476, "num_tokens": 380436852.0, "step": 401 }, { "epoch": 0.7874632713026445, "grad_norm": 0.09194022766426356, "learning_rate": 9.584077809938856e-06, "loss": 0.4619, "num_tokens": 381419522.0, "step": 402 }, { "epoch": 0.7894221351616063, "grad_norm": 0.0953069976595774, "learning_rate": 9.581542907271994e-06, "loss": 0.4475, "num_tokens": 382373518.0, "step": 403 }, { "epoch": 0.791380999020568, "grad_norm": 0.09219210135779037, "learning_rate": 9.57900064068887e-06, "loss": 0.4469, "num_tokens": 383314714.0, "step": 404 }, { "epoch": 0.7933398628795298, "grad_norm": 0.08819815778989788, "learning_rate": 9.57645101427567e-06, "loss": 0.4405, "num_tokens": 384296808.0, "step": 405 }, { "epoch": 0.7952987267384917, "grad_norm": 0.09152627082774994, "learning_rate": 9.573894032130411e-06, "loss": 0.447, "num_tokens": 385238202.0, "step": 406 }, { "epoch": 0.7972575905974535, "grad_norm": 0.09332193664312469, "learning_rate": 9.571329698362931e-06, "loss": 0.4421, "num_tokens": 386206872.0, "step": 407 }, { "epoch": 0.7992164544564153, "grad_norm": 0.09435719314839941, "learning_rate": 9.568758017094884e-06, "loss": 0.4515, "num_tokens": 387163411.0, "step": 408 }, { "epoch": 0.801175318315377, "grad_norm": 0.09062165292179064, "learning_rate": 9.566178992459736e-06, "loss": 0.4581, "num_tokens": 388121654.0, "step": 409 }, { "epoch": 0.8031341821743389, "grad_norm": 0.09249823612388672, "learning_rate": 9.563592628602751e-06, "loss": 0.4378, "num_tokens": 389069363.0, "step": 410 }, { "epoch": 0.8050930460333007, "grad_norm": 0.09671609347878848, "learning_rate": 9.560998929681e-06, "loss": 0.4403, "num_tokens": 390021562.0, "step": 411 }, { "epoch": 0.8070519098922625, "grad_norm": 0.0903169040894827, "learning_rate": 9.55839789986333e-06, "loss": 0.4412, "num_tokens": 390968170.0, "step": 412 }, { "epoch": 0.8090107737512243, "grad_norm": 0.09307469451515846, "learning_rate": 9.555789543330383e-06, "loss": 0.4454, "num_tokens": 391927899.0, "step": 413 }, { "epoch": 0.8109696376101861, "grad_norm": 0.09144767479348721, "learning_rate": 9.553173864274567e-06, "loss": 0.443, "num_tokens": 392851142.0, "step": 414 }, { "epoch": 0.8129285014691479, "grad_norm": 0.09277036645308719, "learning_rate": 9.550550866900066e-06, "loss": 0.4479, "num_tokens": 393839937.0, "step": 415 }, { "epoch": 0.8148873653281097, "grad_norm": 0.09018958429415018, "learning_rate": 9.547920555422826e-06, "loss": 0.4363, "num_tokens": 394796096.0, "step": 416 }, { "epoch": 0.8168462291870715, "grad_norm": 0.09085236470619103, "learning_rate": 9.545282934070549e-06, "loss": 0.4483, "num_tokens": 395713544.0, "step": 417 }, { "epoch": 0.8188050930460333, "grad_norm": 0.08407649581832123, "learning_rate": 9.542638007082679e-06, "loss": 0.4391, "num_tokens": 396689825.0, "step": 418 }, { "epoch": 0.8207639569049952, "grad_norm": 0.09950920573968482, "learning_rate": 9.539985778710417e-06, "loss": 0.4534, "num_tokens": 397652133.0, "step": 419 }, { "epoch": 0.8227228207639569, "grad_norm": 0.09307640606019055, "learning_rate": 9.537326253216685e-06, "loss": 0.4423, "num_tokens": 398608999.0, "step": 420 }, { "epoch": 0.8246816846229187, "grad_norm": 0.09357630630578798, "learning_rate": 9.53465943487614e-06, "loss": 0.4524, "num_tokens": 399553537.0, "step": 421 }, { "epoch": 0.8266405484818805, "grad_norm": 0.09116527434719511, "learning_rate": 9.531985327975166e-06, "loss": 0.4318, "num_tokens": 400478774.0, "step": 422 }, { "epoch": 0.8285994123408423, "grad_norm": 0.0877207115367518, "learning_rate": 9.529303936811848e-06, "loss": 0.4458, "num_tokens": 401453462.0, "step": 423 }, { "epoch": 0.8305582761998042, "grad_norm": 0.10429597192469464, "learning_rate": 9.526615265695996e-06, "loss": 0.4588, "num_tokens": 402371978.0, "step": 424 }, { "epoch": 0.8325171400587659, "grad_norm": 0.0921247742066358, "learning_rate": 9.523919318949107e-06, "loss": 0.4431, "num_tokens": 403299490.0, "step": 425 }, { "epoch": 0.8344760039177277, "grad_norm": 0.08535437886957582, "learning_rate": 9.521216100904379e-06, "loss": 0.4504, "num_tokens": 404272460.0, "step": 426 }, { "epoch": 0.8364348677766895, "grad_norm": 0.09086008435179309, "learning_rate": 9.518505615906695e-06, "loss": 0.4364, "num_tokens": 405227719.0, "step": 427 }, { "epoch": 0.8383937316356513, "grad_norm": 0.08852926690081821, "learning_rate": 9.51578786831262e-06, "loss": 0.4333, "num_tokens": 406172073.0, "step": 428 }, { "epoch": 0.8403525954946132, "grad_norm": 0.09296742826394402, "learning_rate": 9.51306286249039e-06, "loss": 0.4435, "num_tokens": 407135296.0, "step": 429 }, { "epoch": 0.8423114593535749, "grad_norm": 0.09195865414003453, "learning_rate": 9.510330602819908e-06, "loss": 0.4338, "num_tokens": 408025872.0, "step": 430 }, { "epoch": 0.8442703232125367, "grad_norm": 0.08592651925000429, "learning_rate": 9.50759109369274e-06, "loss": 0.4405, "num_tokens": 408952437.0, "step": 431 }, { "epoch": 0.8462291870714985, "grad_norm": 0.08720910332233216, "learning_rate": 9.504844339512096e-06, "loss": 0.4618, "num_tokens": 409927896.0, "step": 432 }, { "epoch": 0.8481880509304603, "grad_norm": 0.09237479059899237, "learning_rate": 9.502090344692839e-06, "loss": 0.444, "num_tokens": 410872172.0, "step": 433 }, { "epoch": 0.8501469147894222, "grad_norm": 0.08611080491290468, "learning_rate": 9.499329113661462e-06, "loss": 0.4522, "num_tokens": 411842501.0, "step": 434 }, { "epoch": 0.8521057786483839, "grad_norm": 0.08974433119079651, "learning_rate": 9.496560650856097e-06, "loss": 0.4389, "num_tokens": 412802958.0, "step": 435 }, { "epoch": 0.8540646425073457, "grad_norm": 0.09282486852664872, "learning_rate": 9.493784960726494e-06, "loss": 0.443, "num_tokens": 413703549.0, "step": 436 }, { "epoch": 0.8560235063663075, "grad_norm": 0.09598607127715932, "learning_rate": 9.491002047734022e-06, "loss": 0.4448, "num_tokens": 414662771.0, "step": 437 }, { "epoch": 0.8579823702252694, "grad_norm": 0.0868526618332538, "learning_rate": 9.488211916351656e-06, "loss": 0.4522, "num_tokens": 415605121.0, "step": 438 }, { "epoch": 0.8599412340842312, "grad_norm": 0.08726492885006254, "learning_rate": 9.485414571063978e-06, "loss": 0.4413, "num_tokens": 416533609.0, "step": 439 }, { "epoch": 0.861900097943193, "grad_norm": 0.08893599449523941, "learning_rate": 9.482610016367162e-06, "loss": 0.458, "num_tokens": 417475629.0, "step": 440 }, { "epoch": 0.8638589618021547, "grad_norm": 0.08765700775405233, "learning_rate": 9.479798256768971e-06, "loss": 0.4465, "num_tokens": 418417455.0, "step": 441 }, { "epoch": 0.8658178256611165, "grad_norm": 0.09834057321360593, "learning_rate": 9.476979296788746e-06, "loss": 0.4373, "num_tokens": 419330611.0, "step": 442 }, { "epoch": 0.8677766895200784, "grad_norm": 0.09680606791405261, "learning_rate": 9.474153140957404e-06, "loss": 0.436, "num_tokens": 420290222.0, "step": 443 }, { "epoch": 0.8697355533790402, "grad_norm": 0.09250835079848582, "learning_rate": 9.471319793817427e-06, "loss": 0.4434, "num_tokens": 421235105.0, "step": 444 }, { "epoch": 0.871694417238002, "grad_norm": 0.08946786551563851, "learning_rate": 9.468479259922853e-06, "loss": 0.4477, "num_tokens": 422178339.0, "step": 445 }, { "epoch": 0.8736532810969637, "grad_norm": 0.09808169538863153, "learning_rate": 9.465631543839275e-06, "loss": 0.4365, "num_tokens": 423122510.0, "step": 446 }, { "epoch": 0.8756121449559255, "grad_norm": 0.10179959020277803, "learning_rate": 9.46277665014383e-06, "loss": 0.4658, "num_tokens": 424026386.0, "step": 447 }, { "epoch": 0.8775710088148874, "grad_norm": 0.09532360921540196, "learning_rate": 9.45991458342519e-06, "loss": 0.4615, "num_tokens": 425002226.0, "step": 448 }, { "epoch": 0.8795298726738492, "grad_norm": 0.10363014085944199, "learning_rate": 9.457045348283552e-06, "loss": 0.4442, "num_tokens": 425954442.0, "step": 449 }, { "epoch": 0.881488736532811, "grad_norm": 0.09131988082293717, "learning_rate": 9.454168949330644e-06, "loss": 0.4454, "num_tokens": 426882944.0, "step": 450 }, { "epoch": 0.8834476003917727, "grad_norm": 0.09361637101900908, "learning_rate": 9.451285391189701e-06, "loss": 0.4392, "num_tokens": 427859282.0, "step": 451 }, { "epoch": 0.8854064642507345, "grad_norm": 0.09877788574597322, "learning_rate": 9.448394678495469e-06, "loss": 0.4506, "num_tokens": 428809528.0, "step": 452 }, { "epoch": 0.8873653281096964, "grad_norm": 0.09549308711562945, "learning_rate": 9.445496815894192e-06, "loss": 0.4506, "num_tokens": 429768305.0, "step": 453 }, { "epoch": 0.8893241919686582, "grad_norm": 0.08871508739494441, "learning_rate": 9.442591808043604e-06, "loss": 0.4495, "num_tokens": 430729678.0, "step": 454 }, { "epoch": 0.89128305582762, "grad_norm": 0.0892212119857271, "learning_rate": 9.439679659612926e-06, "loss": 0.4267, "num_tokens": 431714983.0, "step": 455 }, { "epoch": 0.8932419196865817, "grad_norm": 0.09491973188699437, "learning_rate": 9.436760375282858e-06, "loss": 0.4422, "num_tokens": 432699169.0, "step": 456 }, { "epoch": 0.8952007835455436, "grad_norm": 0.09226562786893142, "learning_rate": 9.433833959745566e-06, "loss": 0.4502, "num_tokens": 433675703.0, "step": 457 }, { "epoch": 0.8971596474045054, "grad_norm": 0.10131891049086757, "learning_rate": 9.430900417704679e-06, "loss": 0.4572, "num_tokens": 434585297.0, "step": 458 }, { "epoch": 0.8991185112634672, "grad_norm": 0.09282059855847394, "learning_rate": 9.42795975387528e-06, "loss": 0.4597, "num_tokens": 435508190.0, "step": 459 }, { "epoch": 0.901077375122429, "grad_norm": 0.09559706065058528, "learning_rate": 9.4250119729839e-06, "loss": 0.4591, "num_tokens": 436464412.0, "step": 460 }, { "epoch": 0.9030362389813908, "grad_norm": 0.08935181573765572, "learning_rate": 9.422057079768508e-06, "loss": 0.4531, "num_tokens": 437430752.0, "step": 461 }, { "epoch": 0.9049951028403526, "grad_norm": 0.09115518617016825, "learning_rate": 9.419095078978506e-06, "loss": 0.4418, "num_tokens": 438400386.0, "step": 462 }, { "epoch": 0.9069539666993144, "grad_norm": 0.09088668038130367, "learning_rate": 9.416125975374722e-06, "loss": 0.4458, "num_tokens": 439320945.0, "step": 463 }, { "epoch": 0.9089128305582762, "grad_norm": 0.0877448959315513, "learning_rate": 9.413149773729393e-06, "loss": 0.4348, "num_tokens": 440254668.0, "step": 464 }, { "epoch": 0.910871694417238, "grad_norm": 0.09474539542626276, "learning_rate": 9.410166478826172e-06, "loss": 0.4618, "num_tokens": 441190447.0, "step": 465 }, { "epoch": 0.9128305582761999, "grad_norm": 0.08817808200419723, "learning_rate": 9.407176095460111e-06, "loss": 0.433, "num_tokens": 442141327.0, "step": 466 }, { "epoch": 0.9147894221351616, "grad_norm": 0.09427837087162373, "learning_rate": 9.404178628437652e-06, "loss": 0.4477, "num_tokens": 443083315.0, "step": 467 }, { "epoch": 0.9167482859941234, "grad_norm": 0.08978197754273248, "learning_rate": 9.40117408257663e-06, "loss": 0.4423, "num_tokens": 444004714.0, "step": 468 }, { "epoch": 0.9187071498530852, "grad_norm": 0.08991167756068928, "learning_rate": 9.398162462706249e-06, "loss": 0.4494, "num_tokens": 444898486.0, "step": 469 }, { "epoch": 0.920666013712047, "grad_norm": 0.08704060556036504, "learning_rate": 9.395143773667089e-06, "loss": 0.443, "num_tokens": 445863285.0, "step": 470 }, { "epoch": 0.9226248775710089, "grad_norm": 0.09328182815982616, "learning_rate": 9.392118020311088e-06, "loss": 0.4517, "num_tokens": 446830884.0, "step": 471 }, { "epoch": 0.9245837414299706, "grad_norm": 0.09946253242310275, "learning_rate": 9.389085207501542e-06, "loss": 0.433, "num_tokens": 447794765.0, "step": 472 }, { "epoch": 0.9265426052889324, "grad_norm": 0.08744402771416822, "learning_rate": 9.386045340113098e-06, "loss": 0.4447, "num_tokens": 448729864.0, "step": 473 }, { "epoch": 0.9285014691478942, "grad_norm": 0.09066553449079683, "learning_rate": 9.382998423031728e-06, "loss": 0.4412, "num_tokens": 449684785.0, "step": 474 }, { "epoch": 0.930460333006856, "grad_norm": 0.09078287148442435, "learning_rate": 9.379944461154747e-06, "loss": 0.4528, "num_tokens": 450636455.0, "step": 475 }, { "epoch": 0.9324191968658179, "grad_norm": 0.0943177774625156, "learning_rate": 9.37688345939079e-06, "loss": 0.4445, "num_tokens": 451543099.0, "step": 476 }, { "epoch": 0.9343780607247796, "grad_norm": 0.08755700718803858, "learning_rate": 9.373815422659806e-06, "loss": 0.4424, "num_tokens": 452484486.0, "step": 477 }, { "epoch": 0.9363369245837414, "grad_norm": 0.09366369233320135, "learning_rate": 9.370740355893054e-06, "loss": 0.4458, "num_tokens": 453400247.0, "step": 478 }, { "epoch": 0.9382957884427032, "grad_norm": 0.0894783976375864, "learning_rate": 9.367658264033089e-06, "loss": 0.4344, "num_tokens": 454365225.0, "step": 479 }, { "epoch": 0.940254652301665, "grad_norm": 0.09324735140123487, "learning_rate": 9.364569152033756e-06, "loss": 0.4532, "num_tokens": 455347718.0, "step": 480 }, { "epoch": 0.9422135161606269, "grad_norm": 0.09558477482548243, "learning_rate": 9.361473024860191e-06, "loss": 0.4503, "num_tokens": 456299070.0, "step": 481 }, { "epoch": 0.9441723800195886, "grad_norm": 0.08950461213093072, "learning_rate": 9.358369887488798e-06, "loss": 0.4422, "num_tokens": 457249785.0, "step": 482 }, { "epoch": 0.9461312438785504, "grad_norm": 0.08592069835625851, "learning_rate": 9.355259744907252e-06, "loss": 0.4533, "num_tokens": 458175313.0, "step": 483 }, { "epoch": 0.9480901077375122, "grad_norm": 0.09232436789338523, "learning_rate": 9.352142602114487e-06, "loss": 0.4516, "num_tokens": 459094336.0, "step": 484 }, { "epoch": 0.9500489715964741, "grad_norm": 0.0978701291776305, "learning_rate": 9.349018464120688e-06, "loss": 0.4434, "num_tokens": 460047468.0, "step": 485 }, { "epoch": 0.9520078354554359, "grad_norm": 0.08981980194266946, "learning_rate": 9.345887335947281e-06, "loss": 0.4315, "num_tokens": 461042755.0, "step": 486 }, { "epoch": 0.9539666993143977, "grad_norm": 0.08782893865713717, "learning_rate": 9.342749222626935e-06, "loss": 0.4355, "num_tokens": 461968138.0, "step": 487 }, { "epoch": 0.9559255631733594, "grad_norm": 0.09388681547459392, "learning_rate": 9.339604129203538e-06, "loss": 0.4296, "num_tokens": 462931977.0, "step": 488 }, { "epoch": 0.9578844270323212, "grad_norm": 0.09136708170681833, "learning_rate": 9.336452060732201e-06, "loss": 0.4553, "num_tokens": 463859433.0, "step": 489 }, { "epoch": 0.9598432908912831, "grad_norm": 0.08704641178999947, "learning_rate": 9.333293022279245e-06, "loss": 0.4473, "num_tokens": 464847912.0, "step": 490 }, { "epoch": 0.9618021547502449, "grad_norm": 0.08687383420459688, "learning_rate": 9.330127018922195e-06, "loss": 0.4588, "num_tokens": 465830652.0, "step": 491 }, { "epoch": 0.9637610186092067, "grad_norm": 0.09087441610941079, "learning_rate": 9.326954055749767e-06, "loss": 0.4488, "num_tokens": 466741968.0, "step": 492 }, { "epoch": 0.9657198824681684, "grad_norm": 0.0926523727188182, "learning_rate": 9.323774137861869e-06, "loss": 0.4372, "num_tokens": 467697264.0, "step": 493 }, { "epoch": 0.9676787463271302, "grad_norm": 0.09008100047290227, "learning_rate": 9.320587270369586e-06, "loss": 0.4415, "num_tokens": 468616073.0, "step": 494 }, { "epoch": 0.9696376101860921, "grad_norm": 0.0909613049269523, "learning_rate": 9.317393458395167e-06, "loss": 0.4486, "num_tokens": 469579890.0, "step": 495 }, { "epoch": 0.9715964740450539, "grad_norm": 0.08656271434525915, "learning_rate": 9.314192707072031e-06, "loss": 0.4296, "num_tokens": 470535384.0, "step": 496 }, { "epoch": 0.9735553379040157, "grad_norm": 0.09028048270821377, "learning_rate": 9.310985021544749e-06, "loss": 0.4505, "num_tokens": 471470747.0, "step": 497 }, { "epoch": 0.9755142017629774, "grad_norm": 0.08686211011620942, "learning_rate": 9.307770406969032e-06, "loss": 0.4325, "num_tokens": 472425879.0, "step": 498 }, { "epoch": 0.9774730656219393, "grad_norm": 0.10047507152636914, "learning_rate": 9.304548868511731e-06, "loss": 0.4386, "num_tokens": 473364667.0, "step": 499 }, { "epoch": 0.9794319294809011, "grad_norm": 0.09171293142795227, "learning_rate": 9.30132041135083e-06, "loss": 0.4326, "num_tokens": 474283129.0, "step": 500 }, { "epoch": 0.9813907933398629, "grad_norm": 0.08391550491357692, "learning_rate": 9.298085040675429e-06, "loss": 0.4317, "num_tokens": 475260218.0, "step": 501 }, { "epoch": 0.9833496571988247, "grad_norm": 0.09443388042449521, "learning_rate": 9.29484276168574e-06, "loss": 0.4311, "num_tokens": 476205061.0, "step": 502 }, { "epoch": 0.9853085210577864, "grad_norm": 0.09282268296918562, "learning_rate": 9.291593579593077e-06, "loss": 0.4311, "num_tokens": 477175331.0, "step": 503 }, { "epoch": 0.9872673849167483, "grad_norm": 0.09243941597551834, "learning_rate": 9.288337499619856e-06, "loss": 0.4432, "num_tokens": 478132730.0, "step": 504 }, { "epoch": 0.9892262487757101, "grad_norm": 0.08810022148509583, "learning_rate": 9.285074526999577e-06, "loss": 0.4351, "num_tokens": 479086824.0, "step": 505 }, { "epoch": 0.9911851126346719, "grad_norm": 0.08841532311346595, "learning_rate": 9.281804666976813e-06, "loss": 0.4279, "num_tokens": 479995722.0, "step": 506 }, { "epoch": 0.9931439764936337, "grad_norm": 0.08789829013435767, "learning_rate": 9.278527924807216e-06, "loss": 0.4458, "num_tokens": 480924330.0, "step": 507 }, { "epoch": 0.9951028403525954, "grad_norm": 0.09797516690458226, "learning_rate": 9.275244305757492e-06, "loss": 0.4475, "num_tokens": 481859349.0, "step": 508 }, { "epoch": 0.9970617042115573, "grad_norm": 0.08900989430801746, "learning_rate": 9.271953815105406e-06, "loss": 0.4461, "num_tokens": 482805292.0, "step": 509 }, { "epoch": 0.9990205680705191, "grad_norm": 0.09041197365212955, "learning_rate": 9.268656458139763e-06, "loss": 0.4436, "num_tokens": 483742048.0, "step": 510 }, { "epoch": 1.0, "grad_norm": 0.09041197365212955, "learning_rate": 9.265352240160408e-06, "loss": 0.4253, "num_tokens": 484242036.0, "step": 511 }, { "epoch": 1.0019588638589618, "grad_norm": 0.1346384417662723, "learning_rate": 9.262041166478215e-06, "loss": 0.4222, "num_tokens": 485179674.0, "step": 512 }, { "epoch": 1.0039177277179236, "grad_norm": 0.088029458816831, "learning_rate": 9.25872324241507e-06, "loss": 0.4338, "num_tokens": 486117403.0, "step": 513 }, { "epoch": 1.0058765915768855, "grad_norm": 0.08878702794859034, "learning_rate": 9.255398473303873e-06, "loss": 0.4351, "num_tokens": 487071335.0, "step": 514 }, { "epoch": 1.0078354554358473, "grad_norm": 0.08822281799871601, "learning_rate": 9.252066864488532e-06, "loss": 0.4269, "num_tokens": 488069775.0, "step": 515 }, { "epoch": 1.0097943192948091, "grad_norm": 0.08939115776709602, "learning_rate": 9.24872842132394e-06, "loss": 0.428, "num_tokens": 489010259.0, "step": 516 }, { "epoch": 1.0117531831537707, "grad_norm": 0.08703369305219627, "learning_rate": 9.245383149175981e-06, "loss": 0.4301, "num_tokens": 489975699.0, "step": 517 }, { "epoch": 1.0137120470127325, "grad_norm": 0.08623685636983926, "learning_rate": 9.242031053421511e-06, "loss": 0.4405, "num_tokens": 490910628.0, "step": 518 }, { "epoch": 1.0156709108716944, "grad_norm": 0.08640005446608966, "learning_rate": 9.238672139448354e-06, "loss": 0.4328, "num_tokens": 491864581.0, "step": 519 }, { "epoch": 1.0176297747306562, "grad_norm": 0.08964058489696397, "learning_rate": 9.235306412655298e-06, "loss": 0.4445, "num_tokens": 492810592.0, "step": 520 }, { "epoch": 1.019588638589618, "grad_norm": 0.09360178318670839, "learning_rate": 9.231933878452075e-06, "loss": 0.4286, "num_tokens": 493725212.0, "step": 521 }, { "epoch": 1.0215475024485798, "grad_norm": 0.0849877652894003, "learning_rate": 9.22855454225936e-06, "loss": 0.4328, "num_tokens": 494682257.0, "step": 522 }, { "epoch": 1.0235063663075417, "grad_norm": 0.08922178043950774, "learning_rate": 9.225168409508763e-06, "loss": 0.4439, "num_tokens": 495656406.0, "step": 523 }, { "epoch": 1.0254652301665035, "grad_norm": 0.0872230033667074, "learning_rate": 9.221775485642817e-06, "loss": 0.4383, "num_tokens": 496573891.0, "step": 524 }, { "epoch": 1.0274240940254653, "grad_norm": 0.08890668382286442, "learning_rate": 9.21837577611497e-06, "loss": 0.4274, "num_tokens": 497525699.0, "step": 525 }, { "epoch": 1.0293829578844271, "grad_norm": 0.0878795163671064, "learning_rate": 9.214969286389577e-06, "loss": 0.4207, "num_tokens": 498456085.0, "step": 526 }, { "epoch": 1.0313418217433887, "grad_norm": 0.08547117859073326, "learning_rate": 9.21155602194189e-06, "loss": 0.4239, "num_tokens": 499434510.0, "step": 527 }, { "epoch": 1.0333006856023506, "grad_norm": 0.08449758677555291, "learning_rate": 9.20813598825805e-06, "loss": 0.4312, "num_tokens": 500402663.0, "step": 528 }, { "epoch": 1.0352595494613124, "grad_norm": 0.08679352085531529, "learning_rate": 9.204709190835081e-06, "loss": 0.4468, "num_tokens": 501376158.0, "step": 529 }, { "epoch": 1.0372184133202742, "grad_norm": 0.09127157452556493, "learning_rate": 9.201275635180877e-06, "loss": 0.4282, "num_tokens": 502317709.0, "step": 530 }, { "epoch": 1.039177277179236, "grad_norm": 0.08423552533543917, "learning_rate": 9.19783532681419e-06, "loss": 0.4399, "num_tokens": 503287116.0, "step": 531 }, { "epoch": 1.0411361410381978, "grad_norm": 0.09162636138190586, "learning_rate": 9.194388271264634e-06, "loss": 0.439, "num_tokens": 504232930.0, "step": 532 }, { "epoch": 1.0430950048971597, "grad_norm": 0.09074860458297218, "learning_rate": 9.190934474072658e-06, "loss": 0.4355, "num_tokens": 505193880.0, "step": 533 }, { "epoch": 1.0450538687561215, "grad_norm": 0.09258516439738017, "learning_rate": 9.187473940789558e-06, "loss": 0.4207, "num_tokens": 506177865.0, "step": 534 }, { "epoch": 1.0470127326150833, "grad_norm": 0.08837416243576877, "learning_rate": 9.184006676977444e-06, "loss": 0.4287, "num_tokens": 507129459.0, "step": 535 }, { "epoch": 1.0489715964740451, "grad_norm": 0.08628378811997846, "learning_rate": 9.180532688209256e-06, "loss": 0.4412, "num_tokens": 508068980.0, "step": 536 }, { "epoch": 1.050930460333007, "grad_norm": 0.08781731978793558, "learning_rate": 9.177051980068738e-06, "loss": 0.416, "num_tokens": 509021271.0, "step": 537 }, { "epoch": 1.0528893241919686, "grad_norm": 0.08690062866814949, "learning_rate": 9.173564558150429e-06, "loss": 0.4339, "num_tokens": 509978396.0, "step": 538 }, { "epoch": 1.0548481880509304, "grad_norm": 0.08606956839244426, "learning_rate": 9.17007042805967e-06, "loss": 0.4417, "num_tokens": 510933733.0, "step": 539 }, { "epoch": 1.0568070519098922, "grad_norm": 0.09521025231984558, "learning_rate": 9.166569595412576e-06, "loss": 0.4288, "num_tokens": 511841279.0, "step": 540 }, { "epoch": 1.058765915768854, "grad_norm": 0.0887698843811598, "learning_rate": 9.163062065836035e-06, "loss": 0.4382, "num_tokens": 512800311.0, "step": 541 }, { "epoch": 1.0607247796278159, "grad_norm": 0.0897644692815152, "learning_rate": 9.159547844967703e-06, "loss": 0.4309, "num_tokens": 513718965.0, "step": 542 }, { "epoch": 1.0626836434867777, "grad_norm": 0.10022140749613061, "learning_rate": 9.15602693845599e-06, "loss": 0.4341, "num_tokens": 514682927.0, "step": 543 }, { "epoch": 1.0646425073457395, "grad_norm": 0.09351376434364068, "learning_rate": 9.15249935196005e-06, "loss": 0.4414, "num_tokens": 515617431.0, "step": 544 }, { "epoch": 1.0666013712047013, "grad_norm": 0.08904044489307142, "learning_rate": 9.14896509114977e-06, "loss": 0.4471, "num_tokens": 516569839.0, "step": 545 }, { "epoch": 1.0685602350636632, "grad_norm": 0.09092820781392638, "learning_rate": 9.145424161705777e-06, "loss": 0.4432, "num_tokens": 517510193.0, "step": 546 }, { "epoch": 1.070519098922625, "grad_norm": 0.09330544530740836, "learning_rate": 9.141876569319405e-06, "loss": 0.4369, "num_tokens": 518445486.0, "step": 547 }, { "epoch": 1.0724779627815866, "grad_norm": 0.09016614273965744, "learning_rate": 9.138322319692701e-06, "loss": 0.4529, "num_tokens": 519407293.0, "step": 548 }, { "epoch": 1.0744368266405484, "grad_norm": 0.08597232757876415, "learning_rate": 9.134761418538411e-06, "loss": 0.4305, "num_tokens": 520351218.0, "step": 549 }, { "epoch": 1.0763956904995102, "grad_norm": 0.08545941413061858, "learning_rate": 9.131193871579975e-06, "loss": 0.4284, "num_tokens": 521304087.0, "step": 550 }, { "epoch": 1.078354554358472, "grad_norm": 0.09322913844311503, "learning_rate": 9.127619684551514e-06, "loss": 0.4369, "num_tokens": 522234338.0, "step": 551 }, { "epoch": 1.0803134182174339, "grad_norm": 0.08621966914343301, "learning_rate": 9.124038863197817e-06, "loss": 0.4223, "num_tokens": 523155859.0, "step": 552 }, { "epoch": 1.0822722820763957, "grad_norm": 0.09160924475456932, "learning_rate": 9.120451413274346e-06, "loss": 0.4364, "num_tokens": 524088614.0, "step": 553 }, { "epoch": 1.0842311459353575, "grad_norm": 0.08458715251971163, "learning_rate": 9.116857340547203e-06, "loss": 0.439, "num_tokens": 525041740.0, "step": 554 }, { "epoch": 1.0861900097943193, "grad_norm": 0.0898259350766786, "learning_rate": 9.11325665079315e-06, "loss": 0.451, "num_tokens": 525986172.0, "step": 555 }, { "epoch": 1.0881488736532812, "grad_norm": 0.08779432275187664, "learning_rate": 9.109649349799575e-06, "loss": 0.4395, "num_tokens": 526927161.0, "step": 556 }, { "epoch": 1.090107737512243, "grad_norm": 0.09289699034996378, "learning_rate": 9.106035443364493e-06, "loss": 0.4369, "num_tokens": 527843298.0, "step": 557 }, { "epoch": 1.0920666013712048, "grad_norm": 0.09181620819782729, "learning_rate": 9.102414937296542e-06, "loss": 0.4456, "num_tokens": 528785650.0, "step": 558 }, { "epoch": 1.0940254652301664, "grad_norm": 0.08490441522975409, "learning_rate": 9.098787837414957e-06, "loss": 0.4356, "num_tokens": 529760264.0, "step": 559 }, { "epoch": 1.0959843290891282, "grad_norm": 0.09289100848996348, "learning_rate": 9.095154149549584e-06, "loss": 0.4312, "num_tokens": 530731237.0, "step": 560 }, { "epoch": 1.09794319294809, "grad_norm": 0.08744910222088205, "learning_rate": 9.091513879540845e-06, "loss": 0.4249, "num_tokens": 531697682.0, "step": 561 }, { "epoch": 1.0999020568070519, "grad_norm": 0.08763731036004777, "learning_rate": 9.087867033239754e-06, "loss": 0.4455, "num_tokens": 532640988.0, "step": 562 }, { "epoch": 1.1018609206660137, "grad_norm": 0.09206640614015055, "learning_rate": 9.084213616507887e-06, "loss": 0.4235, "num_tokens": 533610700.0, "step": 563 }, { "epoch": 1.1038197845249755, "grad_norm": 0.0864258756510009, "learning_rate": 9.080553635217379e-06, "loss": 0.4462, "num_tokens": 534568004.0, "step": 564 }, { "epoch": 1.1057786483839374, "grad_norm": 0.09050850449279439, "learning_rate": 9.076887095250924e-06, "loss": 0.445, "num_tokens": 535487804.0, "step": 565 }, { "epoch": 1.1077375122428992, "grad_norm": 0.08886233313886589, "learning_rate": 9.07321400250175e-06, "loss": 0.4304, "num_tokens": 536431050.0, "step": 566 }, { "epoch": 1.109696376101861, "grad_norm": 0.09119087376797808, "learning_rate": 9.069534362873627e-06, "loss": 0.4403, "num_tokens": 537413743.0, "step": 567 }, { "epoch": 1.1116552399608226, "grad_norm": 0.08832284757786994, "learning_rate": 9.065848182280835e-06, "loss": 0.4237, "num_tokens": 538358346.0, "step": 568 }, { "epoch": 1.1136141038197844, "grad_norm": 0.09035342822708291, "learning_rate": 9.062155466648177e-06, "loss": 0.4336, "num_tokens": 539260955.0, "step": 569 }, { "epoch": 1.1155729676787463, "grad_norm": 0.08818202236921634, "learning_rate": 9.058456221910956e-06, "loss": 0.4718, "num_tokens": 540224054.0, "step": 570 }, { "epoch": 1.117531831537708, "grad_norm": 0.09349504095923021, "learning_rate": 9.05475045401497e-06, "loss": 0.4431, "num_tokens": 541167909.0, "step": 571 }, { "epoch": 1.11949069539667, "grad_norm": 0.09360804157578115, "learning_rate": 9.051038168916502e-06, "loss": 0.4372, "num_tokens": 542110697.0, "step": 572 }, { "epoch": 1.1214495592556317, "grad_norm": 0.08740201831701315, "learning_rate": 9.04731937258231e-06, "loss": 0.4276, "num_tokens": 543078503.0, "step": 573 }, { "epoch": 1.1234084231145935, "grad_norm": 0.08990834925551848, "learning_rate": 9.043594070989619e-06, "loss": 0.4283, "num_tokens": 544041670.0, "step": 574 }, { "epoch": 1.1253672869735554, "grad_norm": 0.09019501205224968, "learning_rate": 9.039862270126102e-06, "loss": 0.4402, "num_tokens": 544955716.0, "step": 575 }, { "epoch": 1.1273261508325172, "grad_norm": 0.09007146534175918, "learning_rate": 9.036123975989893e-06, "loss": 0.4464, "num_tokens": 545868897.0, "step": 576 }, { "epoch": 1.129285014691479, "grad_norm": 0.09113325715101009, "learning_rate": 9.032379194589546e-06, "loss": 0.4191, "num_tokens": 546840812.0, "step": 577 }, { "epoch": 1.1312438785504408, "grad_norm": 0.08730973899060335, "learning_rate": 9.028627931944054e-06, "loss": 0.4407, "num_tokens": 547780715.0, "step": 578 }, { "epoch": 1.1332027424094027, "grad_norm": 0.09309473090760453, "learning_rate": 9.024870194082824e-06, "loss": 0.4405, "num_tokens": 548711835.0, "step": 579 }, { "epoch": 1.1351616062683643, "grad_norm": 0.0909014800583608, "learning_rate": 9.021105987045668e-06, "loss": 0.4311, "num_tokens": 549694336.0, "step": 580 }, { "epoch": 1.137120470127326, "grad_norm": 0.08925871959051611, "learning_rate": 9.0173353168828e-06, "loss": 0.4416, "num_tokens": 550623086.0, "step": 581 }, { "epoch": 1.139079333986288, "grad_norm": 0.08698679272649354, "learning_rate": 9.013558189654819e-06, "loss": 0.4348, "num_tokens": 551575073.0, "step": 582 }, { "epoch": 1.1410381978452497, "grad_norm": 0.10080409823346292, "learning_rate": 9.009774611432703e-06, "loss": 0.416, "num_tokens": 552504245.0, "step": 583 }, { "epoch": 1.1429970617042116, "grad_norm": 0.09186094665712399, "learning_rate": 9.0059845882978e-06, "loss": 0.4396, "num_tokens": 553466835.0, "step": 584 }, { "epoch": 1.1449559255631734, "grad_norm": 0.08544410038312396, "learning_rate": 9.002188126341815e-06, "loss": 0.4168, "num_tokens": 554393292.0, "step": 585 }, { "epoch": 1.1469147894221352, "grad_norm": 0.09036750755990369, "learning_rate": 8.998385231666807e-06, "loss": 0.4428, "num_tokens": 555351017.0, "step": 586 }, { "epoch": 1.148873653281097, "grad_norm": 0.0941430314187494, "learning_rate": 8.994575910385164e-06, "loss": 0.4292, "num_tokens": 556298669.0, "step": 587 }, { "epoch": 1.1508325171400589, "grad_norm": 0.09190677924156279, "learning_rate": 8.990760168619616e-06, "loss": 0.4354, "num_tokens": 557209186.0, "step": 588 }, { "epoch": 1.1527913809990205, "grad_norm": 0.09438033334326829, "learning_rate": 8.986938012503203e-06, "loss": 0.4443, "num_tokens": 558171451.0, "step": 589 }, { "epoch": 1.1547502448579823, "grad_norm": 0.0910359513541571, "learning_rate": 8.983109448179281e-06, "loss": 0.4448, "num_tokens": 559135698.0, "step": 590 }, { "epoch": 1.156709108716944, "grad_norm": 0.08989170045297915, "learning_rate": 8.979274481801501e-06, "loss": 0.4302, "num_tokens": 560063946.0, "step": 591 }, { "epoch": 1.158667972575906, "grad_norm": 0.09086054010786371, "learning_rate": 8.975433119533809e-06, "loss": 0.4359, "num_tokens": 560999098.0, "step": 592 }, { "epoch": 1.1606268364348677, "grad_norm": 0.08845012178342421, "learning_rate": 8.971585367550426e-06, "loss": 0.4431, "num_tokens": 561936325.0, "step": 593 }, { "epoch": 1.1625857002938296, "grad_norm": 0.08962649423988109, "learning_rate": 8.967731232035848e-06, "loss": 0.4434, "num_tokens": 562877802.0, "step": 594 }, { "epoch": 1.1645445641527914, "grad_norm": 0.10172094046132377, "learning_rate": 8.963870719184829e-06, "loss": 0.4327, "num_tokens": 563804266.0, "step": 595 }, { "epoch": 1.1665034280117532, "grad_norm": 0.09120617978253029, "learning_rate": 8.960003835202369e-06, "loss": 0.4292, "num_tokens": 564776814.0, "step": 596 }, { "epoch": 1.168462291870715, "grad_norm": 0.09748825916269319, "learning_rate": 8.956130586303718e-06, "loss": 0.4155, "num_tokens": 565708746.0, "step": 597 }, { "epoch": 1.1704211557296769, "grad_norm": 0.09515446940350762, "learning_rate": 8.95225097871435e-06, "loss": 0.4335, "num_tokens": 566691749.0, "step": 598 }, { "epoch": 1.1723800195886387, "grad_norm": 0.09237283816575288, "learning_rate": 8.948365018669956e-06, "loss": 0.4434, "num_tokens": 567602865.0, "step": 599 }, { "epoch": 1.1743388834476005, "grad_norm": 0.0851188141049216, "learning_rate": 8.944472712416448e-06, "loss": 0.4235, "num_tokens": 568553534.0, "step": 600 }, { "epoch": 1.1762977473065621, "grad_norm": 0.09329542673487902, "learning_rate": 8.940574066209925e-06, "loss": 0.4197, "num_tokens": 569475834.0, "step": 601 }, { "epoch": 1.178256611165524, "grad_norm": 0.09616537034414288, "learning_rate": 8.93666908631669e-06, "loss": 0.4382, "num_tokens": 570398515.0, "step": 602 }, { "epoch": 1.1802154750244858, "grad_norm": 0.08888109211301855, "learning_rate": 8.932757779013214e-06, "loss": 0.4405, "num_tokens": 571317760.0, "step": 603 }, { "epoch": 1.1821743388834476, "grad_norm": 0.09427241505946338, "learning_rate": 8.928840150586145e-06, "loss": 0.4414, "num_tokens": 572262537.0, "step": 604 }, { "epoch": 1.1841332027424094, "grad_norm": 0.08439789988449452, "learning_rate": 8.924916207332288e-06, "loss": 0.4066, "num_tokens": 573190293.0, "step": 605 }, { "epoch": 1.1860920666013712, "grad_norm": 0.09258147163465, "learning_rate": 8.9209859555586e-06, "loss": 0.444, "num_tokens": 574116845.0, "step": 606 }, { "epoch": 1.188050930460333, "grad_norm": 0.09691647341818313, "learning_rate": 8.917049401582178e-06, "loss": 0.4416, "num_tokens": 575043499.0, "step": 607 }, { "epoch": 1.1900097943192949, "grad_norm": 0.09873606919009019, "learning_rate": 8.913106551730247e-06, "loss": 0.4327, "num_tokens": 576005125.0, "step": 608 }, { "epoch": 1.1919686581782567, "grad_norm": 0.08724247174727258, "learning_rate": 8.90915741234015e-06, "loss": 0.4271, "num_tokens": 576979176.0, "step": 609 }, { "epoch": 1.1939275220372183, "grad_norm": 0.08491734755637224, "learning_rate": 8.90520198975934e-06, "loss": 0.4386, "num_tokens": 577932131.0, "step": 610 }, { "epoch": 1.1958863858961801, "grad_norm": 0.08882017914507026, "learning_rate": 8.901240290345372e-06, "loss": 0.4183, "num_tokens": 578869316.0, "step": 611 }, { "epoch": 1.197845249755142, "grad_norm": 0.08848950314924899, "learning_rate": 8.897272320465887e-06, "loss": 0.4374, "num_tokens": 579828766.0, "step": 612 }, { "epoch": 1.1998041136141038, "grad_norm": 0.09418538155264382, "learning_rate": 8.893298086498604e-06, "loss": 0.4407, "num_tokens": 580760358.0, "step": 613 }, { "epoch": 1.2017629774730656, "grad_norm": 0.0951586839511284, "learning_rate": 8.889317594831313e-06, "loss": 0.4559, "num_tokens": 581739411.0, "step": 614 }, { "epoch": 1.2037218413320274, "grad_norm": 0.08649794305940979, "learning_rate": 8.885330851861858e-06, "loss": 0.448, "num_tokens": 582697954.0, "step": 615 }, { "epoch": 1.2056807051909892, "grad_norm": 0.090851455587811, "learning_rate": 8.881337863998137e-06, "loss": 0.4307, "num_tokens": 583654639.0, "step": 616 }, { "epoch": 1.207639569049951, "grad_norm": 0.08879120337346492, "learning_rate": 8.877338637658074e-06, "loss": 0.432, "num_tokens": 584588030.0, "step": 617 }, { "epoch": 1.209598432908913, "grad_norm": 0.0899944248152233, "learning_rate": 8.873333179269635e-06, "loss": 0.4434, "num_tokens": 585533383.0, "step": 618 }, { "epoch": 1.2115572967678747, "grad_norm": 0.09846557093533091, "learning_rate": 8.869321495270794e-06, "loss": 0.4223, "num_tokens": 586464702.0, "step": 619 }, { "epoch": 1.2135161606268365, "grad_norm": 0.09703993219197968, "learning_rate": 8.865303592109528e-06, "loss": 0.434, "num_tokens": 587434572.0, "step": 620 }, { "epoch": 1.2154750244857984, "grad_norm": 0.08482944334878705, "learning_rate": 8.86127947624382e-06, "loss": 0.4272, "num_tokens": 588407834.0, "step": 621 }, { "epoch": 1.21743388834476, "grad_norm": 0.08877645120102982, "learning_rate": 8.857249154141632e-06, "loss": 0.429, "num_tokens": 589366470.0, "step": 622 }, { "epoch": 1.2193927522037218, "grad_norm": 0.09451097486733544, "learning_rate": 8.853212632280901e-06, "loss": 0.4396, "num_tokens": 590324534.0, "step": 623 }, { "epoch": 1.2213516160626836, "grad_norm": 0.09283507786572938, "learning_rate": 8.849169917149532e-06, "loss": 0.4316, "num_tokens": 591288272.0, "step": 624 }, { "epoch": 1.2233104799216454, "grad_norm": 0.09120151965940637, "learning_rate": 8.845121015245384e-06, "loss": 0.435, "num_tokens": 592274577.0, "step": 625 }, { "epoch": 1.2252693437806073, "grad_norm": 0.08867685030216749, "learning_rate": 8.841065933076258e-06, "loss": 0.4526, "num_tokens": 593219134.0, "step": 626 }, { "epoch": 1.227228207639569, "grad_norm": 0.0873309302541802, "learning_rate": 8.83700467715989e-06, "loss": 0.4442, "num_tokens": 594165772.0, "step": 627 }, { "epoch": 1.229187071498531, "grad_norm": 0.0841848045908713, "learning_rate": 8.832937254023938e-06, "loss": 0.4159, "num_tokens": 595116617.0, "step": 628 }, { "epoch": 1.2311459353574927, "grad_norm": 0.09512852122433438, "learning_rate": 8.828863670205973e-06, "loss": 0.4298, "num_tokens": 596063491.0, "step": 629 }, { "epoch": 1.2331047992164546, "grad_norm": 0.09200472118564294, "learning_rate": 8.824783932253472e-06, "loss": 0.4128, "num_tokens": 597019290.0, "step": 630 }, { "epoch": 1.2350636630754162, "grad_norm": 0.0970630323465728, "learning_rate": 8.820698046723796e-06, "loss": 0.4428, "num_tokens": 597975720.0, "step": 631 }, { "epoch": 1.237022526934378, "grad_norm": 0.08991662641692448, "learning_rate": 8.816606020184191e-06, "loss": 0.4265, "num_tokens": 598874821.0, "step": 632 }, { "epoch": 1.2389813907933398, "grad_norm": 0.09894834121305396, "learning_rate": 8.812507859211775e-06, "loss": 0.4377, "num_tokens": 599815897.0, "step": 633 }, { "epoch": 1.2409402546523016, "grad_norm": 0.08975554446089534, "learning_rate": 8.808403570393525e-06, "loss": 0.4297, "num_tokens": 600767656.0, "step": 634 }, { "epoch": 1.2428991185112634, "grad_norm": 0.08981109968851751, "learning_rate": 8.804293160326263e-06, "loss": 0.438, "num_tokens": 601727806.0, "step": 635 }, { "epoch": 1.2448579823702253, "grad_norm": 0.08777442501895265, "learning_rate": 8.800176635616658e-06, "loss": 0.4264, "num_tokens": 602666137.0, "step": 636 }, { "epoch": 1.246816846229187, "grad_norm": 0.08464531266226324, "learning_rate": 8.796054002881196e-06, "loss": 0.4485, "num_tokens": 603593900.0, "step": 637 }, { "epoch": 1.248775710088149, "grad_norm": 0.09454628489660366, "learning_rate": 8.791925268746193e-06, "loss": 0.4236, "num_tokens": 604535548.0, "step": 638 }, { "epoch": 1.2507345739471107, "grad_norm": 0.08654271881186104, "learning_rate": 8.787790439847762e-06, "loss": 0.4161, "num_tokens": 605535752.0, "step": 639 }, { "epoch": 1.2526934378060726, "grad_norm": 0.08524410656224386, "learning_rate": 8.783649522831816e-06, "loss": 0.4247, "num_tokens": 606481476.0, "step": 640 }, { "epoch": 1.2546523016650344, "grad_norm": 0.08525806913461748, "learning_rate": 8.779502524354054e-06, "loss": 0.4362, "num_tokens": 607411055.0, "step": 641 }, { "epoch": 1.2566111655239962, "grad_norm": 0.0936767382051949, "learning_rate": 8.775349451079948e-06, "loss": 0.4396, "num_tokens": 608356108.0, "step": 642 }, { "epoch": 1.2585700293829578, "grad_norm": 0.08980676844521197, "learning_rate": 8.771190309684737e-06, "loss": 0.4133, "num_tokens": 609301945.0, "step": 643 }, { "epoch": 1.2605288932419196, "grad_norm": 0.08618636223485227, "learning_rate": 8.767025106853407e-06, "loss": 0.4156, "num_tokens": 610239096.0, "step": 644 }, { "epoch": 1.2624877571008815, "grad_norm": 0.08214414891863256, "learning_rate": 8.762853849280692e-06, "loss": 0.4292, "num_tokens": 611199816.0, "step": 645 }, { "epoch": 1.2644466209598433, "grad_norm": 0.08708778558013325, "learning_rate": 8.758676543671059e-06, "loss": 0.4354, "num_tokens": 612158169.0, "step": 646 }, { "epoch": 1.266405484818805, "grad_norm": 0.08721017054043191, "learning_rate": 8.754493196738692e-06, "loss": 0.4418, "num_tokens": 613075173.0, "step": 647 }, { "epoch": 1.268364348677767, "grad_norm": 0.09924361764756602, "learning_rate": 8.750303815207487e-06, "loss": 0.4139, "num_tokens": 614043095.0, "step": 648 }, { "epoch": 1.2703232125367288, "grad_norm": 0.0898894914120729, "learning_rate": 8.746108405811036e-06, "loss": 0.4384, "num_tokens": 614975110.0, "step": 649 }, { "epoch": 1.2722820763956906, "grad_norm": 0.0859163297853911, "learning_rate": 8.741906975292626e-06, "loss": 0.4349, "num_tokens": 615918684.0, "step": 650 }, { "epoch": 1.2742409402546522, "grad_norm": 0.09237780533611917, "learning_rate": 8.737699530405218e-06, "loss": 0.4398, "num_tokens": 616878905.0, "step": 651 }, { "epoch": 1.276199804113614, "grad_norm": 0.0995344708616368, "learning_rate": 8.73348607791144e-06, "loss": 0.4585, "num_tokens": 617842176.0, "step": 652 }, { "epoch": 1.2781586679725758, "grad_norm": 0.09267000447613578, "learning_rate": 8.729266624583574e-06, "loss": 0.4254, "num_tokens": 618781622.0, "step": 653 }, { "epoch": 1.2801175318315376, "grad_norm": 0.09010105944541431, "learning_rate": 8.725041177203556e-06, "loss": 0.4436, "num_tokens": 619724561.0, "step": 654 }, { "epoch": 1.2820763956904995, "grad_norm": 0.0884000275855435, "learning_rate": 8.720809742562942e-06, "loss": 0.4429, "num_tokens": 620692580.0, "step": 655 }, { "epoch": 1.2840352595494613, "grad_norm": 0.08443674869288785, "learning_rate": 8.716572327462923e-06, "loss": 0.4387, "num_tokens": 621602721.0, "step": 656 }, { "epoch": 1.2859941234084231, "grad_norm": 0.09753336457459878, "learning_rate": 8.712328938714299e-06, "loss": 0.4283, "num_tokens": 622548333.0, "step": 657 }, { "epoch": 1.287952987267385, "grad_norm": 0.08516333994023409, "learning_rate": 8.708079583137469e-06, "loss": 0.4375, "num_tokens": 623449645.0, "step": 658 }, { "epoch": 1.2899118511263468, "grad_norm": 0.09342874946755898, "learning_rate": 8.703824267562424e-06, "loss": 0.4435, "num_tokens": 624421631.0, "step": 659 }, { "epoch": 1.2918707149853086, "grad_norm": 0.0929486810483253, "learning_rate": 8.699562998828739e-06, "loss": 0.437, "num_tokens": 625372051.0, "step": 660 }, { "epoch": 1.2938295788442704, "grad_norm": 0.08918991633816341, "learning_rate": 8.695295783785549e-06, "loss": 0.431, "num_tokens": 626313867.0, "step": 661 }, { "epoch": 1.2957884427032322, "grad_norm": 0.09284217963926021, "learning_rate": 8.691022629291553e-06, "loss": 0.4426, "num_tokens": 627298613.0, "step": 662 }, { "epoch": 1.297747306562194, "grad_norm": 0.09180686540593182, "learning_rate": 8.686743542214994e-06, "loss": 0.4237, "num_tokens": 628224380.0, "step": 663 }, { "epoch": 1.2997061704211557, "grad_norm": 0.08633319535636999, "learning_rate": 8.682458529433651e-06, "loss": 0.4291, "num_tokens": 629166811.0, "step": 664 }, { "epoch": 1.3016650342801175, "grad_norm": 0.08866719171050685, "learning_rate": 8.678167597834825e-06, "loss": 0.4191, "num_tokens": 630106286.0, "step": 665 }, { "epoch": 1.3036238981390793, "grad_norm": 0.0865093360650587, "learning_rate": 8.673870754315336e-06, "loss": 0.4341, "num_tokens": 631049408.0, "step": 666 }, { "epoch": 1.3055827619980411, "grad_norm": 0.09236539788484789, "learning_rate": 8.669568005781503e-06, "loss": 0.4256, "num_tokens": 631990643.0, "step": 667 }, { "epoch": 1.307541625857003, "grad_norm": 0.08748296294076853, "learning_rate": 8.665259359149132e-06, "loss": 0.4424, "num_tokens": 632969463.0, "step": 668 }, { "epoch": 1.3095004897159648, "grad_norm": 0.0884592018171943, "learning_rate": 8.660944821343516e-06, "loss": 0.425, "num_tokens": 633925921.0, "step": 669 }, { "epoch": 1.3114593535749266, "grad_norm": 0.09219670228189061, "learning_rate": 8.656624399299414e-06, "loss": 0.4267, "num_tokens": 634873252.0, "step": 670 }, { "epoch": 1.3134182174338884, "grad_norm": 0.0934173112646401, "learning_rate": 8.652298099961041e-06, "loss": 0.4257, "num_tokens": 635791641.0, "step": 671 }, { "epoch": 1.31537708129285, "grad_norm": 0.08899742527365999, "learning_rate": 8.64796593028206e-06, "loss": 0.4316, "num_tokens": 636738438.0, "step": 672 }, { "epoch": 1.3173359451518118, "grad_norm": 0.09119691239377391, "learning_rate": 8.64362789722557e-06, "loss": 0.4269, "num_tokens": 637679723.0, "step": 673 }, { "epoch": 1.3192948090107737, "grad_norm": 0.09007407850006857, "learning_rate": 8.639284007764095e-06, "loss": 0.436, "num_tokens": 638618644.0, "step": 674 }, { "epoch": 1.3212536728697355, "grad_norm": 0.09146593267787054, "learning_rate": 8.63493426887957e-06, "loss": 0.4339, "num_tokens": 639568481.0, "step": 675 }, { "epoch": 1.3232125367286973, "grad_norm": 0.08399176778639582, "learning_rate": 8.630578687563331e-06, "loss": 0.4406, "num_tokens": 640515741.0, "step": 676 }, { "epoch": 1.3251714005876591, "grad_norm": 0.0880932161054987, "learning_rate": 8.62621727081611e-06, "loss": 0.4195, "num_tokens": 641452866.0, "step": 677 }, { "epoch": 1.327130264446621, "grad_norm": 0.08856200671088392, "learning_rate": 8.621850025648008e-06, "loss": 0.4537, "num_tokens": 642409372.0, "step": 678 }, { "epoch": 1.3290891283055828, "grad_norm": 0.09542380676899659, "learning_rate": 8.617476959078507e-06, "loss": 0.4415, "num_tokens": 643340797.0, "step": 679 }, { "epoch": 1.3310479921645446, "grad_norm": 0.09151225164745504, "learning_rate": 8.613098078136436e-06, "loss": 0.447, "num_tokens": 644281695.0, "step": 680 }, { "epoch": 1.3330068560235064, "grad_norm": 0.09136175561815078, "learning_rate": 8.608713389859974e-06, "loss": 0.4361, "num_tokens": 645255661.0, "step": 681 }, { "epoch": 1.3349657198824683, "grad_norm": 0.09106928179725267, "learning_rate": 8.60432290129663e-06, "loss": 0.4468, "num_tokens": 646193281.0, "step": 682 }, { "epoch": 1.33692458374143, "grad_norm": 0.08772595573487554, "learning_rate": 8.59992661950324e-06, "loss": 0.4227, "num_tokens": 647120974.0, "step": 683 }, { "epoch": 1.338883447600392, "grad_norm": 0.09534180082475596, "learning_rate": 8.59552455154595e-06, "loss": 0.4234, "num_tokens": 648053667.0, "step": 684 }, { "epoch": 1.3408423114593535, "grad_norm": 0.0934432959803238, "learning_rate": 8.591116704500208e-06, "loss": 0.4285, "num_tokens": 649004374.0, "step": 685 }, { "epoch": 1.3428011753183153, "grad_norm": 0.09172664210031523, "learning_rate": 8.586703085450746e-06, "loss": 0.4327, "num_tokens": 649928772.0, "step": 686 }, { "epoch": 1.3447600391772772, "grad_norm": 0.085466539788077, "learning_rate": 8.582283701491576e-06, "loss": 0.4237, "num_tokens": 650861928.0, "step": 687 }, { "epoch": 1.346718903036239, "grad_norm": 0.09167034135696252, "learning_rate": 8.577858559725978e-06, "loss": 0.4299, "num_tokens": 651825940.0, "step": 688 }, { "epoch": 1.3486777668952008, "grad_norm": 0.0970531851277602, "learning_rate": 8.573427667266485e-06, "loss": 0.4371, "num_tokens": 652783108.0, "step": 689 }, { "epoch": 1.3506366307541626, "grad_norm": 0.09072779638303974, "learning_rate": 8.56899103123487e-06, "loss": 0.4412, "num_tokens": 653718931.0, "step": 690 }, { "epoch": 1.3525954946131244, "grad_norm": 0.08960104365095135, "learning_rate": 8.564548658762143e-06, "loss": 0.4351, "num_tokens": 654637215.0, "step": 691 }, { "epoch": 1.3545543584720863, "grad_norm": 0.09304129721029504, "learning_rate": 8.56010055698853e-06, "loss": 0.4265, "num_tokens": 655560101.0, "step": 692 }, { "epoch": 1.3565132223310479, "grad_norm": 0.08696641031349327, "learning_rate": 8.555646733063469e-06, "loss": 0.4332, "num_tokens": 656510677.0, "step": 693 }, { "epoch": 1.3584720861900097, "grad_norm": 0.09476845731340805, "learning_rate": 8.551187194145591e-06, "loss": 0.4307, "num_tokens": 657450174.0, "step": 694 }, { "epoch": 1.3604309500489715, "grad_norm": 0.09233630418006109, "learning_rate": 8.54672194740272e-06, "loss": 0.4372, "num_tokens": 658393088.0, "step": 695 }, { "epoch": 1.3623898139079333, "grad_norm": 0.08536818889372678, "learning_rate": 8.54225100001184e-06, "loss": 0.4324, "num_tokens": 659330001.0, "step": 696 }, { "epoch": 1.3643486777668952, "grad_norm": 0.0899119840527955, "learning_rate": 8.537774359159117e-06, "loss": 0.4397, "num_tokens": 660298957.0, "step": 697 }, { "epoch": 1.366307541625857, "grad_norm": 0.09321774430912169, "learning_rate": 8.533292032039853e-06, "loss": 0.4277, "num_tokens": 661230044.0, "step": 698 }, { "epoch": 1.3682664054848188, "grad_norm": 0.09133714290085906, "learning_rate": 8.528804025858494e-06, "loss": 0.4353, "num_tokens": 662110838.0, "step": 699 }, { "epoch": 1.3702252693437806, "grad_norm": 0.08931284953678228, "learning_rate": 8.52431034782862e-06, "loss": 0.427, "num_tokens": 663060531.0, "step": 700 }, { "epoch": 1.3721841332027425, "grad_norm": 0.08622021222265192, "learning_rate": 8.519811005172916e-06, "loss": 0.441, "num_tokens": 664014829.0, "step": 701 }, { "epoch": 1.3741429970617043, "grad_norm": 0.0887106044030063, "learning_rate": 8.51530600512318e-06, "loss": 0.445, "num_tokens": 665012979.0, "step": 702 }, { "epoch": 1.376101860920666, "grad_norm": 0.08644656242360411, "learning_rate": 8.510795354920301e-06, "loss": 0.4048, "num_tokens": 665985315.0, "step": 703 }, { "epoch": 1.378060724779628, "grad_norm": 0.08195711926338874, "learning_rate": 8.506279061814248e-06, "loss": 0.4309, "num_tokens": 666939271.0, "step": 704 }, { "epoch": 1.3800195886385898, "grad_norm": 0.09408883419964502, "learning_rate": 8.501757133064066e-06, "loss": 0.429, "num_tokens": 667887572.0, "step": 705 }, { "epoch": 1.3819784524975514, "grad_norm": 0.08811305636721038, "learning_rate": 8.497229575937848e-06, "loss": 0.4357, "num_tokens": 668819611.0, "step": 706 }, { "epoch": 1.3839373163565132, "grad_norm": 0.08775982223125199, "learning_rate": 8.492696397712741e-06, "loss": 0.4343, "num_tokens": 669776929.0, "step": 707 }, { "epoch": 1.385896180215475, "grad_norm": 0.08536772157566545, "learning_rate": 8.488157605674924e-06, "loss": 0.4283, "num_tokens": 670736923.0, "step": 708 }, { "epoch": 1.3878550440744368, "grad_norm": 0.08791249109250661, "learning_rate": 8.483613207119602e-06, "loss": 0.4289, "num_tokens": 671662081.0, "step": 709 }, { "epoch": 1.3898139079333987, "grad_norm": 0.09847314582224101, "learning_rate": 8.479063209350986e-06, "loss": 0.4371, "num_tokens": 672630075.0, "step": 710 }, { "epoch": 1.3917727717923605, "grad_norm": 0.0924208886089575, "learning_rate": 8.47450761968229e-06, "loss": 0.427, "num_tokens": 673510735.0, "step": 711 }, { "epoch": 1.3937316356513223, "grad_norm": 0.09513808239437288, "learning_rate": 8.469946445435719e-06, "loss": 0.4292, "num_tokens": 674461452.0, "step": 712 }, { "epoch": 1.395690499510284, "grad_norm": 0.09173600111784203, "learning_rate": 8.465379693942448e-06, "loss": 0.4305, "num_tokens": 675426239.0, "step": 713 }, { "epoch": 1.3976493633692457, "grad_norm": 0.08839984025128103, "learning_rate": 8.460807372542618e-06, "loss": 0.4614, "num_tokens": 676375387.0, "step": 714 }, { "epoch": 1.3996082272282075, "grad_norm": 0.09374198120073221, "learning_rate": 8.456229488585328e-06, "loss": 0.4213, "num_tokens": 677327368.0, "step": 715 }, { "epoch": 1.4015670910871694, "grad_norm": 0.09072590715344986, "learning_rate": 8.451646049428607e-06, "loss": 0.4235, "num_tokens": 678283492.0, "step": 716 }, { "epoch": 1.4035259549461312, "grad_norm": 0.09335990897846304, "learning_rate": 8.447057062439425e-06, "loss": 0.433, "num_tokens": 679254020.0, "step": 717 }, { "epoch": 1.405484818805093, "grad_norm": 0.09508005476618756, "learning_rate": 8.44246253499366e-06, "loss": 0.4288, "num_tokens": 680237522.0, "step": 718 }, { "epoch": 1.4074436826640548, "grad_norm": 0.09470801427949821, "learning_rate": 8.437862474476099e-06, "loss": 0.4283, "num_tokens": 681198137.0, "step": 719 }, { "epoch": 1.4094025465230167, "grad_norm": 0.09040661798704991, "learning_rate": 8.433256888280422e-06, "loss": 0.4342, "num_tokens": 682109134.0, "step": 720 }, { "epoch": 1.4113614103819785, "grad_norm": 0.09718020598488185, "learning_rate": 8.428645783809187e-06, "loss": 0.4447, "num_tokens": 683041802.0, "step": 721 }, { "epoch": 1.4133202742409403, "grad_norm": 0.09386655251014273, "learning_rate": 8.424029168473829e-06, "loss": 0.4194, "num_tokens": 684005481.0, "step": 722 }, { "epoch": 1.4152791380999021, "grad_norm": 0.08941083494167625, "learning_rate": 8.419407049694634e-06, "loss": 0.4363, "num_tokens": 684974162.0, "step": 723 }, { "epoch": 1.417238001958864, "grad_norm": 0.08558228587510855, "learning_rate": 8.414779434900736e-06, "loss": 0.4388, "num_tokens": 685922624.0, "step": 724 }, { "epoch": 1.4191968658178258, "grad_norm": 0.08820784119716409, "learning_rate": 8.410146331530102e-06, "loss": 0.4401, "num_tokens": 686900397.0, "step": 725 }, { "epoch": 1.4211557296767874, "grad_norm": 0.08852337982491124, "learning_rate": 8.405507747029524e-06, "loss": 0.4253, "num_tokens": 687835960.0, "step": 726 }, { "epoch": 1.4231145935357492, "grad_norm": 0.09168139513037091, "learning_rate": 8.400863688854598e-06, "loss": 0.4409, "num_tokens": 688795372.0, "step": 727 }, { "epoch": 1.425073457394711, "grad_norm": 0.0878237833737238, "learning_rate": 8.396214164469723e-06, "loss": 0.4391, "num_tokens": 689754498.0, "step": 728 }, { "epoch": 1.4270323212536729, "grad_norm": 0.08916921173918818, "learning_rate": 8.391559181348081e-06, "loss": 0.4326, "num_tokens": 690700068.0, "step": 729 }, { "epoch": 1.4289911851126347, "grad_norm": 0.08885595175037213, "learning_rate": 8.38689874697163e-06, "loss": 0.425, "num_tokens": 691625569.0, "step": 730 }, { "epoch": 1.4309500489715965, "grad_norm": 0.09355201498428904, "learning_rate": 8.382232868831088e-06, "loss": 0.4247, "num_tokens": 692549153.0, "step": 731 }, { "epoch": 1.4329089128305583, "grad_norm": 0.08706942822188166, "learning_rate": 8.377561554425923e-06, "loss": 0.4386, "num_tokens": 693508761.0, "step": 732 }, { "epoch": 1.4348677766895201, "grad_norm": 0.09133725236247704, "learning_rate": 8.372884811264339e-06, "loss": 0.4408, "num_tokens": 694431253.0, "step": 733 }, { "epoch": 1.4368266405484817, "grad_norm": 0.08836235986857442, "learning_rate": 8.368202646863273e-06, "loss": 0.4312, "num_tokens": 695387922.0, "step": 734 }, { "epoch": 1.4387855044074436, "grad_norm": 0.09730075311717222, "learning_rate": 8.363515068748365e-06, "loss": 0.4301, "num_tokens": 696352116.0, "step": 735 }, { "epoch": 1.4407443682664054, "grad_norm": 0.0928184606574738, "learning_rate": 8.358822084453964e-06, "loss": 0.4446, "num_tokens": 697336443.0, "step": 736 }, { "epoch": 1.4427032321253672, "grad_norm": 0.09866917309104557, "learning_rate": 8.354123701523107e-06, "loss": 0.4301, "num_tokens": 698300399.0, "step": 737 }, { "epoch": 1.444662095984329, "grad_norm": 0.09600606306106796, "learning_rate": 8.349419927507505e-06, "loss": 0.437, "num_tokens": 699243353.0, "step": 738 }, { "epoch": 1.4466209598432909, "grad_norm": 0.09237112341447015, "learning_rate": 8.344710769967537e-06, "loss": 0.4272, "num_tokens": 700171912.0, "step": 739 }, { "epoch": 1.4485798237022527, "grad_norm": 0.08572563052517873, "learning_rate": 8.339996236472236e-06, "loss": 0.4351, "num_tokens": 701139037.0, "step": 740 }, { "epoch": 1.4505386875612145, "grad_norm": 0.08834229641252914, "learning_rate": 8.335276334599274e-06, "loss": 0.4296, "num_tokens": 702073841.0, "step": 741 }, { "epoch": 1.4524975514201763, "grad_norm": 0.0871311174805632, "learning_rate": 8.33055107193495e-06, "loss": 0.4299, "num_tokens": 703047084.0, "step": 742 }, { "epoch": 1.4544564152791382, "grad_norm": 0.0968390574225456, "learning_rate": 8.325820456074181e-06, "loss": 0.4363, "num_tokens": 704006650.0, "step": 743 }, { "epoch": 1.4564152791381, "grad_norm": 0.09770693695080601, "learning_rate": 8.321084494620489e-06, "loss": 0.4291, "num_tokens": 705005752.0, "step": 744 }, { "epoch": 1.4583741429970618, "grad_norm": 0.09042781818496945, "learning_rate": 8.316343195185988e-06, "loss": 0.433, "num_tokens": 705943750.0, "step": 745 }, { "epoch": 1.4603330068560236, "grad_norm": 0.09074059900347199, "learning_rate": 8.31159656539137e-06, "loss": 0.4443, "num_tokens": 706857464.0, "step": 746 }, { "epoch": 1.4622918707149852, "grad_norm": 0.09296990315373345, "learning_rate": 8.306844612865895e-06, "loss": 0.4278, "num_tokens": 707798061.0, "step": 747 }, { "epoch": 1.464250734573947, "grad_norm": 0.09192749101371916, "learning_rate": 8.30208734524738e-06, "loss": 0.4443, "num_tokens": 708757874.0, "step": 748 }, { "epoch": 1.4662095984329089, "grad_norm": 0.0920452893115045, "learning_rate": 8.297324770182183e-06, "loss": 0.4379, "num_tokens": 709711444.0, "step": 749 }, { "epoch": 1.4681684622918707, "grad_norm": 0.08734169812439341, "learning_rate": 8.292556895325195e-06, "loss": 0.4434, "num_tokens": 710675618.0, "step": 750 }, { "epoch": 1.4701273261508325, "grad_norm": 0.08918617592674521, "learning_rate": 8.28778372833982e-06, "loss": 0.4209, "num_tokens": 711617502.0, "step": 751 }, { "epoch": 1.4720861900097943, "grad_norm": 0.08764649345766538, "learning_rate": 8.283005276897975e-06, "loss": 0.4483, "num_tokens": 712567364.0, "step": 752 }, { "epoch": 1.4740450538687562, "grad_norm": 0.09092598413252254, "learning_rate": 8.278221548680068e-06, "loss": 0.4346, "num_tokens": 713498779.0, "step": 753 }, { "epoch": 1.476003917727718, "grad_norm": 0.09283053237607067, "learning_rate": 8.273432551374984e-06, "loss": 0.4308, "num_tokens": 714454721.0, "step": 754 }, { "epoch": 1.4779627815866796, "grad_norm": 0.09624790658111503, "learning_rate": 8.268638292680085e-06, "loss": 0.4348, "num_tokens": 715410148.0, "step": 755 }, { "epoch": 1.4799216454456414, "grad_norm": 0.0905438810326495, "learning_rate": 8.263838780301182e-06, "loss": 0.42, "num_tokens": 716371428.0, "step": 756 }, { "epoch": 1.4818805093046032, "grad_norm": 0.08900777996620862, "learning_rate": 8.259034021952537e-06, "loss": 0.4275, "num_tokens": 717334918.0, "step": 757 }, { "epoch": 1.483839373163565, "grad_norm": 0.09539894517349785, "learning_rate": 8.254224025356838e-06, "loss": 0.4309, "num_tokens": 718268899.0, "step": 758 }, { "epoch": 1.485798237022527, "grad_norm": 0.08907059530467444, "learning_rate": 8.249408798245196e-06, "loss": 0.4402, "num_tokens": 719225615.0, "step": 759 }, { "epoch": 1.4877571008814887, "grad_norm": 0.08749241883163766, "learning_rate": 8.244588348357127e-06, "loss": 0.4342, "num_tokens": 720155206.0, "step": 760 }, { "epoch": 1.4897159647404505, "grad_norm": 0.0881335687273988, "learning_rate": 8.239762683440542e-06, "loss": 0.4304, "num_tokens": 721082402.0, "step": 761 }, { "epoch": 1.4916748285994124, "grad_norm": 0.09397566961188934, "learning_rate": 8.234931811251739e-06, "loss": 0.4245, "num_tokens": 722036368.0, "step": 762 }, { "epoch": 1.4936336924583742, "grad_norm": 0.08868494862611866, "learning_rate": 8.230095739555377e-06, "loss": 0.426, "num_tokens": 722977150.0, "step": 763 }, { "epoch": 1.495592556317336, "grad_norm": 0.08822709518087088, "learning_rate": 8.225254476124479e-06, "loss": 0.4382, "num_tokens": 723921610.0, "step": 764 }, { "epoch": 1.4975514201762978, "grad_norm": 0.09222431290824455, "learning_rate": 8.220408028740408e-06, "loss": 0.4406, "num_tokens": 724882118.0, "step": 765 }, { "epoch": 1.4995102840352597, "grad_norm": 0.08633570371148419, "learning_rate": 8.215556405192863e-06, "loss": 0.4207, "num_tokens": 725829599.0, "step": 766 }, { "epoch": 1.5014691478942215, "grad_norm": 0.0868716952312777, "learning_rate": 8.21069961327986e-06, "loss": 0.4231, "num_tokens": 726772987.0, "step": 767 }, { "epoch": 1.5034280117531833, "grad_norm": 0.08768214349778954, "learning_rate": 8.205837660807726e-06, "loss": 0.4224, "num_tokens": 727706645.0, "step": 768 }, { "epoch": 1.505386875612145, "grad_norm": 0.09487800672671816, "learning_rate": 8.200970555591073e-06, "loss": 0.4434, "num_tokens": 728681109.0, "step": 769 }, { "epoch": 1.5073457394711067, "grad_norm": 0.08487833690630645, "learning_rate": 8.19609830545281e-06, "loss": 0.4337, "num_tokens": 729617569.0, "step": 770 }, { "epoch": 1.5093046033300686, "grad_norm": 0.09218774680180625, "learning_rate": 8.191220918224102e-06, "loss": 0.4281, "num_tokens": 730579210.0, "step": 771 }, { "epoch": 1.5112634671890304, "grad_norm": 0.08650153794869085, "learning_rate": 8.186338401744377e-06, "loss": 0.4446, "num_tokens": 731526742.0, "step": 772 }, { "epoch": 1.5132223310479922, "grad_norm": 0.08973200677638382, "learning_rate": 8.181450763861306e-06, "loss": 0.439, "num_tokens": 732472379.0, "step": 773 }, { "epoch": 1.515181194906954, "grad_norm": 0.08939353084355083, "learning_rate": 8.176558012430792e-06, "loss": 0.4327, "num_tokens": 733447256.0, "step": 774 }, { "epoch": 1.5171400587659156, "grad_norm": 0.08695642985328696, "learning_rate": 8.171660155316958e-06, "loss": 0.4347, "num_tokens": 734428538.0, "step": 775 }, { "epoch": 1.5190989226248774, "grad_norm": 0.08813042852208332, "learning_rate": 8.16675720039213e-06, "loss": 0.4313, "num_tokens": 735343982.0, "step": 776 }, { "epoch": 1.5210577864838393, "grad_norm": 0.09122332242998978, "learning_rate": 8.161849155536834e-06, "loss": 0.4309, "num_tokens": 736281010.0, "step": 777 }, { "epoch": 1.523016650342801, "grad_norm": 0.08858224521663992, "learning_rate": 8.156936028639768e-06, "loss": 0.433, "num_tokens": 737241458.0, "step": 778 }, { "epoch": 1.524975514201763, "grad_norm": 0.08752140528851679, "learning_rate": 8.152017827597807e-06, "loss": 0.4291, "num_tokens": 738191019.0, "step": 779 }, { "epoch": 1.5269343780607247, "grad_norm": 0.08985526676071166, "learning_rate": 8.147094560315978e-06, "loss": 0.4267, "num_tokens": 739125705.0, "step": 780 }, { "epoch": 1.5288932419196866, "grad_norm": 0.08674954335352558, "learning_rate": 8.142166234707448e-06, "loss": 0.4429, "num_tokens": 740067530.0, "step": 781 }, { "epoch": 1.5308521057786484, "grad_norm": 0.09647413877933882, "learning_rate": 8.13723285869352e-06, "loss": 0.433, "num_tokens": 741012286.0, "step": 782 }, { "epoch": 1.5328109696376102, "grad_norm": 0.08867424938439858, "learning_rate": 8.132294440203609e-06, "loss": 0.4392, "num_tokens": 742006599.0, "step": 783 }, { "epoch": 1.534769833496572, "grad_norm": 0.08614258269227894, "learning_rate": 8.127350987175242e-06, "loss": 0.4298, "num_tokens": 742928475.0, "step": 784 }, { "epoch": 1.5367286973555339, "grad_norm": 0.08941741651572559, "learning_rate": 8.12240250755403e-06, "loss": 0.4271, "num_tokens": 743897067.0, "step": 785 }, { "epoch": 1.5386875612144957, "grad_norm": 0.08307451835655041, "learning_rate": 8.117449009293668e-06, "loss": 0.415, "num_tokens": 744846396.0, "step": 786 }, { "epoch": 1.5406464250734575, "grad_norm": 0.08388009373690888, "learning_rate": 8.112490500355916e-06, "loss": 0.4321, "num_tokens": 745818252.0, "step": 787 }, { "epoch": 1.5426052889324193, "grad_norm": 0.08974967024873853, "learning_rate": 8.10752698871059e-06, "loss": 0.4361, "num_tokens": 746776422.0, "step": 788 }, { "epoch": 1.5445641527913812, "grad_norm": 0.08706306024197821, "learning_rate": 8.10255848233554e-06, "loss": 0.4301, "num_tokens": 747726198.0, "step": 789 }, { "epoch": 1.5465230166503428, "grad_norm": 0.08687534246069954, "learning_rate": 8.097584989216653e-06, "loss": 0.4191, "num_tokens": 748651636.0, "step": 790 }, { "epoch": 1.5484818805093046, "grad_norm": 0.08716398270684944, "learning_rate": 8.092606517347828e-06, "loss": 0.4342, "num_tokens": 749589769.0, "step": 791 }, { "epoch": 1.5504407443682664, "grad_norm": 0.086505218555598, "learning_rate": 8.08762307473096e-06, "loss": 0.4475, "num_tokens": 750588105.0, "step": 792 }, { "epoch": 1.5523996082272282, "grad_norm": 0.08746637865179134, "learning_rate": 8.082634669375944e-06, "loss": 0.4332, "num_tokens": 751558123.0, "step": 793 }, { "epoch": 1.55435847208619, "grad_norm": 0.0840126542938758, "learning_rate": 8.077641309300643e-06, "loss": 0.4138, "num_tokens": 752500192.0, "step": 794 }, { "epoch": 1.5563173359451516, "grad_norm": 0.08652533631916479, "learning_rate": 8.072643002530891e-06, "loss": 0.4258, "num_tokens": 753482524.0, "step": 795 }, { "epoch": 1.5582761998041135, "grad_norm": 0.08596044230527805, "learning_rate": 8.067639757100465e-06, "loss": 0.4314, "num_tokens": 754425718.0, "step": 796 }, { "epoch": 1.5602350636630753, "grad_norm": 0.09017531758130216, "learning_rate": 8.062631581051088e-06, "loss": 0.4343, "num_tokens": 755342924.0, "step": 797 }, { "epoch": 1.5621939275220371, "grad_norm": 0.08539720776155492, "learning_rate": 8.057618482432399e-06, "loss": 0.4099, "num_tokens": 756294294.0, "step": 798 }, { "epoch": 1.564152791380999, "grad_norm": 0.08714477235939926, "learning_rate": 8.052600469301958e-06, "loss": 0.4441, "num_tokens": 757290351.0, "step": 799 }, { "epoch": 1.5661116552399608, "grad_norm": 0.08212796379618922, "learning_rate": 8.047577549725219e-06, "loss": 0.427, "num_tokens": 758262720.0, "step": 800 }, { "epoch": 1.5680705190989226, "grad_norm": 0.08693270850999413, "learning_rate": 8.042549731775521e-06, "loss": 0.4102, "num_tokens": 759177946.0, "step": 801 }, { "epoch": 1.5700293829578844, "grad_norm": 0.08633453071203538, "learning_rate": 8.037517023534082e-06, "loss": 0.4446, "num_tokens": 760130872.0, "step": 802 }, { "epoch": 1.5719882468168462, "grad_norm": 0.09225219773572015, "learning_rate": 8.032479433089976e-06, "loss": 0.4411, "num_tokens": 761084377.0, "step": 803 }, { "epoch": 1.573947110675808, "grad_norm": 0.08702763450061118, "learning_rate": 8.027436968540124e-06, "loss": 0.4426, "num_tokens": 762015812.0, "step": 804 }, { "epoch": 1.5759059745347699, "grad_norm": 0.08846277303169, "learning_rate": 8.022389637989278e-06, "loss": 0.4322, "num_tokens": 762984337.0, "step": 805 }, { "epoch": 1.5778648383937317, "grad_norm": 0.09587811911424722, "learning_rate": 8.01733744955002e-06, "loss": 0.4328, "num_tokens": 763964568.0, "step": 806 }, { "epoch": 1.5798237022526935, "grad_norm": 0.08818955709529418, "learning_rate": 8.012280411342733e-06, "loss": 0.4465, "num_tokens": 764901824.0, "step": 807 }, { "epoch": 1.5817825661116554, "grad_norm": 0.09579378209022592, "learning_rate": 8.0072185314956e-06, "loss": 0.4245, "num_tokens": 765842584.0, "step": 808 }, { "epoch": 1.5837414299706172, "grad_norm": 0.09192508480952484, "learning_rate": 8.002151818144577e-06, "loss": 0.4381, "num_tokens": 766792416.0, "step": 809 }, { "epoch": 1.585700293829579, "grad_norm": 0.08778112997632395, "learning_rate": 7.997080279433402e-06, "loss": 0.4331, "num_tokens": 767750154.0, "step": 810 }, { "epoch": 1.5876591576885406, "grad_norm": 0.08591223462924581, "learning_rate": 7.992003923513557e-06, "loss": 0.4395, "num_tokens": 768681114.0, "step": 811 }, { "epoch": 1.5896180215475024, "grad_norm": 0.09197418425156342, "learning_rate": 7.986922758544272e-06, "loss": 0.4442, "num_tokens": 769613251.0, "step": 812 }, { "epoch": 1.5915768854064642, "grad_norm": 0.08981432669383796, "learning_rate": 7.981836792692508e-06, "loss": 0.4289, "num_tokens": 770564128.0, "step": 813 }, { "epoch": 1.593535749265426, "grad_norm": 0.08670656993160512, "learning_rate": 7.976746034132938e-06, "loss": 0.4487, "num_tokens": 771553074.0, "step": 814 }, { "epoch": 1.595494613124388, "grad_norm": 0.08789310833337932, "learning_rate": 7.971650491047944e-06, "loss": 0.4212, "num_tokens": 772481970.0, "step": 815 }, { "epoch": 1.5974534769833495, "grad_norm": 0.08862878479775865, "learning_rate": 7.966550171627592e-06, "loss": 0.4317, "num_tokens": 773369883.0, "step": 816 }, { "epoch": 1.5994123408423113, "grad_norm": 0.09019671038303287, "learning_rate": 7.96144508406963e-06, "loss": 0.4281, "num_tokens": 774334951.0, "step": 817 }, { "epoch": 1.6013712047012731, "grad_norm": 0.08631560650425865, "learning_rate": 7.956335236579468e-06, "loss": 0.4357, "num_tokens": 775272444.0, "step": 818 }, { "epoch": 1.603330068560235, "grad_norm": 0.09653679998604583, "learning_rate": 7.951220637370166e-06, "loss": 0.435, "num_tokens": 776199956.0, "step": 819 }, { "epoch": 1.6052889324191968, "grad_norm": 0.08792046484591014, "learning_rate": 7.946101294662418e-06, "loss": 0.4426, "num_tokens": 777169727.0, "step": 820 }, { "epoch": 1.6072477962781586, "grad_norm": 0.08557432522056063, "learning_rate": 7.940977216684553e-06, "loss": 0.4356, "num_tokens": 778113352.0, "step": 821 }, { "epoch": 1.6092066601371204, "grad_norm": 0.08792107016892484, "learning_rate": 7.935848411672499e-06, "loss": 0.4396, "num_tokens": 779071100.0, "step": 822 }, { "epoch": 1.6111655239960823, "grad_norm": 0.08278917692252104, "learning_rate": 7.93071488786979e-06, "loss": 0.4273, "num_tokens": 779994414.0, "step": 823 }, { "epoch": 1.613124387855044, "grad_norm": 0.0937239451472487, "learning_rate": 7.92557665352754e-06, "loss": 0.4343, "num_tokens": 780910389.0, "step": 824 }, { "epoch": 1.615083251714006, "grad_norm": 0.09212306734669672, "learning_rate": 7.920433716904436e-06, "loss": 0.4239, "num_tokens": 781843035.0, "step": 825 }, { "epoch": 1.6170421155729677, "grad_norm": 0.08931960362521048, "learning_rate": 7.915286086266723e-06, "loss": 0.427, "num_tokens": 782810958.0, "step": 826 }, { "epoch": 1.6190009794319296, "grad_norm": 0.0905389419758957, "learning_rate": 7.91013376988819e-06, "loss": 0.43, "num_tokens": 783786712.0, "step": 827 }, { "epoch": 1.6209598432908914, "grad_norm": 0.08644014656081704, "learning_rate": 7.904976776050156e-06, "loss": 0.4352, "num_tokens": 784736540.0, "step": 828 }, { "epoch": 1.6229187071498532, "grad_norm": 0.09199717032916593, "learning_rate": 7.899815113041463e-06, "loss": 0.4276, "num_tokens": 785728597.0, "step": 829 }, { "epoch": 1.624877571008815, "grad_norm": 0.08842558324087799, "learning_rate": 7.894648789158452e-06, "loss": 0.4468, "num_tokens": 786655785.0, "step": 830 }, { "epoch": 1.6268364348677768, "grad_norm": 0.10262169363913219, "learning_rate": 7.88947781270496e-06, "loss": 0.4191, "num_tokens": 787590282.0, "step": 831 }, { "epoch": 1.6287952987267384, "grad_norm": 0.09512038984159299, "learning_rate": 7.8843021919923e-06, "loss": 0.4421, "num_tokens": 788510805.0, "step": 832 }, { "epoch": 1.6307541625857003, "grad_norm": 0.0900043810946666, "learning_rate": 7.879121935339249e-06, "loss": 0.4198, "num_tokens": 789398786.0, "step": 833 }, { "epoch": 1.632713026444662, "grad_norm": 0.09477714069470886, "learning_rate": 7.873937051072037e-06, "loss": 0.4365, "num_tokens": 790336897.0, "step": 834 }, { "epoch": 1.634671890303624, "grad_norm": 0.08952014362266324, "learning_rate": 7.868747547524326e-06, "loss": 0.4265, "num_tokens": 791289556.0, "step": 835 }, { "epoch": 1.6366307541625857, "grad_norm": 0.09778589170144017, "learning_rate": 7.863553433037217e-06, "loss": 0.4462, "num_tokens": 792272860.0, "step": 836 }, { "epoch": 1.6385896180215473, "grad_norm": 0.09992143697321337, "learning_rate": 7.858354715959207e-06, "loss": 0.4414, "num_tokens": 793189011.0, "step": 837 }, { "epoch": 1.6405484818805092, "grad_norm": 0.09708392669394192, "learning_rate": 7.853151404646195e-06, "loss": 0.4259, "num_tokens": 794149245.0, "step": 838 }, { "epoch": 1.642507345739471, "grad_norm": 0.0875146819737148, "learning_rate": 7.847943507461471e-06, "loss": 0.4315, "num_tokens": 795082319.0, "step": 839 }, { "epoch": 1.6444662095984328, "grad_norm": 0.08844400418164805, "learning_rate": 7.842731032775688e-06, "loss": 0.4325, "num_tokens": 796044534.0, "step": 840 }, { "epoch": 1.6464250734573946, "grad_norm": 0.08993448791702277, "learning_rate": 7.83751398896686e-06, "loss": 0.4274, "num_tokens": 797002349.0, "step": 841 }, { "epoch": 1.6483839373163565, "grad_norm": 0.09779719083256716, "learning_rate": 7.832292384420342e-06, "loss": 0.427, "num_tokens": 797976708.0, "step": 842 }, { "epoch": 1.6503428011753183, "grad_norm": 0.09313945626158736, "learning_rate": 7.827066227528825e-06, "loss": 0.4324, "num_tokens": 798895955.0, "step": 843 }, { "epoch": 1.65230166503428, "grad_norm": 0.08946785732709807, "learning_rate": 7.821835526692311e-06, "loss": 0.4317, "num_tokens": 799870680.0, "step": 844 }, { "epoch": 1.654260528893242, "grad_norm": 0.0887193935127447, "learning_rate": 7.81660029031811e-06, "loss": 0.4385, "num_tokens": 800831985.0, "step": 845 }, { "epoch": 1.6562193927522038, "grad_norm": 0.08719254083646917, "learning_rate": 7.81136052682082e-06, "loss": 0.4296, "num_tokens": 801786190.0, "step": 846 }, { "epoch": 1.6581782566111656, "grad_norm": 0.08792814450101948, "learning_rate": 7.806116244622316e-06, "loss": 0.429, "num_tokens": 802786436.0, "step": 847 }, { "epoch": 1.6601371204701274, "grad_norm": 0.09345059438799186, "learning_rate": 7.80086745215173e-06, "loss": 0.4345, "num_tokens": 803764644.0, "step": 848 }, { "epoch": 1.6620959843290892, "grad_norm": 0.08817644282627775, "learning_rate": 7.795614157845457e-06, "loss": 0.4337, "num_tokens": 804691651.0, "step": 849 }, { "epoch": 1.664054848188051, "grad_norm": 0.09032795103795767, "learning_rate": 7.790356370147115e-06, "loss": 0.4229, "num_tokens": 805632028.0, "step": 850 }, { "epoch": 1.6660137120470129, "grad_norm": 0.08514604231993507, "learning_rate": 7.78509409750755e-06, "loss": 0.4238, "num_tokens": 806587305.0, "step": 851 }, { "epoch": 1.6679725759059747, "grad_norm": 0.08510216243372119, "learning_rate": 7.779827348384814e-06, "loss": 0.4276, "num_tokens": 807515943.0, "step": 852 }, { "epoch": 1.6699314397649363, "grad_norm": 0.09082030671758394, "learning_rate": 7.774556131244158e-06, "loss": 0.4262, "num_tokens": 808432184.0, "step": 853 }, { "epoch": 1.6718903036238981, "grad_norm": 0.09036312344258203, "learning_rate": 7.769280454558009e-06, "loss": 0.4348, "num_tokens": 809386146.0, "step": 854 }, { "epoch": 1.67384916748286, "grad_norm": 0.09592697261344595, "learning_rate": 7.764000326805967e-06, "loss": 0.4403, "num_tokens": 810355041.0, "step": 855 }, { "epoch": 1.6758080313418218, "grad_norm": 0.08847463534602111, "learning_rate": 7.758715756474785e-06, "loss": 0.4345, "num_tokens": 811304494.0, "step": 856 }, { "epoch": 1.6777668952007836, "grad_norm": 0.08942030009235737, "learning_rate": 7.753426752058354e-06, "loss": 0.4371, "num_tokens": 812264811.0, "step": 857 }, { "epoch": 1.6797257590597452, "grad_norm": 0.09016146484851315, "learning_rate": 7.748133322057693e-06, "loss": 0.4368, "num_tokens": 813213485.0, "step": 858 }, { "epoch": 1.681684622918707, "grad_norm": 0.09156888554192899, "learning_rate": 7.742835474980936e-06, "loss": 0.4525, "num_tokens": 814150318.0, "step": 859 }, { "epoch": 1.6836434867776688, "grad_norm": 0.08900818567746938, "learning_rate": 7.737533219343314e-06, "loss": 0.4324, "num_tokens": 815063347.0, "step": 860 }, { "epoch": 1.6856023506366307, "grad_norm": 0.09431275952391421, "learning_rate": 7.732226563667146e-06, "loss": 0.4361, "num_tokens": 816056695.0, "step": 861 }, { "epoch": 1.6875612144955925, "grad_norm": 0.08600326004812062, "learning_rate": 7.726915516481824e-06, "loss": 0.4276, "num_tokens": 816992130.0, "step": 862 }, { "epoch": 1.6895200783545543, "grad_norm": 0.09421732905820908, "learning_rate": 7.721600086323795e-06, "loss": 0.4333, "num_tokens": 817917134.0, "step": 863 }, { "epoch": 1.6914789422135161, "grad_norm": 0.08822550736347473, "learning_rate": 7.716280281736553e-06, "loss": 0.4417, "num_tokens": 818858756.0, "step": 864 }, { "epoch": 1.693437806072478, "grad_norm": 0.09570061746940511, "learning_rate": 7.710956111270621e-06, "loss": 0.4296, "num_tokens": 819800195.0, "step": 865 }, { "epoch": 1.6953966699314398, "grad_norm": 0.09744285532510016, "learning_rate": 7.705627583483543e-06, "loss": 0.4319, "num_tokens": 820767605.0, "step": 866 }, { "epoch": 1.6973555337904016, "grad_norm": 0.0897795504517481, "learning_rate": 7.700294706939864e-06, "loss": 0.4516, "num_tokens": 821738487.0, "step": 867 }, { "epoch": 1.6993143976493634, "grad_norm": 0.08888776807121071, "learning_rate": 7.69495749021112e-06, "loss": 0.4414, "num_tokens": 822682076.0, "step": 868 }, { "epoch": 1.7012732615083253, "grad_norm": 0.08984604263470013, "learning_rate": 7.68961594187582e-06, "loss": 0.4296, "num_tokens": 823672726.0, "step": 869 }, { "epoch": 1.703232125367287, "grad_norm": 0.08721830032120235, "learning_rate": 7.68427007051944e-06, "loss": 0.4438, "num_tokens": 824599914.0, "step": 870 }, { "epoch": 1.705190989226249, "grad_norm": 0.09343695241717352, "learning_rate": 7.6789198847344e-06, "loss": 0.4283, "num_tokens": 825498810.0, "step": 871 }, { "epoch": 1.7071498530852107, "grad_norm": 0.08978704569061438, "learning_rate": 7.673565393120056e-06, "loss": 0.4191, "num_tokens": 826460436.0, "step": 872 }, { "epoch": 1.7091087169441725, "grad_norm": 0.09005628935518344, "learning_rate": 7.668206604282687e-06, "loss": 0.4438, "num_tokens": 827426291.0, "step": 873 }, { "epoch": 1.7110675808031341, "grad_norm": 0.09040088145401211, "learning_rate": 7.662843526835475e-06, "loss": 0.4467, "num_tokens": 828379720.0, "step": 874 }, { "epoch": 1.713026444662096, "grad_norm": 0.08774775733491094, "learning_rate": 7.657476169398497e-06, "loss": 0.4198, "num_tokens": 829353691.0, "step": 875 }, { "epoch": 1.7149853085210578, "grad_norm": 0.0859301718564589, "learning_rate": 7.652104540598712e-06, "loss": 0.4294, "num_tokens": 830315563.0, "step": 876 }, { "epoch": 1.7169441723800196, "grad_norm": 0.08690655564830525, "learning_rate": 7.64672864906994e-06, "loss": 0.4319, "num_tokens": 831294006.0, "step": 877 }, { "epoch": 1.7189030362389814, "grad_norm": 0.0870783563945675, "learning_rate": 7.641348503452855e-06, "loss": 0.4221, "num_tokens": 832217236.0, "step": 878 }, { "epoch": 1.720861900097943, "grad_norm": 0.09367961873370947, "learning_rate": 7.635964112394966e-06, "loss": 0.4339, "num_tokens": 833159623.0, "step": 879 }, { "epoch": 1.7228207639569049, "grad_norm": 0.08801713160713069, "learning_rate": 7.630575484550611e-06, "loss": 0.4357, "num_tokens": 834099714.0, "step": 880 }, { "epoch": 1.7247796278158667, "grad_norm": 0.0867577050607559, "learning_rate": 7.6251826285809294e-06, "loss": 0.4378, "num_tokens": 835025020.0, "step": 881 }, { "epoch": 1.7267384916748285, "grad_norm": 0.08771599581109289, "learning_rate": 7.619785553153864e-06, "loss": 0.4341, "num_tokens": 835976099.0, "step": 882 }, { "epoch": 1.7286973555337903, "grad_norm": 0.09010045775355124, "learning_rate": 7.614384266944139e-06, "loss": 0.4338, "num_tokens": 836942966.0, "step": 883 }, { "epoch": 1.7306562193927522, "grad_norm": 0.08569022958178449, "learning_rate": 7.60897877863324e-06, "loss": 0.4356, "num_tokens": 837878994.0, "step": 884 }, { "epoch": 1.732615083251714, "grad_norm": 0.09054588136575797, "learning_rate": 7.603569096909414e-06, "loss": 0.4407, "num_tokens": 838812064.0, "step": 885 }, { "epoch": 1.7345739471106758, "grad_norm": 0.08455869017277894, "learning_rate": 7.598155230467645e-06, "loss": 0.4359, "num_tokens": 839761821.0, "step": 886 }, { "epoch": 1.7365328109696376, "grad_norm": 0.08910208308129197, "learning_rate": 7.592737188009643e-06, "loss": 0.4277, "num_tokens": 840698534.0, "step": 887 }, { "epoch": 1.7384916748285995, "grad_norm": 0.08419612378419906, "learning_rate": 7.5873149782438295e-06, "loss": 0.4428, "num_tokens": 841661118.0, "step": 888 }, { "epoch": 1.7404505386875613, "grad_norm": 0.09286261300836257, "learning_rate": 7.581888609885328e-06, "loss": 0.4477, "num_tokens": 842610772.0, "step": 889 }, { "epoch": 1.742409402546523, "grad_norm": 0.08567682262627445, "learning_rate": 7.5764580916559405e-06, "loss": 0.4219, "num_tokens": 843557703.0, "step": 890 }, { "epoch": 1.744368266405485, "grad_norm": 0.0874265045179033, "learning_rate": 7.571023432284142e-06, "loss": 0.4325, "num_tokens": 844529841.0, "step": 891 }, { "epoch": 1.7463271302644467, "grad_norm": 0.08527093511593209, "learning_rate": 7.565584640505061e-06, "loss": 0.4238, "num_tokens": 845454092.0, "step": 892 }, { "epoch": 1.7482859941234086, "grad_norm": 0.08786028916442253, "learning_rate": 7.560141725060477e-06, "loss": 0.4541, "num_tokens": 846388019.0, "step": 893 }, { "epoch": 1.7502448579823702, "grad_norm": 0.08810151204616025, "learning_rate": 7.554694694698783e-06, "loss": 0.4285, "num_tokens": 847341800.0, "step": 894 }, { "epoch": 1.752203721841332, "grad_norm": 0.090679696767153, "learning_rate": 7.549243558174999e-06, "loss": 0.4369, "num_tokens": 848297768.0, "step": 895 }, { "epoch": 1.7541625857002938, "grad_norm": 0.08945682012956972, "learning_rate": 7.543788324250738e-06, "loss": 0.4314, "num_tokens": 849233737.0, "step": 896 }, { "epoch": 1.7561214495592556, "grad_norm": 0.09006761166760972, "learning_rate": 7.5383290016942e-06, "loss": 0.4375, "num_tokens": 850207916.0, "step": 897 }, { "epoch": 1.7580803134182175, "grad_norm": 0.09358780555112266, "learning_rate": 7.532865599280157e-06, "loss": 0.4489, "num_tokens": 851122512.0, "step": 898 }, { "epoch": 1.7600391772771793, "grad_norm": 0.09016876350666427, "learning_rate": 7.5273981257899395e-06, "loss": 0.4327, "num_tokens": 852071033.0, "step": 899 }, { "epoch": 1.761998041136141, "grad_norm": 0.0915091856625939, "learning_rate": 7.521926590011419e-06, "loss": 0.4401, "num_tokens": 853014004.0, "step": 900 }, { "epoch": 1.7639569049951027, "grad_norm": 0.08645157239780024, "learning_rate": 7.516451000738998e-06, "loss": 0.4423, "num_tokens": 853945279.0, "step": 901 }, { "epoch": 1.7659157688540645, "grad_norm": 0.09315232618093792, "learning_rate": 7.5109713667735955e-06, "loss": 0.4327, "num_tokens": 854889767.0, "step": 902 }, { "epoch": 1.7678746327130264, "grad_norm": 0.08294235251717945, "learning_rate": 7.505487696922629e-06, "loss": 0.4306, "num_tokens": 855875946.0, "step": 903 }, { "epoch": 1.7698334965719882, "grad_norm": 0.08990498197963341, "learning_rate": 7.500000000000001e-06, "loss": 0.4272, "num_tokens": 856838276.0, "step": 904 }, { "epoch": 1.77179236043095, "grad_norm": 0.08574846062900512, "learning_rate": 7.494508284826093e-06, "loss": 0.4289, "num_tokens": 857779416.0, "step": 905 }, { "epoch": 1.7737512242899118, "grad_norm": 0.08718139719809676, "learning_rate": 7.489012560227742e-06, "loss": 0.4408, "num_tokens": 858742500.0, "step": 906 }, { "epoch": 1.7757100881488737, "grad_norm": 0.09264817251194245, "learning_rate": 7.483512835038227e-06, "loss": 0.4392, "num_tokens": 859679580.0, "step": 907 }, { "epoch": 1.7776689520078355, "grad_norm": 0.0928835377010871, "learning_rate": 7.47800911809726e-06, "loss": 0.4439, "num_tokens": 860614797.0, "step": 908 }, { "epoch": 1.7796278158667973, "grad_norm": 0.08645145576581885, "learning_rate": 7.472501418250968e-06, "loss": 0.4277, "num_tokens": 861590111.0, "step": 909 }, { "epoch": 1.7815866797257591, "grad_norm": 0.08750451884708292, "learning_rate": 7.46698974435188e-06, "loss": 0.4462, "num_tokens": 862532036.0, "step": 910 }, { "epoch": 1.783545543584721, "grad_norm": 0.09262248516729513, "learning_rate": 7.461474105258911e-06, "loss": 0.4351, "num_tokens": 863526420.0, "step": 911 }, { "epoch": 1.7855044074436828, "grad_norm": 0.08474017515659142, "learning_rate": 7.455954509837352e-06, "loss": 0.4121, "num_tokens": 864448770.0, "step": 912 }, { "epoch": 1.7874632713026446, "grad_norm": 0.093179644270419, "learning_rate": 7.450430966958851e-06, "loss": 0.446, "num_tokens": 865388472.0, "step": 913 }, { "epoch": 1.7894221351616064, "grad_norm": 0.09517537990548135, "learning_rate": 7.444903485501402e-06, "loss": 0.4477, "num_tokens": 866354253.0, "step": 914 }, { "epoch": 1.791380999020568, "grad_norm": 0.08913437808815497, "learning_rate": 7.439372074349328e-06, "loss": 0.4186, "num_tokens": 867334882.0, "step": 915 }, { "epoch": 1.7933398628795298, "grad_norm": 0.09609945805750214, "learning_rate": 7.43383674239327e-06, "loss": 0.4316, "num_tokens": 868257353.0, "step": 916 }, { "epoch": 1.7952987267384917, "grad_norm": 0.08434468534837705, "learning_rate": 7.428297498530168e-06, "loss": 0.4165, "num_tokens": 869172717.0, "step": 917 }, { "epoch": 1.7972575905974535, "grad_norm": 0.08753164647695391, "learning_rate": 7.422754351663252e-06, "loss": 0.4342, "num_tokens": 870135639.0, "step": 918 }, { "epoch": 1.7992164544564153, "grad_norm": 0.08935410902352021, "learning_rate": 7.417207310702025e-06, "loss": 0.4452, "num_tokens": 871098727.0, "step": 919 }, { "epoch": 1.801175318315377, "grad_norm": 0.09277493809398578, "learning_rate": 7.411656384562251e-06, "loss": 0.451, "num_tokens": 872054169.0, "step": 920 }, { "epoch": 1.8031341821743387, "grad_norm": 0.09091615700569183, "learning_rate": 7.406101582165933e-06, "loss": 0.4239, "num_tokens": 872998557.0, "step": 921 }, { "epoch": 1.8050930460333006, "grad_norm": 0.08686796941755942, "learning_rate": 7.400542912441309e-06, "loss": 0.4342, "num_tokens": 873932558.0, "step": 922 }, { "epoch": 1.8070519098922624, "grad_norm": 0.09195894264684597, "learning_rate": 7.394980384322834e-06, "loss": 0.4262, "num_tokens": 874903980.0, "step": 923 }, { "epoch": 1.8090107737512242, "grad_norm": 0.08911363486327942, "learning_rate": 7.389414006751159e-06, "loss": 0.4321, "num_tokens": 875848643.0, "step": 924 }, { "epoch": 1.810969637610186, "grad_norm": 0.09332062468254551, "learning_rate": 7.3838437886731264e-06, "loss": 0.4314, "num_tokens": 876794234.0, "step": 925 }, { "epoch": 1.8129285014691479, "grad_norm": 0.09478438211278445, "learning_rate": 7.378269739041751e-06, "loss": 0.4394, "num_tokens": 877747088.0, "step": 926 }, { "epoch": 1.8148873653281097, "grad_norm": 0.08971306555226388, "learning_rate": 7.3726918668162065e-06, "loss": 0.4485, "num_tokens": 878707539.0, "step": 927 }, { "epoch": 1.8168462291870715, "grad_norm": 0.08569446482743738, "learning_rate": 7.367110180961807e-06, "loss": 0.438, "num_tokens": 879651206.0, "step": 928 }, { "epoch": 1.8188050930460333, "grad_norm": 0.09119524659336786, "learning_rate": 7.3615246904500025e-06, "loss": 0.4192, "num_tokens": 880563698.0, "step": 929 }, { "epoch": 1.8207639569049952, "grad_norm": 0.0889603261333572, "learning_rate": 7.355935404258354e-06, "loss": 0.4325, "num_tokens": 881515407.0, "step": 930 }, { "epoch": 1.822722820763957, "grad_norm": 0.09026967037671438, "learning_rate": 7.350342331370521e-06, "loss": 0.4227, "num_tokens": 882453009.0, "step": 931 }, { "epoch": 1.8246816846229188, "grad_norm": 0.08652547878079349, "learning_rate": 7.3447454807762565e-06, "loss": 0.44, "num_tokens": 883449631.0, "step": 932 }, { "epoch": 1.8266405484818806, "grad_norm": 0.09162557590468148, "learning_rate": 7.339144861471379e-06, "loss": 0.4482, "num_tokens": 884419434.0, "step": 933 }, { "epoch": 1.8285994123408424, "grad_norm": 0.0938185836612543, "learning_rate": 7.333540482457766e-06, "loss": 0.4352, "num_tokens": 885398978.0, "step": 934 }, { "epoch": 1.8305582761998043, "grad_norm": 0.08630327514659179, "learning_rate": 7.327932352743341e-06, "loss": 0.4315, "num_tokens": 886348605.0, "step": 935 }, { "epoch": 1.8325171400587659, "grad_norm": 0.08570655142834177, "learning_rate": 7.322320481342053e-06, "loss": 0.41, "num_tokens": 887284913.0, "step": 936 }, { "epoch": 1.8344760039177277, "grad_norm": 0.08377633482101021, "learning_rate": 7.316704877273866e-06, "loss": 0.4266, "num_tokens": 888208855.0, "step": 937 }, { "epoch": 1.8364348677766895, "grad_norm": 0.08713332313204401, "learning_rate": 7.311085549564742e-06, "loss": 0.4228, "num_tokens": 889171102.0, "step": 938 }, { "epoch": 1.8383937316356513, "grad_norm": 0.10240927862922675, "learning_rate": 7.30546250724663e-06, "loss": 0.4324, "num_tokens": 890162092.0, "step": 939 }, { "epoch": 1.8403525954946132, "grad_norm": 0.09400164210750693, "learning_rate": 7.2998357593574485e-06, "loss": 0.4254, "num_tokens": 891131622.0, "step": 940 }, { "epoch": 1.8423114593535748, "grad_norm": 0.08731919697434702, "learning_rate": 7.294205314941074e-06, "loss": 0.4372, "num_tokens": 892096819.0, "step": 941 }, { "epoch": 1.8442703232125366, "grad_norm": 0.08865328013583497, "learning_rate": 7.288571183047321e-06, "loss": 0.4288, "num_tokens": 893011990.0, "step": 942 }, { "epoch": 1.8462291870714984, "grad_norm": 0.09348427289631799, "learning_rate": 7.282933372731933e-06, "loss": 0.4368, "num_tokens": 893946270.0, "step": 943 }, { "epoch": 1.8481880509304602, "grad_norm": 0.09244859490667034, "learning_rate": 7.277291893056565e-06, "loss": 0.4433, "num_tokens": 894870549.0, "step": 944 }, { "epoch": 1.850146914789422, "grad_norm": 0.0863169278489715, "learning_rate": 7.27164675308877e-06, "loss": 0.4366, "num_tokens": 895834311.0, "step": 945 }, { "epoch": 1.8521057786483839, "grad_norm": 0.08465771790873794, "learning_rate": 7.265997961901987e-06, "loss": 0.4441, "num_tokens": 896791695.0, "step": 946 }, { "epoch": 1.8540646425073457, "grad_norm": 0.08650374600013917, "learning_rate": 7.260345528575518e-06, "loss": 0.4229, "num_tokens": 897749765.0, "step": 947 }, { "epoch": 1.8560235063663075, "grad_norm": 0.0874449822383469, "learning_rate": 7.254689462194522e-06, "loss": 0.4355, "num_tokens": 898730878.0, "step": 948 }, { "epoch": 1.8579823702252694, "grad_norm": 0.09010161028563324, "learning_rate": 7.249029771849999e-06, "loss": 0.4204, "num_tokens": 899652213.0, "step": 949 }, { "epoch": 1.8599412340842312, "grad_norm": 0.0857329910328507, "learning_rate": 7.243366466638771e-06, "loss": 0.4296, "num_tokens": 900622061.0, "step": 950 }, { "epoch": 1.861900097943193, "grad_norm": 0.08716299164988617, "learning_rate": 7.237699555663472e-06, "loss": 0.4536, "num_tokens": 901545497.0, "step": 951 }, { "epoch": 1.8638589618021548, "grad_norm": 0.09347516841706188, "learning_rate": 7.232029048032532e-06, "loss": 0.4291, "num_tokens": 902490485.0, "step": 952 }, { "epoch": 1.8658178256611166, "grad_norm": 0.0854290686625699, "learning_rate": 7.226354952860157e-06, "loss": 0.4158, "num_tokens": 903429848.0, "step": 953 }, { "epoch": 1.8677766895200785, "grad_norm": 0.09123677673598038, "learning_rate": 7.220677279266327e-06, "loss": 0.4321, "num_tokens": 904397266.0, "step": 954 }, { "epoch": 1.8697355533790403, "grad_norm": 0.0875787526272707, "learning_rate": 7.214996036376769e-06, "loss": 0.4303, "num_tokens": 905341373.0, "step": 955 }, { "epoch": 1.8716944172380021, "grad_norm": 0.09385489528693176, "learning_rate": 7.2093112333229476e-06, "loss": 0.4367, "num_tokens": 906287476.0, "step": 956 }, { "epoch": 1.8736532810969637, "grad_norm": 0.08579907833807023, "learning_rate": 7.203622879242047e-06, "loss": 0.4205, "num_tokens": 907208011.0, "step": 957 }, { "epoch": 1.8756121449559255, "grad_norm": 0.08982081921348842, "learning_rate": 7.197930983276964e-06, "loss": 0.4565, "num_tokens": 908157808.0, "step": 958 }, { "epoch": 1.8775710088148874, "grad_norm": 0.09428717961917073, "learning_rate": 7.192235554576287e-06, "loss": 0.4298, "num_tokens": 909123454.0, "step": 959 }, { "epoch": 1.8795298726738492, "grad_norm": 0.08813977535803645, "learning_rate": 7.186536602294278e-06, "loss": 0.4298, "num_tokens": 910062164.0, "step": 960 }, { "epoch": 1.881488736532811, "grad_norm": 0.08709066569776673, "learning_rate": 7.180834135590869e-06, "loss": 0.4314, "num_tokens": 911009601.0, "step": 961 }, { "epoch": 1.8834476003917726, "grad_norm": 0.08719550875015987, "learning_rate": 7.175128163631636e-06, "loss": 0.4279, "num_tokens": 911954589.0, "step": 962 }, { "epoch": 1.8854064642507344, "grad_norm": 0.08836589818991197, "learning_rate": 7.169418695587791e-06, "loss": 0.4215, "num_tokens": 912889182.0, "step": 963 }, { "epoch": 1.8873653281096963, "grad_norm": 0.09352451611567275, "learning_rate": 7.1637057406361656e-06, "loss": 0.4474, "num_tokens": 913825514.0, "step": 964 }, { "epoch": 1.889324191968658, "grad_norm": 0.09249184003628412, "learning_rate": 7.1579893079591955e-06, "loss": 0.4419, "num_tokens": 914762452.0, "step": 965 }, { "epoch": 1.89128305582762, "grad_norm": 0.0883449220618962, "learning_rate": 7.152269406744904e-06, "loss": 0.4349, "num_tokens": 915690159.0, "step": 966 }, { "epoch": 1.8932419196865817, "grad_norm": 0.09220825335778882, "learning_rate": 7.146546046186893e-06, "loss": 0.4296, "num_tokens": 916610333.0, "step": 967 }, { "epoch": 1.8952007835455436, "grad_norm": 0.0872142532151571, "learning_rate": 7.140819235484322e-06, "loss": 0.4318, "num_tokens": 917540458.0, "step": 968 }, { "epoch": 1.8971596474045054, "grad_norm": 0.08882587423699995, "learning_rate": 7.135088983841898e-06, "loss": 0.4401, "num_tokens": 918487705.0, "step": 969 }, { "epoch": 1.8991185112634672, "grad_norm": 0.0936949575425075, "learning_rate": 7.129355300469854e-06, "loss": 0.43, "num_tokens": 919431101.0, "step": 970 }, { "epoch": 1.901077375122429, "grad_norm": 0.08974361619962315, "learning_rate": 7.123618194583947e-06, "loss": 0.4412, "num_tokens": 920379795.0, "step": 971 }, { "epoch": 1.9030362389813908, "grad_norm": 0.08792011295575772, "learning_rate": 7.117877675405427e-06, "loss": 0.4192, "num_tokens": 921314131.0, "step": 972 }, { "epoch": 1.9049951028403527, "grad_norm": 0.08528632260883838, "learning_rate": 7.112133752161034e-06, "loss": 0.4341, "num_tokens": 922226945.0, "step": 973 }, { "epoch": 1.9069539666993145, "grad_norm": 0.09026828044995538, "learning_rate": 7.106386434082979e-06, "loss": 0.4387, "num_tokens": 923195804.0, "step": 974 }, { "epoch": 1.9089128305582763, "grad_norm": 0.08532245075495483, "learning_rate": 7.10063573040893e-06, "loss": 0.432, "num_tokens": 924164462.0, "step": 975 }, { "epoch": 1.9108716944172381, "grad_norm": 0.08838154486612344, "learning_rate": 7.094881650381995e-06, "loss": 0.4166, "num_tokens": 925095845.0, "step": 976 }, { "epoch": 1.9128305582762, "grad_norm": 0.0894884729283402, "learning_rate": 7.089124203250709e-06, "loss": 0.4278, "num_tokens": 926021976.0, "step": 977 }, { "epoch": 1.9147894221351616, "grad_norm": 0.08564215618334733, "learning_rate": 7.083363398269022e-06, "loss": 0.4186, "num_tokens": 926979607.0, "step": 978 }, { "epoch": 1.9167482859941234, "grad_norm": 0.08583123541938284, "learning_rate": 7.077599244696278e-06, "loss": 0.4208, "num_tokens": 927957218.0, "step": 979 }, { "epoch": 1.9187071498530852, "grad_norm": 0.08357222255382876, "learning_rate": 7.071831751797202e-06, "loss": 0.4316, "num_tokens": 928925433.0, "step": 980 }, { "epoch": 1.920666013712047, "grad_norm": 0.08632462955622228, "learning_rate": 7.066060928841891e-06, "loss": 0.4249, "num_tokens": 929875395.0, "step": 981 }, { "epoch": 1.9226248775710089, "grad_norm": 0.08457636543108527, "learning_rate": 7.0602867851057925e-06, "loss": 0.4123, "num_tokens": 930807456.0, "step": 982 }, { "epoch": 1.9245837414299705, "grad_norm": 0.08805920979356217, "learning_rate": 7.054509329869685e-06, "loss": 0.4356, "num_tokens": 931763590.0, "step": 983 }, { "epoch": 1.9265426052889323, "grad_norm": 0.08623461210441301, "learning_rate": 7.048728572419681e-06, "loss": 0.4337, "num_tokens": 932714866.0, "step": 984 }, { "epoch": 1.928501469147894, "grad_norm": 0.0846789684294999, "learning_rate": 7.042944522047193e-06, "loss": 0.429, "num_tokens": 933676337.0, "step": 985 }, { "epoch": 1.930460333006856, "grad_norm": 0.08941298064535665, "learning_rate": 7.0371571880489274e-06, "loss": 0.43, "num_tokens": 934627135.0, "step": 986 }, { "epoch": 1.9324191968658178, "grad_norm": 0.08647750253606112, "learning_rate": 7.0313665797268704e-06, "loss": 0.4406, "num_tokens": 935574842.0, "step": 987 }, { "epoch": 1.9343780607247796, "grad_norm": 0.08733646879164099, "learning_rate": 7.025572706388268e-06, "loss": 0.4437, "num_tokens": 936533645.0, "step": 988 }, { "epoch": 1.9363369245837414, "grad_norm": 0.08713719779617433, "learning_rate": 7.019775577345618e-06, "loss": 0.4293, "num_tokens": 937489716.0, "step": 989 }, { "epoch": 1.9382957884427032, "grad_norm": 0.08620697776474966, "learning_rate": 7.0139752019166474e-06, "loss": 0.4166, "num_tokens": 938462701.0, "step": 990 }, { "epoch": 1.940254652301665, "grad_norm": 0.08687060493220213, "learning_rate": 7.008171589424302e-06, "loss": 0.4254, "num_tokens": 939428068.0, "step": 991 }, { "epoch": 1.9422135161606269, "grad_norm": 0.08367694885617118, "learning_rate": 7.0023647491967336e-06, "loss": 0.4191, "num_tokens": 940380046.0, "step": 992 }, { "epoch": 1.9441723800195887, "grad_norm": 0.08779227586016532, "learning_rate": 6.996554690567276e-06, "loss": 0.4353, "num_tokens": 941292759.0, "step": 993 }, { "epoch": 1.9461312438785505, "grad_norm": 0.09292159580303222, "learning_rate": 6.990741422874443e-06, "loss": 0.441, "num_tokens": 942242549.0, "step": 994 }, { "epoch": 1.9480901077375123, "grad_norm": 0.0834052261589026, "learning_rate": 6.984924955461901e-06, "loss": 0.4196, "num_tokens": 943203026.0, "step": 995 }, { "epoch": 1.9500489715964742, "grad_norm": 0.08523032764682975, "learning_rate": 6.979105297678462e-06, "loss": 0.4438, "num_tokens": 944179700.0, "step": 996 }, { "epoch": 1.952007835455436, "grad_norm": 0.09110533923101696, "learning_rate": 6.9732824588780635e-06, "loss": 0.44, "num_tokens": 945138832.0, "step": 997 }, { "epoch": 1.9539666993143978, "grad_norm": 0.08650261050569312, "learning_rate": 6.967456448419756e-06, "loss": 0.4327, "num_tokens": 946133903.0, "step": 998 }, { "epoch": 1.9559255631733594, "grad_norm": 0.08491087154817847, "learning_rate": 6.961627275667693e-06, "loss": 0.4222, "num_tokens": 947096770.0, "step": 999 }, { "epoch": 1.9578844270323212, "grad_norm": 0.08281487740287748, "learning_rate": 6.955794949991104e-06, "loss": 0.438, "num_tokens": 948045301.0, "step": 1000 }, { "epoch": 1.959843290891283, "grad_norm": 0.08839472335258977, "learning_rate": 6.9499594807642905e-06, "loss": 0.4351, "num_tokens": 949014144.0, "step": 1001 }, { "epoch": 1.9618021547502449, "grad_norm": 0.08677248282074429, "learning_rate": 6.944120877366605e-06, "loss": 0.4323, "num_tokens": 949983103.0, "step": 1002 }, { "epoch": 1.9637610186092067, "grad_norm": 0.08765327355089926, "learning_rate": 6.938279149182435e-06, "loss": 0.4335, "num_tokens": 950960048.0, "step": 1003 }, { "epoch": 1.9657198824681683, "grad_norm": 0.08593327105152829, "learning_rate": 6.932434305601197e-06, "loss": 0.4252, "num_tokens": 951887650.0, "step": 1004 }, { "epoch": 1.9676787463271301, "grad_norm": 0.08324228774379007, "learning_rate": 6.92658635601731e-06, "loss": 0.413, "num_tokens": 952856948.0, "step": 1005 }, { "epoch": 1.969637610186092, "grad_norm": 0.08417073884935165, "learning_rate": 6.920735309830184e-06, "loss": 0.4276, "num_tokens": 953790803.0, "step": 1006 }, { "epoch": 1.9715964740450538, "grad_norm": 0.08310230438234872, "learning_rate": 6.914881176444213e-06, "loss": 0.4334, "num_tokens": 954734145.0, "step": 1007 }, { "epoch": 1.9735553379040156, "grad_norm": 0.08890830673621009, "learning_rate": 6.909023965268746e-06, "loss": 0.4232, "num_tokens": 955704667.0, "step": 1008 }, { "epoch": 1.9755142017629774, "grad_norm": 0.0832389330871157, "learning_rate": 6.9031636857180795e-06, "loss": 0.4067, "num_tokens": 956644147.0, "step": 1009 }, { "epoch": 1.9774730656219393, "grad_norm": 0.08722851640571877, "learning_rate": 6.897300347211446e-06, "loss": 0.4313, "num_tokens": 957601288.0, "step": 1010 }, { "epoch": 1.979431929480901, "grad_norm": 0.09022898911624623, "learning_rate": 6.891433959172994e-06, "loss": 0.4287, "num_tokens": 958584024.0, "step": 1011 }, { "epoch": 1.981390793339863, "grad_norm": 0.08985029693519823, "learning_rate": 6.88556453103177e-06, "loss": 0.4312, "num_tokens": 959535098.0, "step": 1012 }, { "epoch": 1.9833496571988247, "grad_norm": 0.08472538395205044, "learning_rate": 6.879692072221709e-06, "loss": 0.416, "num_tokens": 960475278.0, "step": 1013 }, { "epoch": 1.9853085210577865, "grad_norm": 0.08796363623101505, "learning_rate": 6.873816592181617e-06, "loss": 0.4368, "num_tokens": 961396426.0, "step": 1014 }, { "epoch": 1.9872673849167484, "grad_norm": 0.09073188833185505, "learning_rate": 6.867938100355157e-06, "loss": 0.434, "num_tokens": 962325943.0, "step": 1015 }, { "epoch": 1.9892262487757102, "grad_norm": 0.08785016560910433, "learning_rate": 6.86205660619083e-06, "loss": 0.4367, "num_tokens": 963289686.0, "step": 1016 }, { "epoch": 1.991185112634672, "grad_norm": 0.08790596218123922, "learning_rate": 6.856172119141966e-06, "loss": 0.435, "num_tokens": 964244718.0, "step": 1017 }, { "epoch": 1.9931439764936338, "grad_norm": 0.08641718763995186, "learning_rate": 6.850284648666706e-06, "loss": 0.4283, "num_tokens": 965189309.0, "step": 1018 }, { "epoch": 1.9951028403525954, "grad_norm": 0.08831923186959566, "learning_rate": 6.844394204227981e-06, "loss": 0.4347, "num_tokens": 966118494.0, "step": 1019 }, { "epoch": 1.9970617042115573, "grad_norm": 0.08785677300124596, "learning_rate": 6.838500795293506e-06, "loss": 0.4374, "num_tokens": 967063563.0, "step": 1020 }, { "epoch": 1.999020568070519, "grad_norm": 0.09001442137711776, "learning_rate": 6.832604431335762e-06, "loss": 0.4204, "num_tokens": 968000355.0, "step": 1021 }, { "epoch": 2.0, "grad_norm": 0.13990886438124683, "learning_rate": 6.8267051218319766e-06, "loss": 0.4285, "num_tokens": 968473493.0, "step": 1022 }, { "epoch": 2.001958863858962, "grad_norm": 0.09042271698661719, "learning_rate": 6.820802876264112e-06, "loss": 0.4339, "num_tokens": 969409433.0, "step": 1023 }, { "epoch": 2.0039177277179236, "grad_norm": 0.0894873000540885, "learning_rate": 6.814897704118851e-06, "loss": 0.445, "num_tokens": 970344157.0, "step": 1024 }, { "epoch": 2.0058765915768855, "grad_norm": 0.08828463646358492, "learning_rate": 6.808989614887582e-06, "loss": 0.4254, "num_tokens": 971264637.0, "step": 1025 }, { "epoch": 2.0078354554358473, "grad_norm": 0.08863652952689459, "learning_rate": 6.803078618066378e-06, "loss": 0.4221, "num_tokens": 972185363.0, "step": 1026 }, { "epoch": 2.009794319294809, "grad_norm": 0.0869582376314397, "learning_rate": 6.797164723155987e-06, "loss": 0.4186, "num_tokens": 973135478.0, "step": 1027 }, { "epoch": 2.011753183153771, "grad_norm": 0.0912341087882137, "learning_rate": 6.7912479396618155e-06, "loss": 0.4339, "num_tokens": 974087860.0, "step": 1028 }, { "epoch": 2.0137120470127328, "grad_norm": 0.08879814060006445, "learning_rate": 6.785328277093912e-06, "loss": 0.4243, "num_tokens": 975022047.0, "step": 1029 }, { "epoch": 2.0156709108716946, "grad_norm": 0.09211748606068026, "learning_rate": 6.7794057449669545e-06, "loss": 0.4426, "num_tokens": 975946956.0, "step": 1030 }, { "epoch": 2.0176297747306564, "grad_norm": 0.08710182517592917, "learning_rate": 6.773480352800234e-06, "loss": 0.4266, "num_tokens": 976870592.0, "step": 1031 }, { "epoch": 2.0195886385896182, "grad_norm": 0.08733356753832412, "learning_rate": 6.767552110117631e-06, "loss": 0.4175, "num_tokens": 977800895.0, "step": 1032 }, { "epoch": 2.0215475024485796, "grad_norm": 0.0871460092075375, "learning_rate": 6.761621026447616e-06, "loss": 0.4147, "num_tokens": 978770781.0, "step": 1033 }, { "epoch": 2.0235063663075414, "grad_norm": 0.08878069977801947, "learning_rate": 6.7556871113232234e-06, "loss": 0.4386, "num_tokens": 979720092.0, "step": 1034 }, { "epoch": 2.0254652301665033, "grad_norm": 0.08835686419539676, "learning_rate": 6.749750374282039e-06, "loss": 0.4288, "num_tokens": 980678968.0, "step": 1035 }, { "epoch": 2.027424094025465, "grad_norm": 0.0874628032269614, "learning_rate": 6.743810824866179e-06, "loss": 0.4125, "num_tokens": 981610602.0, "step": 1036 }, { "epoch": 2.029382957884427, "grad_norm": 0.08450736471281808, "learning_rate": 6.7378684726222875e-06, "loss": 0.4068, "num_tokens": 982563335.0, "step": 1037 }, { "epoch": 2.0313418217433887, "grad_norm": 0.08505868516958862, "learning_rate": 6.7319233271015104e-06, "loss": 0.4205, "num_tokens": 983513105.0, "step": 1038 }, { "epoch": 2.0333006856023506, "grad_norm": 0.08730489468212824, "learning_rate": 6.725975397859481e-06, "loss": 0.4407, "num_tokens": 984446753.0, "step": 1039 }, { "epoch": 2.0352595494613124, "grad_norm": 0.08587029827647688, "learning_rate": 6.720024694456311e-06, "loss": 0.4185, "num_tokens": 985383770.0, "step": 1040 }, { "epoch": 2.037218413320274, "grad_norm": 0.08595039343020731, "learning_rate": 6.714071226456568e-06, "loss": 0.4273, "num_tokens": 986353533.0, "step": 1041 }, { "epoch": 2.039177277179236, "grad_norm": 0.08648519850540613, "learning_rate": 6.708115003429265e-06, "loss": 0.4112, "num_tokens": 987291394.0, "step": 1042 }, { "epoch": 2.041136141038198, "grad_norm": 0.09070720713842331, "learning_rate": 6.702156034947842e-06, "loss": 0.4165, "num_tokens": 988209521.0, "step": 1043 }, { "epoch": 2.0430950048971597, "grad_norm": 0.08720773104648802, "learning_rate": 6.6961943305901515e-06, "loss": 0.4244, "num_tokens": 989143282.0, "step": 1044 }, { "epoch": 2.0450538687561215, "grad_norm": 0.08356870633033667, "learning_rate": 6.690229899938445e-06, "loss": 0.4244, "num_tokens": 990087629.0, "step": 1045 }, { "epoch": 2.0470127326150833, "grad_norm": 0.0885256763687902, "learning_rate": 6.684262752579354e-06, "loss": 0.4303, "num_tokens": 991029591.0, "step": 1046 }, { "epoch": 2.048971596474045, "grad_norm": 0.08845802673876547, "learning_rate": 6.678292898103879e-06, "loss": 0.422, "num_tokens": 991974727.0, "step": 1047 }, { "epoch": 2.050930460333007, "grad_norm": 0.08578130284905115, "learning_rate": 6.672320346107367e-06, "loss": 0.4242, "num_tokens": 992917717.0, "step": 1048 }, { "epoch": 2.052889324191969, "grad_norm": 0.09364007726543205, "learning_rate": 6.666345106189508e-06, "loss": 0.4237, "num_tokens": 993862862.0, "step": 1049 }, { "epoch": 2.0548481880509306, "grad_norm": 0.0860871126577842, "learning_rate": 6.660367187954304e-06, "loss": 0.4252, "num_tokens": 994830885.0, "step": 1050 }, { "epoch": 2.0568070519098924, "grad_norm": 0.09066905547094299, "learning_rate": 6.65438660101007e-06, "loss": 0.4233, "num_tokens": 995780263.0, "step": 1051 }, { "epoch": 2.0587659157688543, "grad_norm": 0.08451666053149698, "learning_rate": 6.648403354969404e-06, "loss": 0.4322, "num_tokens": 996762312.0, "step": 1052 }, { "epoch": 2.060724779627816, "grad_norm": 0.08147946498227737, "learning_rate": 6.642417459449181e-06, "loss": 0.4147, "num_tokens": 997760014.0, "step": 1053 }, { "epoch": 2.0626836434867775, "grad_norm": 0.0924097896999832, "learning_rate": 6.6364289240705324e-06, "loss": 0.4225, "num_tokens": 998707194.0, "step": 1054 }, { "epoch": 2.0646425073457393, "grad_norm": 0.08880772747352325, "learning_rate": 6.630437758458839e-06, "loss": 0.4222, "num_tokens": 999624872.0, "step": 1055 }, { "epoch": 2.066601371204701, "grad_norm": 0.08535151578135977, "learning_rate": 6.6244439722436985e-06, "loss": 0.4199, "num_tokens": 1000555666.0, "step": 1056 }, { "epoch": 2.068560235063663, "grad_norm": 0.08645940123807073, "learning_rate": 6.618447575058929e-06, "loss": 0.4278, "num_tokens": 1001482665.0, "step": 1057 }, { "epoch": 2.0705190989226248, "grad_norm": 0.08850702822202812, "learning_rate": 6.612448576542545e-06, "loss": 0.4124, "num_tokens": 1002454920.0, "step": 1058 }, { "epoch": 2.0724779627815866, "grad_norm": 0.08638920339828096, "learning_rate": 6.606446986336736e-06, "loss": 0.4184, "num_tokens": 1003425275.0, "step": 1059 }, { "epoch": 2.0744368266405484, "grad_norm": 0.0908983416828278, "learning_rate": 6.600442814087865e-06, "loss": 0.426, "num_tokens": 1004345342.0, "step": 1060 }, { "epoch": 2.0763956904995102, "grad_norm": 0.09052632215917387, "learning_rate": 6.594436069446437e-06, "loss": 0.4408, "num_tokens": 1005315478.0, "step": 1061 }, { "epoch": 2.078354554358472, "grad_norm": 0.08250486255633122, "learning_rate": 6.588426762067099e-06, "loss": 0.4113, "num_tokens": 1006245838.0, "step": 1062 }, { "epoch": 2.080313418217434, "grad_norm": 0.08852451483879102, "learning_rate": 6.582414901608611e-06, "loss": 0.4342, "num_tokens": 1007229627.0, "step": 1063 }, { "epoch": 2.0822722820763957, "grad_norm": 0.09031206410672629, "learning_rate": 6.576400497733841e-06, "loss": 0.4254, "num_tokens": 1008162960.0, "step": 1064 }, { "epoch": 2.0842311459353575, "grad_norm": 0.0864045142590658, "learning_rate": 6.570383560109745e-06, "loss": 0.4293, "num_tokens": 1009101606.0, "step": 1065 }, { "epoch": 2.0861900097943193, "grad_norm": 0.08922509587318524, "learning_rate": 6.564364098407348e-06, "loss": 0.434, "num_tokens": 1010024213.0, "step": 1066 }, { "epoch": 2.088148873653281, "grad_norm": 0.08691060584575491, "learning_rate": 6.558342122301733e-06, "loss": 0.4147, "num_tokens": 1010962414.0, "step": 1067 }, { "epoch": 2.090107737512243, "grad_norm": 0.08807462348673281, "learning_rate": 6.552317641472027e-06, "loss": 0.4292, "num_tokens": 1011926691.0, "step": 1068 }, { "epoch": 2.092066601371205, "grad_norm": 0.08543679010461865, "learning_rate": 6.5462906656013805e-06, "loss": 0.4237, "num_tokens": 1012871933.0, "step": 1069 }, { "epoch": 2.0940254652301666, "grad_norm": 0.08742364903188327, "learning_rate": 6.540261204376954e-06, "loss": 0.4274, "num_tokens": 1013811271.0, "step": 1070 }, { "epoch": 2.0959843290891285, "grad_norm": 0.08865922941911121, "learning_rate": 6.5342292674899065e-06, "loss": 0.4435, "num_tokens": 1014750776.0, "step": 1071 }, { "epoch": 2.0979431929480903, "grad_norm": 0.08850207818224096, "learning_rate": 6.52819486463537e-06, "loss": 0.4222, "num_tokens": 1015691930.0, "step": 1072 }, { "epoch": 2.099902056807052, "grad_norm": 0.08485792660808923, "learning_rate": 6.522158005512445e-06, "loss": 0.4059, "num_tokens": 1016642924.0, "step": 1073 }, { "epoch": 2.101860920666014, "grad_norm": 0.09106027897419366, "learning_rate": 6.516118699824179e-06, "loss": 0.4332, "num_tokens": 1017589589.0, "step": 1074 }, { "epoch": 2.1038197845249753, "grad_norm": 0.09628235122455635, "learning_rate": 6.510076957277549e-06, "loss": 0.4236, "num_tokens": 1018536454.0, "step": 1075 }, { "epoch": 2.105778648383937, "grad_norm": 0.09137519333502027, "learning_rate": 6.504032787583452e-06, "loss": 0.4332, "num_tokens": 1019489810.0, "step": 1076 }, { "epoch": 2.107737512242899, "grad_norm": 0.08896344922548269, "learning_rate": 6.497986200456684e-06, "loss": 0.4229, "num_tokens": 1020408403.0, "step": 1077 }, { "epoch": 2.109696376101861, "grad_norm": 0.09144395841876914, "learning_rate": 6.49193720561593e-06, "loss": 0.419, "num_tokens": 1021371009.0, "step": 1078 }, { "epoch": 2.1116552399608226, "grad_norm": 0.08419306375593151, "learning_rate": 6.48588581278374e-06, "loss": 0.4208, "num_tokens": 1022353024.0, "step": 1079 }, { "epoch": 2.1136141038197844, "grad_norm": 0.08908143142604028, "learning_rate": 6.479832031686522e-06, "loss": 0.4225, "num_tokens": 1023330905.0, "step": 1080 }, { "epoch": 2.1155729676787463, "grad_norm": 0.09110437348208388, "learning_rate": 6.473775872054522e-06, "loss": 0.4148, "num_tokens": 1024323661.0, "step": 1081 }, { "epoch": 2.117531831537708, "grad_norm": 0.08828334528110036, "learning_rate": 6.467717343621807e-06, "loss": 0.4261, "num_tokens": 1025321459.0, "step": 1082 }, { "epoch": 2.11949069539667, "grad_norm": 0.08723822032570723, "learning_rate": 6.461656456126254e-06, "loss": 0.4243, "num_tokens": 1026278632.0, "step": 1083 }, { "epoch": 2.1214495592556317, "grad_norm": 0.0913536062911726, "learning_rate": 6.45559321930953e-06, "loss": 0.4344, "num_tokens": 1027227510.0, "step": 1084 }, { "epoch": 2.1234084231145935, "grad_norm": 0.08782548059322276, "learning_rate": 6.449527642917079e-06, "loss": 0.4065, "num_tokens": 1028156348.0, "step": 1085 }, { "epoch": 2.1253672869735554, "grad_norm": 0.08968160127978006, "learning_rate": 6.443459736698106e-06, "loss": 0.4238, "num_tokens": 1029105257.0, "step": 1086 }, { "epoch": 2.127326150832517, "grad_norm": 0.08757246543778505, "learning_rate": 6.4373895104055575e-06, "loss": 0.4168, "num_tokens": 1030059584.0, "step": 1087 }, { "epoch": 2.129285014691479, "grad_norm": 0.0925208798180306, "learning_rate": 6.431316973796114e-06, "loss": 0.4254, "num_tokens": 1031018195.0, "step": 1088 }, { "epoch": 2.131243878550441, "grad_norm": 0.09027882568785589, "learning_rate": 6.425242136630163e-06, "loss": 0.4326, "num_tokens": 1031915323.0, "step": 1089 }, { "epoch": 2.1332027424094027, "grad_norm": 0.08698448537140553, "learning_rate": 6.419165008671796e-06, "loss": 0.4168, "num_tokens": 1032860249.0, "step": 1090 }, { "epoch": 2.1351616062683645, "grad_norm": 0.0904908892556419, "learning_rate": 6.413085599688786e-06, "loss": 0.4165, "num_tokens": 1033827505.0, "step": 1091 }, { "epoch": 2.1371204701273263, "grad_norm": 0.0903982867050371, "learning_rate": 6.407003919452565e-06, "loss": 0.4243, "num_tokens": 1034761620.0, "step": 1092 }, { "epoch": 2.139079333986288, "grad_norm": 0.08664524783286122, "learning_rate": 6.400919977738222e-06, "loss": 0.426, "num_tokens": 1035720831.0, "step": 1093 }, { "epoch": 2.14103819784525, "grad_norm": 0.09074898123458312, "learning_rate": 6.394833784324485e-06, "loss": 0.4153, "num_tokens": 1036661948.0, "step": 1094 }, { "epoch": 2.142997061704212, "grad_norm": 0.08912674580637855, "learning_rate": 6.388745348993689e-06, "loss": 0.419, "num_tokens": 1037608963.0, "step": 1095 }, { "epoch": 2.144955925563173, "grad_norm": 0.08584834284684544, "learning_rate": 6.3826546815317835e-06, "loss": 0.408, "num_tokens": 1038556073.0, "step": 1096 }, { "epoch": 2.146914789422135, "grad_norm": 0.08516378615177235, "learning_rate": 6.3765617917282985e-06, "loss": 0.4228, "num_tokens": 1039529123.0, "step": 1097 }, { "epoch": 2.148873653281097, "grad_norm": 0.08620322833951624, "learning_rate": 6.370466689376343e-06, "loss": 0.4344, "num_tokens": 1040489386.0, "step": 1098 }, { "epoch": 2.1508325171400586, "grad_norm": 0.08632778561522257, "learning_rate": 6.364369384272573e-06, "loss": 0.4088, "num_tokens": 1041423415.0, "step": 1099 }, { "epoch": 2.1527913809990205, "grad_norm": 0.0912855425604682, "learning_rate": 6.3582698862171945e-06, "loss": 0.4253, "num_tokens": 1042352125.0, "step": 1100 }, { "epoch": 2.1547502448579823, "grad_norm": 0.09338332366594856, "learning_rate": 6.3521682050139325e-06, "loss": 0.4271, "num_tokens": 1043332301.0, "step": 1101 }, { "epoch": 2.156709108716944, "grad_norm": 0.08712608719213073, "learning_rate": 6.346064350470019e-06, "loss": 0.4112, "num_tokens": 1044303032.0, "step": 1102 }, { "epoch": 2.158667972575906, "grad_norm": 0.08406921815221968, "learning_rate": 6.3399583323961864e-06, "loss": 0.4129, "num_tokens": 1045225033.0, "step": 1103 }, { "epoch": 2.1606268364348677, "grad_norm": 0.08523689137761957, "learning_rate": 6.333850160606641e-06, "loss": 0.4182, "num_tokens": 1046200892.0, "step": 1104 }, { "epoch": 2.1625857002938296, "grad_norm": 0.08849746271017883, "learning_rate": 6.327739844919047e-06, "loss": 0.419, "num_tokens": 1047132811.0, "step": 1105 }, { "epoch": 2.1645445641527914, "grad_norm": 0.09296922046548894, "learning_rate": 6.321627395154522e-06, "loss": 0.413, "num_tokens": 1048082197.0, "step": 1106 }, { "epoch": 2.166503428011753, "grad_norm": 0.0868108858528206, "learning_rate": 6.315512821137606e-06, "loss": 0.4343, "num_tokens": 1049080403.0, "step": 1107 }, { "epoch": 2.168462291870715, "grad_norm": 0.09214250216477304, "learning_rate": 6.30939613269626e-06, "loss": 0.4344, "num_tokens": 1050035591.0, "step": 1108 }, { "epoch": 2.170421155729677, "grad_norm": 0.08449882559402659, "learning_rate": 6.303277339661839e-06, "loss": 0.4193, "num_tokens": 1050975812.0, "step": 1109 }, { "epoch": 2.1723800195886387, "grad_norm": 0.09423781044233816, "learning_rate": 6.297156451869082e-06, "loss": 0.4347, "num_tokens": 1051895775.0, "step": 1110 }, { "epoch": 2.1743388834476005, "grad_norm": 0.09075475482659402, "learning_rate": 6.291033479156094e-06, "loss": 0.4351, "num_tokens": 1052848543.0, "step": 1111 }, { "epoch": 2.1762977473065623, "grad_norm": 0.08949276553966692, "learning_rate": 6.284908431364336e-06, "loss": 0.4243, "num_tokens": 1053810946.0, "step": 1112 }, { "epoch": 2.178256611165524, "grad_norm": 0.08706268467759441, "learning_rate": 6.278781318338597e-06, "loss": 0.4399, "num_tokens": 1054804296.0, "step": 1113 }, { "epoch": 2.180215475024486, "grad_norm": 0.08873194457519597, "learning_rate": 6.272652149926989e-06, "loss": 0.4205, "num_tokens": 1055765758.0, "step": 1114 }, { "epoch": 2.182174338883448, "grad_norm": 0.08708002276397027, "learning_rate": 6.26652093598093e-06, "loss": 0.4249, "num_tokens": 1056721494.0, "step": 1115 }, { "epoch": 2.1841332027424096, "grad_norm": 0.08684815034937152, "learning_rate": 6.260387686355121e-06, "loss": 0.4096, "num_tokens": 1057665717.0, "step": 1116 }, { "epoch": 2.186092066601371, "grad_norm": 0.08676643946482575, "learning_rate": 6.254252410907536e-06, "loss": 0.4168, "num_tokens": 1058645384.0, "step": 1117 }, { "epoch": 2.188050930460333, "grad_norm": 0.08720475768234767, "learning_rate": 6.2481151194994105e-06, "loss": 0.4305, "num_tokens": 1059591036.0, "step": 1118 }, { "epoch": 2.1900097943192947, "grad_norm": 0.08636773101059496, "learning_rate": 6.241975821995213e-06, "loss": 0.416, "num_tokens": 1060528437.0, "step": 1119 }, { "epoch": 2.1919686581782565, "grad_norm": 0.08644496055481303, "learning_rate": 6.235834528262641e-06, "loss": 0.4181, "num_tokens": 1061491146.0, "step": 1120 }, { "epoch": 2.1939275220372183, "grad_norm": 0.08715824158543502, "learning_rate": 6.229691248172599e-06, "loss": 0.4114, "num_tokens": 1062393644.0, "step": 1121 }, { "epoch": 2.19588638589618, "grad_norm": 0.08812619878603911, "learning_rate": 6.223545991599184e-06, "loss": 0.4261, "num_tokens": 1063329990.0, "step": 1122 }, { "epoch": 2.197845249755142, "grad_norm": 0.09132206578888642, "learning_rate": 6.217398768419668e-06, "loss": 0.4288, "num_tokens": 1064269256.0, "step": 1123 }, { "epoch": 2.1998041136141038, "grad_norm": 0.08502664270876768, "learning_rate": 6.211249588514489e-06, "loss": 0.4087, "num_tokens": 1065164894.0, "step": 1124 }, { "epoch": 2.2017629774730656, "grad_norm": 0.08789264797629244, "learning_rate": 6.2050984617672275e-06, "loss": 0.4136, "num_tokens": 1066078469.0, "step": 1125 }, { "epoch": 2.2037218413320274, "grad_norm": 0.08596189310712543, "learning_rate": 6.198945398064589e-06, "loss": 0.4292, "num_tokens": 1067041602.0, "step": 1126 }, { "epoch": 2.2056807051909892, "grad_norm": 0.08946989522624431, "learning_rate": 6.192790407296399e-06, "loss": 0.4299, "num_tokens": 1067977225.0, "step": 1127 }, { "epoch": 2.207639569049951, "grad_norm": 0.08530660571718063, "learning_rate": 6.186633499355576e-06, "loss": 0.4274, "num_tokens": 1068925447.0, "step": 1128 }, { "epoch": 2.209598432908913, "grad_norm": 0.08223275716237784, "learning_rate": 6.18047468413812e-06, "loss": 0.418, "num_tokens": 1069898052.0, "step": 1129 }, { "epoch": 2.2115572967678747, "grad_norm": 0.08439202910237378, "learning_rate": 6.1743139715431e-06, "loss": 0.4113, "num_tokens": 1070859566.0, "step": 1130 }, { "epoch": 2.2135161606268365, "grad_norm": 0.08836386425823387, "learning_rate": 6.168151371472634e-06, "loss": 0.4156, "num_tokens": 1071812516.0, "step": 1131 }, { "epoch": 2.2154750244857984, "grad_norm": 0.08253391333509251, "learning_rate": 6.161986893831867e-06, "loss": 0.4085, "num_tokens": 1072788298.0, "step": 1132 }, { "epoch": 2.21743388834476, "grad_norm": 0.08734497027399377, "learning_rate": 6.155820548528972e-06, "loss": 0.4098, "num_tokens": 1073729223.0, "step": 1133 }, { "epoch": 2.219392752203722, "grad_norm": 0.0872983973605972, "learning_rate": 6.149652345475118e-06, "loss": 0.4199, "num_tokens": 1074663715.0, "step": 1134 }, { "epoch": 2.221351616062684, "grad_norm": 0.08553499760591111, "learning_rate": 6.143482294584459e-06, "loss": 0.4327, "num_tokens": 1075616314.0, "step": 1135 }, { "epoch": 2.223310479921645, "grad_norm": 0.08813226772588481, "learning_rate": 6.137310405774122e-06, "loss": 0.4153, "num_tokens": 1076569428.0, "step": 1136 }, { "epoch": 2.2252693437806075, "grad_norm": 0.08466239192875728, "learning_rate": 6.1311366889641875e-06, "loss": 0.4267, "num_tokens": 1077546687.0, "step": 1137 }, { "epoch": 2.227228207639569, "grad_norm": 0.090136417478135, "learning_rate": 6.124961154077672e-06, "loss": 0.4172, "num_tokens": 1078449090.0, "step": 1138 }, { "epoch": 2.2291870714985307, "grad_norm": 0.08482683736174175, "learning_rate": 6.118783811040518e-06, "loss": 0.4105, "num_tokens": 1079416873.0, "step": 1139 }, { "epoch": 2.2311459353574925, "grad_norm": 0.08520286917446548, "learning_rate": 6.112604669781572e-06, "loss": 0.4122, "num_tokens": 1080347126.0, "step": 1140 }, { "epoch": 2.2331047992164543, "grad_norm": 0.08782176324254114, "learning_rate": 6.106423740232571e-06, "loss": 0.425, "num_tokens": 1081289664.0, "step": 1141 }, { "epoch": 2.235063663075416, "grad_norm": 0.08680345708766998, "learning_rate": 6.100241032328125e-06, "loss": 0.4326, "num_tokens": 1082237218.0, "step": 1142 }, { "epoch": 2.237022526934378, "grad_norm": 0.09063147118544396, "learning_rate": 6.094056556005703e-06, "loss": 0.4296, "num_tokens": 1083165502.0, "step": 1143 }, { "epoch": 2.23898139079334, "grad_norm": 0.0849138051543222, "learning_rate": 6.087870321205619e-06, "loss": 0.4224, "num_tokens": 1084127711.0, "step": 1144 }, { "epoch": 2.2409402546523016, "grad_norm": 0.08643945489123467, "learning_rate": 6.0816823378710086e-06, "loss": 0.4157, "num_tokens": 1085059824.0, "step": 1145 }, { "epoch": 2.2428991185112634, "grad_norm": 0.0895327436928162, "learning_rate": 6.075492615947824e-06, "loss": 0.4289, "num_tokens": 1085988313.0, "step": 1146 }, { "epoch": 2.2448579823702253, "grad_norm": 0.0879595063795222, "learning_rate": 6.069301165384807e-06, "loss": 0.4179, "num_tokens": 1086926661.0, "step": 1147 }, { "epoch": 2.246816846229187, "grad_norm": 0.09157089650253557, "learning_rate": 6.063107996133477e-06, "loss": 0.4356, "num_tokens": 1087829770.0, "step": 1148 }, { "epoch": 2.248775710088149, "grad_norm": 0.09318446896644245, "learning_rate": 6.056913118148122e-06, "loss": 0.4299, "num_tokens": 1088788350.0, "step": 1149 }, { "epoch": 2.2507345739471107, "grad_norm": 0.087298596899998, "learning_rate": 6.05071654138577e-06, "loss": 0.4161, "num_tokens": 1089765315.0, "step": 1150 }, { "epoch": 2.2526934378060726, "grad_norm": 0.08361724539249257, "learning_rate": 6.044518275806184e-06, "loss": 0.3956, "num_tokens": 1090656166.0, "step": 1151 }, { "epoch": 2.2546523016650344, "grad_norm": 0.08685786823092265, "learning_rate": 6.038318331371836e-06, "loss": 0.4331, "num_tokens": 1091577603.0, "step": 1152 }, { "epoch": 2.256611165523996, "grad_norm": 0.08956135119922624, "learning_rate": 6.032116718047905e-06, "loss": 0.4167, "num_tokens": 1092518192.0, "step": 1153 }, { "epoch": 2.258570029382958, "grad_norm": 0.09705063375610096, "learning_rate": 6.025913445802242e-06, "loss": 0.4174, "num_tokens": 1093426114.0, "step": 1154 }, { "epoch": 2.26052889324192, "grad_norm": 0.08764165278411953, "learning_rate": 6.019708524605372e-06, "loss": 0.4154, "num_tokens": 1094372037.0, "step": 1155 }, { "epoch": 2.2624877571008817, "grad_norm": 0.08653416837402352, "learning_rate": 6.013501964430468e-06, "loss": 0.4201, "num_tokens": 1095349695.0, "step": 1156 }, { "epoch": 2.264446620959843, "grad_norm": 0.086947342784056, "learning_rate": 6.007293775253339e-06, "loss": 0.4172, "num_tokens": 1096289633.0, "step": 1157 }, { "epoch": 2.2664054848188053, "grad_norm": 0.09222607274740083, "learning_rate": 6.001083967052408e-06, "loss": 0.4369, "num_tokens": 1097268758.0, "step": 1158 }, { "epoch": 2.2683643486777667, "grad_norm": 0.09039180454960108, "learning_rate": 5.994872549808702e-06, "loss": 0.4136, "num_tokens": 1098237143.0, "step": 1159 }, { "epoch": 2.2703232125367285, "grad_norm": 0.0910752643051445, "learning_rate": 5.988659533505839e-06, "loss": 0.4238, "num_tokens": 1099175653.0, "step": 1160 }, { "epoch": 2.2722820763956904, "grad_norm": 0.08708378242269066, "learning_rate": 5.982444928130001e-06, "loss": 0.4385, "num_tokens": 1100137220.0, "step": 1161 }, { "epoch": 2.274240940254652, "grad_norm": 0.08831809469667551, "learning_rate": 5.976228743669926e-06, "loss": 0.4136, "num_tokens": 1101100621.0, "step": 1162 }, { "epoch": 2.276199804113614, "grad_norm": 0.08991776132591153, "learning_rate": 5.970010990116892e-06, "loss": 0.425, "num_tokens": 1102049160.0, "step": 1163 }, { "epoch": 2.278158667972576, "grad_norm": 0.08300228647211032, "learning_rate": 5.963791677464696e-06, "loss": 0.4185, "num_tokens": 1103011905.0, "step": 1164 }, { "epoch": 2.2801175318315376, "grad_norm": 0.09026104617757048, "learning_rate": 5.957570815709643e-06, "loss": 0.4205, "num_tokens": 1103958699.0, "step": 1165 }, { "epoch": 2.2820763956904995, "grad_norm": 0.08812812922118217, "learning_rate": 5.951348414850529e-06, "loss": 0.4207, "num_tokens": 1104914696.0, "step": 1166 }, { "epoch": 2.2840352595494613, "grad_norm": 0.08719939930912697, "learning_rate": 5.94512448488862e-06, "loss": 0.4313, "num_tokens": 1105860497.0, "step": 1167 }, { "epoch": 2.285994123408423, "grad_norm": 0.09334636444021462, "learning_rate": 5.938899035827641e-06, "loss": 0.42, "num_tokens": 1106806217.0, "step": 1168 }, { "epoch": 2.287952987267385, "grad_norm": 0.08758036200203533, "learning_rate": 5.93267207767376e-06, "loss": 0.4235, "num_tokens": 1107719105.0, "step": 1169 }, { "epoch": 2.2899118511263468, "grad_norm": 0.0865580879757578, "learning_rate": 5.926443620435572e-06, "loss": 0.4266, "num_tokens": 1108682386.0, "step": 1170 }, { "epoch": 2.2918707149853086, "grad_norm": 0.08826947457120272, "learning_rate": 5.920213674124079e-06, "loss": 0.4237, "num_tokens": 1109596326.0, "step": 1171 }, { "epoch": 2.2938295788442704, "grad_norm": 0.08879768641173116, "learning_rate": 5.913982248752675e-06, "loss": 0.4264, "num_tokens": 1110537632.0, "step": 1172 }, { "epoch": 2.2957884427032322, "grad_norm": 0.09002457607624295, "learning_rate": 5.907749354337133e-06, "loss": 0.4157, "num_tokens": 1111464017.0, "step": 1173 }, { "epoch": 2.297747306562194, "grad_norm": 0.08699029703191959, "learning_rate": 5.901515000895591e-06, "loss": 0.4356, "num_tokens": 1112388838.0, "step": 1174 }, { "epoch": 2.299706170421156, "grad_norm": 0.09153113371090561, "learning_rate": 5.8952791984485245e-06, "loss": 0.4347, "num_tokens": 1113323477.0, "step": 1175 }, { "epoch": 2.3016650342801177, "grad_norm": 0.09107919287896965, "learning_rate": 5.889041957018745e-06, "loss": 0.419, "num_tokens": 1114287175.0, "step": 1176 }, { "epoch": 2.3036238981390795, "grad_norm": 0.09021894631319483, "learning_rate": 5.8828032866313725e-06, "loss": 0.4215, "num_tokens": 1115217358.0, "step": 1177 }, { "epoch": 2.305582761998041, "grad_norm": 0.08630194272004957, "learning_rate": 5.876563197313824e-06, "loss": 0.4117, "num_tokens": 1116132185.0, "step": 1178 }, { "epoch": 2.307541625857003, "grad_norm": 0.08634061646782061, "learning_rate": 5.8703216990958015e-06, "loss": 0.4332, "num_tokens": 1117096741.0, "step": 1179 }, { "epoch": 2.3095004897159646, "grad_norm": 0.08881950285497699, "learning_rate": 5.8640788020092634e-06, "loss": 0.4277, "num_tokens": 1118023443.0, "step": 1180 }, { "epoch": 2.3114593535749264, "grad_norm": 0.08708093470736415, "learning_rate": 5.857834516088426e-06, "loss": 0.4196, "num_tokens": 1118991817.0, "step": 1181 }, { "epoch": 2.313418217433888, "grad_norm": 0.09735277120936721, "learning_rate": 5.85158885136973e-06, "loss": 0.4278, "num_tokens": 1119946883.0, "step": 1182 }, { "epoch": 2.31537708129285, "grad_norm": 0.09013207888988778, "learning_rate": 5.845341817891838e-06, "loss": 0.4288, "num_tokens": 1120899043.0, "step": 1183 }, { "epoch": 2.317335945151812, "grad_norm": 0.08656646884508337, "learning_rate": 5.839093425695609e-06, "loss": 0.421, "num_tokens": 1121848313.0, "step": 1184 }, { "epoch": 2.3192948090107737, "grad_norm": 0.08680555823074611, "learning_rate": 5.832843684824087e-06, "loss": 0.4256, "num_tokens": 1122829981.0, "step": 1185 }, { "epoch": 2.3212536728697355, "grad_norm": 0.08741721014384901, "learning_rate": 5.826592605322482e-06, "loss": 0.425, "num_tokens": 1123749809.0, "step": 1186 }, { "epoch": 2.3232125367286973, "grad_norm": 0.08642901810838186, "learning_rate": 5.820340197238161e-06, "loss": 0.4226, "num_tokens": 1124698596.0, "step": 1187 }, { "epoch": 2.325171400587659, "grad_norm": 0.08588498258919515, "learning_rate": 5.81408647062062e-06, "loss": 0.4393, "num_tokens": 1125657206.0, "step": 1188 }, { "epoch": 2.327130264446621, "grad_norm": 0.08748373647641175, "learning_rate": 5.807831435521477e-06, "loss": 0.4232, "num_tokens": 1126610387.0, "step": 1189 }, { "epoch": 2.329089128305583, "grad_norm": 0.08927452797282809, "learning_rate": 5.8015751019944555e-06, "loss": 0.4213, "num_tokens": 1127539698.0, "step": 1190 }, { "epoch": 2.3310479921645446, "grad_norm": 0.0893519642205304, "learning_rate": 5.795317480095361e-06, "loss": 0.4125, "num_tokens": 1128510407.0, "step": 1191 }, { "epoch": 2.3330068560235064, "grad_norm": 0.08491636345490047, "learning_rate": 5.789058579882074e-06, "loss": 0.4121, "num_tokens": 1129470063.0, "step": 1192 }, { "epoch": 2.3349657198824683, "grad_norm": 0.08984252196487613, "learning_rate": 5.782798411414525e-06, "loss": 0.4331, "num_tokens": 1130367777.0, "step": 1193 }, { "epoch": 2.33692458374143, "grad_norm": 0.09053581250815942, "learning_rate": 5.776536984754691e-06, "loss": 0.4202, "num_tokens": 1131311686.0, "step": 1194 }, { "epoch": 2.338883447600392, "grad_norm": 0.09063978299387467, "learning_rate": 5.770274309966562e-06, "loss": 0.4223, "num_tokens": 1132235248.0, "step": 1195 }, { "epoch": 2.3408423114593537, "grad_norm": 0.08800259601948023, "learning_rate": 5.76401039711614e-06, "loss": 0.4338, "num_tokens": 1133183331.0, "step": 1196 }, { "epoch": 2.3428011753183156, "grad_norm": 0.08719622932715004, "learning_rate": 5.757745256271416e-06, "loss": 0.4308, "num_tokens": 1134132519.0, "step": 1197 }, { "epoch": 2.3447600391772774, "grad_norm": 0.08572206568995963, "learning_rate": 5.751478897502353e-06, "loss": 0.4096, "num_tokens": 1135054379.0, "step": 1198 }, { "epoch": 2.3467189030362388, "grad_norm": 0.08817276524972734, "learning_rate": 5.745211330880872e-06, "loss": 0.4294, "num_tokens": 1136022955.0, "step": 1199 }, { "epoch": 2.348677766895201, "grad_norm": 0.091228551530654, "learning_rate": 5.738942566480839e-06, "loss": 0.4303, "num_tokens": 1136958778.0, "step": 1200 }, { "epoch": 2.3506366307541624, "grad_norm": 0.08868420902004148, "learning_rate": 5.7326726143780394e-06, "loss": 0.4284, "num_tokens": 1137912240.0, "step": 1201 }, { "epoch": 2.3525954946131242, "grad_norm": 0.08948749533257055, "learning_rate": 5.726401484650171e-06, "loss": 0.4406, "num_tokens": 1138838672.0, "step": 1202 }, { "epoch": 2.354554358472086, "grad_norm": 0.09142155592338352, "learning_rate": 5.720129187376825e-06, "loss": 0.4306, "num_tokens": 1139765318.0, "step": 1203 }, { "epoch": 2.356513222331048, "grad_norm": 0.08728357874374511, "learning_rate": 5.713855732639466e-06, "loss": 0.4121, "num_tokens": 1140687203.0, "step": 1204 }, { "epoch": 2.3584720861900097, "grad_norm": 0.0923934935729649, "learning_rate": 5.707581130521424e-06, "loss": 0.423, "num_tokens": 1141640022.0, "step": 1205 }, { "epoch": 2.3604309500489715, "grad_norm": 0.08824418035661111, "learning_rate": 5.701305391107868e-06, "loss": 0.4153, "num_tokens": 1142588951.0, "step": 1206 }, { "epoch": 2.3623898139079333, "grad_norm": 0.08604299060885007, "learning_rate": 5.695028524485797e-06, "loss": 0.4159, "num_tokens": 1143525481.0, "step": 1207 }, { "epoch": 2.364348677766895, "grad_norm": 0.08486896431466706, "learning_rate": 5.688750540744021e-06, "loss": 0.4283, "num_tokens": 1144478548.0, "step": 1208 }, { "epoch": 2.366307541625857, "grad_norm": 0.08550242628203791, "learning_rate": 5.6824714499731505e-06, "loss": 0.4234, "num_tokens": 1145416931.0, "step": 1209 }, { "epoch": 2.368266405484819, "grad_norm": 0.08380409463016955, "learning_rate": 5.6761912622655675e-06, "loss": 0.4171, "num_tokens": 1146393240.0, "step": 1210 }, { "epoch": 2.3702252693437806, "grad_norm": 0.08748035599931478, "learning_rate": 5.6699099877154214e-06, "loss": 0.4148, "num_tokens": 1147358141.0, "step": 1211 }, { "epoch": 2.3721841332027425, "grad_norm": 0.08850046424705908, "learning_rate": 5.663627636418611e-06, "loss": 0.4325, "num_tokens": 1148312744.0, "step": 1212 }, { "epoch": 2.3741429970617043, "grad_norm": 0.08528350097972932, "learning_rate": 5.65734421847276e-06, "loss": 0.4274, "num_tokens": 1149299127.0, "step": 1213 }, { "epoch": 2.376101860920666, "grad_norm": 0.0858978532254676, "learning_rate": 5.6510597439772085e-06, "loss": 0.4281, "num_tokens": 1150243554.0, "step": 1214 }, { "epoch": 2.378060724779628, "grad_norm": 0.08659252218510509, "learning_rate": 5.644774223032997e-06, "loss": 0.4309, "num_tokens": 1151178767.0, "step": 1215 }, { "epoch": 2.3800195886385898, "grad_norm": 0.08581836773259258, "learning_rate": 5.638487665742849e-06, "loss": 0.4279, "num_tokens": 1152102608.0, "step": 1216 }, { "epoch": 2.3819784524975516, "grad_norm": 0.0867447544185725, "learning_rate": 5.632200082211146e-06, "loss": 0.4206, "num_tokens": 1153091123.0, "step": 1217 }, { "epoch": 2.3839373163565134, "grad_norm": 0.08594354254331536, "learning_rate": 5.625911482543928e-06, "loss": 0.427, "num_tokens": 1154072296.0, "step": 1218 }, { "epoch": 2.3858961802154752, "grad_norm": 0.08852540163719144, "learning_rate": 5.619621876848864e-06, "loss": 0.4211, "num_tokens": 1154992011.0, "step": 1219 }, { "epoch": 2.3878550440744366, "grad_norm": 0.08588867297587977, "learning_rate": 5.61333127523524e-06, "loss": 0.4348, "num_tokens": 1155926089.0, "step": 1220 }, { "epoch": 2.389813907933399, "grad_norm": 0.08344556567626972, "learning_rate": 5.607039687813942e-06, "loss": 0.4257, "num_tokens": 1156889859.0, "step": 1221 }, { "epoch": 2.3917727717923603, "grad_norm": 0.08750373287023493, "learning_rate": 5.600747124697442e-06, "loss": 0.4332, "num_tokens": 1157807428.0, "step": 1222 }, { "epoch": 2.393731635651322, "grad_norm": 0.0885322929769485, "learning_rate": 5.5944535959997825e-06, "loss": 0.4271, "num_tokens": 1158772647.0, "step": 1223 }, { "epoch": 2.395690499510284, "grad_norm": 0.08584501160678022, "learning_rate": 5.588159111836553e-06, "loss": 0.416, "num_tokens": 1159719729.0, "step": 1224 }, { "epoch": 2.3976493633692457, "grad_norm": 0.08383944177589096, "learning_rate": 5.581863682324882e-06, "loss": 0.4251, "num_tokens": 1160705818.0, "step": 1225 }, { "epoch": 2.3996082272282075, "grad_norm": 0.08580460156302355, "learning_rate": 5.575567317583415e-06, "loss": 0.4183, "num_tokens": 1161669076.0, "step": 1226 }, { "epoch": 2.4015670910871694, "grad_norm": 0.09267158512984244, "learning_rate": 5.569270027732306e-06, "loss": 0.4257, "num_tokens": 1162619969.0, "step": 1227 }, { "epoch": 2.403525954946131, "grad_norm": 0.08421035900643646, "learning_rate": 5.562971822893187e-06, "loss": 0.4094, "num_tokens": 1163571952.0, "step": 1228 }, { "epoch": 2.405484818805093, "grad_norm": 0.08596664522543533, "learning_rate": 5.55667271318917e-06, "loss": 0.4203, "num_tokens": 1164539426.0, "step": 1229 }, { "epoch": 2.407443682664055, "grad_norm": 0.09233976790408117, "learning_rate": 5.5503727087448155e-06, "loss": 0.4227, "num_tokens": 1165474032.0, "step": 1230 }, { "epoch": 2.4094025465230167, "grad_norm": 0.08688320768846537, "learning_rate": 5.544071819686125e-06, "loss": 0.4248, "num_tokens": 1166422493.0, "step": 1231 }, { "epoch": 2.4113614103819785, "grad_norm": 0.0945756226192389, "learning_rate": 5.53777005614052e-06, "loss": 0.4302, "num_tokens": 1167390753.0, "step": 1232 }, { "epoch": 2.4133202742409403, "grad_norm": 0.08552777818490351, "learning_rate": 5.531467428236827e-06, "loss": 0.429, "num_tokens": 1168369616.0, "step": 1233 }, { "epoch": 2.415279138099902, "grad_norm": 0.08703907151650343, "learning_rate": 5.525163946105266e-06, "loss": 0.4264, "num_tokens": 1169323260.0, "step": 1234 }, { "epoch": 2.417238001958864, "grad_norm": 0.0855132558577357, "learning_rate": 5.5188596198774244e-06, "loss": 0.4236, "num_tokens": 1170253457.0, "step": 1235 }, { "epoch": 2.419196865817826, "grad_norm": 0.09911091065156342, "learning_rate": 5.5125544596862505e-06, "loss": 0.4286, "num_tokens": 1171178861.0, "step": 1236 }, { "epoch": 2.4211557296767876, "grad_norm": 0.09515489730191913, "learning_rate": 5.506248475666031e-06, "loss": 0.433, "num_tokens": 1172118925.0, "step": 1237 }, { "epoch": 2.4231145935357494, "grad_norm": 0.0922862855799988, "learning_rate": 5.499941677952376e-06, "loss": 0.4112, "num_tokens": 1173043753.0, "step": 1238 }, { "epoch": 2.4250734573947113, "grad_norm": 0.0927490877834443, "learning_rate": 5.493634076682206e-06, "loss": 0.4255, "num_tokens": 1173983910.0, "step": 1239 }, { "epoch": 2.427032321253673, "grad_norm": 0.08991829452505166, "learning_rate": 5.4873256819937325e-06, "loss": 0.4304, "num_tokens": 1174924986.0, "step": 1240 }, { "epoch": 2.4289911851126345, "grad_norm": 0.09654364592883412, "learning_rate": 5.48101650402644e-06, "loss": 0.4223, "num_tokens": 1175880543.0, "step": 1241 }, { "epoch": 2.4309500489715967, "grad_norm": 0.09143355884515676, "learning_rate": 5.474706552921074e-06, "loss": 0.4293, "num_tokens": 1176851342.0, "step": 1242 }, { "epoch": 2.432908912830558, "grad_norm": 0.08487914794201284, "learning_rate": 5.468395838819624e-06, "loss": 0.4189, "num_tokens": 1177790517.0, "step": 1243 }, { "epoch": 2.43486777668952, "grad_norm": 0.08484709848706648, "learning_rate": 5.462084371865301e-06, "loss": 0.4103, "num_tokens": 1178737565.0, "step": 1244 }, { "epoch": 2.4368266405484817, "grad_norm": 0.08781895956074455, "learning_rate": 5.455772162202531e-06, "loss": 0.4263, "num_tokens": 1179660472.0, "step": 1245 }, { "epoch": 2.4387855044074436, "grad_norm": 0.0911733217803357, "learning_rate": 5.44945921997693e-06, "loss": 0.4263, "num_tokens": 1180588262.0, "step": 1246 }, { "epoch": 2.4407443682664054, "grad_norm": 0.08748336699199348, "learning_rate": 5.443145555335296e-06, "loss": 0.4336, "num_tokens": 1181543520.0, "step": 1247 }, { "epoch": 2.442703232125367, "grad_norm": 0.08665842688163865, "learning_rate": 5.436831178425582e-06, "loss": 0.4205, "num_tokens": 1182481681.0, "step": 1248 }, { "epoch": 2.444662095984329, "grad_norm": 0.08652704081826909, "learning_rate": 5.430516099396892e-06, "loss": 0.4119, "num_tokens": 1183408064.0, "step": 1249 }, { "epoch": 2.446620959843291, "grad_norm": 0.09089265625207198, "learning_rate": 5.424200328399456e-06, "loss": 0.4302, "num_tokens": 1184398506.0, "step": 1250 }, { "epoch": 2.4485798237022527, "grad_norm": 0.08665999856715143, "learning_rate": 5.417883875584613e-06, "loss": 0.4126, "num_tokens": 1185351585.0, "step": 1251 }, { "epoch": 2.4505386875612145, "grad_norm": 0.08772877346785295, "learning_rate": 5.411566751104804e-06, "loss": 0.4188, "num_tokens": 1186298168.0, "step": 1252 }, { "epoch": 2.4524975514201763, "grad_norm": 0.08846717146727087, "learning_rate": 5.405248965113546e-06, "loss": 0.4216, "num_tokens": 1187220175.0, "step": 1253 }, { "epoch": 2.454456415279138, "grad_norm": 0.09323937399381285, "learning_rate": 5.398930527765416e-06, "loss": 0.4173, "num_tokens": 1188169761.0, "step": 1254 }, { "epoch": 2.4564152791381, "grad_norm": 0.08635935317945805, "learning_rate": 5.392611449216045e-06, "loss": 0.4071, "num_tokens": 1189100120.0, "step": 1255 }, { "epoch": 2.458374142997062, "grad_norm": 0.09471159385477396, "learning_rate": 5.3862917396220925e-06, "loss": 0.4161, "num_tokens": 1190045776.0, "step": 1256 }, { "epoch": 2.4603330068560236, "grad_norm": 0.08940277625598954, "learning_rate": 5.379971409141227e-06, "loss": 0.4244, "num_tokens": 1190965780.0, "step": 1257 }, { "epoch": 2.4622918707149855, "grad_norm": 0.08333095986467484, "learning_rate": 5.373650467932122e-06, "loss": 0.4071, "num_tokens": 1191942307.0, "step": 1258 }, { "epoch": 2.4642507345739473, "grad_norm": 0.08622640845177042, "learning_rate": 5.367328926154428e-06, "loss": 0.4189, "num_tokens": 1192906717.0, "step": 1259 }, { "epoch": 2.466209598432909, "grad_norm": 0.08819087470337499, "learning_rate": 5.361006793968764e-06, "loss": 0.4399, "num_tokens": 1193847834.0, "step": 1260 }, { "epoch": 2.468168462291871, "grad_norm": 0.08392409479166801, "learning_rate": 5.354684081536693e-06, "loss": 0.4055, "num_tokens": 1194800527.0, "step": 1261 }, { "epoch": 2.4701273261508323, "grad_norm": 0.08873929760927023, "learning_rate": 5.348360799020717e-06, "loss": 0.4275, "num_tokens": 1195738245.0, "step": 1262 }, { "epoch": 2.472086190009794, "grad_norm": 0.0870619869669245, "learning_rate": 5.342036956584251e-06, "loss": 0.4162, "num_tokens": 1196709708.0, "step": 1263 }, { "epoch": 2.474045053868756, "grad_norm": 0.08833235986296435, "learning_rate": 5.335712564391609e-06, "loss": 0.4314, "num_tokens": 1197646062.0, "step": 1264 }, { "epoch": 2.4760039177277178, "grad_norm": 0.08809126754844328, "learning_rate": 5.329387632607989e-06, "loss": 0.4203, "num_tokens": 1198575151.0, "step": 1265 }, { "epoch": 2.4779627815866796, "grad_norm": 0.09220084623013948, "learning_rate": 5.32306217139946e-06, "loss": 0.4212, "num_tokens": 1199546127.0, "step": 1266 }, { "epoch": 2.4799216454456414, "grad_norm": 0.08788363086287648, "learning_rate": 5.316736190932936e-06, "loss": 0.4289, "num_tokens": 1200487335.0, "step": 1267 }, { "epoch": 2.4818805093046032, "grad_norm": 0.08637705985091716, "learning_rate": 5.31040970137617e-06, "loss": 0.4284, "num_tokens": 1201402960.0, "step": 1268 }, { "epoch": 2.483839373163565, "grad_norm": 0.08505942012147649, "learning_rate": 5.304082712897734e-06, "loss": 0.4073, "num_tokens": 1202357356.0, "step": 1269 }, { "epoch": 2.485798237022527, "grad_norm": 0.08793991445308141, "learning_rate": 5.297755235666996e-06, "loss": 0.4272, "num_tokens": 1203318073.0, "step": 1270 }, { "epoch": 2.4877571008814887, "grad_norm": 0.08715273201098046, "learning_rate": 5.291427279854117e-06, "loss": 0.4109, "num_tokens": 1204275795.0, "step": 1271 }, { "epoch": 2.4897159647404505, "grad_norm": 0.08511146390681669, "learning_rate": 5.28509885563002e-06, "loss": 0.4107, "num_tokens": 1205252135.0, "step": 1272 }, { "epoch": 2.4916748285994124, "grad_norm": 0.08417989921996578, "learning_rate": 5.2787699731663865e-06, "loss": 0.4074, "num_tokens": 1206216487.0, "step": 1273 }, { "epoch": 2.493633692458374, "grad_norm": 0.08565702386459073, "learning_rate": 5.2724406426356325e-06, "loss": 0.4189, "num_tokens": 1207143688.0, "step": 1274 }, { "epoch": 2.495592556317336, "grad_norm": 0.08517590404259694, "learning_rate": 5.266110874210893e-06, "loss": 0.4211, "num_tokens": 1208077199.0, "step": 1275 }, { "epoch": 2.497551420176298, "grad_norm": 0.08473219993111532, "learning_rate": 5.259780678066012e-06, "loss": 0.4222, "num_tokens": 1209022544.0, "step": 1276 }, { "epoch": 2.4995102840352597, "grad_norm": 0.08838379088708623, "learning_rate": 5.253450064375512e-06, "loss": 0.418, "num_tokens": 1209974535.0, "step": 1277 }, { "epoch": 2.5014691478942215, "grad_norm": 0.0935118444195652, "learning_rate": 5.247119043314592e-06, "loss": 0.4234, "num_tokens": 1210907077.0, "step": 1278 }, { "epoch": 2.5034280117531833, "grad_norm": 0.08726823635723774, "learning_rate": 5.24078762505911e-06, "loss": 0.4232, "num_tokens": 1211840227.0, "step": 1279 }, { "epoch": 2.505386875612145, "grad_norm": 0.08689683850520893, "learning_rate": 5.234455819785554e-06, "loss": 0.4266, "num_tokens": 1212811578.0, "step": 1280 }, { "epoch": 2.5073457394711065, "grad_norm": 0.08668393909851933, "learning_rate": 5.228123637671037e-06, "loss": 0.4243, "num_tokens": 1213755099.0, "step": 1281 }, { "epoch": 2.5093046033300688, "grad_norm": 0.08635148027331238, "learning_rate": 5.221791088893282e-06, "loss": 0.4186, "num_tokens": 1214715473.0, "step": 1282 }, { "epoch": 2.51126346718903, "grad_norm": 0.08660314040039935, "learning_rate": 5.215458183630597e-06, "loss": 0.4123, "num_tokens": 1215671606.0, "step": 1283 }, { "epoch": 2.5132223310479924, "grad_norm": 0.08970370400613235, "learning_rate": 5.209124932061862e-06, "loss": 0.4251, "num_tokens": 1216615570.0, "step": 1284 }, { "epoch": 2.515181194906954, "grad_norm": 0.09140977797954425, "learning_rate": 5.202791344366517e-06, "loss": 0.4104, "num_tokens": 1217500696.0, "step": 1285 }, { "epoch": 2.5171400587659156, "grad_norm": 0.09726820085104514, "learning_rate": 5.196457430724541e-06, "loss": 0.4518, "num_tokens": 1218426160.0, "step": 1286 }, { "epoch": 2.5190989226248774, "grad_norm": 0.09152544566287074, "learning_rate": 5.190123201316435e-06, "loss": 0.4338, "num_tokens": 1219386445.0, "step": 1287 }, { "epoch": 2.5210577864838393, "grad_norm": 0.08702100478160023, "learning_rate": 5.183788666323209e-06, "loss": 0.4083, "num_tokens": 1220323923.0, "step": 1288 }, { "epoch": 2.523016650342801, "grad_norm": 0.08763610530813823, "learning_rate": 5.177453835926366e-06, "loss": 0.4335, "num_tokens": 1221246258.0, "step": 1289 }, { "epoch": 2.524975514201763, "grad_norm": 0.09035880771317832, "learning_rate": 5.1711187203078826e-06, "loss": 0.4364, "num_tokens": 1222161108.0, "step": 1290 }, { "epoch": 2.5269343780607247, "grad_norm": 0.08886694883924055, "learning_rate": 5.164783329650192e-06, "loss": 0.422, "num_tokens": 1223089888.0, "step": 1291 }, { "epoch": 2.5288932419196866, "grad_norm": 0.0902986541107478, "learning_rate": 5.15844767413617e-06, "loss": 0.4333, "num_tokens": 1224025492.0, "step": 1292 }, { "epoch": 2.5308521057786484, "grad_norm": 0.08450434755008468, "learning_rate": 5.152111763949122e-06, "loss": 0.4113, "num_tokens": 1224962829.0, "step": 1293 }, { "epoch": 2.53281096963761, "grad_norm": 0.0842951630380676, "learning_rate": 5.145775609272757e-06, "loss": 0.4107, "num_tokens": 1225936176.0, "step": 1294 }, { "epoch": 2.534769833496572, "grad_norm": 0.09140174623138231, "learning_rate": 5.13943922029118e-06, "loss": 0.4277, "num_tokens": 1226884735.0, "step": 1295 }, { "epoch": 2.536728697355534, "grad_norm": 0.08431087858868833, "learning_rate": 5.133102607188875e-06, "loss": 0.427, "num_tokens": 1227836437.0, "step": 1296 }, { "epoch": 2.5386875612144957, "grad_norm": 0.09111438925586439, "learning_rate": 5.126765780150678e-06, "loss": 0.4398, "num_tokens": 1228767154.0, "step": 1297 }, { "epoch": 2.5406464250734575, "grad_norm": 0.09498884391890484, "learning_rate": 5.120428749361781e-06, "loss": 0.433, "num_tokens": 1229723280.0, "step": 1298 }, { "epoch": 2.5426052889324193, "grad_norm": 0.0889692529410911, "learning_rate": 5.1140915250076914e-06, "loss": 0.4161, "num_tokens": 1230657337.0, "step": 1299 }, { "epoch": 2.544564152791381, "grad_norm": 0.0868340073908007, "learning_rate": 5.107754117274236e-06, "loss": 0.4309, "num_tokens": 1231629360.0, "step": 1300 }, { "epoch": 2.546523016650343, "grad_norm": 0.08610435407697006, "learning_rate": 5.101416536347532e-06, "loss": 0.4287, "num_tokens": 1232581778.0, "step": 1301 }, { "epoch": 2.5484818805093044, "grad_norm": 0.08719213873117067, "learning_rate": 5.095078792413976e-06, "loss": 0.4213, "num_tokens": 1233538873.0, "step": 1302 }, { "epoch": 2.5504407443682666, "grad_norm": 0.08634673821346445, "learning_rate": 5.0887408956602316e-06, "loss": 0.4195, "num_tokens": 1234483567.0, "step": 1303 }, { "epoch": 2.552399608227228, "grad_norm": 0.09344386614951955, "learning_rate": 5.082402856273198e-06, "loss": 0.4174, "num_tokens": 1235412735.0, "step": 1304 }, { "epoch": 2.5543584720861903, "grad_norm": 0.08676201793077444, "learning_rate": 5.0760646844400105e-06, "loss": 0.4285, "num_tokens": 1236381151.0, "step": 1305 }, { "epoch": 2.5563173359451516, "grad_norm": 0.08656747436579534, "learning_rate": 5.069726390348019e-06, "loss": 0.4193, "num_tokens": 1237289451.0, "step": 1306 }, { "epoch": 2.5582761998041135, "grad_norm": 0.08486631831734234, "learning_rate": 5.063387984184763e-06, "loss": 0.427, "num_tokens": 1238216386.0, "step": 1307 }, { "epoch": 2.5602350636630753, "grad_norm": 0.09018745711808192, "learning_rate": 5.057049476137968e-06, "loss": 0.4369, "num_tokens": 1239152655.0, "step": 1308 }, { "epoch": 2.562193927522037, "grad_norm": 0.08541194231580124, "learning_rate": 5.05071087639552e-06, "loss": 0.4149, "num_tokens": 1240077458.0, "step": 1309 }, { "epoch": 2.564152791380999, "grad_norm": 0.09055586101454287, "learning_rate": 5.044372195145455e-06, "loss": 0.4256, "num_tokens": 1241039487.0, "step": 1310 }, { "epoch": 2.5661116552399608, "grad_norm": 0.08645832979757842, "learning_rate": 5.038033442575939e-06, "loss": 0.4135, "num_tokens": 1242000722.0, "step": 1311 }, { "epoch": 2.5680705190989226, "grad_norm": 0.08787443136369189, "learning_rate": 5.03169462887525e-06, "loss": 0.4254, "num_tokens": 1242927159.0, "step": 1312 }, { "epoch": 2.5700293829578844, "grad_norm": 0.08809899335198573, "learning_rate": 5.025355764231769e-06, "loss": 0.4267, "num_tokens": 1243890829.0, "step": 1313 }, { "epoch": 2.5719882468168462, "grad_norm": 0.09132544297416752, "learning_rate": 5.019016858833954e-06, "loss": 0.424, "num_tokens": 1244841292.0, "step": 1314 }, { "epoch": 2.573947110675808, "grad_norm": 0.08722016010171965, "learning_rate": 5.012677922870332e-06, "loss": 0.4221, "num_tokens": 1245805034.0, "step": 1315 }, { "epoch": 2.57590597453477, "grad_norm": 0.08219176566970758, "learning_rate": 5.006338966529479e-06, "loss": 0.422, "num_tokens": 1246760183.0, "step": 1316 }, { "epoch": 2.5778648383937317, "grad_norm": 0.08664017458337375, "learning_rate": 5e-06, "loss": 0.4288, "num_tokens": 1247697466.0, "step": 1317 }, { "epoch": 2.5798237022526935, "grad_norm": 0.08679633888158816, "learning_rate": 4.993661033470522e-06, "loss": 0.4233, "num_tokens": 1248618984.0, "step": 1318 }, { "epoch": 2.5817825661116554, "grad_norm": 0.08405379996897977, "learning_rate": 4.98732207712967e-06, "loss": 0.4293, "num_tokens": 1249596593.0, "step": 1319 }, { "epoch": 2.583741429970617, "grad_norm": 0.0876393179149807, "learning_rate": 4.980983141166047e-06, "loss": 0.4376, "num_tokens": 1250566838.0, "step": 1320 }, { "epoch": 2.585700293829579, "grad_norm": 0.08667325691006618, "learning_rate": 4.974644235768232e-06, "loss": 0.4462, "num_tokens": 1251527132.0, "step": 1321 }, { "epoch": 2.587659157688541, "grad_norm": 0.08681222762067749, "learning_rate": 4.9683053711247515e-06, "loss": 0.4271, "num_tokens": 1252486319.0, "step": 1322 }, { "epoch": 2.589618021547502, "grad_norm": 0.08560882662756536, "learning_rate": 4.961966557424063e-06, "loss": 0.4274, "num_tokens": 1253446832.0, "step": 1323 }, { "epoch": 2.5915768854064645, "grad_norm": 0.08701258458632462, "learning_rate": 4.9556278048545445e-06, "loss": 0.4228, "num_tokens": 1254392072.0, "step": 1324 }, { "epoch": 2.593535749265426, "grad_norm": 0.08551688483760193, "learning_rate": 4.949289123604481e-06, "loss": 0.4264, "num_tokens": 1255359716.0, "step": 1325 }, { "epoch": 2.595494613124388, "grad_norm": 0.08665994615475629, "learning_rate": 4.942950523862033e-06, "loss": 0.4441, "num_tokens": 1256344914.0, "step": 1326 }, { "epoch": 2.5974534769833495, "grad_norm": 0.08572831448333348, "learning_rate": 4.936612015815237e-06, "loss": 0.4255, "num_tokens": 1257311586.0, "step": 1327 }, { "epoch": 2.5994123408423113, "grad_norm": 0.08490205995471985, "learning_rate": 4.930273609651983e-06, "loss": 0.4107, "num_tokens": 1258277915.0, "step": 1328 }, { "epoch": 2.601371204701273, "grad_norm": 0.08611105268036397, "learning_rate": 4.9239353155599894e-06, "loss": 0.4151, "num_tokens": 1259241846.0, "step": 1329 }, { "epoch": 2.603330068560235, "grad_norm": 0.08371809003984203, "learning_rate": 4.917597143726802e-06, "loss": 0.4089, "num_tokens": 1260175108.0, "step": 1330 }, { "epoch": 2.605288932419197, "grad_norm": 0.08308040980042024, "learning_rate": 4.911259104339771e-06, "loss": 0.4026, "num_tokens": 1261131640.0, "step": 1331 }, { "epoch": 2.6072477962781586, "grad_norm": 0.08405177452374366, "learning_rate": 4.904921207586025e-06, "loss": 0.4115, "num_tokens": 1262089996.0, "step": 1332 }, { "epoch": 2.6092066601371204, "grad_norm": 0.08749694772730317, "learning_rate": 4.89858346365247e-06, "loss": 0.4085, "num_tokens": 1263023865.0, "step": 1333 }, { "epoch": 2.6111655239960823, "grad_norm": 0.08982798796986177, "learning_rate": 4.892245882725766e-06, "loss": 0.4291, "num_tokens": 1263981873.0, "step": 1334 }, { "epoch": 2.613124387855044, "grad_norm": 0.08715085872050161, "learning_rate": 4.88590847499231e-06, "loss": 0.4107, "num_tokens": 1264934131.0, "step": 1335 }, { "epoch": 2.615083251714006, "grad_norm": 0.08348569251643344, "learning_rate": 4.879571250638221e-06, "loss": 0.4313, "num_tokens": 1265881116.0, "step": 1336 }, { "epoch": 2.6170421155729677, "grad_norm": 0.08581338711154483, "learning_rate": 4.873234219849322e-06, "loss": 0.4221, "num_tokens": 1266830553.0, "step": 1337 }, { "epoch": 2.6190009794319296, "grad_norm": 0.0829385128259635, "learning_rate": 4.866897392811127e-06, "loss": 0.4105, "num_tokens": 1267818561.0, "step": 1338 }, { "epoch": 2.6209598432908914, "grad_norm": 0.08724719932512014, "learning_rate": 4.860560779708821e-06, "loss": 0.4315, "num_tokens": 1268764694.0, "step": 1339 }, { "epoch": 2.622918707149853, "grad_norm": 0.08767125652252683, "learning_rate": 4.8542243907272445e-06, "loss": 0.4325, "num_tokens": 1269711703.0, "step": 1340 }, { "epoch": 2.624877571008815, "grad_norm": 0.09372252963411086, "learning_rate": 4.847888236050879e-06, "loss": 0.4168, "num_tokens": 1270612157.0, "step": 1341 }, { "epoch": 2.626836434867777, "grad_norm": 0.08405713241309688, "learning_rate": 4.8415523258638306e-06, "loss": 0.4241, "num_tokens": 1271576623.0, "step": 1342 }, { "epoch": 2.6287952987267387, "grad_norm": 0.09110545895885466, "learning_rate": 4.83521667034981e-06, "loss": 0.4222, "num_tokens": 1272522556.0, "step": 1343 }, { "epoch": 2.6307541625857, "grad_norm": 0.08877118077984512, "learning_rate": 4.828881279692118e-06, "loss": 0.4246, "num_tokens": 1273476719.0, "step": 1344 }, { "epoch": 2.6327130264446623, "grad_norm": 0.08532625599007881, "learning_rate": 4.822546164073635e-06, "loss": 0.4214, "num_tokens": 1274440726.0, "step": 1345 }, { "epoch": 2.6346718903036237, "grad_norm": 0.08601418206646565, "learning_rate": 4.816211333676792e-06, "loss": 0.4246, "num_tokens": 1275390298.0, "step": 1346 }, { "epoch": 2.636630754162586, "grad_norm": 0.0844819289001155, "learning_rate": 4.809876798683567e-06, "loss": 0.4257, "num_tokens": 1276383425.0, "step": 1347 }, { "epoch": 2.6385896180215473, "grad_norm": 0.08513663433905257, "learning_rate": 4.803542569275461e-06, "loss": 0.4068, "num_tokens": 1277332941.0, "step": 1348 }, { "epoch": 2.640548481880509, "grad_norm": 0.0843116595718068, "learning_rate": 4.797208655633484e-06, "loss": 0.4133, "num_tokens": 1278246978.0, "step": 1349 }, { "epoch": 2.642507345739471, "grad_norm": 0.08388506139157995, "learning_rate": 4.7908750679381386e-06, "loss": 0.4188, "num_tokens": 1279192153.0, "step": 1350 }, { "epoch": 2.644466209598433, "grad_norm": 0.09031713181014736, "learning_rate": 4.784541816369403e-06, "loss": 0.4243, "num_tokens": 1280128374.0, "step": 1351 }, { "epoch": 2.6464250734573946, "grad_norm": 0.08951869887136361, "learning_rate": 4.778208911106718e-06, "loss": 0.412, "num_tokens": 1281048912.0, "step": 1352 }, { "epoch": 2.6483839373163565, "grad_norm": 0.08454202196508212, "learning_rate": 4.771876362328962e-06, "loss": 0.4201, "num_tokens": 1282010060.0, "step": 1353 }, { "epoch": 2.6503428011753183, "grad_norm": 0.0841667336847675, "learning_rate": 4.765544180214447e-06, "loss": 0.4237, "num_tokens": 1282970943.0, "step": 1354 }, { "epoch": 2.65230166503428, "grad_norm": 0.0909589185939779, "learning_rate": 4.7592123749408904e-06, "loss": 0.4118, "num_tokens": 1283937168.0, "step": 1355 }, { "epoch": 2.654260528893242, "grad_norm": 0.08799476261814788, "learning_rate": 4.752880956685407e-06, "loss": 0.4262, "num_tokens": 1284878060.0, "step": 1356 }, { "epoch": 2.6562193927522038, "grad_norm": 0.08905967083052413, "learning_rate": 4.746549935624491e-06, "loss": 0.4315, "num_tokens": 1285839805.0, "step": 1357 }, { "epoch": 2.6581782566111656, "grad_norm": 0.08352530100551155, "learning_rate": 4.740219321933991e-06, "loss": 0.4111, "num_tokens": 1286780353.0, "step": 1358 }, { "epoch": 2.6601371204701274, "grad_norm": 0.08869645410002136, "learning_rate": 4.7338891257891085e-06, "loss": 0.425, "num_tokens": 1287753264.0, "step": 1359 }, { "epoch": 2.6620959843290892, "grad_norm": 0.08426981837149752, "learning_rate": 4.72755935736437e-06, "loss": 0.4075, "num_tokens": 1288663758.0, "step": 1360 }, { "epoch": 2.664054848188051, "grad_norm": 0.08424764410311818, "learning_rate": 4.721230026833615e-06, "loss": 0.4235, "num_tokens": 1289635743.0, "step": 1361 }, { "epoch": 2.666013712047013, "grad_norm": 0.08468526648090785, "learning_rate": 4.714901144369982e-06, "loss": 0.4133, "num_tokens": 1290615669.0, "step": 1362 }, { "epoch": 2.6679725759059747, "grad_norm": 0.08674077530770553, "learning_rate": 4.708572720145886e-06, "loss": 0.4262, "num_tokens": 1291577461.0, "step": 1363 }, { "epoch": 2.6699314397649365, "grad_norm": 0.0879098956928888, "learning_rate": 4.702244764333006e-06, "loss": 0.4291, "num_tokens": 1292498086.0, "step": 1364 }, { "epoch": 2.671890303623898, "grad_norm": 0.08513834128773212, "learning_rate": 4.695917287102267e-06, "loss": 0.4232, "num_tokens": 1293435143.0, "step": 1365 }, { "epoch": 2.67384916748286, "grad_norm": 0.08643486884422578, "learning_rate": 4.689590298623831e-06, "loss": 0.4247, "num_tokens": 1294378264.0, "step": 1366 }, { "epoch": 2.6758080313418215, "grad_norm": 0.08745936633848232, "learning_rate": 4.683263809067065e-06, "loss": 0.4132, "num_tokens": 1295322682.0, "step": 1367 }, { "epoch": 2.677766895200784, "grad_norm": 0.08535549088178032, "learning_rate": 4.676937828600542e-06, "loss": 0.4225, "num_tokens": 1296243735.0, "step": 1368 }, { "epoch": 2.679725759059745, "grad_norm": 0.08626784633146645, "learning_rate": 4.670612367392012e-06, "loss": 0.4259, "num_tokens": 1297168892.0, "step": 1369 }, { "epoch": 2.681684622918707, "grad_norm": 0.0865592449270761, "learning_rate": 4.664287435608393e-06, "loss": 0.4221, "num_tokens": 1298117653.0, "step": 1370 }, { "epoch": 2.683643486777669, "grad_norm": 0.08852648756125193, "learning_rate": 4.657963043415751e-06, "loss": 0.4135, "num_tokens": 1299091473.0, "step": 1371 }, { "epoch": 2.6856023506366307, "grad_norm": 0.08803130086410157, "learning_rate": 4.651639200979284e-06, "loss": 0.4289, "num_tokens": 1300046508.0, "step": 1372 }, { "epoch": 2.6875612144955925, "grad_norm": 0.08728864770970363, "learning_rate": 4.645315918463308e-06, "loss": 0.4311, "num_tokens": 1300982987.0, "step": 1373 }, { "epoch": 2.6895200783545543, "grad_norm": 0.08542773274024708, "learning_rate": 4.638993206031238e-06, "loss": 0.4254, "num_tokens": 1301936022.0, "step": 1374 }, { "epoch": 2.691478942213516, "grad_norm": 0.08754845624593002, "learning_rate": 4.632671073845574e-06, "loss": 0.4166, "num_tokens": 1302874334.0, "step": 1375 }, { "epoch": 2.693437806072478, "grad_norm": 0.09093501073826295, "learning_rate": 4.626349532067879e-06, "loss": 0.4217, "num_tokens": 1303814830.0, "step": 1376 }, { "epoch": 2.69539666993144, "grad_norm": 0.08949043713831442, "learning_rate": 4.620028590858773e-06, "loss": 0.4151, "num_tokens": 1304721413.0, "step": 1377 }, { "epoch": 2.6973555337904016, "grad_norm": 0.08427046742392057, "learning_rate": 4.613708260377909e-06, "loss": 0.4013, "num_tokens": 1305673260.0, "step": 1378 }, { "epoch": 2.6993143976493634, "grad_norm": 0.08786768786837658, "learning_rate": 4.607388550783955e-06, "loss": 0.4373, "num_tokens": 1306626238.0, "step": 1379 }, { "epoch": 2.7012732615083253, "grad_norm": 0.09030499429070131, "learning_rate": 4.601069472234584e-06, "loss": 0.4281, "num_tokens": 1307559897.0, "step": 1380 }, { "epoch": 2.703232125367287, "grad_norm": 0.08626019707122737, "learning_rate": 4.594751034886457e-06, "loss": 0.4078, "num_tokens": 1308463462.0, "step": 1381 }, { "epoch": 2.705190989226249, "grad_norm": 0.09258734342286046, "learning_rate": 4.588433248895196e-06, "loss": 0.4219, "num_tokens": 1309366698.0, "step": 1382 }, { "epoch": 2.7071498530852107, "grad_norm": 0.08996091511584008, "learning_rate": 4.5821161244153885e-06, "loss": 0.4327, "num_tokens": 1310328296.0, "step": 1383 }, { "epoch": 2.7091087169441725, "grad_norm": 0.086387115622684, "learning_rate": 4.575799671600547e-06, "loss": 0.4131, "num_tokens": 1311296001.0, "step": 1384 }, { "epoch": 2.7110675808031344, "grad_norm": 0.08251139972896727, "learning_rate": 4.569483900603109e-06, "loss": 0.4102, "num_tokens": 1312243912.0, "step": 1385 }, { "epoch": 2.7130264446620957, "grad_norm": 0.0863231830166427, "learning_rate": 4.56316882157442e-06, "loss": 0.4275, "num_tokens": 1313180857.0, "step": 1386 }, { "epoch": 2.714985308521058, "grad_norm": 0.08047455742931099, "learning_rate": 4.556854444664706e-06, "loss": 0.4145, "num_tokens": 1314147053.0, "step": 1387 }, { "epoch": 2.7169441723800194, "grad_norm": 0.08545677192809085, "learning_rate": 4.5505407800230715e-06, "loss": 0.4177, "num_tokens": 1315061581.0, "step": 1388 }, { "epoch": 2.7189030362389817, "grad_norm": 0.08748335614431532, "learning_rate": 4.544227837797471e-06, "loss": 0.4288, "num_tokens": 1315977754.0, "step": 1389 }, { "epoch": 2.720861900097943, "grad_norm": 0.08688618754690962, "learning_rate": 4.5379156281347e-06, "loss": 0.425, "num_tokens": 1316917513.0, "step": 1390 }, { "epoch": 2.722820763956905, "grad_norm": 0.08300622578487153, "learning_rate": 4.5316041611803775e-06, "loss": 0.4073, "num_tokens": 1317872866.0, "step": 1391 }, { "epoch": 2.7247796278158667, "grad_norm": 0.08851146163457943, "learning_rate": 4.525293447078927e-06, "loss": 0.4242, "num_tokens": 1318779793.0, "step": 1392 }, { "epoch": 2.7267384916748285, "grad_norm": 0.08246445330676379, "learning_rate": 4.518983495973561e-06, "loss": 0.4181, "num_tokens": 1319728628.0, "step": 1393 }, { "epoch": 2.7286973555337903, "grad_norm": 0.08732434459091828, "learning_rate": 4.512674318006268e-06, "loss": 0.4294, "num_tokens": 1320692125.0, "step": 1394 }, { "epoch": 2.730656219392752, "grad_norm": 0.08878515380250739, "learning_rate": 4.506365923317796e-06, "loss": 0.4266, "num_tokens": 1321627423.0, "step": 1395 }, { "epoch": 2.732615083251714, "grad_norm": 0.08932684385252695, "learning_rate": 4.500058322047626e-06, "loss": 0.4331, "num_tokens": 1322577825.0, "step": 1396 }, { "epoch": 2.734573947110676, "grad_norm": 0.08577537821073135, "learning_rate": 4.493751524333971e-06, "loss": 0.4212, "num_tokens": 1323525074.0, "step": 1397 }, { "epoch": 2.7365328109696376, "grad_norm": 0.08887611706237718, "learning_rate": 4.487445540313752e-06, "loss": 0.4227, "num_tokens": 1324431844.0, "step": 1398 }, { "epoch": 2.7384916748285995, "grad_norm": 0.08666775303969908, "learning_rate": 4.481140380122576e-06, "loss": 0.4297, "num_tokens": 1325369039.0, "step": 1399 }, { "epoch": 2.7404505386875613, "grad_norm": 0.08570871349890383, "learning_rate": 4.474836053894735e-06, "loss": 0.4238, "num_tokens": 1326314871.0, "step": 1400 }, { "epoch": 2.742409402546523, "grad_norm": 0.0857332714167006, "learning_rate": 4.468532571763174e-06, "loss": 0.4227, "num_tokens": 1327305903.0, "step": 1401 }, { "epoch": 2.744368266405485, "grad_norm": 0.08624918511174663, "learning_rate": 4.462229943859481e-06, "loss": 0.4158, "num_tokens": 1328242905.0, "step": 1402 }, { "epoch": 2.7463271302644467, "grad_norm": 0.0906246795423964, "learning_rate": 4.455928180313876e-06, "loss": 0.4349, "num_tokens": 1329196365.0, "step": 1403 }, { "epoch": 2.7482859941234086, "grad_norm": 0.0868823878426707, "learning_rate": 4.4496272912551845e-06, "loss": 0.4238, "num_tokens": 1330152019.0, "step": 1404 }, { "epoch": 2.75024485798237, "grad_norm": 0.08839265708997726, "learning_rate": 4.44332728681083e-06, "loss": 0.4274, "num_tokens": 1331141273.0, "step": 1405 }, { "epoch": 2.752203721841332, "grad_norm": 0.08436293399608402, "learning_rate": 4.4370281771068135e-06, "loss": 0.4262, "num_tokens": 1332088742.0, "step": 1406 }, { "epoch": 2.7541625857002936, "grad_norm": 0.08799429873505175, "learning_rate": 4.430729972267695e-06, "loss": 0.4084, "num_tokens": 1333004796.0, "step": 1407 }, { "epoch": 2.756121449559256, "grad_norm": 0.08826510870107931, "learning_rate": 4.424432682416585e-06, "loss": 0.436, "num_tokens": 1333929542.0, "step": 1408 }, { "epoch": 2.7580803134182172, "grad_norm": 0.08445417563135003, "learning_rate": 4.418136317675119e-06, "loss": 0.4203, "num_tokens": 1334870165.0, "step": 1409 }, { "epoch": 2.7600391772771795, "grad_norm": 0.08253451675574697, "learning_rate": 4.411840888163449e-06, "loss": 0.4097, "num_tokens": 1335840737.0, "step": 1410 }, { "epoch": 2.761998041136141, "grad_norm": 0.08519991754388842, "learning_rate": 4.405546404000219e-06, "loss": 0.4294, "num_tokens": 1336789179.0, "step": 1411 }, { "epoch": 2.7639569049951027, "grad_norm": 0.08726131229535722, "learning_rate": 4.39925287530256e-06, "loss": 0.4093, "num_tokens": 1337739721.0, "step": 1412 }, { "epoch": 2.7659157688540645, "grad_norm": 0.08397532110608683, "learning_rate": 4.392960312186061e-06, "loss": 0.4333, "num_tokens": 1338727437.0, "step": 1413 }, { "epoch": 2.7678746327130264, "grad_norm": 0.08362502030987641, "learning_rate": 4.386668724764763e-06, "loss": 0.4031, "num_tokens": 1339705363.0, "step": 1414 }, { "epoch": 2.769833496571988, "grad_norm": 0.08847889470054021, "learning_rate": 4.380378123151139e-06, "loss": 0.4323, "num_tokens": 1340627521.0, "step": 1415 }, { "epoch": 2.77179236043095, "grad_norm": 0.08874335681524745, "learning_rate": 4.374088517456074e-06, "loss": 0.4297, "num_tokens": 1341568553.0, "step": 1416 }, { "epoch": 2.773751224289912, "grad_norm": 0.08843479663044887, "learning_rate": 4.367799917788855e-06, "loss": 0.4132, "num_tokens": 1342488029.0, "step": 1417 }, { "epoch": 2.7757100881488737, "grad_norm": 0.08588344536442485, "learning_rate": 4.361512334257153e-06, "loss": 0.424, "num_tokens": 1343431146.0, "step": 1418 }, { "epoch": 2.7776689520078355, "grad_norm": 0.08419682853981998, "learning_rate": 4.355225776967004e-06, "loss": 0.4122, "num_tokens": 1344386333.0, "step": 1419 }, { "epoch": 2.7796278158667973, "grad_norm": 0.08614637232405228, "learning_rate": 4.348940256022793e-06, "loss": 0.4191, "num_tokens": 1345337799.0, "step": 1420 }, { "epoch": 2.781586679725759, "grad_norm": 0.08609453928325388, "learning_rate": 4.342655781527242e-06, "loss": 0.4279, "num_tokens": 1346278180.0, "step": 1421 }, { "epoch": 2.783545543584721, "grad_norm": 0.08399908294756268, "learning_rate": 4.336372363581391e-06, "loss": 0.3971, "num_tokens": 1347235109.0, "step": 1422 }, { "epoch": 2.7855044074436828, "grad_norm": 0.08607710324760467, "learning_rate": 4.330090012284579e-06, "loss": 0.4106, "num_tokens": 1348195228.0, "step": 1423 }, { "epoch": 2.7874632713026446, "grad_norm": 0.08830464407820007, "learning_rate": 4.323808737734434e-06, "loss": 0.4243, "num_tokens": 1349140603.0, "step": 1424 }, { "epoch": 2.7894221351616064, "grad_norm": 0.08465188461532855, "learning_rate": 4.317528550026852e-06, "loss": 0.4166, "num_tokens": 1350080074.0, "step": 1425 }, { "epoch": 2.791380999020568, "grad_norm": 0.0853482332122759, "learning_rate": 4.3112494592559796e-06, "loss": 0.4375, "num_tokens": 1351042372.0, "step": 1426 }, { "epoch": 2.79333986287953, "grad_norm": 0.08623633168182623, "learning_rate": 4.304971475514204e-06, "loss": 0.4109, "num_tokens": 1351976139.0, "step": 1427 }, { "epoch": 2.7952987267384914, "grad_norm": 0.09794545108240034, "learning_rate": 4.298694608892134e-06, "loss": 0.4276, "num_tokens": 1352887962.0, "step": 1428 }, { "epoch": 2.7972575905974537, "grad_norm": 0.08865652806882626, "learning_rate": 4.292418869478577e-06, "loss": 0.4186, "num_tokens": 1353811342.0, "step": 1429 }, { "epoch": 2.799216454456415, "grad_norm": 0.08408678839635018, "learning_rate": 4.286144267360534e-06, "loss": 0.433, "num_tokens": 1354766417.0, "step": 1430 }, { "epoch": 2.801175318315377, "grad_norm": 0.08467383283729038, "learning_rate": 4.2798708126231756e-06, "loss": 0.4166, "num_tokens": 1355719962.0, "step": 1431 }, { "epoch": 2.8031341821743387, "grad_norm": 0.08517287543720582, "learning_rate": 4.2735985153498295e-06, "loss": 0.4185, "num_tokens": 1356676496.0, "step": 1432 }, { "epoch": 2.8050930460333006, "grad_norm": 0.08571886488062591, "learning_rate": 4.267327385621961e-06, "loss": 0.4272, "num_tokens": 1357628861.0, "step": 1433 }, { "epoch": 2.8070519098922624, "grad_norm": 0.08645861300094004, "learning_rate": 4.2610574335191615e-06, "loss": 0.4181, "num_tokens": 1358579701.0, "step": 1434 }, { "epoch": 2.809010773751224, "grad_norm": 0.08765285916897982, "learning_rate": 4.254788669119127e-06, "loss": 0.4426, "num_tokens": 1359550557.0, "step": 1435 }, { "epoch": 2.810969637610186, "grad_norm": 0.08613431350147782, "learning_rate": 4.248521102497649e-06, "loss": 0.423, "num_tokens": 1360494519.0, "step": 1436 }, { "epoch": 2.812928501469148, "grad_norm": 0.08199107687960296, "learning_rate": 4.242254743728586e-06, "loss": 0.4206, "num_tokens": 1361469588.0, "step": 1437 }, { "epoch": 2.8148873653281097, "grad_norm": 0.08731083624025529, "learning_rate": 4.235989602883862e-06, "loss": 0.4141, "num_tokens": 1362438118.0, "step": 1438 }, { "epoch": 2.8168462291870715, "grad_norm": 0.0844344702064925, "learning_rate": 4.22972569003344e-06, "loss": 0.4243, "num_tokens": 1363403750.0, "step": 1439 }, { "epoch": 2.8188050930460333, "grad_norm": 0.08283393914695329, "learning_rate": 4.223463015245311e-06, "loss": 0.417, "num_tokens": 1364397979.0, "step": 1440 }, { "epoch": 2.820763956904995, "grad_norm": 0.08904784088420996, "learning_rate": 4.217201588585475e-06, "loss": 0.442, "num_tokens": 1365365382.0, "step": 1441 }, { "epoch": 2.822722820763957, "grad_norm": 0.08607038877058544, "learning_rate": 4.210941420117929e-06, "loss": 0.4226, "num_tokens": 1366346611.0, "step": 1442 }, { "epoch": 2.824681684622919, "grad_norm": 0.08789243523209957, "learning_rate": 4.204682519904641e-06, "loss": 0.4252, "num_tokens": 1367319906.0, "step": 1443 }, { "epoch": 2.8266405484818806, "grad_norm": 0.0846557832077161, "learning_rate": 4.198424898005546e-06, "loss": 0.4267, "num_tokens": 1368275343.0, "step": 1444 }, { "epoch": 2.8285994123408424, "grad_norm": 0.08386220182738313, "learning_rate": 4.192168564478524e-06, "loss": 0.4133, "num_tokens": 1369274064.0, "step": 1445 }, { "epoch": 2.8305582761998043, "grad_norm": 0.08420007395172191, "learning_rate": 4.185913529379381e-06, "loss": 0.4157, "num_tokens": 1370227156.0, "step": 1446 }, { "epoch": 2.8325171400587656, "grad_norm": 0.08878866775347316, "learning_rate": 4.1796598027618406e-06, "loss": 0.4293, "num_tokens": 1371180761.0, "step": 1447 }, { "epoch": 2.834476003917728, "grad_norm": 0.08371984206402905, "learning_rate": 4.173407394677519e-06, "loss": 0.4233, "num_tokens": 1372171555.0, "step": 1448 }, { "epoch": 2.8364348677766893, "grad_norm": 0.08440853168229509, "learning_rate": 4.167156315175915e-06, "loss": 0.4038, "num_tokens": 1373114490.0, "step": 1449 }, { "epoch": 2.8383937316356516, "grad_norm": 0.08860207759262753, "learning_rate": 4.160906574304392e-06, "loss": 0.4165, "num_tokens": 1374077054.0, "step": 1450 }, { "epoch": 2.840352595494613, "grad_norm": 0.08813572880219034, "learning_rate": 4.154658182108163e-06, "loss": 0.4236, "num_tokens": 1375000445.0, "step": 1451 }, { "epoch": 2.8423114593535748, "grad_norm": 0.08708887346865538, "learning_rate": 4.148411148630271e-06, "loss": 0.4467, "num_tokens": 1375974231.0, "step": 1452 }, { "epoch": 2.8442703232125366, "grad_norm": 0.08739088490670505, "learning_rate": 4.142165483911575e-06, "loss": 0.4485, "num_tokens": 1376927049.0, "step": 1453 }, { "epoch": 2.8462291870714984, "grad_norm": 0.089933453616257, "learning_rate": 4.135921197990737e-06, "loss": 0.4181, "num_tokens": 1377873883.0, "step": 1454 }, { "epoch": 2.8481880509304602, "grad_norm": 0.08555850125055711, "learning_rate": 4.129678300904201e-06, "loss": 0.4252, "num_tokens": 1378793565.0, "step": 1455 }, { "epoch": 2.850146914789422, "grad_norm": 0.08400564416104261, "learning_rate": 4.123436802686176e-06, "loss": 0.4217, "num_tokens": 1379745839.0, "step": 1456 }, { "epoch": 2.852105778648384, "grad_norm": 0.08704734713245992, "learning_rate": 4.117196713368629e-06, "loss": 0.4292, "num_tokens": 1380716393.0, "step": 1457 }, { "epoch": 2.8540646425073457, "grad_norm": 0.08667779032238235, "learning_rate": 4.110958042981256e-06, "loss": 0.4191, "num_tokens": 1381659575.0, "step": 1458 }, { "epoch": 2.8560235063663075, "grad_norm": 0.08312185377817422, "learning_rate": 4.104720801551475e-06, "loss": 0.4211, "num_tokens": 1382617119.0, "step": 1459 }, { "epoch": 2.8579823702252694, "grad_norm": 0.08361729500249165, "learning_rate": 4.098484999104409e-06, "loss": 0.4199, "num_tokens": 1383604919.0, "step": 1460 }, { "epoch": 2.859941234084231, "grad_norm": 0.0883469687869528, "learning_rate": 4.092250645662867e-06, "loss": 0.4183, "num_tokens": 1384539101.0, "step": 1461 }, { "epoch": 2.861900097943193, "grad_norm": 0.08365888362340465, "learning_rate": 4.086017751247328e-06, "loss": 0.4154, "num_tokens": 1385494745.0, "step": 1462 }, { "epoch": 2.863858961802155, "grad_norm": 0.0857829252017472, "learning_rate": 4.079786325875924e-06, "loss": 0.4243, "num_tokens": 1386459640.0, "step": 1463 }, { "epoch": 2.8658178256611166, "grad_norm": 0.08579543156355285, "learning_rate": 4.073556379564429e-06, "loss": 0.4157, "num_tokens": 1387422495.0, "step": 1464 }, { "epoch": 2.8677766895200785, "grad_norm": 0.08403844361349817, "learning_rate": 4.067327922326242e-06, "loss": 0.4176, "num_tokens": 1388405073.0, "step": 1465 }, { "epoch": 2.8697355533790403, "grad_norm": 0.08300366668369293, "learning_rate": 4.0611009641723615e-06, "loss": 0.4197, "num_tokens": 1389346663.0, "step": 1466 }, { "epoch": 2.871694417238002, "grad_norm": 0.08600928440720594, "learning_rate": 4.0548755151113824e-06, "loss": 0.4198, "num_tokens": 1390300507.0, "step": 1467 }, { "epoch": 2.8736532810969635, "grad_norm": 0.08502182035386478, "learning_rate": 4.048651585149473e-06, "loss": 0.4164, "num_tokens": 1391204890.0, "step": 1468 }, { "epoch": 2.8756121449559258, "grad_norm": 0.08195440822126236, "learning_rate": 4.042429184290357e-06, "loss": 0.4112, "num_tokens": 1392185859.0, "step": 1469 }, { "epoch": 2.877571008814887, "grad_norm": 0.090892916068915, "learning_rate": 4.036208322535304e-06, "loss": 0.4295, "num_tokens": 1393131912.0, "step": 1470 }, { "epoch": 2.8795298726738494, "grad_norm": 0.0862667451758762, "learning_rate": 4.0299890098831096e-06, "loss": 0.4252, "num_tokens": 1394114081.0, "step": 1471 }, { "epoch": 2.881488736532811, "grad_norm": 0.08393258626423705, "learning_rate": 4.023771256330075e-06, "loss": 0.4228, "num_tokens": 1395089769.0, "step": 1472 }, { "epoch": 2.8834476003917726, "grad_norm": 0.08272992398126203, "learning_rate": 4.01755507187e-06, "loss": 0.4136, "num_tokens": 1396066601.0, "step": 1473 }, { "epoch": 2.8854064642507344, "grad_norm": 0.08551055948053857, "learning_rate": 4.011340466494162e-06, "loss": 0.4243, "num_tokens": 1397041867.0, "step": 1474 }, { "epoch": 2.8873653281096963, "grad_norm": 0.0873784242015668, "learning_rate": 4.005127450191299e-06, "loss": 0.4314, "num_tokens": 1398009067.0, "step": 1475 }, { "epoch": 2.889324191968658, "grad_norm": 0.08921150252123267, "learning_rate": 3.998916032947594e-06, "loss": 0.426, "num_tokens": 1398960877.0, "step": 1476 }, { "epoch": 2.89128305582762, "grad_norm": 0.08657560218415009, "learning_rate": 3.9927062247466625e-06, "loss": 0.4223, "num_tokens": 1399911107.0, "step": 1477 }, { "epoch": 2.8932419196865817, "grad_norm": 0.08431648518237164, "learning_rate": 3.986498035569533e-06, "loss": 0.422, "num_tokens": 1400868445.0, "step": 1478 }, { "epoch": 2.8952007835455436, "grad_norm": 0.08964906628252636, "learning_rate": 3.980291475394629e-06, "loss": 0.421, "num_tokens": 1401832152.0, "step": 1479 }, { "epoch": 2.8971596474045054, "grad_norm": 0.08505233137308539, "learning_rate": 3.974086554197759e-06, "loss": 0.4335, "num_tokens": 1402819847.0, "step": 1480 }, { "epoch": 2.899118511263467, "grad_norm": 0.08384794297954219, "learning_rate": 3.967883281952098e-06, "loss": 0.4249, "num_tokens": 1403772945.0, "step": 1481 }, { "epoch": 2.901077375122429, "grad_norm": 0.08397983241421397, "learning_rate": 3.9616816686281636e-06, "loss": 0.4198, "num_tokens": 1404751662.0, "step": 1482 }, { "epoch": 2.903036238981391, "grad_norm": 0.08733913341135212, "learning_rate": 3.955481724193817e-06, "loss": 0.4197, "num_tokens": 1405696873.0, "step": 1483 }, { "epoch": 2.9049951028403527, "grad_norm": 0.08910912116182969, "learning_rate": 3.9492834586142306e-06, "loss": 0.4219, "num_tokens": 1406591142.0, "step": 1484 }, { "epoch": 2.9069539666993145, "grad_norm": 0.09212989650364477, "learning_rate": 3.9430868818518786e-06, "loss": 0.413, "num_tokens": 1407541320.0, "step": 1485 }, { "epoch": 2.9089128305582763, "grad_norm": 0.08293600214295944, "learning_rate": 3.936892003866523e-06, "loss": 0.4002, "num_tokens": 1408502771.0, "step": 1486 }, { "epoch": 2.910871694417238, "grad_norm": 0.08590134879504807, "learning_rate": 3.930698834615195e-06, "loss": 0.4142, "num_tokens": 1409425939.0, "step": 1487 }, { "epoch": 2.9128305582762, "grad_norm": 0.08500131829727482, "learning_rate": 3.924507384052177e-06, "loss": 0.4266, "num_tokens": 1410401535.0, "step": 1488 }, { "epoch": 2.9147894221351613, "grad_norm": 0.08585076216249231, "learning_rate": 3.918317662128994e-06, "loss": 0.4295, "num_tokens": 1411329453.0, "step": 1489 }, { "epoch": 2.9167482859941236, "grad_norm": 0.08610441103438081, "learning_rate": 3.912129678794384e-06, "loss": 0.3994, "num_tokens": 1412308096.0, "step": 1490 }, { "epoch": 2.918707149853085, "grad_norm": 0.08848155507666867, "learning_rate": 3.905943443994299e-06, "loss": 0.4258, "num_tokens": 1413260722.0, "step": 1491 }, { "epoch": 2.9206660137120473, "grad_norm": 0.08558103907972713, "learning_rate": 3.899758967671879e-06, "loss": 0.4266, "num_tokens": 1414194902.0, "step": 1492 }, { "epoch": 2.9226248775710086, "grad_norm": 0.08695765787369268, "learning_rate": 3.893576259767431e-06, "loss": 0.4224, "num_tokens": 1415138861.0, "step": 1493 }, { "epoch": 2.9245837414299705, "grad_norm": 0.08563015648204321, "learning_rate": 3.887395330218429e-06, "loss": 0.4224, "num_tokens": 1416099459.0, "step": 1494 }, { "epoch": 2.9265426052889323, "grad_norm": 0.08342369327522459, "learning_rate": 3.8812161889594826e-06, "loss": 0.4152, "num_tokens": 1417054600.0, "step": 1495 }, { "epoch": 2.928501469147894, "grad_norm": 0.08796149298875809, "learning_rate": 3.875038845922329e-06, "loss": 0.4304, "num_tokens": 1418009232.0, "step": 1496 }, { "epoch": 2.930460333006856, "grad_norm": 0.08731540375781419, "learning_rate": 3.868863311035814e-06, "loss": 0.4346, "num_tokens": 1418974959.0, "step": 1497 }, { "epoch": 2.9324191968658178, "grad_norm": 0.08397040254879076, "learning_rate": 3.8626895942258804e-06, "loss": 0.4242, "num_tokens": 1419928020.0, "step": 1498 }, { "epoch": 2.9343780607247796, "grad_norm": 0.08829494871058116, "learning_rate": 3.856517705415543e-06, "loss": 0.4378, "num_tokens": 1420887581.0, "step": 1499 }, { "epoch": 2.9363369245837414, "grad_norm": 0.09057604720036475, "learning_rate": 3.850347654524884e-06, "loss": 0.4177, "num_tokens": 1421844392.0, "step": 1500 }, { "epoch": 2.9382957884427032, "grad_norm": 0.08520793043642402, "learning_rate": 3.84417945147103e-06, "loss": 0.4254, "num_tokens": 1422766353.0, "step": 1501 }, { "epoch": 2.940254652301665, "grad_norm": 0.08427816943750237, "learning_rate": 3.838013106168134e-06, "loss": 0.4078, "num_tokens": 1423742891.0, "step": 1502 }, { "epoch": 2.942213516160627, "grad_norm": 0.08386793424652861, "learning_rate": 3.831848628527368e-06, "loss": 0.4109, "num_tokens": 1424671790.0, "step": 1503 }, { "epoch": 2.9441723800195887, "grad_norm": 0.08416905709344, "learning_rate": 3.825686028456901e-06, "loss": 0.4178, "num_tokens": 1425628029.0, "step": 1504 }, { "epoch": 2.9461312438785505, "grad_norm": 0.08322343356214816, "learning_rate": 3.8195253158618815e-06, "loss": 0.4131, "num_tokens": 1426597089.0, "step": 1505 }, { "epoch": 2.9480901077375123, "grad_norm": 0.08331145269417914, "learning_rate": 3.813366500644426e-06, "loss": 0.4221, "num_tokens": 1427529099.0, "step": 1506 }, { "epoch": 2.950048971596474, "grad_norm": 0.08572693265651272, "learning_rate": 3.8072095927036034e-06, "loss": 0.434, "num_tokens": 1428501980.0, "step": 1507 }, { "epoch": 2.952007835455436, "grad_norm": 0.08641632406249106, "learning_rate": 3.8010546019354122e-06, "loss": 0.4251, "num_tokens": 1429428197.0, "step": 1508 }, { "epoch": 2.953966699314398, "grad_norm": 0.08316652705389987, "learning_rate": 3.7949015382327737e-06, "loss": 0.4088, "num_tokens": 1430395899.0, "step": 1509 }, { "epoch": 2.955925563173359, "grad_norm": 0.08947342170867507, "learning_rate": 3.788750411485511e-06, "loss": 0.4276, "num_tokens": 1431329285.0, "step": 1510 }, { "epoch": 2.9578844270323215, "grad_norm": 0.08295971476456522, "learning_rate": 3.782601231580332e-06, "loss": 0.4341, "num_tokens": 1432276277.0, "step": 1511 }, { "epoch": 2.959843290891283, "grad_norm": 0.08161559699458866, "learning_rate": 3.7764540084008166e-06, "loss": 0.4237, "num_tokens": 1433245547.0, "step": 1512 }, { "epoch": 2.961802154750245, "grad_norm": 0.09079736480700806, "learning_rate": 3.770308751827402e-06, "loss": 0.4183, "num_tokens": 1434187255.0, "step": 1513 }, { "epoch": 2.9637610186092065, "grad_norm": 0.0842775167541979, "learning_rate": 3.764165471737359e-06, "loss": 0.4121, "num_tokens": 1435139934.0, "step": 1514 }, { "epoch": 2.9657198824681683, "grad_norm": 0.0871417251165915, "learning_rate": 3.758024178004789e-06, "loss": 0.4232, "num_tokens": 1436085191.0, "step": 1515 }, { "epoch": 2.96767874632713, "grad_norm": 0.08558891087797421, "learning_rate": 3.751884880500591e-06, "loss": 0.4135, "num_tokens": 1437010923.0, "step": 1516 }, { "epoch": 2.969637610186092, "grad_norm": 0.08392466431363356, "learning_rate": 3.7457475890924656e-06, "loss": 0.4119, "num_tokens": 1437971337.0, "step": 1517 }, { "epoch": 2.971596474045054, "grad_norm": 0.0854219137398358, "learning_rate": 3.7396123136448824e-06, "loss": 0.4339, "num_tokens": 1438928174.0, "step": 1518 }, { "epoch": 2.9735553379040156, "grad_norm": 0.08432553105531877, "learning_rate": 3.7334790640190733e-06, "loss": 0.4185, "num_tokens": 1439879746.0, "step": 1519 }, { "epoch": 2.9755142017629774, "grad_norm": 0.0853445665439681, "learning_rate": 3.727347850073012e-06, "loss": 0.4119, "num_tokens": 1440799387.0, "step": 1520 }, { "epoch": 2.9774730656219393, "grad_norm": 0.0832484637801818, "learning_rate": 3.721218681661406e-06, "loss": 0.4194, "num_tokens": 1441766089.0, "step": 1521 }, { "epoch": 2.979431929480901, "grad_norm": 0.08445308558444874, "learning_rate": 3.7150915686356657e-06, "loss": 0.4244, "num_tokens": 1442732128.0, "step": 1522 }, { "epoch": 2.981390793339863, "grad_norm": 0.08667681178857112, "learning_rate": 3.708966520843906e-06, "loss": 0.4144, "num_tokens": 1443674826.0, "step": 1523 }, { "epoch": 2.9833496571988247, "grad_norm": 0.09038005129679207, "learning_rate": 3.70284354813092e-06, "loss": 0.4268, "num_tokens": 1444668576.0, "step": 1524 }, { "epoch": 2.9853085210577865, "grad_norm": 0.08268635260702975, "learning_rate": 3.6967226603381624e-06, "loss": 0.4137, "num_tokens": 1445635950.0, "step": 1525 }, { "epoch": 2.9872673849167484, "grad_norm": 0.0842139646863267, "learning_rate": 3.690603867303741e-06, "loss": 0.4133, "num_tokens": 1446543169.0, "step": 1526 }, { "epoch": 2.98922624877571, "grad_norm": 0.08709311275860508, "learning_rate": 3.6844871788623946e-06, "loss": 0.4116, "num_tokens": 1447487974.0, "step": 1527 }, { "epoch": 2.991185112634672, "grad_norm": 0.0857642104307539, "learning_rate": 3.6783726048454794e-06, "loss": 0.4199, "num_tokens": 1448428128.0, "step": 1528 }, { "epoch": 2.993143976493634, "grad_norm": 0.08473377933023256, "learning_rate": 3.6722601550809534e-06, "loss": 0.429, "num_tokens": 1449374978.0, "step": 1529 }, { "epoch": 2.9951028403525957, "grad_norm": 0.08998178379263423, "learning_rate": 3.6661498393933612e-06, "loss": 0.438, "num_tokens": 1450341064.0, "step": 1530 }, { "epoch": 2.997061704211557, "grad_norm": 0.08697424506888644, "learning_rate": 3.6600416676038144e-06, "loss": 0.4365, "num_tokens": 1451292835.0, "step": 1531 }, { "epoch": 2.9990205680705193, "grad_norm": 0.08657942646985065, "learning_rate": 3.6539356495299816e-06, "loss": 0.4194, "num_tokens": 1452234989.0, "step": 1532 }, { "epoch": 3.0, "grad_norm": 0.08657942646985065, "learning_rate": 3.6478317949860696e-06, "loss": 0.4189, "num_tokens": 1452695717.0, "step": 1533 }, { "epoch": 3.001958863858962, "grad_norm": 0.13984079896830842, "learning_rate": 3.641730113782807e-06, "loss": 0.4149, "num_tokens": 1453658103.0, "step": 1534 }, { "epoch": 3.0039177277179236, "grad_norm": 0.08377869426784214, "learning_rate": 3.6356306157274274e-06, "loss": 0.412, "num_tokens": 1454618540.0, "step": 1535 }, { "epoch": 3.0058765915768855, "grad_norm": 0.08664720922269122, "learning_rate": 3.629533310623658e-06, "loss": 0.4126, "num_tokens": 1455545489.0, "step": 1536 }, { "epoch": 3.0078354554358473, "grad_norm": 0.08623098846156026, "learning_rate": 3.623438208271701e-06, "loss": 0.4137, "num_tokens": 1456513429.0, "step": 1537 }, { "epoch": 3.009794319294809, "grad_norm": 0.08688930262599263, "learning_rate": 3.6173453184682173e-06, "loss": 0.4174, "num_tokens": 1457483318.0, "step": 1538 }, { "epoch": 3.011753183153771, "grad_norm": 0.08597988636751401, "learning_rate": 3.611254651006311e-06, "loss": 0.424, "num_tokens": 1458412653.0, "step": 1539 }, { "epoch": 3.0137120470127328, "grad_norm": 0.08696752958556553, "learning_rate": 3.605166215675516e-06, "loss": 0.4008, "num_tokens": 1459381365.0, "step": 1540 }, { "epoch": 3.0156709108716946, "grad_norm": 0.08261014918185572, "learning_rate": 3.5990800222617774e-06, "loss": 0.425, "num_tokens": 1460343126.0, "step": 1541 }, { "epoch": 3.0176297747306564, "grad_norm": 0.08413938130327642, "learning_rate": 3.5929960805474386e-06, "loss": 0.4107, "num_tokens": 1461260689.0, "step": 1542 }, { "epoch": 3.0195886385896182, "grad_norm": 0.08383172821306109, "learning_rate": 3.5869144003112177e-06, "loss": 0.414, "num_tokens": 1462227916.0, "step": 1543 }, { "epoch": 3.0215475024485796, "grad_norm": 0.08419459918821942, "learning_rate": 3.580834991328206e-06, "loss": 0.4231, "num_tokens": 1463209714.0, "step": 1544 }, { "epoch": 3.0235063663075414, "grad_norm": 0.08149417035919176, "learning_rate": 3.5747578633698394e-06, "loss": 0.4307, "num_tokens": 1464181564.0, "step": 1545 }, { "epoch": 3.0254652301665033, "grad_norm": 0.0883918014012131, "learning_rate": 3.568683026203889e-06, "loss": 0.4338, "num_tokens": 1465174778.0, "step": 1546 }, { "epoch": 3.027424094025465, "grad_norm": 0.08517759734458666, "learning_rate": 3.5626104895944437e-06, "loss": 0.4053, "num_tokens": 1466129922.0, "step": 1547 }, { "epoch": 3.029382957884427, "grad_norm": 0.08446013585267594, "learning_rate": 3.5565402633018963e-06, "loss": 0.4182, "num_tokens": 1467089461.0, "step": 1548 }, { "epoch": 3.0313418217433887, "grad_norm": 0.08533340445757492, "learning_rate": 3.550472357082922e-06, "loss": 0.4122, "num_tokens": 1468046930.0, "step": 1549 }, { "epoch": 3.0333006856023506, "grad_norm": 0.09021042288751242, "learning_rate": 3.5444067806904703e-06, "loss": 0.4213, "num_tokens": 1468982281.0, "step": 1550 }, { "epoch": 3.0352595494613124, "grad_norm": 0.08618935150857403, "learning_rate": 3.538343543873748e-06, "loss": 0.4125, "num_tokens": 1469934810.0, "step": 1551 }, { "epoch": 3.037218413320274, "grad_norm": 0.0822883606080526, "learning_rate": 3.532282656378194e-06, "loss": 0.4208, "num_tokens": 1470853320.0, "step": 1552 }, { "epoch": 3.039177277179236, "grad_norm": 0.08475966762698743, "learning_rate": 3.526224127945479e-06, "loss": 0.3981, "num_tokens": 1471815045.0, "step": 1553 }, { "epoch": 3.041136141038198, "grad_norm": 0.0833098974921207, "learning_rate": 3.5201679683134793e-06, "loss": 0.4108, "num_tokens": 1472765711.0, "step": 1554 }, { "epoch": 3.0430950048971597, "grad_norm": 0.08204783615357081, "learning_rate": 3.5141141872162613e-06, "loss": 0.4309, "num_tokens": 1473756856.0, "step": 1555 }, { "epoch": 3.0450538687561215, "grad_norm": 0.08622732916217471, "learning_rate": 3.5080627943840717e-06, "loss": 0.4187, "num_tokens": 1474700106.0, "step": 1556 }, { "epoch": 3.0470127326150833, "grad_norm": 0.08823584351791991, "learning_rate": 3.5020137995433177e-06, "loss": 0.4175, "num_tokens": 1475676036.0, "step": 1557 }, { "epoch": 3.048971596474045, "grad_norm": 0.08673325416372975, "learning_rate": 3.49596721241655e-06, "loss": 0.423, "num_tokens": 1476615144.0, "step": 1558 }, { "epoch": 3.050930460333007, "grad_norm": 0.08522920122710141, "learning_rate": 3.489923042722453e-06, "loss": 0.4171, "num_tokens": 1477560825.0, "step": 1559 }, { "epoch": 3.052889324191969, "grad_norm": 0.08287045771927945, "learning_rate": 3.483881300175823e-06, "loss": 0.4105, "num_tokens": 1478493290.0, "step": 1560 }, { "epoch": 3.0548481880509306, "grad_norm": 0.08657532938138, "learning_rate": 3.4778419944875556e-06, "loss": 0.4117, "num_tokens": 1479436488.0, "step": 1561 }, { "epoch": 3.0568070519098924, "grad_norm": 0.08885440023003051, "learning_rate": 3.4718051353646304e-06, "loss": 0.4056, "num_tokens": 1480380043.0, "step": 1562 }, { "epoch": 3.0587659157688543, "grad_norm": 0.08556954522511925, "learning_rate": 3.465770732510095e-06, "loss": 0.4083, "num_tokens": 1481352426.0, "step": 1563 }, { "epoch": 3.060724779627816, "grad_norm": 0.0820891592346816, "learning_rate": 3.459738795623046e-06, "loss": 0.4106, "num_tokens": 1482296566.0, "step": 1564 }, { "epoch": 3.0626836434867775, "grad_norm": 0.08517211749613793, "learning_rate": 3.4537093343986207e-06, "loss": 0.4252, "num_tokens": 1483246603.0, "step": 1565 }, { "epoch": 3.0646425073457393, "grad_norm": 0.09074356856063315, "learning_rate": 3.4476823585279745e-06, "loss": 0.4251, "num_tokens": 1484209872.0, "step": 1566 }, { "epoch": 3.066601371204701, "grad_norm": 0.08704176577376241, "learning_rate": 3.4416578776982677e-06, "loss": 0.4231, "num_tokens": 1485204372.0, "step": 1567 }, { "epoch": 3.068560235063663, "grad_norm": 0.08499616024048211, "learning_rate": 3.4356359015926553e-06, "loss": 0.4249, "num_tokens": 1486151469.0, "step": 1568 }, { "epoch": 3.0705190989226248, "grad_norm": 0.08380466790436383, "learning_rate": 3.4296164398902576e-06, "loss": 0.4138, "num_tokens": 1487089176.0, "step": 1569 }, { "epoch": 3.0724779627815866, "grad_norm": 0.0866388761711634, "learning_rate": 3.42359950226616e-06, "loss": 0.4193, "num_tokens": 1488013711.0, "step": 1570 }, { "epoch": 3.0744368266405484, "grad_norm": 0.09210749037221054, "learning_rate": 3.4175850983913915e-06, "loss": 0.4262, "num_tokens": 1488958800.0, "step": 1571 }, { "epoch": 3.0763956904995102, "grad_norm": 0.09040379822716957, "learning_rate": 3.4115732379329038e-06, "loss": 0.4061, "num_tokens": 1489904078.0, "step": 1572 }, { "epoch": 3.078354554358472, "grad_norm": 0.08889792152673466, "learning_rate": 3.4055639305535647e-06, "loss": 0.417, "num_tokens": 1490873655.0, "step": 1573 }, { "epoch": 3.080313418217434, "grad_norm": 0.08498848149221166, "learning_rate": 3.399557185912138e-06, "loss": 0.4236, "num_tokens": 1491813490.0, "step": 1574 }, { "epoch": 3.0822722820763957, "grad_norm": 0.08783321336207509, "learning_rate": 3.393553013663265e-06, "loss": 0.4316, "num_tokens": 1492776079.0, "step": 1575 }, { "epoch": 3.0842311459353575, "grad_norm": 0.08725093162154747, "learning_rate": 3.387551423457456e-06, "loss": 0.433, "num_tokens": 1493699310.0, "step": 1576 }, { "epoch": 3.0861900097943193, "grad_norm": 0.0867956181889245, "learning_rate": 3.3815524249410716e-06, "loss": 0.4038, "num_tokens": 1494647913.0, "step": 1577 }, { "epoch": 3.088148873653281, "grad_norm": 0.08521664492400116, "learning_rate": 3.3755560277563028e-06, "loss": 0.4229, "num_tokens": 1495616948.0, "step": 1578 }, { "epoch": 3.090107737512243, "grad_norm": 0.0848634149347137, "learning_rate": 3.3695622415411626e-06, "loss": 0.4096, "num_tokens": 1496583967.0, "step": 1579 }, { "epoch": 3.092066601371205, "grad_norm": 0.08183021108472953, "learning_rate": 3.3635710759294684e-06, "loss": 0.4115, "num_tokens": 1497544251.0, "step": 1580 }, { "epoch": 3.0940254652301666, "grad_norm": 0.08651099819700418, "learning_rate": 3.3575825405508213e-06, "loss": 0.4123, "num_tokens": 1498477038.0, "step": 1581 }, { "epoch": 3.0959843290891285, "grad_norm": 0.08605916461647833, "learning_rate": 3.351596645030597e-06, "loss": 0.4135, "num_tokens": 1499429051.0, "step": 1582 }, { "epoch": 3.0979431929480903, "grad_norm": 0.08691192888813512, "learning_rate": 3.345613398989932e-06, "loss": 0.419, "num_tokens": 1500372240.0, "step": 1583 }, { "epoch": 3.099902056807052, "grad_norm": 0.0863879801250081, "learning_rate": 3.3396328120456968e-06, "loss": 0.4202, "num_tokens": 1501297993.0, "step": 1584 }, { "epoch": 3.101860920666014, "grad_norm": 0.08680344056610552, "learning_rate": 3.333654893810493e-06, "loss": 0.4152, "num_tokens": 1502237843.0, "step": 1585 }, { "epoch": 3.1038197845249753, "grad_norm": 0.09001976183098781, "learning_rate": 3.3276796538926335e-06, "loss": 0.4108, "num_tokens": 1503139935.0, "step": 1586 }, { "epoch": 3.105778648383937, "grad_norm": 0.0892083435236935, "learning_rate": 3.321707101896122e-06, "loss": 0.431, "num_tokens": 1504067449.0, "step": 1587 }, { "epoch": 3.107737512242899, "grad_norm": 0.08401671867988442, "learning_rate": 3.3157372474206463e-06, "loss": 0.4116, "num_tokens": 1505019240.0, "step": 1588 }, { "epoch": 3.109696376101861, "grad_norm": 0.08402281265217668, "learning_rate": 3.3097701000615543e-06, "loss": 0.396, "num_tokens": 1505980621.0, "step": 1589 }, { "epoch": 3.1116552399608226, "grad_norm": 0.08693116821436273, "learning_rate": 3.3038056694098485e-06, "loss": 0.4052, "num_tokens": 1506935827.0, "step": 1590 }, { "epoch": 3.1136141038197844, "grad_norm": 0.08520733780574463, "learning_rate": 3.2978439650521583e-06, "loss": 0.4238, "num_tokens": 1507907052.0, "step": 1591 }, { "epoch": 3.1155729676787463, "grad_norm": 0.08299873349532465, "learning_rate": 3.2918849965707346e-06, "loss": 0.4194, "num_tokens": 1508855217.0, "step": 1592 }, { "epoch": 3.117531831537708, "grad_norm": 0.08560173771938759, "learning_rate": 3.285928773543432e-06, "loss": 0.4118, "num_tokens": 1509765480.0, "step": 1593 }, { "epoch": 3.11949069539667, "grad_norm": 0.08741343363260318, "learning_rate": 3.279975305543691e-06, "loss": 0.4253, "num_tokens": 1510726718.0, "step": 1594 }, { "epoch": 3.1214495592556317, "grad_norm": 0.08673928707032301, "learning_rate": 3.274024602140521e-06, "loss": 0.403, "num_tokens": 1511680781.0, "step": 1595 }, { "epoch": 3.1234084231145935, "grad_norm": 0.08440194225050086, "learning_rate": 3.268076672898492e-06, "loss": 0.417, "num_tokens": 1512595531.0, "step": 1596 }, { "epoch": 3.1253672869735554, "grad_norm": 0.08530355178102447, "learning_rate": 3.262131527377715e-06, "loss": 0.414, "num_tokens": 1513537053.0, "step": 1597 }, { "epoch": 3.127326150832517, "grad_norm": 0.08542535879459515, "learning_rate": 3.2561891751338232e-06, "loss": 0.412, "num_tokens": 1514439201.0, "step": 1598 }, { "epoch": 3.129285014691479, "grad_norm": 0.08603236893444292, "learning_rate": 3.250249625717964e-06, "loss": 0.4107, "num_tokens": 1515384871.0, "step": 1599 }, { "epoch": 3.131243878550441, "grad_norm": 0.08403329310434217, "learning_rate": 3.2443128886767782e-06, "loss": 0.3974, "num_tokens": 1516352640.0, "step": 1600 }, { "epoch": 3.1332027424094027, "grad_norm": 0.08207199919198398, "learning_rate": 3.238378973552385e-06, "loss": 0.4253, "num_tokens": 1517325804.0, "step": 1601 }, { "epoch": 3.1351616062683645, "grad_norm": 0.0855836165023854, "learning_rate": 3.232447889882371e-06, "loss": 0.4163, "num_tokens": 1518280093.0, "step": 1602 }, { "epoch": 3.1371204701273263, "grad_norm": 0.08669293489065007, "learning_rate": 3.2265196471997682e-06, "loss": 0.4396, "num_tokens": 1519242000.0, "step": 1603 }, { "epoch": 3.139079333986288, "grad_norm": 0.08885090340405069, "learning_rate": 3.220594255033046e-06, "loss": 0.421, "num_tokens": 1520192835.0, "step": 1604 }, { "epoch": 3.14103819784525, "grad_norm": 0.08467497009528231, "learning_rate": 3.2146717229060885e-06, "loss": 0.4158, "num_tokens": 1521134188.0, "step": 1605 }, { "epoch": 3.142997061704212, "grad_norm": 0.0853642353840204, "learning_rate": 3.208752060338186e-06, "loss": 0.4115, "num_tokens": 1522125329.0, "step": 1606 }, { "epoch": 3.144955925563173, "grad_norm": 0.08515957490504761, "learning_rate": 3.202835276844015e-06, "loss": 0.4197, "num_tokens": 1523073841.0, "step": 1607 }, { "epoch": 3.146914789422135, "grad_norm": 0.08709305426130894, "learning_rate": 3.196921381933624e-06, "loss": 0.4091, "num_tokens": 1524011872.0, "step": 1608 }, { "epoch": 3.148873653281097, "grad_norm": 0.08384257861940272, "learning_rate": 3.1910103851124185e-06, "loss": 0.4121, "num_tokens": 1524958136.0, "step": 1609 }, { "epoch": 3.1508325171400586, "grad_norm": 0.09376876734480932, "learning_rate": 3.1851022958811495e-06, "loss": 0.416, "num_tokens": 1525942578.0, "step": 1610 }, { "epoch": 3.1527913809990205, "grad_norm": 0.08724936921017105, "learning_rate": 3.1791971237358893e-06, "loss": 0.4165, "num_tokens": 1526879385.0, "step": 1611 }, { "epoch": 3.1547502448579823, "grad_norm": 0.08507190978735882, "learning_rate": 3.173294878168025e-06, "loss": 0.4183, "num_tokens": 1527823497.0, "step": 1612 }, { "epoch": 3.156709108716944, "grad_norm": 0.08579655874793927, "learning_rate": 3.16739556866424e-06, "loss": 0.404, "num_tokens": 1528777994.0, "step": 1613 }, { "epoch": 3.158667972575906, "grad_norm": 0.08500964605985802, "learning_rate": 3.1614992047064947e-06, "loss": 0.413, "num_tokens": 1529718455.0, "step": 1614 }, { "epoch": 3.1606268364348677, "grad_norm": 0.0866587425423897, "learning_rate": 3.15560579577202e-06, "loss": 0.4246, "num_tokens": 1530618167.0, "step": 1615 }, { "epoch": 3.1625857002938296, "grad_norm": 0.0861475105612104, "learning_rate": 3.1497153513332956e-06, "loss": 0.4005, "num_tokens": 1531582422.0, "step": 1616 }, { "epoch": 3.1645445641527914, "grad_norm": 0.08225270518432634, "learning_rate": 3.1438278808580336e-06, "loss": 0.416, "num_tokens": 1532529952.0, "step": 1617 }, { "epoch": 3.166503428011753, "grad_norm": 0.09168438965808968, "learning_rate": 3.1379433938091695e-06, "loss": 0.428, "num_tokens": 1533478518.0, "step": 1618 }, { "epoch": 3.168462291870715, "grad_norm": 0.08600848156463216, "learning_rate": 3.132061899644845e-06, "loss": 0.4062, "num_tokens": 1534431461.0, "step": 1619 }, { "epoch": 3.170421155729677, "grad_norm": 0.0843262406890434, "learning_rate": 3.126183407818384e-06, "loss": 0.417, "num_tokens": 1535382785.0, "step": 1620 }, { "epoch": 3.1723800195886387, "grad_norm": 0.08220724066091184, "learning_rate": 3.1203079277782933e-06, "loss": 0.4228, "num_tokens": 1536337194.0, "step": 1621 }, { "epoch": 3.1743388834476005, "grad_norm": 0.08780076873904627, "learning_rate": 3.114435468968232e-06, "loss": 0.4177, "num_tokens": 1537278965.0, "step": 1622 }, { "epoch": 3.1762977473065623, "grad_norm": 0.09380512161834306, "learning_rate": 3.1085660408270072e-06, "loss": 0.423, "num_tokens": 1538242134.0, "step": 1623 }, { "epoch": 3.178256611165524, "grad_norm": 0.08714251180503718, "learning_rate": 3.1026996527885555e-06, "loss": 0.4224, "num_tokens": 1539190656.0, "step": 1624 }, { "epoch": 3.180215475024486, "grad_norm": 0.09076483906215926, "learning_rate": 3.0968363142819226e-06, "loss": 0.4038, "num_tokens": 1540168234.0, "step": 1625 }, { "epoch": 3.182174338883448, "grad_norm": 0.0853473473221615, "learning_rate": 3.090976034731257e-06, "loss": 0.415, "num_tokens": 1541115357.0, "step": 1626 }, { "epoch": 3.1841332027424096, "grad_norm": 0.08322543501652875, "learning_rate": 3.0851188235557892e-06, "loss": 0.4213, "num_tokens": 1542091792.0, "step": 1627 }, { "epoch": 3.186092066601371, "grad_norm": 0.08464197460586767, "learning_rate": 3.0792646901698165e-06, "loss": 0.4249, "num_tokens": 1543035468.0, "step": 1628 }, { "epoch": 3.188050930460333, "grad_norm": 0.08382929766448245, "learning_rate": 3.0734136439826913e-06, "loss": 0.3976, "num_tokens": 1543979369.0, "step": 1629 }, { "epoch": 3.1900097943192947, "grad_norm": 0.08628998152935578, "learning_rate": 3.0675656943988046e-06, "loss": 0.4146, "num_tokens": 1544914493.0, "step": 1630 }, { "epoch": 3.1919686581782565, "grad_norm": 0.0875463347687337, "learning_rate": 3.0617208508175662e-06, "loss": 0.4117, "num_tokens": 1545843104.0, "step": 1631 }, { "epoch": 3.1939275220372183, "grad_norm": 0.08789314380866718, "learning_rate": 3.0558791226333974e-06, "loss": 0.4157, "num_tokens": 1546744401.0, "step": 1632 }, { "epoch": 3.19588638589618, "grad_norm": 0.08655557771658778, "learning_rate": 3.050040519235711e-06, "loss": 0.4084, "num_tokens": 1547674396.0, "step": 1633 }, { "epoch": 3.197845249755142, "grad_norm": 0.08735170082717181, "learning_rate": 3.044205050008897e-06, "loss": 0.4233, "num_tokens": 1548641233.0, "step": 1634 }, { "epoch": 3.1998041136141038, "grad_norm": 0.08637476375081056, "learning_rate": 3.038372724332308e-06, "loss": 0.4184, "num_tokens": 1549614441.0, "step": 1635 }, { "epoch": 3.2017629774730656, "grad_norm": 0.0810595463890678, "learning_rate": 3.032543551580245e-06, "loss": 0.4113, "num_tokens": 1550594878.0, "step": 1636 }, { "epoch": 3.2037218413320274, "grad_norm": 0.08498515772322175, "learning_rate": 3.0267175411219386e-06, "loss": 0.4228, "num_tokens": 1551538367.0, "step": 1637 }, { "epoch": 3.2056807051909892, "grad_norm": 0.08391917564635133, "learning_rate": 3.020894702321539e-06, "loss": 0.4148, "num_tokens": 1552526924.0, "step": 1638 }, { "epoch": 3.207639569049951, "grad_norm": 0.08624378179678088, "learning_rate": 3.0150750445380995e-06, "loss": 0.419, "num_tokens": 1553490942.0, "step": 1639 }, { "epoch": 3.209598432908913, "grad_norm": 0.08507409916984543, "learning_rate": 3.0092585771255567e-06, "loss": 0.411, "num_tokens": 1554417219.0, "step": 1640 }, { "epoch": 3.2115572967678747, "grad_norm": 0.0921606250869294, "learning_rate": 3.003445309432723e-06, "loss": 0.4153, "num_tokens": 1555337275.0, "step": 1641 }, { "epoch": 3.2135161606268365, "grad_norm": 0.08743945172958979, "learning_rate": 2.997635250803268e-06, "loss": 0.4189, "num_tokens": 1556308265.0, "step": 1642 }, { "epoch": 3.2154750244857984, "grad_norm": 0.08537878053467987, "learning_rate": 2.991828410575698e-06, "loss": 0.4171, "num_tokens": 1557238231.0, "step": 1643 }, { "epoch": 3.21743388834476, "grad_norm": 0.08723290879714155, "learning_rate": 2.9860247980833534e-06, "loss": 0.4122, "num_tokens": 1558165499.0, "step": 1644 }, { "epoch": 3.219392752203722, "grad_norm": 0.08372142734691139, "learning_rate": 2.980224422654382e-06, "loss": 0.402, "num_tokens": 1559110342.0, "step": 1645 }, { "epoch": 3.221351616062684, "grad_norm": 0.08551788909129061, "learning_rate": 2.9744272936117323e-06, "loss": 0.413, "num_tokens": 1560057661.0, "step": 1646 }, { "epoch": 3.223310479921645, "grad_norm": 0.08613788402152842, "learning_rate": 2.9686334202731325e-06, "loss": 0.4307, "num_tokens": 1560992014.0, "step": 1647 }, { "epoch": 3.2252693437806075, "grad_norm": 0.08584891156901349, "learning_rate": 2.9628428119510742e-06, "loss": 0.4006, "num_tokens": 1561930651.0, "step": 1648 }, { "epoch": 3.227228207639569, "grad_norm": 0.084163946886269, "learning_rate": 2.9570554779528094e-06, "loss": 0.4209, "num_tokens": 1562892101.0, "step": 1649 }, { "epoch": 3.2291870714985307, "grad_norm": 0.08666807475053581, "learning_rate": 2.951271427580321e-06, "loss": 0.4087, "num_tokens": 1563818257.0, "step": 1650 }, { "epoch": 3.2311459353574925, "grad_norm": 0.08238976907237429, "learning_rate": 2.9454906701303167e-06, "loss": 0.4058, "num_tokens": 1564769030.0, "step": 1651 }, { "epoch": 3.2331047992164543, "grad_norm": 0.08506864633911439, "learning_rate": 2.9397132148942104e-06, "loss": 0.4039, "num_tokens": 1565715324.0, "step": 1652 }, { "epoch": 3.235063663075416, "grad_norm": 0.08222389882512811, "learning_rate": 2.9339390711581105e-06, "loss": 0.4202, "num_tokens": 1566677591.0, "step": 1653 }, { "epoch": 3.237022526934378, "grad_norm": 0.08610362353178959, "learning_rate": 2.928168248202799e-06, "loss": 0.4081, "num_tokens": 1567616036.0, "step": 1654 }, { "epoch": 3.23898139079334, "grad_norm": 0.0882402206039739, "learning_rate": 2.9224007553037236e-06, "loss": 0.3962, "num_tokens": 1568558614.0, "step": 1655 }, { "epoch": 3.2409402546523016, "grad_norm": 0.08387583936887746, "learning_rate": 2.91663660173098e-06, "loss": 0.4095, "num_tokens": 1569507348.0, "step": 1656 }, { "epoch": 3.2428991185112634, "grad_norm": 0.08395318958639249, "learning_rate": 2.9108757967492907e-06, "loss": 0.4125, "num_tokens": 1570467818.0, "step": 1657 }, { "epoch": 3.2448579823702253, "grad_norm": 0.08448289400753382, "learning_rate": 2.905118349618007e-06, "loss": 0.4128, "num_tokens": 1571390817.0, "step": 1658 }, { "epoch": 3.246816846229187, "grad_norm": 0.08247992022481007, "learning_rate": 2.899364269591072e-06, "loss": 0.4162, "num_tokens": 1572356564.0, "step": 1659 }, { "epoch": 3.248775710088149, "grad_norm": 0.09887974786405178, "learning_rate": 2.8936135659170217e-06, "loss": 0.4206, "num_tokens": 1573328765.0, "step": 1660 }, { "epoch": 3.2507345739471107, "grad_norm": 0.08430893870506403, "learning_rate": 2.8878662478389675e-06, "loss": 0.4043, "num_tokens": 1574270584.0, "step": 1661 }, { "epoch": 3.2526934378060726, "grad_norm": 0.08518091959222462, "learning_rate": 2.882122324594575e-06, "loss": 0.4209, "num_tokens": 1575234867.0, "step": 1662 }, { "epoch": 3.2546523016650344, "grad_norm": 0.08476071734482449, "learning_rate": 2.8763818054160538e-06, "loss": 0.4241, "num_tokens": 1576187296.0, "step": 1663 }, { "epoch": 3.256611165523996, "grad_norm": 0.08657136311691385, "learning_rate": 2.870644699530146e-06, "loss": 0.4269, "num_tokens": 1577180261.0, "step": 1664 }, { "epoch": 3.258570029382958, "grad_norm": 0.09207416692508284, "learning_rate": 2.8649110161581047e-06, "loss": 0.4084, "num_tokens": 1578094376.0, "step": 1665 }, { "epoch": 3.26052889324192, "grad_norm": 0.08785370437827834, "learning_rate": 2.859180764515678e-06, "loss": 0.4239, "num_tokens": 1579024168.0, "step": 1666 }, { "epoch": 3.2624877571008817, "grad_norm": 0.0852265339374843, "learning_rate": 2.853453953813108e-06, "loss": 0.4171, "num_tokens": 1579965477.0, "step": 1667 }, { "epoch": 3.264446620959843, "grad_norm": 0.0871308803527743, "learning_rate": 2.847730593255097e-06, "loss": 0.4339, "num_tokens": 1580915159.0, "step": 1668 }, { "epoch": 3.2664054848188053, "grad_norm": 0.08373682216483416, "learning_rate": 2.842010692040805e-06, "loss": 0.4171, "num_tokens": 1581839703.0, "step": 1669 }, { "epoch": 3.2683643486777667, "grad_norm": 0.08507930380153848, "learning_rate": 2.8362942593638344e-06, "loss": 0.4177, "num_tokens": 1582754801.0, "step": 1670 }, { "epoch": 3.2703232125367285, "grad_norm": 0.08766349076289924, "learning_rate": 2.83058130441221e-06, "loss": 0.4196, "num_tokens": 1583723619.0, "step": 1671 }, { "epoch": 3.2722820763956904, "grad_norm": 0.08387823426246532, "learning_rate": 2.824871836368364e-06, "loss": 0.4225, "num_tokens": 1584702117.0, "step": 1672 }, { "epoch": 3.274240940254652, "grad_norm": 0.08405211840878263, "learning_rate": 2.819165864409134e-06, "loss": 0.4155, "num_tokens": 1585632848.0, "step": 1673 }, { "epoch": 3.276199804113614, "grad_norm": 0.08611604714213371, "learning_rate": 2.8134633977057236e-06, "loss": 0.4352, "num_tokens": 1586592050.0, "step": 1674 }, { "epoch": 3.278158667972576, "grad_norm": 0.08485664401177756, "learning_rate": 2.8077644454237165e-06, "loss": 0.4179, "num_tokens": 1587529986.0, "step": 1675 }, { "epoch": 3.2801175318315376, "grad_norm": 0.08895927311901823, "learning_rate": 2.8020690167230384e-06, "loss": 0.4235, "num_tokens": 1588475204.0, "step": 1676 }, { "epoch": 3.2820763956904995, "grad_norm": 0.08252937131680212, "learning_rate": 2.7963771207579543e-06, "loss": 0.4155, "num_tokens": 1589437322.0, "step": 1677 }, { "epoch": 3.2840352595494613, "grad_norm": 0.08561585560174582, "learning_rate": 2.7906887666770554e-06, "loss": 0.4209, "num_tokens": 1590379994.0, "step": 1678 }, { "epoch": 3.285994123408423, "grad_norm": 0.08471052529676973, "learning_rate": 2.785003963623233e-06, "loss": 0.4077, "num_tokens": 1591316557.0, "step": 1679 }, { "epoch": 3.287952987267385, "grad_norm": 0.0837155669110085, "learning_rate": 2.7793227207336733e-06, "loss": 0.4112, "num_tokens": 1592285008.0, "step": 1680 }, { "epoch": 3.2899118511263468, "grad_norm": 0.08396722946953317, "learning_rate": 2.7736450471398435e-06, "loss": 0.4083, "num_tokens": 1593231866.0, "step": 1681 }, { "epoch": 3.2918707149853086, "grad_norm": 0.08267500220429273, "learning_rate": 2.7679709519674715e-06, "loss": 0.4092, "num_tokens": 1594187614.0, "step": 1682 }, { "epoch": 3.2938295788442704, "grad_norm": 0.08835444863536938, "learning_rate": 2.762300444336529e-06, "loss": 0.4347, "num_tokens": 1595121078.0, "step": 1683 }, { "epoch": 3.2957884427032322, "grad_norm": 0.08270484625014247, "learning_rate": 2.7566335333612303e-06, "loss": 0.4093, "num_tokens": 1596066499.0, "step": 1684 }, { "epoch": 3.297747306562194, "grad_norm": 0.08730471579599326, "learning_rate": 2.7509702281500028e-06, "loss": 0.4409, "num_tokens": 1597027924.0, "step": 1685 }, { "epoch": 3.299706170421156, "grad_norm": 0.08451779069624428, "learning_rate": 2.745310537805479e-06, "loss": 0.4092, "num_tokens": 1597997298.0, "step": 1686 }, { "epoch": 3.3016650342801177, "grad_norm": 0.08395958861025746, "learning_rate": 2.7396544714244837e-06, "loss": 0.4207, "num_tokens": 1598925723.0, "step": 1687 }, { "epoch": 3.3036238981390795, "grad_norm": 0.093120802790401, "learning_rate": 2.734002038098015e-06, "loss": 0.4162, "num_tokens": 1599886896.0, "step": 1688 }, { "epoch": 3.305582761998041, "grad_norm": 0.08465686828727743, "learning_rate": 2.7283532469112296e-06, "loss": 0.4085, "num_tokens": 1600897103.0, "step": 1689 }, { "epoch": 3.307541625857003, "grad_norm": 0.08666605127840357, "learning_rate": 2.7227081069434356e-06, "loss": 0.4164, "num_tokens": 1601870206.0, "step": 1690 }, { "epoch": 3.3095004897159646, "grad_norm": 0.08503362608005334, "learning_rate": 2.717066627268069e-06, "loss": 0.4161, "num_tokens": 1602797175.0, "step": 1691 }, { "epoch": 3.3114593535749264, "grad_norm": 0.08925765101820528, "learning_rate": 2.711428816952679e-06, "loss": 0.4113, "num_tokens": 1603727650.0, "step": 1692 }, { "epoch": 3.313418217433888, "grad_norm": 0.08134885457766461, "learning_rate": 2.705794685058927e-06, "loss": 0.4029, "num_tokens": 1604692227.0, "step": 1693 }, { "epoch": 3.31537708129285, "grad_norm": 0.08399004334986025, "learning_rate": 2.7001642406425523e-06, "loss": 0.4128, "num_tokens": 1605650385.0, "step": 1694 }, { "epoch": 3.317335945151812, "grad_norm": 0.08948639907925239, "learning_rate": 2.69453749275337e-06, "loss": 0.4103, "num_tokens": 1606585046.0, "step": 1695 }, { "epoch": 3.3192948090107737, "grad_norm": 0.08556440329567387, "learning_rate": 2.6889144504352592e-06, "loss": 0.4225, "num_tokens": 1607526201.0, "step": 1696 }, { "epoch": 3.3212536728697355, "grad_norm": 0.08999851006315714, "learning_rate": 2.683295122726136e-06, "loss": 0.4155, "num_tokens": 1608475502.0, "step": 1697 }, { "epoch": 3.3232125367286973, "grad_norm": 0.09137548845171062, "learning_rate": 2.6776795186579466e-06, "loss": 0.4229, "num_tokens": 1609438064.0, "step": 1698 }, { "epoch": 3.325171400587659, "grad_norm": 0.08318486747108624, "learning_rate": 2.6720676472566593e-06, "loss": 0.4069, "num_tokens": 1610394954.0, "step": 1699 }, { "epoch": 3.327130264446621, "grad_norm": 0.08257993944770921, "learning_rate": 2.6664595175422347e-06, "loss": 0.4225, "num_tokens": 1611363403.0, "step": 1700 }, { "epoch": 3.329089128305583, "grad_norm": 0.08548244142024326, "learning_rate": 2.6608551385286234e-06, "loss": 0.4245, "num_tokens": 1612312837.0, "step": 1701 }, { "epoch": 3.3310479921645446, "grad_norm": 0.08400158509048183, "learning_rate": 2.655254519223746e-06, "loss": 0.415, "num_tokens": 1613268041.0, "step": 1702 }, { "epoch": 3.3330068560235064, "grad_norm": 0.0869609313137376, "learning_rate": 2.6496576686294793e-06, "loss": 0.4091, "num_tokens": 1614232079.0, "step": 1703 }, { "epoch": 3.3349657198824683, "grad_norm": 0.08581379027992089, "learning_rate": 2.6440645957416483e-06, "loss": 0.4181, "num_tokens": 1615131466.0, "step": 1704 }, { "epoch": 3.33692458374143, "grad_norm": 0.08580102241383081, "learning_rate": 2.638475309549999e-06, "loss": 0.4153, "num_tokens": 1616107363.0, "step": 1705 }, { "epoch": 3.338883447600392, "grad_norm": 0.08371222806481414, "learning_rate": 2.6328898190381934e-06, "loss": 0.4162, "num_tokens": 1617034866.0, "step": 1706 }, { "epoch": 3.3408423114593537, "grad_norm": 0.08969662627288895, "learning_rate": 2.6273081331837956e-06, "loss": 0.4084, "num_tokens": 1617983797.0, "step": 1707 }, { "epoch": 3.3428011753183156, "grad_norm": 0.08618927633672457, "learning_rate": 2.6217302609582512e-06, "loss": 0.4187, "num_tokens": 1618963558.0, "step": 1708 }, { "epoch": 3.3447600391772774, "grad_norm": 0.08731475338949538, "learning_rate": 2.616156211326875e-06, "loss": 0.4184, "num_tokens": 1619928523.0, "step": 1709 }, { "epoch": 3.3467189030362388, "grad_norm": 0.08528902861499998, "learning_rate": 2.610585993248843e-06, "loss": 0.4256, "num_tokens": 1620886650.0, "step": 1710 }, { "epoch": 3.348677766895201, "grad_norm": 0.08460023583505598, "learning_rate": 2.605019615677169e-06, "loss": 0.4083, "num_tokens": 1621808402.0, "step": 1711 }, { "epoch": 3.3506366307541624, "grad_norm": 0.08335951649806546, "learning_rate": 2.599457087558691e-06, "loss": 0.4041, "num_tokens": 1622768658.0, "step": 1712 }, { "epoch": 3.3525954946131242, "grad_norm": 0.0847151441908554, "learning_rate": 2.5938984178340686e-06, "loss": 0.406, "num_tokens": 1623691927.0, "step": 1713 }, { "epoch": 3.354554358472086, "grad_norm": 0.08641642780825894, "learning_rate": 2.588343615437752e-06, "loss": 0.4106, "num_tokens": 1624642137.0, "step": 1714 }, { "epoch": 3.356513222331048, "grad_norm": 0.08299067286267417, "learning_rate": 2.582792689297975e-06, "loss": 0.4235, "num_tokens": 1625588285.0, "step": 1715 }, { "epoch": 3.3584720861900097, "grad_norm": 0.08682643569946621, "learning_rate": 2.57724564833675e-06, "loss": 0.4127, "num_tokens": 1626542713.0, "step": 1716 }, { "epoch": 3.3604309500489715, "grad_norm": 0.08258307177948966, "learning_rate": 2.5717025014698347e-06, "loss": 0.4092, "num_tokens": 1627518098.0, "step": 1717 }, { "epoch": 3.3623898139079333, "grad_norm": 0.08623905493619788, "learning_rate": 2.5661632576067315e-06, "loss": 0.4152, "num_tokens": 1628468117.0, "step": 1718 }, { "epoch": 3.364348677766895, "grad_norm": 0.08295413915630635, "learning_rate": 2.5606279256506726e-06, "loss": 0.4286, "num_tokens": 1629417735.0, "step": 1719 }, { "epoch": 3.366307541625857, "grad_norm": 0.08725054043735624, "learning_rate": 2.5550965144985993e-06, "loss": 0.4082, "num_tokens": 1630334417.0, "step": 1720 }, { "epoch": 3.368266405484819, "grad_norm": 0.08490228890645754, "learning_rate": 2.5495690330411488e-06, "loss": 0.4119, "num_tokens": 1631291605.0, "step": 1721 }, { "epoch": 3.3702252693437806, "grad_norm": 0.08891319542355251, "learning_rate": 2.5440454901626487e-06, "loss": 0.4205, "num_tokens": 1632218898.0, "step": 1722 }, { "epoch": 3.3721841332027425, "grad_norm": 0.08969720499224697, "learning_rate": 2.5385258947410908e-06, "loss": 0.4218, "num_tokens": 1633173091.0, "step": 1723 }, { "epoch": 3.3741429970617043, "grad_norm": 0.088167415198806, "learning_rate": 2.5330102556481206e-06, "loss": 0.4174, "num_tokens": 1634100421.0, "step": 1724 }, { "epoch": 3.376101860920666, "grad_norm": 0.08340958418579227, "learning_rate": 2.5274985817490327e-06, "loss": 0.4183, "num_tokens": 1635065644.0, "step": 1725 }, { "epoch": 3.378060724779628, "grad_norm": 0.09041616739164081, "learning_rate": 2.5219908819027415e-06, "loss": 0.4315, "num_tokens": 1636000356.0, "step": 1726 }, { "epoch": 3.3800195886385898, "grad_norm": 0.08300502331043028, "learning_rate": 2.516487164961775e-06, "loss": 0.4051, "num_tokens": 1636952582.0, "step": 1727 }, { "epoch": 3.3819784524975516, "grad_norm": 0.08426560137119644, "learning_rate": 2.510987439772261e-06, "loss": 0.4105, "num_tokens": 1637916269.0, "step": 1728 }, { "epoch": 3.3839373163565134, "grad_norm": 0.08522213583301294, "learning_rate": 2.505491715173908e-06, "loss": 0.4174, "num_tokens": 1638871893.0, "step": 1729 }, { "epoch": 3.3858961802154752, "grad_norm": 0.08801999823760176, "learning_rate": 2.5000000000000015e-06, "loss": 0.4127, "num_tokens": 1639846947.0, "step": 1730 }, { "epoch": 3.3878550440744366, "grad_norm": 0.08574862503642781, "learning_rate": 2.4945123030773755e-06, "loss": 0.404, "num_tokens": 1640807461.0, "step": 1731 }, { "epoch": 3.389813907933399, "grad_norm": 0.08277495860589412, "learning_rate": 2.489028633226406e-06, "loss": 0.4186, "num_tokens": 1641778118.0, "step": 1732 }, { "epoch": 3.3917727717923603, "grad_norm": 0.08805180798992134, "learning_rate": 2.4835489992610035e-06, "loss": 0.4125, "num_tokens": 1642692304.0, "step": 1733 }, { "epoch": 3.393731635651322, "grad_norm": 0.08482598218207796, "learning_rate": 2.478073409988583e-06, "loss": 0.406, "num_tokens": 1643641294.0, "step": 1734 }, { "epoch": 3.395690499510284, "grad_norm": 0.08397140001054886, "learning_rate": 2.4726018742100617e-06, "loss": 0.4282, "num_tokens": 1644637554.0, "step": 1735 }, { "epoch": 3.3976493633692457, "grad_norm": 0.08735993915989847, "learning_rate": 2.467134400719844e-06, "loss": 0.4122, "num_tokens": 1645572744.0, "step": 1736 }, { "epoch": 3.3996082272282075, "grad_norm": 0.08421742427245288, "learning_rate": 2.461670998305802e-06, "loss": 0.409, "num_tokens": 1646522141.0, "step": 1737 }, { "epoch": 3.4015670910871694, "grad_norm": 0.08333993516563225, "learning_rate": 2.4562116757492625e-06, "loss": 0.4205, "num_tokens": 1647474167.0, "step": 1738 }, { "epoch": 3.403525954946131, "grad_norm": 0.08824166242437516, "learning_rate": 2.4507564418250022e-06, "loss": 0.4139, "num_tokens": 1648409454.0, "step": 1739 }, { "epoch": 3.405484818805093, "grad_norm": 0.08423873827597493, "learning_rate": 2.4453053053012187e-06, "loss": 0.4078, "num_tokens": 1649366647.0, "step": 1740 }, { "epoch": 3.407443682664055, "grad_norm": 0.08439380435889429, "learning_rate": 2.439858274939525e-06, "loss": 0.4098, "num_tokens": 1650335154.0, "step": 1741 }, { "epoch": 3.4094025465230167, "grad_norm": 0.08659772005961898, "learning_rate": 2.43441535949494e-06, "loss": 0.4186, "num_tokens": 1651290121.0, "step": 1742 }, { "epoch": 3.4113614103819785, "grad_norm": 0.08613026733417209, "learning_rate": 2.4289765677158616e-06, "loss": 0.4192, "num_tokens": 1652203049.0, "step": 1743 }, { "epoch": 3.4133202742409403, "grad_norm": 0.08919702377634635, "learning_rate": 2.4235419083440615e-06, "loss": 0.4178, "num_tokens": 1653161862.0, "step": 1744 }, { "epoch": 3.415279138099902, "grad_norm": 0.08359942286714173, "learning_rate": 2.4181113901146737e-06, "loss": 0.4081, "num_tokens": 1654102435.0, "step": 1745 }, { "epoch": 3.417238001958864, "grad_norm": 0.08656263278449895, "learning_rate": 2.41268502175617e-06, "loss": 0.4124, "num_tokens": 1655048320.0, "step": 1746 }, { "epoch": 3.419196865817826, "grad_norm": 0.08764171546401789, "learning_rate": 2.4072628119903574e-06, "loss": 0.4097, "num_tokens": 1655975517.0, "step": 1747 }, { "epoch": 3.4211557296767876, "grad_norm": 0.08564032809521967, "learning_rate": 2.401844769532356e-06, "loss": 0.3934, "num_tokens": 1656923442.0, "step": 1748 }, { "epoch": 3.4231145935357494, "grad_norm": 0.08613560589077948, "learning_rate": 2.396430903090586e-06, "loss": 0.4071, "num_tokens": 1657886144.0, "step": 1749 }, { "epoch": 3.4250734573947113, "grad_norm": 0.08215056845240856, "learning_rate": 2.3910212213667605e-06, "loss": 0.4204, "num_tokens": 1658819601.0, "step": 1750 }, { "epoch": 3.427032321253673, "grad_norm": 0.08838797102360238, "learning_rate": 2.3856157330558625e-06, "loss": 0.4225, "num_tokens": 1659756549.0, "step": 1751 }, { "epoch": 3.4289911851126345, "grad_norm": 0.08737452904307148, "learning_rate": 2.380214446846137e-06, "loss": 0.4312, "num_tokens": 1660735494.0, "step": 1752 }, { "epoch": 3.4309500489715967, "grad_norm": 0.08352062693110998, "learning_rate": 2.3748173714190726e-06, "loss": 0.4055, "num_tokens": 1661703052.0, "step": 1753 }, { "epoch": 3.432908912830558, "grad_norm": 0.08615588980744507, "learning_rate": 2.3694245154493927e-06, "loss": 0.4248, "num_tokens": 1662666836.0, "step": 1754 }, { "epoch": 3.43486777668952, "grad_norm": 0.08368970100744642, "learning_rate": 2.3640358876050344e-06, "loss": 0.3958, "num_tokens": 1663611928.0, "step": 1755 }, { "epoch": 3.4368266405484817, "grad_norm": 0.08264730673412893, "learning_rate": 2.3586514965471462e-06, "loss": 0.4053, "num_tokens": 1664537902.0, "step": 1756 }, { "epoch": 3.4387855044074436, "grad_norm": 0.08571500674580414, "learning_rate": 2.3532713509300613e-06, "loss": 0.419, "num_tokens": 1665468709.0, "step": 1757 }, { "epoch": 3.4407443682664054, "grad_norm": 0.08610498491933342, "learning_rate": 2.3478954594012884e-06, "loss": 0.4087, "num_tokens": 1666395205.0, "step": 1758 }, { "epoch": 3.442703232125367, "grad_norm": 0.08877814131108148, "learning_rate": 2.3425238306015034e-06, "loss": 0.4179, "num_tokens": 1667309425.0, "step": 1759 }, { "epoch": 3.444662095984329, "grad_norm": 0.08596822518274606, "learning_rate": 2.337156473164526e-06, "loss": 0.4061, "num_tokens": 1668244302.0, "step": 1760 }, { "epoch": 3.446620959843291, "grad_norm": 0.08492643582989387, "learning_rate": 2.3317933957173146e-06, "loss": 0.4098, "num_tokens": 1669211039.0, "step": 1761 }, { "epoch": 3.4485798237022527, "grad_norm": 0.08455790693486658, "learning_rate": 2.3264346068799455e-06, "loss": 0.428, "num_tokens": 1670166151.0, "step": 1762 }, { "epoch": 3.4505386875612145, "grad_norm": 0.08526852184816038, "learning_rate": 2.321080115265601e-06, "loss": 0.4201, "num_tokens": 1671118037.0, "step": 1763 }, { "epoch": 3.4524975514201763, "grad_norm": 0.08569161667306968, "learning_rate": 2.3157299294805613e-06, "loss": 0.4136, "num_tokens": 1672063474.0, "step": 1764 }, { "epoch": 3.454456415279138, "grad_norm": 0.08450316742982002, "learning_rate": 2.310384058124181e-06, "loss": 0.4173, "num_tokens": 1673036663.0, "step": 1765 }, { "epoch": 3.4564152791381, "grad_norm": 0.0856099935556782, "learning_rate": 2.3050425097888802e-06, "loss": 0.4173, "num_tokens": 1673968432.0, "step": 1766 }, { "epoch": 3.458374142997062, "grad_norm": 0.08462805783740393, "learning_rate": 2.299705293060136e-06, "loss": 0.4144, "num_tokens": 1674934091.0, "step": 1767 }, { "epoch": 3.4603330068560236, "grad_norm": 0.0845664978809549, "learning_rate": 2.2943724165164583e-06, "loss": 0.4144, "num_tokens": 1675887098.0, "step": 1768 }, { "epoch": 3.4622918707149855, "grad_norm": 0.08952609770725463, "learning_rate": 2.289043888729379e-06, "loss": 0.4056, "num_tokens": 1676869567.0, "step": 1769 }, { "epoch": 3.4642507345739473, "grad_norm": 0.08254427905437188, "learning_rate": 2.2837197182634484e-06, "loss": 0.4281, "num_tokens": 1677838703.0, "step": 1770 }, { "epoch": 3.466209598432909, "grad_norm": 0.09066133696819692, "learning_rate": 2.2783999136762064e-06, "loss": 0.411, "num_tokens": 1678757313.0, "step": 1771 }, { "epoch": 3.468168462291871, "grad_norm": 0.08416745354032049, "learning_rate": 2.273084483518176e-06, "loss": 0.4241, "num_tokens": 1679689579.0, "step": 1772 }, { "epoch": 3.4701273261508323, "grad_norm": 0.09237198051598337, "learning_rate": 2.2677734363328534e-06, "loss": 0.4134, "num_tokens": 1680648641.0, "step": 1773 }, { "epoch": 3.472086190009794, "grad_norm": 0.08633902308442941, "learning_rate": 2.262466780656687e-06, "loss": 0.4256, "num_tokens": 1681548281.0, "step": 1774 }, { "epoch": 3.474045053868756, "grad_norm": 0.08747874225822623, "learning_rate": 2.2571645250190643e-06, "loss": 0.4254, "num_tokens": 1682526137.0, "step": 1775 }, { "epoch": 3.4760039177277178, "grad_norm": 0.0838620168697465, "learning_rate": 2.2518666779423078e-06, "loss": 0.411, "num_tokens": 1683479152.0, "step": 1776 }, { "epoch": 3.4779627815866796, "grad_norm": 0.08879126737134915, "learning_rate": 2.2465732479416476e-06, "loss": 0.4053, "num_tokens": 1684424051.0, "step": 1777 }, { "epoch": 3.4799216454456414, "grad_norm": 0.08211408450445824, "learning_rate": 2.241284243525215e-06, "loss": 0.4131, "num_tokens": 1685334537.0, "step": 1778 }, { "epoch": 3.4818805093046032, "grad_norm": 0.09247793438793202, "learning_rate": 2.2359996731940348e-06, "loss": 0.4113, "num_tokens": 1686285600.0, "step": 1779 }, { "epoch": 3.483839373163565, "grad_norm": 0.08267329357229923, "learning_rate": 2.230719545441992e-06, "loss": 0.4134, "num_tokens": 1687219733.0, "step": 1780 }, { "epoch": 3.485798237022527, "grad_norm": 0.08535859600841918, "learning_rate": 2.225443868755845e-06, "loss": 0.4141, "num_tokens": 1688156805.0, "step": 1781 }, { "epoch": 3.4877571008814887, "grad_norm": 0.08737140079566375, "learning_rate": 2.2201726516151885e-06, "loss": 0.4089, "num_tokens": 1689080953.0, "step": 1782 }, { "epoch": 3.4897159647404505, "grad_norm": 0.08292150516674358, "learning_rate": 2.214905902492452e-06, "loss": 0.4135, "num_tokens": 1690064045.0, "step": 1783 }, { "epoch": 3.4916748285994124, "grad_norm": 0.09017231440685045, "learning_rate": 2.2096436298528865e-06, "loss": 0.4166, "num_tokens": 1691013524.0, "step": 1784 }, { "epoch": 3.493633692458374, "grad_norm": 0.08898438261625373, "learning_rate": 2.204385842154545e-06, "loss": 0.4224, "num_tokens": 1691950030.0, "step": 1785 }, { "epoch": 3.495592556317336, "grad_norm": 0.092535999074826, "learning_rate": 2.1991325478482695e-06, "loss": 0.4252, "num_tokens": 1692853484.0, "step": 1786 }, { "epoch": 3.497551420176298, "grad_norm": 0.08892908936281196, "learning_rate": 2.1938837553776872e-06, "loss": 0.4145, "num_tokens": 1693797005.0, "step": 1787 }, { "epoch": 3.4995102840352597, "grad_norm": 0.08770780187605762, "learning_rate": 2.1886394731791814e-06, "loss": 0.4125, "num_tokens": 1694718544.0, "step": 1788 }, { "epoch": 3.5014691478942215, "grad_norm": 0.08925655869184221, "learning_rate": 2.1833997096818897e-06, "loss": 0.4255, "num_tokens": 1695664993.0, "step": 1789 }, { "epoch": 3.5034280117531833, "grad_norm": 0.0825690667279722, "learning_rate": 2.1781644733076897e-06, "loss": 0.4099, "num_tokens": 1696629356.0, "step": 1790 }, { "epoch": 3.505386875612145, "grad_norm": 0.08589096944740693, "learning_rate": 2.172933772471177e-06, "loss": 0.4028, "num_tokens": 1697588346.0, "step": 1791 }, { "epoch": 3.5073457394711065, "grad_norm": 0.087846075951044, "learning_rate": 2.1677076155796583e-06, "loss": 0.418, "num_tokens": 1698542320.0, "step": 1792 }, { "epoch": 3.5093046033300688, "grad_norm": 0.08342957076022849, "learning_rate": 2.162486011033142e-06, "loss": 0.4307, "num_tokens": 1699526083.0, "step": 1793 }, { "epoch": 3.51126346718903, "grad_norm": 0.08745366835845464, "learning_rate": 2.157268967224314e-06, "loss": 0.4268, "num_tokens": 1700468996.0, "step": 1794 }, { "epoch": 3.5132223310479924, "grad_norm": 0.09869232946554855, "learning_rate": 2.1520564925385294e-06, "loss": 0.4213, "num_tokens": 1701421419.0, "step": 1795 }, { "epoch": 3.515181194906954, "grad_norm": 0.08292798367719592, "learning_rate": 2.1468485953538054e-06, "loss": 0.4086, "num_tokens": 1702323652.0, "step": 1796 }, { "epoch": 3.5171400587659156, "grad_norm": 0.08837785187933994, "learning_rate": 2.141645284040796e-06, "loss": 0.4245, "num_tokens": 1703259414.0, "step": 1797 }, { "epoch": 3.5190989226248774, "grad_norm": 0.08503007192272861, "learning_rate": 2.136446566962784e-06, "loss": 0.4231, "num_tokens": 1704203270.0, "step": 1798 }, { "epoch": 3.5210577864838393, "grad_norm": 0.08523179691963496, "learning_rate": 2.1312524524756735e-06, "loss": 0.4128, "num_tokens": 1705165703.0, "step": 1799 }, { "epoch": 3.523016650342801, "grad_norm": 0.08678239735374162, "learning_rate": 2.1260629489279662e-06, "loss": 0.4208, "num_tokens": 1706076773.0, "step": 1800 }, { "epoch": 3.524975514201763, "grad_norm": 0.08584504515243888, "learning_rate": 2.1208780646607517e-06, "loss": 0.4174, "num_tokens": 1707027549.0, "step": 1801 }, { "epoch": 3.5269343780607247, "grad_norm": 0.08735485016531032, "learning_rate": 2.115697808007701e-06, "loss": 0.4231, "num_tokens": 1707936544.0, "step": 1802 }, { "epoch": 3.5288932419196866, "grad_norm": 0.09003271743520458, "learning_rate": 2.1105221872950403e-06, "loss": 0.4226, "num_tokens": 1708874517.0, "step": 1803 }, { "epoch": 3.5308521057786484, "grad_norm": 0.08535192791935681, "learning_rate": 2.1053512108415476e-06, "loss": 0.4063, "num_tokens": 1709823687.0, "step": 1804 }, { "epoch": 3.53281096963761, "grad_norm": 0.0829088964822582, "learning_rate": 2.1001848869585396e-06, "loss": 0.4083, "num_tokens": 1710757139.0, "step": 1805 }, { "epoch": 3.534769833496572, "grad_norm": 0.09425316059515856, "learning_rate": 2.095023223949845e-06, "loss": 0.4138, "num_tokens": 1711714145.0, "step": 1806 }, { "epoch": 3.536728697355534, "grad_norm": 0.08363603545769085, "learning_rate": 2.089866230111813e-06, "loss": 0.4084, "num_tokens": 1712644160.0, "step": 1807 }, { "epoch": 3.5386875612144957, "grad_norm": 0.08797244156608434, "learning_rate": 2.0847139137332795e-06, "loss": 0.4233, "num_tokens": 1713581770.0, "step": 1808 }, { "epoch": 3.5406464250734575, "grad_norm": 0.08429236686683463, "learning_rate": 2.079566283095565e-06, "loss": 0.4052, "num_tokens": 1714528173.0, "step": 1809 }, { "epoch": 3.5426052889324193, "grad_norm": 0.08511651079994069, "learning_rate": 2.074423346472461e-06, "loss": 0.4275, "num_tokens": 1715502773.0, "step": 1810 }, { "epoch": 3.544564152791381, "grad_norm": 0.081576130787359, "learning_rate": 2.0692851121302114e-06, "loss": 0.4255, "num_tokens": 1716463993.0, "step": 1811 }, { "epoch": 3.546523016650343, "grad_norm": 0.08356993766841732, "learning_rate": 2.064151588327501e-06, "loss": 0.4219, "num_tokens": 1717433634.0, "step": 1812 }, { "epoch": 3.5484818805093044, "grad_norm": 0.09094645398636986, "learning_rate": 2.0590227833154485e-06, "loss": 0.4293, "num_tokens": 1718382354.0, "step": 1813 }, { "epoch": 3.5504407443682666, "grad_norm": 0.08261497622365453, "learning_rate": 2.053898705337583e-06, "loss": 0.4125, "num_tokens": 1719353516.0, "step": 1814 }, { "epoch": 3.552399608227228, "grad_norm": 0.08463768612829826, "learning_rate": 2.0487793626298364e-06, "loss": 0.4137, "num_tokens": 1720312136.0, "step": 1815 }, { "epoch": 3.5543584720861903, "grad_norm": 0.08495475667173523, "learning_rate": 2.043664763420534e-06, "loss": 0.41, "num_tokens": 1721287673.0, "step": 1816 }, { "epoch": 3.5563173359451516, "grad_norm": 0.08244986328912285, "learning_rate": 2.0385549159303716e-06, "loss": 0.413, "num_tokens": 1722203009.0, "step": 1817 }, { "epoch": 3.5582761998041135, "grad_norm": 0.08699776816484583, "learning_rate": 2.033449828372408e-06, "loss": 0.4114, "num_tokens": 1723169179.0, "step": 1818 }, { "epoch": 3.5602350636630753, "grad_norm": 0.08655242975299911, "learning_rate": 2.028349508952057e-06, "loss": 0.4149, "num_tokens": 1724074552.0, "step": 1819 }, { "epoch": 3.562193927522037, "grad_norm": 0.08471053153555338, "learning_rate": 2.023253965867063e-06, "loss": 0.4231, "num_tokens": 1725034811.0, "step": 1820 }, { "epoch": 3.564152791380999, "grad_norm": 0.08479434667529531, "learning_rate": 2.0181632073074925e-06, "loss": 0.4239, "num_tokens": 1726000629.0, "step": 1821 }, { "epoch": 3.5661116552399608, "grad_norm": 0.08532687672569757, "learning_rate": 2.013077241455729e-06, "loss": 0.4114, "num_tokens": 1726961708.0, "step": 1822 }, { "epoch": 3.5680705190989226, "grad_norm": 0.0833140131608997, "learning_rate": 2.0079960764864453e-06, "loss": 0.4153, "num_tokens": 1727917436.0, "step": 1823 }, { "epoch": 3.5700293829578844, "grad_norm": 0.08153647040277169, "learning_rate": 2.002919720566599e-06, "loss": 0.4118, "num_tokens": 1728906407.0, "step": 1824 }, { "epoch": 3.5719882468168462, "grad_norm": 0.08754391286573099, "learning_rate": 1.997848181855423e-06, "loss": 0.4165, "num_tokens": 1729857227.0, "step": 1825 }, { "epoch": 3.573947110675808, "grad_norm": 0.08388968292243014, "learning_rate": 1.992781468504402e-06, "loss": 0.4054, "num_tokens": 1730804975.0, "step": 1826 }, { "epoch": 3.57590597453477, "grad_norm": 0.08525964255363742, "learning_rate": 1.9877195886572666e-06, "loss": 0.4117, "num_tokens": 1731781764.0, "step": 1827 }, { "epoch": 3.5778648383937317, "grad_norm": 0.08351103254975643, "learning_rate": 1.9826625504499807e-06, "loss": 0.4321, "num_tokens": 1732770357.0, "step": 1828 }, { "epoch": 3.5798237022526935, "grad_norm": 0.08216601091778715, "learning_rate": 1.9776103620107235e-06, "loss": 0.4188, "num_tokens": 1733702202.0, "step": 1829 }, { "epoch": 3.5817825661116554, "grad_norm": 0.08917446966353505, "learning_rate": 1.972563031459878e-06, "loss": 0.4361, "num_tokens": 1734625040.0, "step": 1830 }, { "epoch": 3.583741429970617, "grad_norm": 0.08677643703346584, "learning_rate": 1.967520566910025e-06, "loss": 0.4193, "num_tokens": 1735583871.0, "step": 1831 }, { "epoch": 3.585700293829579, "grad_norm": 0.08713642480637289, "learning_rate": 1.962482976465918e-06, "loss": 0.4135, "num_tokens": 1736537854.0, "step": 1832 }, { "epoch": 3.587659157688541, "grad_norm": 0.08973041504995834, "learning_rate": 1.9574502682244793e-06, "loss": 0.4196, "num_tokens": 1737479499.0, "step": 1833 }, { "epoch": 3.589618021547502, "grad_norm": 0.08471327244617989, "learning_rate": 1.952422450274784e-06, "loss": 0.4111, "num_tokens": 1738430189.0, "step": 1834 }, { "epoch": 3.5915768854064645, "grad_norm": 0.08432918776696068, "learning_rate": 1.947399530698043e-06, "loss": 0.4072, "num_tokens": 1739410135.0, "step": 1835 }, { "epoch": 3.593535749265426, "grad_norm": 0.08421464213829939, "learning_rate": 1.9423815175676027e-06, "loss": 0.4166, "num_tokens": 1740341982.0, "step": 1836 }, { "epoch": 3.595494613124388, "grad_norm": 0.08687752015803595, "learning_rate": 1.9373684189489156e-06, "loss": 0.4239, "num_tokens": 1741273342.0, "step": 1837 }, { "epoch": 3.5974534769833495, "grad_norm": 0.09046963770564236, "learning_rate": 1.9323602428995358e-06, "loss": 0.4207, "num_tokens": 1742235920.0, "step": 1838 }, { "epoch": 3.5994123408423113, "grad_norm": 0.08623590989277806, "learning_rate": 1.9273569974691107e-06, "loss": 0.427, "num_tokens": 1743203172.0, "step": 1839 }, { "epoch": 3.601371204701273, "grad_norm": 0.08598574944302184, "learning_rate": 1.9223586906993586e-06, "loss": 0.3957, "num_tokens": 1744130229.0, "step": 1840 }, { "epoch": 3.603330068560235, "grad_norm": 0.08426701231885612, "learning_rate": 1.917365330624057e-06, "loss": 0.4152, "num_tokens": 1745108939.0, "step": 1841 }, { "epoch": 3.605288932419197, "grad_norm": 0.08791378402919472, "learning_rate": 1.912376925269041e-06, "loss": 0.4241, "num_tokens": 1746050426.0, "step": 1842 }, { "epoch": 3.6072477962781586, "grad_norm": 0.08707336976847711, "learning_rate": 1.907393482652175e-06, "loss": 0.4153, "num_tokens": 1747061149.0, "step": 1843 }, { "epoch": 3.6092066601371204, "grad_norm": 0.08072513353149141, "learning_rate": 1.9024150107833473e-06, "loss": 0.4046, "num_tokens": 1747983306.0, "step": 1844 }, { "epoch": 3.6111655239960823, "grad_norm": 0.08520789486374623, "learning_rate": 1.8974415176644607e-06, "loss": 0.4198, "num_tokens": 1748924913.0, "step": 1845 }, { "epoch": 3.613124387855044, "grad_norm": 0.08932671544709427, "learning_rate": 1.8924730112894129e-06, "loss": 0.4189, "num_tokens": 1749899433.0, "step": 1846 }, { "epoch": 3.615083251714006, "grad_norm": 0.08653129735499375, "learning_rate": 1.887509499644084e-06, "loss": 0.4166, "num_tokens": 1750830578.0, "step": 1847 }, { "epoch": 3.6170421155729677, "grad_norm": 0.0872920067808658, "learning_rate": 1.8825509907063328e-06, "loss": 0.4284, "num_tokens": 1751755284.0, "step": 1848 }, { "epoch": 3.6190009794319296, "grad_norm": 0.08568505102634366, "learning_rate": 1.8775974924459716e-06, "loss": 0.4106, "num_tokens": 1752721987.0, "step": 1849 }, { "epoch": 3.6209598432908914, "grad_norm": 0.08651794932163524, "learning_rate": 1.872649012824758e-06, "loss": 0.4339, "num_tokens": 1753661497.0, "step": 1850 }, { "epoch": 3.622918707149853, "grad_norm": 0.08521313183514503, "learning_rate": 1.867705559796391e-06, "loss": 0.3973, "num_tokens": 1754612562.0, "step": 1851 }, { "epoch": 3.624877571008815, "grad_norm": 0.0826221539214511, "learning_rate": 1.8627671413064825e-06, "loss": 0.4198, "num_tokens": 1755555673.0, "step": 1852 }, { "epoch": 3.626836434867777, "grad_norm": 0.08246182928242993, "learning_rate": 1.8578337652925527e-06, "loss": 0.4093, "num_tokens": 1756497461.0, "step": 1853 }, { "epoch": 3.6287952987267387, "grad_norm": 0.08469517615317272, "learning_rate": 1.8529054396840234e-06, "loss": 0.4028, "num_tokens": 1757467410.0, "step": 1854 }, { "epoch": 3.6307541625857, "grad_norm": 0.08550398418039426, "learning_rate": 1.847982172402194e-06, "loss": 0.4088, "num_tokens": 1758414079.0, "step": 1855 }, { "epoch": 3.6327130264446623, "grad_norm": 0.08689088625548672, "learning_rate": 1.8430639713602317e-06, "loss": 0.4148, "num_tokens": 1759353247.0, "step": 1856 }, { "epoch": 3.6346718903036237, "grad_norm": 0.08363548236902174, "learning_rate": 1.8381508444631674e-06, "loss": 0.4135, "num_tokens": 1760292625.0, "step": 1857 }, { "epoch": 3.636630754162586, "grad_norm": 0.08520877105783718, "learning_rate": 1.8332427996078705e-06, "loss": 0.4119, "num_tokens": 1761257171.0, "step": 1858 }, { "epoch": 3.6385896180215473, "grad_norm": 0.08438989396775987, "learning_rate": 1.8283398446830442e-06, "loss": 0.4143, "num_tokens": 1762209289.0, "step": 1859 }, { "epoch": 3.640548481880509, "grad_norm": 0.08423480034920917, "learning_rate": 1.8234419875692105e-06, "loss": 0.4182, "num_tokens": 1763123626.0, "step": 1860 }, { "epoch": 3.642507345739471, "grad_norm": 0.084598886311569, "learning_rate": 1.8185492361386958e-06, "loss": 0.4093, "num_tokens": 1764058676.0, "step": 1861 }, { "epoch": 3.644466209598433, "grad_norm": 0.08399823936656207, "learning_rate": 1.8136615982556256e-06, "loss": 0.4009, "num_tokens": 1765020172.0, "step": 1862 }, { "epoch": 3.6464250734573946, "grad_norm": 0.08963877149509505, "learning_rate": 1.808779081775901e-06, "loss": 0.4216, "num_tokens": 1765959369.0, "step": 1863 }, { "epoch": 3.6483839373163565, "grad_norm": 0.08594458469416463, "learning_rate": 1.8039016945471915e-06, "loss": 0.4131, "num_tokens": 1766895719.0, "step": 1864 }, { "epoch": 3.6503428011753183, "grad_norm": 0.08596876907518203, "learning_rate": 1.799029444408928e-06, "loss": 0.4243, "num_tokens": 1767825589.0, "step": 1865 }, { "epoch": 3.65230166503428, "grad_norm": 0.08255312354050273, "learning_rate": 1.7941623391922775e-06, "loss": 0.4012, "num_tokens": 1768765820.0, "step": 1866 }, { "epoch": 3.654260528893242, "grad_norm": 0.08435650099899289, "learning_rate": 1.789300386720141e-06, "loss": 0.4177, "num_tokens": 1769725106.0, "step": 1867 }, { "epoch": 3.6562193927522038, "grad_norm": 0.08343230599318063, "learning_rate": 1.7844435948071387e-06, "loss": 0.4209, "num_tokens": 1770653757.0, "step": 1868 }, { "epoch": 3.6581782566111656, "grad_norm": 0.08452327308623443, "learning_rate": 1.7795919712595943e-06, "loss": 0.4277, "num_tokens": 1771637772.0, "step": 1869 }, { "epoch": 3.6601371204701274, "grad_norm": 0.0841760619426903, "learning_rate": 1.7747455238755223e-06, "loss": 0.4372, "num_tokens": 1772582387.0, "step": 1870 }, { "epoch": 3.6620959843290892, "grad_norm": 0.08574535958399185, "learning_rate": 1.7699042604446243e-06, "loss": 0.41, "num_tokens": 1773544097.0, "step": 1871 }, { "epoch": 3.664054848188051, "grad_norm": 0.08557968582975219, "learning_rate": 1.7650681887482628e-06, "loss": 0.421, "num_tokens": 1774479049.0, "step": 1872 }, { "epoch": 3.666013712047013, "grad_norm": 0.08368340503368406, "learning_rate": 1.7602373165594578e-06, "loss": 0.4137, "num_tokens": 1775401619.0, "step": 1873 }, { "epoch": 3.6679725759059747, "grad_norm": 0.08569041934847592, "learning_rate": 1.7554116516428753e-06, "loss": 0.4128, "num_tokens": 1776373602.0, "step": 1874 }, { "epoch": 3.6699314397649365, "grad_norm": 0.0847805857155678, "learning_rate": 1.7505912017548054e-06, "loss": 0.4331, "num_tokens": 1777351845.0, "step": 1875 }, { "epoch": 3.671890303623898, "grad_norm": 0.0849496557044052, "learning_rate": 1.7457759746431635e-06, "loss": 0.4131, "num_tokens": 1778286086.0, "step": 1876 }, { "epoch": 3.67384916748286, "grad_norm": 0.0859433881590437, "learning_rate": 1.7409659780474652e-06, "loss": 0.4101, "num_tokens": 1779223864.0, "step": 1877 }, { "epoch": 3.6758080313418215, "grad_norm": 0.08534200625136383, "learning_rate": 1.7361612196988177e-06, "loss": 0.4214, "num_tokens": 1780182672.0, "step": 1878 }, { "epoch": 3.677766895200784, "grad_norm": 0.08665887542010337, "learning_rate": 1.7313617073199163e-06, "loss": 0.417, "num_tokens": 1781173761.0, "step": 1879 }, { "epoch": 3.679725759059745, "grad_norm": 0.09091683884933376, "learning_rate": 1.7265674486250167e-06, "loss": 0.4146, "num_tokens": 1782087403.0, "step": 1880 }, { "epoch": 3.681684622918707, "grad_norm": 0.08603234926712564, "learning_rate": 1.7217784513199327e-06, "loss": 0.4231, "num_tokens": 1783062564.0, "step": 1881 }, { "epoch": 3.683643486777669, "grad_norm": 0.08683918881574754, "learning_rate": 1.7169947231020252e-06, "loss": 0.4184, "num_tokens": 1784022301.0, "step": 1882 }, { "epoch": 3.6856023506366307, "grad_norm": 0.08470340031448806, "learning_rate": 1.7122162716601808e-06, "loss": 0.4156, "num_tokens": 1784929460.0, "step": 1883 }, { "epoch": 3.6875612144955925, "grad_norm": 0.08464888603138751, "learning_rate": 1.7074431046748075e-06, "loss": 0.4234, "num_tokens": 1785858673.0, "step": 1884 }, { "epoch": 3.6895200783545543, "grad_norm": 0.09102148220171614, "learning_rate": 1.702675229817819e-06, "loss": 0.4066, "num_tokens": 1786793243.0, "step": 1885 }, { "epoch": 3.691478942213516, "grad_norm": 0.08619754730701117, "learning_rate": 1.6979126547526231e-06, "loss": 0.4183, "num_tokens": 1787739701.0, "step": 1886 }, { "epoch": 3.693437806072478, "grad_norm": 0.0834667381778445, "learning_rate": 1.6931553871341071e-06, "loss": 0.4213, "num_tokens": 1788684718.0, "step": 1887 }, { "epoch": 3.69539666993144, "grad_norm": 0.08408279900173775, "learning_rate": 1.6884034346086332e-06, "loss": 0.4225, "num_tokens": 1789637707.0, "step": 1888 }, { "epoch": 3.6973555337904016, "grad_norm": 0.08608790200130456, "learning_rate": 1.683656804814014e-06, "loss": 0.4242, "num_tokens": 1790633967.0, "step": 1889 }, { "epoch": 3.6993143976493634, "grad_norm": 0.0879383245546899, "learning_rate": 1.678915505379513e-06, "loss": 0.4284, "num_tokens": 1791628570.0, "step": 1890 }, { "epoch": 3.7012732615083253, "grad_norm": 0.0859372868559316, "learning_rate": 1.6741795439258218e-06, "loss": 0.4057, "num_tokens": 1792559909.0, "step": 1891 }, { "epoch": 3.703232125367287, "grad_norm": 0.08537582725539315, "learning_rate": 1.6694489280650516e-06, "loss": 0.4165, "num_tokens": 1793498999.0, "step": 1892 }, { "epoch": 3.705190989226249, "grad_norm": 0.08325153911763906, "learning_rate": 1.6647236654007276e-06, "loss": 0.4157, "num_tokens": 1794425533.0, "step": 1893 }, { "epoch": 3.7071498530852107, "grad_norm": 0.08411595431604199, "learning_rate": 1.660003763527765e-06, "loss": 0.4125, "num_tokens": 1795386101.0, "step": 1894 }, { "epoch": 3.7091087169441725, "grad_norm": 0.08360335887700575, "learning_rate": 1.6552892300324625e-06, "loss": 0.4143, "num_tokens": 1796324039.0, "step": 1895 }, { "epoch": 3.7110675808031344, "grad_norm": 0.08355029162210738, "learning_rate": 1.650580072492496e-06, "loss": 0.4267, "num_tokens": 1797278534.0, "step": 1896 }, { "epoch": 3.7130264446620957, "grad_norm": 0.08461827441161335, "learning_rate": 1.645876298476895e-06, "loss": 0.4347, "num_tokens": 1798197706.0, "step": 1897 }, { "epoch": 3.714985308521058, "grad_norm": 0.08811499346200773, "learning_rate": 1.641177915546036e-06, "loss": 0.3977, "num_tokens": 1799139831.0, "step": 1898 }, { "epoch": 3.7169441723800194, "grad_norm": 0.08492856828316475, "learning_rate": 1.6364849312516357e-06, "loss": 0.4243, "num_tokens": 1800099569.0, "step": 1899 }, { "epoch": 3.7189030362389817, "grad_norm": 0.08481567469199112, "learning_rate": 1.6317973531367293e-06, "loss": 0.41, "num_tokens": 1801061165.0, "step": 1900 }, { "epoch": 3.720861900097943, "grad_norm": 0.08403856147686903, "learning_rate": 1.6271151887356607e-06, "loss": 0.4062, "num_tokens": 1802007153.0, "step": 1901 }, { "epoch": 3.722820763956905, "grad_norm": 0.08631686762180507, "learning_rate": 1.6224384455740788e-06, "loss": 0.4212, "num_tokens": 1802930843.0, "step": 1902 }, { "epoch": 3.7247796278158667, "grad_norm": 0.09229068743186988, "learning_rate": 1.6177671311689141e-06, "loss": 0.4218, "num_tokens": 1803861097.0, "step": 1903 }, { "epoch": 3.7267384916748285, "grad_norm": 0.08531139766353613, "learning_rate": 1.6131012530283702e-06, "loss": 0.4215, "num_tokens": 1804762220.0, "step": 1904 }, { "epoch": 3.7286973555337903, "grad_norm": 0.0889313467619071, "learning_rate": 1.6084408186519195e-06, "loss": 0.4096, "num_tokens": 1805687334.0, "step": 1905 }, { "epoch": 3.730656219392752, "grad_norm": 0.08106353324046786, "learning_rate": 1.6037858355302788e-06, "loss": 0.4213, "num_tokens": 1806646527.0, "step": 1906 }, { "epoch": 3.732615083251714, "grad_norm": 0.08259892785556008, "learning_rate": 1.5991363111454023e-06, "loss": 0.4057, "num_tokens": 1807608616.0, "step": 1907 }, { "epoch": 3.734573947110676, "grad_norm": 0.0835525455556785, "learning_rate": 1.5944922529704776e-06, "loss": 0.4158, "num_tokens": 1808543058.0, "step": 1908 }, { "epoch": 3.7365328109696376, "grad_norm": 0.08231090323783154, "learning_rate": 1.589853668469899e-06, "loss": 0.4226, "num_tokens": 1809496623.0, "step": 1909 }, { "epoch": 3.7384916748285995, "grad_norm": 0.08512469678523701, "learning_rate": 1.5852205650992647e-06, "loss": 0.4038, "num_tokens": 1810466934.0, "step": 1910 }, { "epoch": 3.7404505386875613, "grad_norm": 0.08421443630211962, "learning_rate": 1.5805929503053685e-06, "loss": 0.4116, "num_tokens": 1811456339.0, "step": 1911 }, { "epoch": 3.742409402546523, "grad_norm": 0.0821152272671918, "learning_rate": 1.5759708315261724e-06, "loss": 0.4027, "num_tokens": 1812398210.0, "step": 1912 }, { "epoch": 3.744368266405485, "grad_norm": 0.09023731565749514, "learning_rate": 1.5713542161908141e-06, "loss": 0.4162, "num_tokens": 1813340179.0, "step": 1913 }, { "epoch": 3.7463271302644467, "grad_norm": 0.08504933339836979, "learning_rate": 1.5667431117195814e-06, "loss": 0.4253, "num_tokens": 1814310043.0, "step": 1914 }, { "epoch": 3.7482859941234086, "grad_norm": 0.08492851530814112, "learning_rate": 1.5621375255239018e-06, "loss": 0.4225, "num_tokens": 1815298802.0, "step": 1915 }, { "epoch": 3.75024485798237, "grad_norm": 0.08512399191992572, "learning_rate": 1.5575374650063406e-06, "loss": 0.4247, "num_tokens": 1816212697.0, "step": 1916 }, { "epoch": 3.752203721841332, "grad_norm": 0.08554790074590062, "learning_rate": 1.552942937560576e-06, "loss": 0.4228, "num_tokens": 1817152179.0, "step": 1917 }, { "epoch": 3.7541625857002936, "grad_norm": 0.08659548668907796, "learning_rate": 1.5483539505713918e-06, "loss": 0.4104, "num_tokens": 1818083399.0, "step": 1918 }, { "epoch": 3.756121449559256, "grad_norm": 0.08176585901436256, "learning_rate": 1.5437705114146735e-06, "loss": 0.4158, "num_tokens": 1819025814.0, "step": 1919 }, { "epoch": 3.7580803134182172, "grad_norm": 0.08507563429865035, "learning_rate": 1.539192627457382e-06, "loss": 0.4331, "num_tokens": 1819938914.0, "step": 1920 }, { "epoch": 3.7600391772771795, "grad_norm": 0.08605313690692393, "learning_rate": 1.5346203060575526e-06, "loss": 0.3975, "num_tokens": 1820911956.0, "step": 1921 }, { "epoch": 3.761998041136141, "grad_norm": 0.08198911838135789, "learning_rate": 1.530053554564282e-06, "loss": 0.4206, "num_tokens": 1821846063.0, "step": 1922 }, { "epoch": 3.7639569049951027, "grad_norm": 0.08800163657224465, "learning_rate": 1.5254923803177108e-06, "loss": 0.4188, "num_tokens": 1822752847.0, "step": 1923 }, { "epoch": 3.7659157688540645, "grad_norm": 0.0859162604766064, "learning_rate": 1.5209367906490152e-06, "loss": 0.4199, "num_tokens": 1823708565.0, "step": 1924 }, { "epoch": 3.7678746327130264, "grad_norm": 0.08619344356928228, "learning_rate": 1.5163867928803998e-06, "loss": 0.4169, "num_tokens": 1824668568.0, "step": 1925 }, { "epoch": 3.769833496571988, "grad_norm": 0.08501854812802791, "learning_rate": 1.511842394325077e-06, "loss": 0.4156, "num_tokens": 1825619257.0, "step": 1926 }, { "epoch": 3.77179236043095, "grad_norm": 0.08621020723153473, "learning_rate": 1.50730360228726e-06, "loss": 0.405, "num_tokens": 1826588277.0, "step": 1927 }, { "epoch": 3.773751224289912, "grad_norm": 0.0829383462089837, "learning_rate": 1.5027704240621533e-06, "loss": 0.4126, "num_tokens": 1827554644.0, "step": 1928 }, { "epoch": 3.7757100881488737, "grad_norm": 0.08857959937499182, "learning_rate": 1.4982428669359362e-06, "loss": 0.4133, "num_tokens": 1828492139.0, "step": 1929 }, { "epoch": 3.7776689520078355, "grad_norm": 0.08508540757727298, "learning_rate": 1.4937209381857509e-06, "loss": 0.4252, "num_tokens": 1829464363.0, "step": 1930 }, { "epoch": 3.7796278158667973, "grad_norm": 0.08805253930443299, "learning_rate": 1.4892046450797004e-06, "loss": 0.428, "num_tokens": 1830400969.0, "step": 1931 }, { "epoch": 3.781586679725759, "grad_norm": 0.0869356895237708, "learning_rate": 1.4846939948768218e-06, "loss": 0.4174, "num_tokens": 1831327031.0, "step": 1932 }, { "epoch": 3.783545543584721, "grad_norm": 0.08251678849294589, "learning_rate": 1.4801889948270852e-06, "loss": 0.4138, "num_tokens": 1832263846.0, "step": 1933 }, { "epoch": 3.7855044074436828, "grad_norm": 0.08346726500453072, "learning_rate": 1.4756896521713821e-06, "loss": 0.4145, "num_tokens": 1833211972.0, "step": 1934 }, { "epoch": 3.7874632713026446, "grad_norm": 0.08893905266831544, "learning_rate": 1.471195974141506e-06, "loss": 0.4193, "num_tokens": 1834160367.0, "step": 1935 }, { "epoch": 3.7894221351616064, "grad_norm": 0.08547630574089961, "learning_rate": 1.4667079679601475e-06, "loss": 0.4255, "num_tokens": 1835113199.0, "step": 1936 }, { "epoch": 3.791380999020568, "grad_norm": 0.08520224604626843, "learning_rate": 1.4622256408408852e-06, "loss": 0.4119, "num_tokens": 1836063274.0, "step": 1937 }, { "epoch": 3.79333986287953, "grad_norm": 0.09052160754255766, "learning_rate": 1.45774899998816e-06, "loss": 0.414, "num_tokens": 1837004423.0, "step": 1938 }, { "epoch": 3.7952987267384914, "grad_norm": 0.08285027997712183, "learning_rate": 1.4532780525972833e-06, "loss": 0.4152, "num_tokens": 1837949051.0, "step": 1939 }, { "epoch": 3.7972575905974537, "grad_norm": 0.08712884894974125, "learning_rate": 1.44881280585441e-06, "loss": 0.4155, "num_tokens": 1838888101.0, "step": 1940 }, { "epoch": 3.799216454456415, "grad_norm": 0.08826969607410523, "learning_rate": 1.4443532669365317e-06, "loss": 0.4223, "num_tokens": 1839861647.0, "step": 1941 }, { "epoch": 3.801175318315377, "grad_norm": 0.08673165545294591, "learning_rate": 1.4398994430114705e-06, "loss": 0.4163, "num_tokens": 1840829994.0, "step": 1942 }, { "epoch": 3.8031341821743387, "grad_norm": 0.08121303740640588, "learning_rate": 1.4354513412378586e-06, "loss": 0.4123, "num_tokens": 1841761720.0, "step": 1943 }, { "epoch": 3.8050930460333006, "grad_norm": 0.08931443056419079, "learning_rate": 1.4310089687651302e-06, "loss": 0.4283, "num_tokens": 1842697162.0, "step": 1944 }, { "epoch": 3.8070519098922624, "grad_norm": 0.08514268990515005, "learning_rate": 1.4265723327335162e-06, "loss": 0.4261, "num_tokens": 1843667675.0, "step": 1945 }, { "epoch": 3.809010773751224, "grad_norm": 0.08903059963344348, "learning_rate": 1.422141440274023e-06, "loss": 0.4224, "num_tokens": 1844619081.0, "step": 1946 }, { "epoch": 3.810969637610186, "grad_norm": 0.08401625713340918, "learning_rate": 1.4177162985084242e-06, "loss": 0.425, "num_tokens": 1845560968.0, "step": 1947 }, { "epoch": 3.812928501469148, "grad_norm": 0.0868968883943069, "learning_rate": 1.4132969145492559e-06, "loss": 0.4054, "num_tokens": 1846504685.0, "step": 1948 }, { "epoch": 3.8148873653281097, "grad_norm": 0.08180514482927043, "learning_rate": 1.4088832954997944e-06, "loss": 0.3966, "num_tokens": 1847433339.0, "step": 1949 }, { "epoch": 3.8168462291870715, "grad_norm": 0.0856552429613838, "learning_rate": 1.40447544845405e-06, "loss": 0.4307, "num_tokens": 1848386888.0, "step": 1950 }, { "epoch": 3.8188050930460333, "grad_norm": 0.08634696578800498, "learning_rate": 1.4000733804967608e-06, "loss": 0.4104, "num_tokens": 1849327866.0, "step": 1951 }, { "epoch": 3.820763956904995, "grad_norm": 0.08414651487379428, "learning_rate": 1.3956770987033719e-06, "loss": 0.4137, "num_tokens": 1850271679.0, "step": 1952 }, { "epoch": 3.822722820763957, "grad_norm": 0.08419625942296331, "learning_rate": 1.3912866101400275e-06, "loss": 0.417, "num_tokens": 1851203767.0, "step": 1953 }, { "epoch": 3.824681684622919, "grad_norm": 0.08390891064909364, "learning_rate": 1.3869019218635644e-06, "loss": 0.417, "num_tokens": 1852158239.0, "step": 1954 }, { "epoch": 3.8266405484818806, "grad_norm": 0.08567211789364376, "learning_rate": 1.3825230409214935e-06, "loss": 0.4113, "num_tokens": 1853119345.0, "step": 1955 }, { "epoch": 3.8285994123408424, "grad_norm": 0.08187069477773441, "learning_rate": 1.3781499743519911e-06, "loss": 0.4149, "num_tokens": 1854064557.0, "step": 1956 }, { "epoch": 3.8305582761998043, "grad_norm": 0.08442954503231062, "learning_rate": 1.3737827291838917e-06, "loss": 0.4119, "num_tokens": 1855003717.0, "step": 1957 }, { "epoch": 3.8325171400587656, "grad_norm": 0.08178202625417966, "learning_rate": 1.3694213124366696e-06, "loss": 0.4068, "num_tokens": 1855937892.0, "step": 1958 }, { "epoch": 3.834476003917728, "grad_norm": 0.08601072930203271, "learning_rate": 1.3650657311204307e-06, "loss": 0.428, "num_tokens": 1856862078.0, "step": 1959 }, { "epoch": 3.8364348677766893, "grad_norm": 0.08980409225787742, "learning_rate": 1.3607159922359058e-06, "loss": 0.4161, "num_tokens": 1857814343.0, "step": 1960 }, { "epoch": 3.8383937316356516, "grad_norm": 0.0881534624814249, "learning_rate": 1.3563721027744309e-06, "loss": 0.4172, "num_tokens": 1858760105.0, "step": 1961 }, { "epoch": 3.840352595494613, "grad_norm": 0.08242921726026739, "learning_rate": 1.3520340697179406e-06, "loss": 0.4187, "num_tokens": 1859705486.0, "step": 1962 }, { "epoch": 3.8423114593535748, "grad_norm": 0.08460184858489149, "learning_rate": 1.3477019000389624e-06, "loss": 0.4083, "num_tokens": 1860626769.0, "step": 1963 }, { "epoch": 3.8442703232125366, "grad_norm": 0.08800797682634066, "learning_rate": 1.3433756007005877e-06, "loss": 0.4187, "num_tokens": 1861580538.0, "step": 1964 }, { "epoch": 3.8462291870714984, "grad_norm": 0.08286833733684613, "learning_rate": 1.3390551786564853e-06, "loss": 0.414, "num_tokens": 1862536042.0, "step": 1965 }, { "epoch": 3.8481880509304602, "grad_norm": 0.08442314687884295, "learning_rate": 1.3347406408508695e-06, "loss": 0.4187, "num_tokens": 1863496597.0, "step": 1966 }, { "epoch": 3.850146914789422, "grad_norm": 0.08348039787032789, "learning_rate": 1.3304319942184985e-06, "loss": 0.4196, "num_tokens": 1864411280.0, "step": 1967 }, { "epoch": 3.852105778648384, "grad_norm": 0.08363557741924119, "learning_rate": 1.3261292456846648e-06, "loss": 0.4096, "num_tokens": 1865331381.0, "step": 1968 }, { "epoch": 3.8540646425073457, "grad_norm": 0.08820754563153728, "learning_rate": 1.3218324021651763e-06, "loss": 0.4234, "num_tokens": 1866285303.0, "step": 1969 }, { "epoch": 3.8560235063663075, "grad_norm": 0.08275129689993459, "learning_rate": 1.3175414705663508e-06, "loss": 0.4012, "num_tokens": 1867241446.0, "step": 1970 }, { "epoch": 3.8579823702252694, "grad_norm": 0.08405493005731622, "learning_rate": 1.3132564577850076e-06, "loss": 0.4108, "num_tokens": 1868209880.0, "step": 1971 }, { "epoch": 3.859941234084231, "grad_norm": 0.09041932226250768, "learning_rate": 1.3089773707084492e-06, "loss": 0.4313, "num_tokens": 1869153236.0, "step": 1972 }, { "epoch": 3.861900097943193, "grad_norm": 0.08449472039851497, "learning_rate": 1.3047042162144518e-06, "loss": 0.4224, "num_tokens": 1870126698.0, "step": 1973 }, { "epoch": 3.863858961802155, "grad_norm": 0.08411268105576536, "learning_rate": 1.3004370011712624e-06, "loss": 0.4139, "num_tokens": 1871077323.0, "step": 1974 }, { "epoch": 3.8658178256611166, "grad_norm": 0.0827968826755833, "learning_rate": 1.2961757324375768e-06, "loss": 0.4078, "num_tokens": 1872038384.0, "step": 1975 }, { "epoch": 3.8677766895200785, "grad_norm": 0.0845075442611009, "learning_rate": 1.2919204168625322e-06, "loss": 0.434, "num_tokens": 1873051905.0, "step": 1976 }, { "epoch": 3.8697355533790403, "grad_norm": 0.08462865608635815, "learning_rate": 1.2876710612857029e-06, "loss": 0.4223, "num_tokens": 1873979415.0, "step": 1977 }, { "epoch": 3.871694417238002, "grad_norm": 0.08460278437532708, "learning_rate": 1.2834276725370787e-06, "loss": 0.4137, "num_tokens": 1874950456.0, "step": 1978 }, { "epoch": 3.8736532810969635, "grad_norm": 0.08238758783440217, "learning_rate": 1.2791902574370591e-06, "loss": 0.4033, "num_tokens": 1875891822.0, "step": 1979 }, { "epoch": 3.8756121449559258, "grad_norm": 0.08342045939138873, "learning_rate": 1.2749588227964466e-06, "loss": 0.4222, "num_tokens": 1876851239.0, "step": 1980 }, { "epoch": 3.877571008814887, "grad_norm": 0.08437609123813213, "learning_rate": 1.2707333754164264e-06, "loss": 0.4162, "num_tokens": 1877805057.0, "step": 1981 }, { "epoch": 3.8795298726738494, "grad_norm": 0.08583678185088015, "learning_rate": 1.2665139220885615e-06, "loss": 0.4154, "num_tokens": 1878744460.0, "step": 1982 }, { "epoch": 3.881488736532811, "grad_norm": 0.08687076318955812, "learning_rate": 1.2623004695947832e-06, "loss": 0.4144, "num_tokens": 1879691675.0, "step": 1983 }, { "epoch": 3.8834476003917726, "grad_norm": 0.08710363190478325, "learning_rate": 1.2580930247073752e-06, "loss": 0.4163, "num_tokens": 1880638800.0, "step": 1984 }, { "epoch": 3.8854064642507344, "grad_norm": 0.08231975046148307, "learning_rate": 1.2538915941889646e-06, "loss": 0.4251, "num_tokens": 1881590715.0, "step": 1985 }, { "epoch": 3.8873653281096963, "grad_norm": 0.08920842056600325, "learning_rate": 1.2496961847925154e-06, "loss": 0.4257, "num_tokens": 1882517573.0, "step": 1986 }, { "epoch": 3.889324191968658, "grad_norm": 0.08646749713217497, "learning_rate": 1.2455068032613077e-06, "loss": 0.4185, "num_tokens": 1883463626.0, "step": 1987 }, { "epoch": 3.89128305582762, "grad_norm": 0.08299093958222617, "learning_rate": 1.2413234563289406e-06, "loss": 0.4042, "num_tokens": 1884403020.0, "step": 1988 }, { "epoch": 3.8932419196865817, "grad_norm": 0.07815162636046777, "learning_rate": 1.2371461507193077e-06, "loss": 0.4208, "num_tokens": 1885354725.0, "step": 1989 }, { "epoch": 3.8952007835455436, "grad_norm": 0.08675301159428482, "learning_rate": 1.2329748931465945e-06, "loss": 0.4108, "num_tokens": 1886282535.0, "step": 1990 }, { "epoch": 3.8971596474045054, "grad_norm": 0.084792093805616, "learning_rate": 1.2288096903152652e-06, "loss": 0.4041, "num_tokens": 1887247894.0, "step": 1991 }, { "epoch": 3.899118511263467, "grad_norm": 0.08283453301958113, "learning_rate": 1.2246505489200534e-06, "loss": 0.4058, "num_tokens": 1888196825.0, "step": 1992 }, { "epoch": 3.901077375122429, "grad_norm": 0.08651976409073864, "learning_rate": 1.2204974756459465e-06, "loss": 0.4234, "num_tokens": 1889162984.0, "step": 1993 }, { "epoch": 3.903036238981391, "grad_norm": 0.0844438315601082, "learning_rate": 1.216350477168185e-06, "loss": 0.4203, "num_tokens": 1890103421.0, "step": 1994 }, { "epoch": 3.9049951028403527, "grad_norm": 0.08637547521440192, "learning_rate": 1.2122095601522404e-06, "loss": 0.419, "num_tokens": 1891081047.0, "step": 1995 }, { "epoch": 3.9069539666993145, "grad_norm": 0.08730163040571967, "learning_rate": 1.2080747312538082e-06, "loss": 0.4268, "num_tokens": 1891995891.0, "step": 1996 }, { "epoch": 3.9089128305582763, "grad_norm": 0.08548199716073168, "learning_rate": 1.203945997118805e-06, "loss": 0.4124, "num_tokens": 1892946143.0, "step": 1997 }, { "epoch": 3.910871694417238, "grad_norm": 0.08191612584010766, "learning_rate": 1.1998233643833457e-06, "loss": 0.4058, "num_tokens": 1893888836.0, "step": 1998 }, { "epoch": 3.9128305582762, "grad_norm": 0.08330786533493431, "learning_rate": 1.195706839673738e-06, "loss": 0.4065, "num_tokens": 1894847818.0, "step": 1999 }, { "epoch": 3.9147894221351613, "grad_norm": 0.0824831852880592, "learning_rate": 1.1915964296064775e-06, "loss": 0.3953, "num_tokens": 1895811476.0, "step": 2000 }, { "epoch": 3.9167482859941236, "grad_norm": 0.08122688319719282, "learning_rate": 1.187492140788225e-06, "loss": 0.4158, "num_tokens": 1896787472.0, "step": 2001 }, { "epoch": 3.918707149853085, "grad_norm": 0.08136197637442369, "learning_rate": 1.1833939798158095e-06, "loss": 0.4195, "num_tokens": 1897705197.0, "step": 2002 }, { "epoch": 3.9206660137120473, "grad_norm": 0.08546440048957472, "learning_rate": 1.1793019532762057e-06, "loss": 0.4258, "num_tokens": 1898637706.0, "step": 2003 }, { "epoch": 3.9226248775710086, "grad_norm": 0.08585210376684785, "learning_rate": 1.1752160677465286e-06, "loss": 0.4117, "num_tokens": 1899588114.0, "step": 2004 }, { "epoch": 3.9245837414299705, "grad_norm": 0.0835230568809389, "learning_rate": 1.1711363297940265e-06, "loss": 0.414, "num_tokens": 1900483221.0, "step": 2005 }, { "epoch": 3.9265426052889323, "grad_norm": 0.08529726758501822, "learning_rate": 1.1670627459760637e-06, "loss": 0.4259, "num_tokens": 1901419892.0, "step": 2006 }, { "epoch": 3.928501469147894, "grad_norm": 0.09140885115656842, "learning_rate": 1.1629953228401109e-06, "loss": 0.4225, "num_tokens": 1902376792.0, "step": 2007 }, { "epoch": 3.930460333006856, "grad_norm": 0.08248469579214886, "learning_rate": 1.1589340669237432e-06, "loss": 0.4238, "num_tokens": 1903326764.0, "step": 2008 }, { "epoch": 3.9324191968658178, "grad_norm": 0.08597793447176653, "learning_rate": 1.1548789847546178e-06, "loss": 0.4076, "num_tokens": 1904290512.0, "step": 2009 }, { "epoch": 3.9343780607247796, "grad_norm": 0.08251527638436337, "learning_rate": 1.1508300828504682e-06, "loss": 0.4131, "num_tokens": 1905236568.0, "step": 2010 }, { "epoch": 3.9363369245837414, "grad_norm": 0.08440021634504866, "learning_rate": 1.1467873677191005e-06, "loss": 0.4179, "num_tokens": 1906158419.0, "step": 2011 }, { "epoch": 3.9382957884427032, "grad_norm": 0.08234066151314694, "learning_rate": 1.1427508458583703e-06, "loss": 0.4048, "num_tokens": 1907064319.0, "step": 2012 }, { "epoch": 3.940254652301665, "grad_norm": 0.0829201170387118, "learning_rate": 1.1387205237561804e-06, "loss": 0.4064, "num_tokens": 1908029968.0, "step": 2013 }, { "epoch": 3.942213516160627, "grad_norm": 0.08394438675727865, "learning_rate": 1.1346964078904727e-06, "loss": 0.4117, "num_tokens": 1908945871.0, "step": 2014 }, { "epoch": 3.9441723800195887, "grad_norm": 0.08613624973812307, "learning_rate": 1.1306785047292085e-06, "loss": 0.4318, "num_tokens": 1909891675.0, "step": 2015 }, { "epoch": 3.9461312438785505, "grad_norm": 0.08654788565549938, "learning_rate": 1.126666820730366e-06, "loss": 0.4198, "num_tokens": 1910827674.0, "step": 2016 }, { "epoch": 3.9480901077375123, "grad_norm": 0.08265006031279491, "learning_rate": 1.122661362341927e-06, "loss": 0.415, "num_tokens": 1911775363.0, "step": 2017 }, { "epoch": 3.950048971596474, "grad_norm": 0.08506299568498547, "learning_rate": 1.1186621360018656e-06, "loss": 0.416, "num_tokens": 1912745495.0, "step": 2018 }, { "epoch": 3.952007835455436, "grad_norm": 0.0862167601754671, "learning_rate": 1.1146691481381428e-06, "loss": 0.4165, "num_tokens": 1913679035.0, "step": 2019 }, { "epoch": 3.953966699314398, "grad_norm": 0.08483214721743973, "learning_rate": 1.1106824051686888e-06, "loss": 0.4114, "num_tokens": 1914583470.0, "step": 2020 }, { "epoch": 3.955925563173359, "grad_norm": 0.0844595520583096, "learning_rate": 1.1067019135013956e-06, "loss": 0.4318, "num_tokens": 1915529721.0, "step": 2021 }, { "epoch": 3.9578844270323215, "grad_norm": 0.08529514135489234, "learning_rate": 1.1027276795341135e-06, "loss": 0.4147, "num_tokens": 1916478178.0, "step": 2022 }, { "epoch": 3.959843290891283, "grad_norm": 0.08299401156766298, "learning_rate": 1.0987597096546288e-06, "loss": 0.4062, "num_tokens": 1917430861.0, "step": 2023 }, { "epoch": 3.961802154750245, "grad_norm": 0.08306066471753731, "learning_rate": 1.0947980102406597e-06, "loss": 0.4154, "num_tokens": 1918405975.0, "step": 2024 }, { "epoch": 3.9637610186092065, "grad_norm": 0.08160808801541311, "learning_rate": 1.0908425876598512e-06, "loss": 0.4112, "num_tokens": 1919405602.0, "step": 2025 }, { "epoch": 3.9657198824681683, "grad_norm": 0.08148928490862165, "learning_rate": 1.0868934482697547e-06, "loss": 0.4079, "num_tokens": 1920351831.0, "step": 2026 }, { "epoch": 3.96767874632713, "grad_norm": 0.0838535117640875, "learning_rate": 1.0829505984178212e-06, "loss": 0.4107, "num_tokens": 1921300601.0, "step": 2027 }, { "epoch": 3.969637610186092, "grad_norm": 0.08061820241323737, "learning_rate": 1.0790140444414e-06, "loss": 0.4098, "num_tokens": 1922253262.0, "step": 2028 }, { "epoch": 3.971596474045054, "grad_norm": 0.08164752940757682, "learning_rate": 1.0750837926677133e-06, "loss": 0.4127, "num_tokens": 1923196761.0, "step": 2029 }, { "epoch": 3.9735553379040156, "grad_norm": 0.08457874528495596, "learning_rate": 1.0711598494138564e-06, "loss": 0.4289, "num_tokens": 1924139440.0, "step": 2030 }, { "epoch": 3.9755142017629774, "grad_norm": 0.08340730751985082, "learning_rate": 1.0672422209867879e-06, "loss": 0.4147, "num_tokens": 1925118595.0, "step": 2031 }, { "epoch": 3.9774730656219393, "grad_norm": 0.08543135447738082, "learning_rate": 1.063330913683312e-06, "loss": 0.4201, "num_tokens": 1926054293.0, "step": 2032 }, { "epoch": 3.979431929480901, "grad_norm": 0.08344031544175283, "learning_rate": 1.0594259337900748e-06, "loss": 0.4042, "num_tokens": 1927014860.0, "step": 2033 }, { "epoch": 3.981390793339863, "grad_norm": 0.08259028738825627, "learning_rate": 1.0555272875835537e-06, "loss": 0.4175, "num_tokens": 1927959244.0, "step": 2034 }, { "epoch": 3.9833496571988247, "grad_norm": 0.08450279727167037, "learning_rate": 1.0516349813300447e-06, "loss": 0.4056, "num_tokens": 1928904971.0, "step": 2035 }, { "epoch": 3.9853085210577865, "grad_norm": 0.08607922460115165, "learning_rate": 1.047749021285651e-06, "loss": 0.4151, "num_tokens": 1929856382.0, "step": 2036 }, { "epoch": 3.9872673849167484, "grad_norm": 0.08731108771114565, "learning_rate": 1.0438694136962824e-06, "loss": 0.4262, "num_tokens": 1930766918.0, "step": 2037 }, { "epoch": 3.98922624877571, "grad_norm": 0.08561238047874237, "learning_rate": 1.0399961647976315e-06, "loss": 0.4198, "num_tokens": 1931709023.0, "step": 2038 }, { "epoch": 3.991185112634672, "grad_norm": 0.0840692465053406, "learning_rate": 1.0361292808151725e-06, "loss": 0.406, "num_tokens": 1932656941.0, "step": 2039 }, { "epoch": 3.993143976493634, "grad_norm": 0.08523835969256632, "learning_rate": 1.0322687679641523e-06, "loss": 0.4336, "num_tokens": 1933632222.0, "step": 2040 }, { "epoch": 3.9951028403525957, "grad_norm": 0.08560297472644865, "learning_rate": 1.0284146324495747e-06, "loss": 0.4214, "num_tokens": 1934570746.0, "step": 2041 }, { "epoch": 3.997061704211557, "grad_norm": 0.08318935314236181, "learning_rate": 1.024566880466193e-06, "loss": 0.4269, "num_tokens": 1935528915.0, "step": 2042 }, { "epoch": 3.9990205680705193, "grad_norm": 0.08849211155747533, "learning_rate": 1.020725518198501e-06, "loss": 0.4303, "num_tokens": 1936453215.0, "step": 2043 }, { "epoch": 4.0, "grad_norm": 0.1312278322783195, "learning_rate": 1.0168905518207206e-06, "loss": 0.4084, "num_tokens": 1936926192.0, "step": 2044 }, { "epoch": 4.001958863858961, "grad_norm": 0.08502193677350244, "learning_rate": 1.0130619874967983e-06, "loss": 0.4221, "num_tokens": 1937887261.0, "step": 2045 }, { "epoch": 4.003917727717924, "grad_norm": 0.08475491122092788, "learning_rate": 1.0092398313803864e-06, "loss": 0.4101, "num_tokens": 1938852537.0, "step": 2046 }, { "epoch": 4.005876591576885, "grad_norm": 0.08689762084378919, "learning_rate": 1.0054240896148364e-06, "loss": 0.4083, "num_tokens": 1939788905.0, "step": 2047 }, { "epoch": 4.007835455435847, "grad_norm": 0.08260209940359746, "learning_rate": 1.0016147683331955e-06, "loss": 0.4237, "num_tokens": 1940726418.0, "step": 2048 }, { "epoch": 4.009794319294809, "grad_norm": 0.08399095900727253, "learning_rate": 9.97811873658186e-07, "loss": 0.4279, "num_tokens": 1941663266.0, "step": 2049 }, { "epoch": 4.011753183153771, "grad_norm": 0.08421254211310467, "learning_rate": 9.940154117022005e-07, "loss": 0.4148, "num_tokens": 1942599604.0, "step": 2050 }, { "epoch": 4.013712047012732, "grad_norm": 0.08420527051018095, "learning_rate": 9.90225388567298e-07, "loss": 0.4098, "num_tokens": 1943527663.0, "step": 2051 }, { "epoch": 4.015670910871695, "grad_norm": 0.08317858749531441, "learning_rate": 9.86441810345183e-07, "loss": 0.4097, "num_tokens": 1944461056.0, "step": 2052 }, { "epoch": 4.017629774730656, "grad_norm": 0.08725770282762309, "learning_rate": 9.826646831172004e-07, "loss": 0.4267, "num_tokens": 1945386344.0, "step": 2053 }, { "epoch": 4.019588638589618, "grad_norm": 0.08222310778997291, "learning_rate": 9.788940129543323e-07, "loss": 0.4174, "num_tokens": 1946354473.0, "step": 2054 }, { "epoch": 4.02154750244858, "grad_norm": 0.08337402373949127, "learning_rate": 9.751298059171777e-07, "loss": 0.4104, "num_tokens": 1947324057.0, "step": 2055 }, { "epoch": 4.023506366307542, "grad_norm": 0.08333568851478758, "learning_rate": 9.713720680559463e-07, "loss": 0.3997, "num_tokens": 1948282104.0, "step": 2056 }, { "epoch": 4.025465230166503, "grad_norm": 0.08270716784118884, "learning_rate": 9.676208054104553e-07, "loss": 0.4138, "num_tokens": 1949279239.0, "step": 2057 }, { "epoch": 4.0274240940254655, "grad_norm": 0.08202636274056062, "learning_rate": 9.638760240101102e-07, "loss": 0.4014, "num_tokens": 1950240230.0, "step": 2058 }, { "epoch": 4.029382957884427, "grad_norm": 0.08458357494517589, "learning_rate": 9.60137729873898e-07, "loss": 0.4011, "num_tokens": 1951192910.0, "step": 2059 }, { "epoch": 4.031341821743389, "grad_norm": 0.08314252647160143, "learning_rate": 9.564059290103832e-07, "loss": 0.4162, "num_tokens": 1952150853.0, "step": 2060 }, { "epoch": 4.0333006856023506, "grad_norm": 0.08943419961461954, "learning_rate": 9.526806274176903e-07, "loss": 0.4166, "num_tokens": 1953085933.0, "step": 2061 }, { "epoch": 4.035259549461313, "grad_norm": 0.08656132443725874, "learning_rate": 9.489618310834975e-07, "loss": 0.4071, "num_tokens": 1954020911.0, "step": 2062 }, { "epoch": 4.037218413320274, "grad_norm": 0.08238655532832571, "learning_rate": 9.452495459850303e-07, "loss": 0.4095, "num_tokens": 1954980124.0, "step": 2063 }, { "epoch": 4.0391772771792365, "grad_norm": 0.08482853005158912, "learning_rate": 9.415437780890452e-07, "loss": 0.4153, "num_tokens": 1955937348.0, "step": 2064 }, { "epoch": 4.041136141038198, "grad_norm": 0.08503822379426298, "learning_rate": 9.378445333518238e-07, "loss": 0.408, "num_tokens": 1956877866.0, "step": 2065 }, { "epoch": 4.043095004897159, "grad_norm": 0.08649059908764865, "learning_rate": 9.34151817719166e-07, "loss": 0.401, "num_tokens": 1957796291.0, "step": 2066 }, { "epoch": 4.0450538687561215, "grad_norm": 0.08540282899796471, "learning_rate": 9.304656371263749e-07, "loss": 0.4129, "num_tokens": 1958743695.0, "step": 2067 }, { "epoch": 4.047012732615083, "grad_norm": 0.08589684990372319, "learning_rate": 9.26785997498249e-07, "loss": 0.4292, "num_tokens": 1959686627.0, "step": 2068 }, { "epoch": 4.048971596474045, "grad_norm": 0.08169380819766643, "learning_rate": 9.231129047490784e-07, "loss": 0.4122, "num_tokens": 1960669441.0, "step": 2069 }, { "epoch": 4.0509304603330065, "grad_norm": 0.08231001210924904, "learning_rate": 9.194463647826224e-07, "loss": 0.4121, "num_tokens": 1961638587.0, "step": 2070 }, { "epoch": 4.052889324191969, "grad_norm": 0.08227276084835788, "learning_rate": 9.157863834921155e-07, "loss": 0.4116, "num_tokens": 1962579088.0, "step": 2071 }, { "epoch": 4.05484818805093, "grad_norm": 0.08299856022370741, "learning_rate": 9.121329667602474e-07, "loss": 0.3987, "num_tokens": 1963493283.0, "step": 2072 }, { "epoch": 4.056807051909892, "grad_norm": 0.08386846753592783, "learning_rate": 9.08486120459155e-07, "loss": 0.417, "num_tokens": 1964424620.0, "step": 2073 }, { "epoch": 4.058765915768854, "grad_norm": 0.08497730471137754, "learning_rate": 9.048458504504181e-07, "loss": 0.4238, "num_tokens": 1965374010.0, "step": 2074 }, { "epoch": 4.060724779627816, "grad_norm": 0.08193153364959628, "learning_rate": 9.012121625850445e-07, "loss": 0.4058, "num_tokens": 1966341859.0, "step": 2075 }, { "epoch": 4.0626836434867775, "grad_norm": 0.08406692485203614, "learning_rate": 8.975850627034605e-07, "loss": 0.4098, "num_tokens": 1967288695.0, "step": 2076 }, { "epoch": 4.06464250734574, "grad_norm": 0.08458426120522519, "learning_rate": 8.939645566355082e-07, "loss": 0.4097, "num_tokens": 1968206492.0, "step": 2077 }, { "epoch": 4.066601371204701, "grad_norm": 0.08268780425945656, "learning_rate": 8.903506502004272e-07, "loss": 0.4073, "num_tokens": 1969156152.0, "step": 2078 }, { "epoch": 4.068560235063663, "grad_norm": 0.08509860396084373, "learning_rate": 8.867433492068506e-07, "loss": 0.4178, "num_tokens": 1970076813.0, "step": 2079 }, { "epoch": 4.070519098922625, "grad_norm": 0.08410925852511572, "learning_rate": 8.831426594527976e-07, "loss": 0.4091, "num_tokens": 1971000534.0, "step": 2080 }, { "epoch": 4.072477962781587, "grad_norm": 0.08214244051387433, "learning_rate": 8.79548586725657e-07, "loss": 0.4086, "num_tokens": 1971963735.0, "step": 2081 }, { "epoch": 4.074436826640548, "grad_norm": 0.08320219656143128, "learning_rate": 8.759611368021831e-07, "loss": 0.412, "num_tokens": 1972949208.0, "step": 2082 }, { "epoch": 4.076395690499511, "grad_norm": 0.08087905369433049, "learning_rate": 8.723803154484878e-07, "loss": 0.4048, "num_tokens": 1973900628.0, "step": 2083 }, { "epoch": 4.078354554358472, "grad_norm": 0.0839633275807735, "learning_rate": 8.688061284200266e-07, "loss": 0.4165, "num_tokens": 1974821386.0, "step": 2084 }, { "epoch": 4.080313418217434, "grad_norm": 0.08577869269252363, "learning_rate": 8.652385814615899e-07, "loss": 0.4085, "num_tokens": 1975746775.0, "step": 2085 }, { "epoch": 4.082272282076396, "grad_norm": 0.0858937256665813, "learning_rate": 8.616776803073013e-07, "loss": 0.4227, "num_tokens": 1976704407.0, "step": 2086 }, { "epoch": 4.084231145935357, "grad_norm": 0.08591592861928278, "learning_rate": 8.581234306805969e-07, "loss": 0.4153, "num_tokens": 1977661539.0, "step": 2087 }, { "epoch": 4.086190009794319, "grad_norm": 0.08264970198212282, "learning_rate": 8.545758382942232e-07, "loss": 0.4205, "num_tokens": 1978625050.0, "step": 2088 }, { "epoch": 4.088148873653281, "grad_norm": 0.08333784229963259, "learning_rate": 8.510349088502295e-07, "loss": 0.4277, "num_tokens": 1979620090.0, "step": 2089 }, { "epoch": 4.090107737512243, "grad_norm": 0.08789058389774676, "learning_rate": 8.475006480399528e-07, "loss": 0.4268, "num_tokens": 1980562137.0, "step": 2090 }, { "epoch": 4.092066601371204, "grad_norm": 0.0873505236627856, "learning_rate": 8.439730615440106e-07, "loss": 0.4108, "num_tokens": 1981474359.0, "step": 2091 }, { "epoch": 4.094025465230167, "grad_norm": 0.08388104843650347, "learning_rate": 8.404521550322975e-07, "loss": 0.4236, "num_tokens": 1982430028.0, "step": 2092 }, { "epoch": 4.095984329089128, "grad_norm": 0.08239524474820561, "learning_rate": 8.36937934163966e-07, "loss": 0.4132, "num_tokens": 1983395209.0, "step": 2093 }, { "epoch": 4.09794319294809, "grad_norm": 0.08362559051183868, "learning_rate": 8.334304045874248e-07, "loss": 0.4031, "num_tokens": 1984310413.0, "step": 2094 }, { "epoch": 4.099902056807052, "grad_norm": 0.08625867872649809, "learning_rate": 8.299295719403311e-07, "loss": 0.424, "num_tokens": 1985247194.0, "step": 2095 }, { "epoch": 4.101860920666014, "grad_norm": 0.08665866705409384, "learning_rate": 8.264354418495707e-07, "loss": 0.4172, "num_tokens": 1986214184.0, "step": 2096 }, { "epoch": 4.103819784524975, "grad_norm": 0.08319286746801058, "learning_rate": 8.229480199312645e-07, "loss": 0.4133, "num_tokens": 1987152093.0, "step": 2097 }, { "epoch": 4.105778648383938, "grad_norm": 0.08289013502815501, "learning_rate": 8.194673117907453e-07, "loss": 0.4112, "num_tokens": 1988115357.0, "step": 2098 }, { "epoch": 4.107737512242899, "grad_norm": 0.08516964773454462, "learning_rate": 8.159933230225564e-07, "loss": 0.425, "num_tokens": 1989081236.0, "step": 2099 }, { "epoch": 4.109696376101861, "grad_norm": 0.08260738635978855, "learning_rate": 8.125260592104445e-07, "loss": 0.4007, "num_tokens": 1990018687.0, "step": 2100 }, { "epoch": 4.111655239960823, "grad_norm": 0.08321095732955454, "learning_rate": 8.090655259273428e-07, "loss": 0.4162, "num_tokens": 1990965805.0, "step": 2101 }, { "epoch": 4.113614103819785, "grad_norm": 0.08618368115956755, "learning_rate": 8.05611728735367e-07, "loss": 0.4324, "num_tokens": 1991956498.0, "step": 2102 }, { "epoch": 4.115572967678746, "grad_norm": 0.08264899564453558, "learning_rate": 8.021646731858101e-07, "loss": 0.4065, "num_tokens": 1992912450.0, "step": 2103 }, { "epoch": 4.1175318315377085, "grad_norm": 0.08288342381673579, "learning_rate": 7.987243648191245e-07, "loss": 0.4015, "num_tokens": 1993820685.0, "step": 2104 }, { "epoch": 4.11949069539667, "grad_norm": 0.08393247390987402, "learning_rate": 7.952908091649186e-07, "loss": 0.4063, "num_tokens": 1994778014.0, "step": 2105 }, { "epoch": 4.121449559255632, "grad_norm": 0.08110493139268171, "learning_rate": 7.918640117419507e-07, "loss": 0.4023, "num_tokens": 1995745587.0, "step": 2106 }, { "epoch": 4.1234084231145935, "grad_norm": 0.08074313592907365, "learning_rate": 7.884439780581127e-07, "loss": 0.3961, "num_tokens": 1996736368.0, "step": 2107 }, { "epoch": 4.125367286973555, "grad_norm": 0.08225551126841155, "learning_rate": 7.850307136104246e-07, "loss": 0.4151, "num_tokens": 1997718180.0, "step": 2108 }, { "epoch": 4.127326150832517, "grad_norm": 0.08182874299364075, "learning_rate": 7.816242238850319e-07, "loss": 0.4091, "num_tokens": 1998709445.0, "step": 2109 }, { "epoch": 4.129285014691479, "grad_norm": 0.08526759626891284, "learning_rate": 7.782245143571848e-07, "loss": 0.4189, "num_tokens": 1999647519.0, "step": 2110 }, { "epoch": 4.131243878550441, "grad_norm": 0.08521423913269954, "learning_rate": 7.748315904912379e-07, "loss": 0.4089, "num_tokens": 2000565274.0, "step": 2111 }, { "epoch": 4.133202742409402, "grad_norm": 0.08322162936888877, "learning_rate": 7.71445457740641e-07, "loss": 0.4125, "num_tokens": 2001500032.0, "step": 2112 }, { "epoch": 4.1351616062683645, "grad_norm": 0.08482170959481423, "learning_rate": 7.680661215479268e-07, "loss": 0.4095, "num_tokens": 2002437137.0, "step": 2113 }, { "epoch": 4.137120470127326, "grad_norm": 0.08342381084957223, "learning_rate": 7.64693587344702e-07, "loss": 0.4075, "num_tokens": 2003378228.0, "step": 2114 }, { "epoch": 4.139079333986288, "grad_norm": 0.08461529304023764, "learning_rate": 7.613278605516455e-07, "loss": 0.4145, "num_tokens": 2004289444.0, "step": 2115 }, { "epoch": 4.1410381978452495, "grad_norm": 0.08102920812569286, "learning_rate": 7.57968946578489e-07, "loss": 0.4157, "num_tokens": 2005277377.0, "step": 2116 }, { "epoch": 4.142997061704212, "grad_norm": 0.08494344523553249, "learning_rate": 7.546168508240192e-07, "loss": 0.4159, "num_tokens": 2006212370.0, "step": 2117 }, { "epoch": 4.144955925563173, "grad_norm": 0.08396960848853971, "learning_rate": 7.512715786760604e-07, "loss": 0.425, "num_tokens": 2007166347.0, "step": 2118 }, { "epoch": 4.146914789422135, "grad_norm": 0.08594146064778084, "learning_rate": 7.479331355114682e-07, "loss": 0.4105, "num_tokens": 2008098904.0, "step": 2119 }, { "epoch": 4.148873653281097, "grad_norm": 0.08450597034766295, "learning_rate": 7.446015266961276e-07, "loss": 0.412, "num_tokens": 2009022591.0, "step": 2120 }, { "epoch": 4.150832517140059, "grad_norm": 0.08474797510859908, "learning_rate": 7.412767575849328e-07, "loss": 0.4148, "num_tokens": 2009954740.0, "step": 2121 }, { "epoch": 4.1527913809990205, "grad_norm": 0.08188769206637259, "learning_rate": 7.379588335217875e-07, "loss": 0.411, "num_tokens": 2010913111.0, "step": 2122 }, { "epoch": 4.154750244857983, "grad_norm": 0.08255063659786951, "learning_rate": 7.346477598395929e-07, "loss": 0.4195, "num_tokens": 2011914672.0, "step": 2123 }, { "epoch": 4.156709108716944, "grad_norm": 0.08454163666818436, "learning_rate": 7.313435418602388e-07, "loss": 0.4105, "num_tokens": 2012852391.0, "step": 2124 }, { "epoch": 4.158667972575906, "grad_norm": 0.08416398576314223, "learning_rate": 7.280461848945957e-07, "loss": 0.4219, "num_tokens": 2013781590.0, "step": 2125 }, { "epoch": 4.160626836434868, "grad_norm": 0.08449507440365876, "learning_rate": 7.247556942425094e-07, "loss": 0.4053, "num_tokens": 2014724385.0, "step": 2126 }, { "epoch": 4.16258570029383, "grad_norm": 0.08487978542152061, "learning_rate": 7.214720751927862e-07, "loss": 0.4163, "num_tokens": 2015687056.0, "step": 2127 }, { "epoch": 4.164544564152791, "grad_norm": 0.08553567224887644, "learning_rate": 7.181953330231878e-07, "loss": 0.4099, "num_tokens": 2016620091.0, "step": 2128 }, { "epoch": 4.166503428011753, "grad_norm": 0.0853119108817595, "learning_rate": 7.149254730004246e-07, "loss": 0.4157, "num_tokens": 2017554377.0, "step": 2129 }, { "epoch": 4.168462291870715, "grad_norm": 0.08093729368936112, "learning_rate": 7.116625003801436e-07, "loss": 0.3992, "num_tokens": 2018515914.0, "step": 2130 }, { "epoch": 4.170421155729676, "grad_norm": 0.08568561410916128, "learning_rate": 7.084064204069236e-07, "loss": 0.396, "num_tokens": 2019453253.0, "step": 2131 }, { "epoch": 4.172380019588639, "grad_norm": 0.0849945814766733, "learning_rate": 7.05157238314263e-07, "loss": 0.4296, "num_tokens": 2020398435.0, "step": 2132 }, { "epoch": 4.1743388834476, "grad_norm": 0.08783575006158219, "learning_rate": 7.019149593245722e-07, "loss": 0.4089, "num_tokens": 2021307637.0, "step": 2133 }, { "epoch": 4.176297747306562, "grad_norm": 0.0843997620643785, "learning_rate": 6.986795886491704e-07, "loss": 0.4212, "num_tokens": 2022262326.0, "step": 2134 }, { "epoch": 4.178256611165524, "grad_norm": 0.08199608429243559, "learning_rate": 6.954511314882695e-07, "loss": 0.4037, "num_tokens": 2023222076.0, "step": 2135 }, { "epoch": 4.180215475024486, "grad_norm": 0.08402598227606085, "learning_rate": 6.922295930309691e-07, "loss": 0.4227, "num_tokens": 2024171135.0, "step": 2136 }, { "epoch": 4.182174338883447, "grad_norm": 0.08157805519585351, "learning_rate": 6.890149784552519e-07, "loss": 0.4116, "num_tokens": 2025145287.0, "step": 2137 }, { "epoch": 4.18413320274241, "grad_norm": 0.08690398372361305, "learning_rate": 6.858072929279691e-07, "loss": 0.429, "num_tokens": 2026067710.0, "step": 2138 }, { "epoch": 4.186092066601371, "grad_norm": 0.08265160839080408, "learning_rate": 6.826065416048327e-07, "loss": 0.4135, "num_tokens": 2027026586.0, "step": 2139 }, { "epoch": 4.188050930460333, "grad_norm": 0.08346277029597797, "learning_rate": 6.794127296304154e-07, "loss": 0.4175, "num_tokens": 2028018740.0, "step": 2140 }, { "epoch": 4.190009794319295, "grad_norm": 0.0834534109184271, "learning_rate": 6.762258621381313e-07, "loss": 0.4191, "num_tokens": 2028994550.0, "step": 2141 }, { "epoch": 4.191968658178257, "grad_norm": 0.08465385376506786, "learning_rate": 6.73045944250233e-07, "loss": 0.4133, "num_tokens": 2029923918.0, "step": 2142 }, { "epoch": 4.193927522037218, "grad_norm": 0.08619047456406939, "learning_rate": 6.698729810778065e-07, "loss": 0.4306, "num_tokens": 2030894928.0, "step": 2143 }, { "epoch": 4.195886385896181, "grad_norm": 0.08419196548036316, "learning_rate": 6.667069777207563e-07, "loss": 0.41, "num_tokens": 2031844940.0, "step": 2144 }, { "epoch": 4.197845249755142, "grad_norm": 0.08361444455541482, "learning_rate": 6.63547939267799e-07, "loss": 0.4019, "num_tokens": 2032801382.0, "step": 2145 }, { "epoch": 4.199804113614104, "grad_norm": 0.08317461888285774, "learning_rate": 6.603958707964626e-07, "loss": 0.4127, "num_tokens": 2033772314.0, "step": 2146 }, { "epoch": 4.201762977473066, "grad_norm": 0.0848943280652552, "learning_rate": 6.572507773730657e-07, "loss": 0.412, "num_tokens": 2034723023.0, "step": 2147 }, { "epoch": 4.203721841332028, "grad_norm": 0.08437657318695536, "learning_rate": 6.541126640527195e-07, "loss": 0.4054, "num_tokens": 2035677290.0, "step": 2148 }, { "epoch": 4.205680705190989, "grad_norm": 0.08478795354565644, "learning_rate": 6.509815358793148e-07, "loss": 0.4207, "num_tokens": 2036642296.0, "step": 2149 }, { "epoch": 4.207639569049951, "grad_norm": 0.08779979141689355, "learning_rate": 6.478573978855146e-07, "loss": 0.4113, "num_tokens": 2037548272.0, "step": 2150 }, { "epoch": 4.209598432908913, "grad_norm": 0.08180382264066516, "learning_rate": 6.447402550927495e-07, "loss": 0.415, "num_tokens": 2038495772.0, "step": 2151 }, { "epoch": 4.211557296767874, "grad_norm": 0.08371757404507543, "learning_rate": 6.416301125112034e-07, "loss": 0.4111, "num_tokens": 2039428219.0, "step": 2152 }, { "epoch": 4.2135161606268365, "grad_norm": 0.08185201788840223, "learning_rate": 6.385269751398099e-07, "loss": 0.4062, "num_tokens": 2040362205.0, "step": 2153 }, { "epoch": 4.215475024485798, "grad_norm": 0.08487072656465926, "learning_rate": 6.354308479662447e-07, "loss": 0.4075, "num_tokens": 2041311062.0, "step": 2154 }, { "epoch": 4.21743388834476, "grad_norm": 0.08625274537260617, "learning_rate": 6.323417359669132e-07, "loss": 0.4073, "num_tokens": 2042225442.0, "step": 2155 }, { "epoch": 4.219392752203722, "grad_norm": 0.08221294186841123, "learning_rate": 6.292596441069465e-07, "loss": 0.4023, "num_tokens": 2043189469.0, "step": 2156 }, { "epoch": 4.221351616062684, "grad_norm": 0.08339468501248067, "learning_rate": 6.261845773401936e-07, "loss": 0.4097, "num_tokens": 2044130437.0, "step": 2157 }, { "epoch": 4.223310479921645, "grad_norm": 0.08487069018364485, "learning_rate": 6.23116540609211e-07, "loss": 0.4136, "num_tokens": 2045048907.0, "step": 2158 }, { "epoch": 4.2252693437806075, "grad_norm": 0.08453487202604214, "learning_rate": 6.200555388452539e-07, "loss": 0.427, "num_tokens": 2046006203.0, "step": 2159 }, { "epoch": 4.227228207639569, "grad_norm": 0.08761268157203629, "learning_rate": 6.170015769682742e-07, "loss": 0.4231, "num_tokens": 2046896363.0, "step": 2160 }, { "epoch": 4.229187071498531, "grad_norm": 0.08754788294493071, "learning_rate": 6.13954659886905e-07, "loss": 0.4117, "num_tokens": 2047831879.0, "step": 2161 }, { "epoch": 4.2311459353574925, "grad_norm": 0.08407342007603844, "learning_rate": 6.109147924984576e-07, "loss": 0.4114, "num_tokens": 2048792085.0, "step": 2162 }, { "epoch": 4.233104799216455, "grad_norm": 0.08328412407219687, "learning_rate": 6.078819796889135e-07, "loss": 0.4194, "num_tokens": 2049720176.0, "step": 2163 }, { "epoch": 4.235063663075416, "grad_norm": 0.08622209159468973, "learning_rate": 6.048562263329139e-07, "loss": 0.4161, "num_tokens": 2050659907.0, "step": 2164 }, { "epoch": 4.237022526934378, "grad_norm": 0.08357107207721784, "learning_rate": 6.018375372937524e-07, "loss": 0.4305, "num_tokens": 2051624536.0, "step": 2165 }, { "epoch": 4.23898139079334, "grad_norm": 0.08622754908099019, "learning_rate": 5.988259174233713e-07, "loss": 0.406, "num_tokens": 2052521740.0, "step": 2166 }, { "epoch": 4.240940254652302, "grad_norm": 0.08163031176710157, "learning_rate": 5.958213715623479e-07, "loss": 0.4083, "num_tokens": 2053504048.0, "step": 2167 }, { "epoch": 4.2428991185112634, "grad_norm": 0.08585538701462192, "learning_rate": 5.928239045398898e-07, "loss": 0.4179, "num_tokens": 2054424204.0, "step": 2168 }, { "epoch": 4.244857982370226, "grad_norm": 0.08322893752348214, "learning_rate": 5.898335211738287e-07, "loss": 0.4209, "num_tokens": 2055386498.0, "step": 2169 }, { "epoch": 4.246816846229187, "grad_norm": 0.0836286720621953, "learning_rate": 5.868502262706083e-07, "loss": 0.4166, "num_tokens": 2056326300.0, "step": 2170 }, { "epoch": 4.2487757100881485, "grad_norm": 0.08553714321910702, "learning_rate": 5.838740246252794e-07, "loss": 0.4254, "num_tokens": 2057227482.0, "step": 2171 }, { "epoch": 4.250734573947111, "grad_norm": 0.08630438171310291, "learning_rate": 5.80904921021494e-07, "loss": 0.4164, "num_tokens": 2058127330.0, "step": 2172 }, { "epoch": 4.252693437806072, "grad_norm": 0.0824815008477906, "learning_rate": 5.779429202314929e-07, "loss": 0.4168, "num_tokens": 2059096919.0, "step": 2173 }, { "epoch": 4.254652301665034, "grad_norm": 0.08562295305796984, "learning_rate": 5.749880270161024e-07, "loss": 0.4093, "num_tokens": 2060003289.0, "step": 2174 }, { "epoch": 4.256611165523996, "grad_norm": 0.08357240470701881, "learning_rate": 5.720402461247221e-07, "loss": 0.4029, "num_tokens": 2060957367.0, "step": 2175 }, { "epoch": 4.258570029382958, "grad_norm": 0.08382743661617692, "learning_rate": 5.690995822953226e-07, "loss": 0.4111, "num_tokens": 2061942571.0, "step": 2176 }, { "epoch": 4.260528893241919, "grad_norm": 0.08196817106127785, "learning_rate": 5.66166040254435e-07, "loss": 0.4078, "num_tokens": 2062894641.0, "step": 2177 }, { "epoch": 4.262487757100882, "grad_norm": 0.0853492542610442, "learning_rate": 5.632396247171429e-07, "loss": 0.415, "num_tokens": 2063836749.0, "step": 2178 }, { "epoch": 4.264446620959843, "grad_norm": 0.08622616065735546, "learning_rate": 5.603203403870739e-07, "loss": 0.4008, "num_tokens": 2064731613.0, "step": 2179 }, { "epoch": 4.266405484818805, "grad_norm": 0.08526980675529142, "learning_rate": 5.574081919563973e-07, "loss": 0.4228, "num_tokens": 2065718089.0, "step": 2180 }, { "epoch": 4.268364348677767, "grad_norm": 0.08631245593373647, "learning_rate": 5.545031841058101e-07, "loss": 0.4268, "num_tokens": 2066648903.0, "step": 2181 }, { "epoch": 4.270323212536729, "grad_norm": 0.08397979015459724, "learning_rate": 5.516053215045314e-07, "loss": 0.4056, "num_tokens": 2067601088.0, "step": 2182 }, { "epoch": 4.27228207639569, "grad_norm": 0.08476284187754984, "learning_rate": 5.487146088102995e-07, "loss": 0.4072, "num_tokens": 2068570800.0, "step": 2183 }, { "epoch": 4.274240940254653, "grad_norm": 0.082501835170805, "learning_rate": 5.45831050669357e-07, "loss": 0.4139, "num_tokens": 2069560541.0, "step": 2184 }, { "epoch": 4.276199804113614, "grad_norm": 0.08472218411987088, "learning_rate": 5.429546517164486e-07, "loss": 0.4236, "num_tokens": 2070500169.0, "step": 2185 }, { "epoch": 4.278158667972576, "grad_norm": 0.08415131804090892, "learning_rate": 5.400854165748126e-07, "loss": 0.4208, "num_tokens": 2071451080.0, "step": 2186 }, { "epoch": 4.280117531831538, "grad_norm": 0.08524541923262709, "learning_rate": 5.372233498561713e-07, "loss": 0.4186, "num_tokens": 2072377052.0, "step": 2187 }, { "epoch": 4.2820763956905, "grad_norm": 0.08498805380670119, "learning_rate": 5.343684561607249e-07, "loss": 0.4199, "num_tokens": 2073338734.0, "step": 2188 }, { "epoch": 4.284035259549461, "grad_norm": 0.08155953449800538, "learning_rate": 5.315207400771483e-07, "loss": 0.4129, "num_tokens": 2074298989.0, "step": 2189 }, { "epoch": 4.285994123408424, "grad_norm": 0.08224383241962627, "learning_rate": 5.286802061825752e-07, "loss": 0.4097, "num_tokens": 2075272934.0, "step": 2190 }, { "epoch": 4.287952987267385, "grad_norm": 0.08258556595270888, "learning_rate": 5.258468590425969e-07, "loss": 0.4188, "num_tokens": 2076247593.0, "step": 2191 }, { "epoch": 4.289911851126346, "grad_norm": 0.08684278617350298, "learning_rate": 5.230207032112549e-07, "loss": 0.4098, "num_tokens": 2077159368.0, "step": 2192 }, { "epoch": 4.291870714985309, "grad_norm": 0.08136552409319969, "learning_rate": 5.202017432310302e-07, "loss": 0.4165, "num_tokens": 2078142112.0, "step": 2193 }, { "epoch": 4.29382957884427, "grad_norm": 0.0829622828213875, "learning_rate": 5.173899836328383e-07, "loss": 0.4147, "num_tokens": 2079116049.0, "step": 2194 }, { "epoch": 4.295788442703232, "grad_norm": 0.08520556202686495, "learning_rate": 5.145854289360224e-07, "loss": 0.4275, "num_tokens": 2080079397.0, "step": 2195 }, { "epoch": 4.297747306562194, "grad_norm": 0.08559487717274157, "learning_rate": 5.117880836483452e-07, "loss": 0.4218, "num_tokens": 2081033883.0, "step": 2196 }, { "epoch": 4.299706170421156, "grad_norm": 0.08186257263284755, "learning_rate": 5.089979522659799e-07, "loss": 0.389, "num_tokens": 2081955163.0, "step": 2197 }, { "epoch": 4.301665034280117, "grad_norm": 0.08937897689299613, "learning_rate": 5.062150392735071e-07, "loss": 0.4127, "num_tokens": 2082866119.0, "step": 2198 }, { "epoch": 4.3036238981390795, "grad_norm": 0.08526523692463407, "learning_rate": 5.034393491439044e-07, "loss": 0.4102, "num_tokens": 2083846571.0, "step": 2199 }, { "epoch": 4.305582761998041, "grad_norm": 0.08259604272892286, "learning_rate": 5.006708863385379e-07, "loss": 0.4068, "num_tokens": 2084800432.0, "step": 2200 }, { "epoch": 4.307541625857003, "grad_norm": 0.08492654876361953, "learning_rate": 4.979096553071638e-07, "loss": 0.4294, "num_tokens": 2085782308.0, "step": 2201 }, { "epoch": 4.3095004897159646, "grad_norm": 0.08687307163056773, "learning_rate": 4.951556604879049e-07, "loss": 0.4122, "num_tokens": 2086736289.0, "step": 2202 }, { "epoch": 4.311459353574927, "grad_norm": 0.08451750896812836, "learning_rate": 4.924089063072613e-07, "loss": 0.4145, "num_tokens": 2087672786.0, "step": 2203 }, { "epoch": 4.313418217433888, "grad_norm": 0.08381739276592902, "learning_rate": 4.896693971800925e-07, "loss": 0.4216, "num_tokens": 2088630583.0, "step": 2204 }, { "epoch": 4.3153770812928505, "grad_norm": 0.08897984130235548, "learning_rate": 4.869371375096115e-07, "loss": 0.4133, "num_tokens": 2089555353.0, "step": 2205 }, { "epoch": 4.317335945151812, "grad_norm": 0.08334445269234497, "learning_rate": 4.842121316873821e-07, "loss": 0.3994, "num_tokens": 2090483402.0, "step": 2206 }, { "epoch": 4.319294809010774, "grad_norm": 0.08202820823655337, "learning_rate": 4.814943840933073e-07, "loss": 0.4025, "num_tokens": 2091426935.0, "step": 2207 }, { "epoch": 4.3212536728697355, "grad_norm": 0.08770868215761991, "learning_rate": 4.787838990956228e-07, "loss": 0.4196, "num_tokens": 2092364121.0, "step": 2208 }, { "epoch": 4.323212536728698, "grad_norm": 0.08734326654192257, "learning_rate": 4.76080681050895e-07, "loss": 0.4121, "num_tokens": 2093306474.0, "step": 2209 }, { "epoch": 4.325171400587659, "grad_norm": 0.08315994682821065, "learning_rate": 4.73384734304006e-07, "loss": 0.4115, "num_tokens": 2094259529.0, "step": 2210 }, { "epoch": 4.327130264446621, "grad_norm": 0.08058722757886908, "learning_rate": 4.7069606318815133e-07, "loss": 0.3994, "num_tokens": 2095200979.0, "step": 2211 }, { "epoch": 4.329089128305583, "grad_norm": 0.08591503600597408, "learning_rate": 4.680146720248363e-07, "loss": 0.4242, "num_tokens": 2096145206.0, "step": 2212 }, { "epoch": 4.331047992164544, "grad_norm": 0.08200645306346635, "learning_rate": 4.653405651238607e-07, "loss": 0.4078, "num_tokens": 2097099830.0, "step": 2213 }, { "epoch": 4.333006856023506, "grad_norm": 0.082613043339824, "learning_rate": 4.62673746783317e-07, "loss": 0.4056, "num_tokens": 2098086966.0, "step": 2214 }, { "epoch": 4.334965719882468, "grad_norm": 0.08304868702514229, "learning_rate": 4.600142212895853e-07, "loss": 0.4122, "num_tokens": 2099028865.0, "step": 2215 }, { "epoch": 4.33692458374143, "grad_norm": 0.08005491049253148, "learning_rate": 4.5736199291732174e-07, "loss": 0.4126, "num_tokens": 2100028776.0, "step": 2216 }, { "epoch": 4.3388834476003915, "grad_norm": 0.08318850806063391, "learning_rate": 4.547170659294536e-07, "loss": 0.4123, "num_tokens": 2100966982.0, "step": 2217 }, { "epoch": 4.340842311459354, "grad_norm": 0.08207739567563224, "learning_rate": 4.5207944457717477e-07, "loss": 0.4183, "num_tokens": 2101944250.0, "step": 2218 }, { "epoch": 4.342801175318315, "grad_norm": 0.08152074304049574, "learning_rate": 4.494491330999351e-07, "loss": 0.4055, "num_tokens": 2102886073.0, "step": 2219 }, { "epoch": 4.344760039177277, "grad_norm": 0.08474302020174879, "learning_rate": 4.468261357254339e-07, "loss": 0.4154, "num_tokens": 2103819573.0, "step": 2220 }, { "epoch": 4.346718903036239, "grad_norm": 0.08638245066452833, "learning_rate": 4.442104566696187e-07, "loss": 0.4304, "num_tokens": 2104761432.0, "step": 2221 }, { "epoch": 4.348677766895201, "grad_norm": 0.08386037076332054, "learning_rate": 4.416021001366699e-07, "loss": 0.4183, "num_tokens": 2105697909.0, "step": 2222 }, { "epoch": 4.350636630754162, "grad_norm": 0.08448003629016722, "learning_rate": 4.390010703190006e-07, "loss": 0.3955, "num_tokens": 2106637338.0, "step": 2223 }, { "epoch": 4.352595494613125, "grad_norm": 0.084315991238533, "learning_rate": 4.364073713972489e-07, "loss": 0.406, "num_tokens": 2107559331.0, "step": 2224 }, { "epoch": 4.354554358472086, "grad_norm": 0.08402430619713995, "learning_rate": 4.338210075402666e-07, "loss": 0.4289, "num_tokens": 2108527970.0, "step": 2225 }, { "epoch": 4.356513222331048, "grad_norm": 0.08512842873957444, "learning_rate": 4.312419829051173e-07, "loss": 0.4143, "num_tokens": 2109475714.0, "step": 2226 }, { "epoch": 4.35847208619001, "grad_norm": 0.08384900087958944, "learning_rate": 4.286703016370719e-07, "loss": 0.4208, "num_tokens": 2110460226.0, "step": 2227 }, { "epoch": 4.360430950048972, "grad_norm": 0.08390734204509748, "learning_rate": 4.261059678695906e-07, "loss": 0.4197, "num_tokens": 2111415213.0, "step": 2228 }, { "epoch": 4.362389813907933, "grad_norm": 0.08199699894461897, "learning_rate": 4.2354898572433136e-07, "loss": 0.4022, "num_tokens": 2112335020.0, "step": 2229 }, { "epoch": 4.364348677766896, "grad_norm": 0.08426215459972695, "learning_rate": 4.209993593111317e-07, "loss": 0.4065, "num_tokens": 2113274626.0, "step": 2230 }, { "epoch": 4.366307541625857, "grad_norm": 0.08147690133629366, "learning_rate": 4.184570927280074e-07, "loss": 0.4006, "num_tokens": 2114245201.0, "step": 2231 }, { "epoch": 4.368266405484819, "grad_norm": 0.08622121832133177, "learning_rate": 4.1592219006114597e-07, "loss": 0.4256, "num_tokens": 2115197965.0, "step": 2232 }, { "epoch": 4.370225269343781, "grad_norm": 0.08048327534010842, "learning_rate": 4.1339465538489666e-07, "loss": 0.4002, "num_tokens": 2116157469.0, "step": 2233 }, { "epoch": 4.372184133202742, "grad_norm": 0.08493546129868577, "learning_rate": 4.108744927617669e-07, "loss": 0.4058, "num_tokens": 2117128306.0, "step": 2234 }, { "epoch": 4.374142997061704, "grad_norm": 0.08182394768372599, "learning_rate": 4.083617062424172e-07, "loss": 0.4226, "num_tokens": 2118104858.0, "step": 2235 }, { "epoch": 4.376101860920666, "grad_norm": 0.08122698124721504, "learning_rate": 4.0585629986564954e-07, "loss": 0.4094, "num_tokens": 2119080292.0, "step": 2236 }, { "epoch": 4.378060724779628, "grad_norm": 0.08298022529596868, "learning_rate": 4.033582776584033e-07, "loss": 0.4056, "num_tokens": 2120028954.0, "step": 2237 }, { "epoch": 4.380019588638589, "grad_norm": 0.0846494002618381, "learning_rate": 4.008676436357539e-07, "loss": 0.405, "num_tokens": 2120966719.0, "step": 2238 }, { "epoch": 4.381978452497552, "grad_norm": 0.08372569010082824, "learning_rate": 3.98384401800897e-07, "loss": 0.4167, "num_tokens": 2121904322.0, "step": 2239 }, { "epoch": 4.383937316356513, "grad_norm": 0.08581288762711979, "learning_rate": 3.9590855614514757e-07, "loss": 0.4311, "num_tokens": 2122858061.0, "step": 2240 }, { "epoch": 4.385896180215475, "grad_norm": 0.08413316811852717, "learning_rate": 3.934401106479352e-07, "loss": 0.4107, "num_tokens": 2123804370.0, "step": 2241 }, { "epoch": 4.387855044074437, "grad_norm": 0.08331964490849027, "learning_rate": 3.909790692767934e-07, "loss": 0.403, "num_tokens": 2124702332.0, "step": 2242 }, { "epoch": 4.389813907933399, "grad_norm": 0.08498461246276752, "learning_rate": 3.8852543598735413e-07, "loss": 0.4057, "num_tokens": 2125605612.0, "step": 2243 }, { "epoch": 4.39177277179236, "grad_norm": 0.08226870123121025, "learning_rate": 3.86079214723345e-07, "loss": 0.4101, "num_tokens": 2126590824.0, "step": 2244 }, { "epoch": 4.3937316356513225, "grad_norm": 0.08362359404104432, "learning_rate": 3.8364040941657685e-07, "loss": 0.4187, "num_tokens": 2127552800.0, "step": 2245 }, { "epoch": 4.395690499510284, "grad_norm": 0.08441545341310425, "learning_rate": 3.812090239869448e-07, "loss": 0.4343, "num_tokens": 2128515212.0, "step": 2246 }, { "epoch": 4.397649363369246, "grad_norm": 0.08384512349331089, "learning_rate": 3.78785062342415e-07, "loss": 0.3941, "num_tokens": 2129442558.0, "step": 2247 }, { "epoch": 4.3996082272282075, "grad_norm": 0.08129428902959814, "learning_rate": 3.763685283790208e-07, "loss": 0.4083, "num_tokens": 2130423219.0, "step": 2248 }, { "epoch": 4.40156709108717, "grad_norm": 0.08373615458913583, "learning_rate": 3.73959425980861e-07, "loss": 0.422, "num_tokens": 2131383809.0, "step": 2249 }, { "epoch": 4.403525954946131, "grad_norm": 0.08493424193543722, "learning_rate": 3.7155775902008527e-07, "loss": 0.4104, "num_tokens": 2132363826.0, "step": 2250 }, { "epoch": 4.4054848188050935, "grad_norm": 0.08591869997551338, "learning_rate": 3.691635313568942e-07, "loss": 0.4136, "num_tokens": 2133338319.0, "step": 2251 }, { "epoch": 4.407443682664055, "grad_norm": 0.08332966368334885, "learning_rate": 3.667767468395328e-07, "loss": 0.4201, "num_tokens": 2134278623.0, "step": 2252 }, { "epoch": 4.409402546523017, "grad_norm": 0.08221686813731542, "learning_rate": 3.643974093042807e-07, "loss": 0.3929, "num_tokens": 2135231101.0, "step": 2253 }, { "epoch": 4.4113614103819785, "grad_norm": 0.0836099528841157, "learning_rate": 3.620255225754454e-07, "loss": 0.3987, "num_tokens": 2136147587.0, "step": 2254 }, { "epoch": 4.41332027424094, "grad_norm": 0.08010666325081656, "learning_rate": 3.596610904653652e-07, "loss": 0.3988, "num_tokens": 2137099188.0, "step": 2255 }, { "epoch": 4.415279138099902, "grad_norm": 0.08403197010040314, "learning_rate": 3.5730411677439125e-07, "loss": 0.4124, "num_tokens": 2138041070.0, "step": 2256 }, { "epoch": 4.4172380019588635, "grad_norm": 0.0852261359677142, "learning_rate": 3.5495460529088865e-07, "loss": 0.4135, "num_tokens": 2138959695.0, "step": 2257 }, { "epoch": 4.419196865817826, "grad_norm": 0.08243881441915217, "learning_rate": 3.5261255979122946e-07, "loss": 0.4095, "num_tokens": 2139883279.0, "step": 2258 }, { "epoch": 4.421155729676787, "grad_norm": 0.08835912267629452, "learning_rate": 3.5027798403978294e-07, "loss": 0.4239, "num_tokens": 2140831527.0, "step": 2259 }, { "epoch": 4.423114593535749, "grad_norm": 0.08537049208908011, "learning_rate": 3.4795088178891543e-07, "loss": 0.4229, "num_tokens": 2141779156.0, "step": 2260 }, { "epoch": 4.425073457394711, "grad_norm": 0.08880943437711142, "learning_rate": 3.4563125677897936e-07, "loss": 0.4103, "num_tokens": 2142725642.0, "step": 2261 }, { "epoch": 4.427032321253673, "grad_norm": 0.0833425437987292, "learning_rate": 3.433191127383079e-07, "loss": 0.4218, "num_tokens": 2143677235.0, "step": 2262 }, { "epoch": 4.4289911851126345, "grad_norm": 0.0846244693188937, "learning_rate": 3.410144533832127e-07, "loss": 0.4131, "num_tokens": 2144611510.0, "step": 2263 }, { "epoch": 4.430950048971597, "grad_norm": 0.08527690690387682, "learning_rate": 3.3871728241797406e-07, "loss": 0.4273, "num_tokens": 2145590702.0, "step": 2264 }, { "epoch": 4.432908912830558, "grad_norm": 0.08344372448433705, "learning_rate": 3.364276035348346e-07, "loss": 0.4171, "num_tokens": 2146540219.0, "step": 2265 }, { "epoch": 4.43486777668952, "grad_norm": 0.08558048600760117, "learning_rate": 3.3414542041399764e-07, "loss": 0.4152, "num_tokens": 2147496479.0, "step": 2266 }, { "epoch": 4.436826640548482, "grad_norm": 0.08380365344074671, "learning_rate": 3.318707367236174e-07, "loss": 0.4077, "num_tokens": 2148462954.0, "step": 2267 }, { "epoch": 4.438785504407444, "grad_norm": 0.08100272244534662, "learning_rate": 3.2960355611979245e-07, "loss": 0.4093, "num_tokens": 2149440780.0, "step": 2268 }, { "epoch": 4.440744368266405, "grad_norm": 0.08705743733780545, "learning_rate": 3.2734388224656575e-07, "loss": 0.418, "num_tokens": 2150369881.0, "step": 2269 }, { "epoch": 4.442703232125368, "grad_norm": 0.08330266108193204, "learning_rate": 3.2509171873591095e-07, "loss": 0.4, "num_tokens": 2151281705.0, "step": 2270 }, { "epoch": 4.444662095984329, "grad_norm": 0.08065185209022314, "learning_rate": 3.2284706920773067e-07, "loss": 0.4111, "num_tokens": 2152244910.0, "step": 2271 }, { "epoch": 4.44662095984329, "grad_norm": 0.0852183829272433, "learning_rate": 3.206099372698529e-07, "loss": 0.4036, "num_tokens": 2153198567.0, "step": 2272 }, { "epoch": 4.448579823702253, "grad_norm": 0.08618367096331993, "learning_rate": 3.183803265180208e-07, "loss": 0.4262, "num_tokens": 2154141710.0, "step": 2273 }, { "epoch": 4.450538687561215, "grad_norm": 0.08260727856664073, "learning_rate": 3.1615824053588764e-07, "loss": 0.4168, "num_tokens": 2155127523.0, "step": 2274 }, { "epoch": 4.452497551420176, "grad_norm": 0.08570730391592203, "learning_rate": 3.139436828950143e-07, "loss": 0.4113, "num_tokens": 2156076271.0, "step": 2275 }, { "epoch": 4.454456415279138, "grad_norm": 0.086741957208345, "learning_rate": 3.1173665715486076e-07, "loss": 0.4305, "num_tokens": 2157004084.0, "step": 2276 }, { "epoch": 4.4564152791381, "grad_norm": 0.08418931419252824, "learning_rate": 3.0953716686277944e-07, "loss": 0.3968, "num_tokens": 2157937532.0, "step": 2277 }, { "epoch": 4.458374142997061, "grad_norm": 0.08326658418956387, "learning_rate": 3.073452155540135e-07, "loss": 0.4109, "num_tokens": 2158874784.0, "step": 2278 }, { "epoch": 4.460333006856024, "grad_norm": 0.08141615131737735, "learning_rate": 3.0516080675168645e-07, "loss": 0.4004, "num_tokens": 2159841810.0, "step": 2279 }, { "epoch": 4.462291870714985, "grad_norm": 0.08284872071901574, "learning_rate": 3.029839439668003e-07, "loss": 0.4135, "num_tokens": 2160783954.0, "step": 2280 }, { "epoch": 4.464250734573947, "grad_norm": 0.08508399315997507, "learning_rate": 3.008146306982274e-07, "loss": 0.4084, "num_tokens": 2161701970.0, "step": 2281 }, { "epoch": 4.466209598432909, "grad_norm": 0.08563041485398272, "learning_rate": 2.9865287043270517e-07, "loss": 0.4097, "num_tokens": 2162627195.0, "step": 2282 }, { "epoch": 4.468168462291871, "grad_norm": 0.08574590565029692, "learning_rate": 2.9649866664483387e-07, "loss": 0.4021, "num_tokens": 2163552297.0, "step": 2283 }, { "epoch": 4.470127326150832, "grad_norm": 0.08449605980534906, "learning_rate": 2.943520227970653e-07, "loss": 0.4216, "num_tokens": 2164506134.0, "step": 2284 }, { "epoch": 4.472086190009795, "grad_norm": 0.08781311090859446, "learning_rate": 2.922129423397008e-07, "loss": 0.4009, "num_tokens": 2165440702.0, "step": 2285 }, { "epoch": 4.474045053868756, "grad_norm": 0.08384345960497813, "learning_rate": 2.9008142871088665e-07, "loss": 0.4124, "num_tokens": 2166396224.0, "step": 2286 }, { "epoch": 4.476003917727718, "grad_norm": 0.0826491722653405, "learning_rate": 2.8795748533660515e-07, "loss": 0.3996, "num_tokens": 2167356792.0, "step": 2287 }, { "epoch": 4.47796278158668, "grad_norm": 0.08388780111752804, "learning_rate": 2.8584111563067065e-07, "loss": 0.4069, "num_tokens": 2168322722.0, "step": 2288 }, { "epoch": 4.479921645445642, "grad_norm": 0.08150606305818417, "learning_rate": 2.83732322994727e-07, "loss": 0.3975, "num_tokens": 2169253064.0, "step": 2289 }, { "epoch": 4.481880509304603, "grad_norm": 0.08481783368686523, "learning_rate": 2.816311108182368e-07, "loss": 0.4199, "num_tokens": 2170195849.0, "step": 2290 }, { "epoch": 4.4838393731635655, "grad_norm": 0.08559890570711427, "learning_rate": 2.795374824784791e-07, "loss": 0.4134, "num_tokens": 2171151219.0, "step": 2291 }, { "epoch": 4.485798237022527, "grad_norm": 0.08538626033193797, "learning_rate": 2.774514413405444e-07, "loss": 0.4113, "num_tokens": 2172080606.0, "step": 2292 }, { "epoch": 4.487757100881488, "grad_norm": 0.08300910596689294, "learning_rate": 2.7537299075732824e-07, "loss": 0.4043, "num_tokens": 2173073159.0, "step": 2293 }, { "epoch": 4.4897159647404505, "grad_norm": 0.08618134206206447, "learning_rate": 2.7330213406952357e-07, "loss": 0.412, "num_tokens": 2174037855.0, "step": 2294 }, { "epoch": 4.491674828599413, "grad_norm": 0.09072514236070624, "learning_rate": 2.71238874605621e-07, "loss": 0.4361, "num_tokens": 2174993777.0, "step": 2295 }, { "epoch": 4.493633692458374, "grad_norm": 0.08157708889530797, "learning_rate": 2.69183215681898e-07, "loss": 0.4031, "num_tokens": 2175966634.0, "step": 2296 }, { "epoch": 4.495592556317336, "grad_norm": 0.08577225461564279, "learning_rate": 2.671351606024153e-07, "loss": 0.4098, "num_tokens": 2176900109.0, "step": 2297 }, { "epoch": 4.497551420176298, "grad_norm": 0.08731063558403147, "learning_rate": 2.6509471265901476e-07, "loss": 0.4135, "num_tokens": 2177793712.0, "step": 2298 }, { "epoch": 4.499510284035259, "grad_norm": 0.08477879481994724, "learning_rate": 2.6306187513130853e-07, "loss": 0.411, "num_tokens": 2178711336.0, "step": 2299 }, { "epoch": 4.5014691478942215, "grad_norm": 0.0847002053644622, "learning_rate": 2.6103665128667633e-07, "loss": 0.4204, "num_tokens": 2179653814.0, "step": 2300 }, { "epoch": 4.503428011753183, "grad_norm": 0.08441303701627194, "learning_rate": 2.590190443802626e-07, "loss": 0.4099, "num_tokens": 2180594507.0, "step": 2301 }, { "epoch": 4.505386875612145, "grad_norm": 0.08581206332258794, "learning_rate": 2.570090576549683e-07, "loss": 0.4142, "num_tokens": 2181528961.0, "step": 2302 }, { "epoch": 4.5073457394711065, "grad_norm": 0.08619673806619452, "learning_rate": 2.550066943414453e-07, "loss": 0.4108, "num_tokens": 2182439074.0, "step": 2303 }, { "epoch": 4.509304603330069, "grad_norm": 0.08664944180446546, "learning_rate": 2.530119576580936e-07, "loss": 0.4313, "num_tokens": 2183381993.0, "step": 2304 }, { "epoch": 4.51126346718903, "grad_norm": 0.0892073424588054, "learning_rate": 2.5102485081105464e-07, "loss": 0.4191, "num_tokens": 2184321837.0, "step": 2305 }, { "epoch": 4.513222331047992, "grad_norm": 0.08470427006361182, "learning_rate": 2.4904537699420585e-07, "loss": 0.4167, "num_tokens": 2185269117.0, "step": 2306 }, { "epoch": 4.515181194906954, "grad_norm": 0.08263284273956638, "learning_rate": 2.4707353938915656e-07, "loss": 0.4, "num_tokens": 2186215907.0, "step": 2307 }, { "epoch": 4.517140058765916, "grad_norm": 0.08322266205259499, "learning_rate": 2.4510934116524167e-07, "loss": 0.4045, "num_tokens": 2187156847.0, "step": 2308 }, { "epoch": 4.5190989226248774, "grad_norm": 0.08479976261062598, "learning_rate": 2.431527854795196e-07, "loss": 0.421, "num_tokens": 2188122917.0, "step": 2309 }, { "epoch": 4.52105778648384, "grad_norm": 0.08043269562909104, "learning_rate": 2.412038754767626e-07, "loss": 0.4051, "num_tokens": 2189076512.0, "step": 2310 }, { "epoch": 4.523016650342801, "grad_norm": 0.08492742322181865, "learning_rate": 2.3926261428945386e-07, "loss": 0.4235, "num_tokens": 2190025165.0, "step": 2311 }, { "epoch": 4.524975514201763, "grad_norm": 0.08560696506075864, "learning_rate": 2.3732900503778523e-07, "loss": 0.4143, "num_tokens": 2190951086.0, "step": 2312 }, { "epoch": 4.526934378060725, "grad_norm": 0.08009458542404425, "learning_rate": 2.354030508296462e-07, "loss": 0.3956, "num_tokens": 2191901846.0, "step": 2313 }, { "epoch": 4.528893241919686, "grad_norm": 0.0847903675690526, "learning_rate": 2.3348475476062448e-07, "loss": 0.4034, "num_tokens": 2192855624.0, "step": 2314 }, { "epoch": 4.530852105778648, "grad_norm": 0.08344768867181616, "learning_rate": 2.3157411991399925e-07, "loss": 0.4002, "num_tokens": 2193794408.0, "step": 2315 }, { "epoch": 4.532810969637611, "grad_norm": 0.08278655922588328, "learning_rate": 2.2967114936073342e-07, "loss": 0.4308, "num_tokens": 2194760213.0, "step": 2316 }, { "epoch": 4.534769833496572, "grad_norm": 0.08429760750186942, "learning_rate": 2.2777584615947313e-07, "loss": 0.4286, "num_tokens": 2195732735.0, "step": 2317 }, { "epoch": 4.536728697355533, "grad_norm": 0.0826348218518181, "learning_rate": 2.2588821335654044e-07, "loss": 0.4083, "num_tokens": 2196684662.0, "step": 2318 }, { "epoch": 4.538687561214496, "grad_norm": 0.08613819047594629, "learning_rate": 2.240082539859284e-07, "loss": 0.4262, "num_tokens": 2197653734.0, "step": 2319 }, { "epoch": 4.540646425073457, "grad_norm": 0.08738738991907348, "learning_rate": 2.2213597106929608e-07, "loss": 0.4253, "num_tokens": 2198584761.0, "step": 2320 }, { "epoch": 4.542605288932419, "grad_norm": 0.08283335755329545, "learning_rate": 2.2027136761596623e-07, "loss": 0.411, "num_tokens": 2199518797.0, "step": 2321 }, { "epoch": 4.544564152791381, "grad_norm": 0.08574179247280905, "learning_rate": 2.1841444662291543e-07, "loss": 0.4085, "num_tokens": 2200465168.0, "step": 2322 }, { "epoch": 4.546523016650343, "grad_norm": 0.08510442520869327, "learning_rate": 2.16565211074774e-07, "loss": 0.4178, "num_tokens": 2201420313.0, "step": 2323 }, { "epoch": 4.548481880509304, "grad_norm": 0.0827048706537416, "learning_rate": 2.147236639438205e-07, "loss": 0.406, "num_tokens": 2202345114.0, "step": 2324 }, { "epoch": 4.550440744368267, "grad_norm": 0.08470254714454753, "learning_rate": 2.1288980818997272e-07, "loss": 0.4162, "num_tokens": 2203283922.0, "step": 2325 }, { "epoch": 4.552399608227228, "grad_norm": 0.08173116438802071, "learning_rate": 2.1106364676078906e-07, "loss": 0.3965, "num_tokens": 2204239649.0, "step": 2326 }, { "epoch": 4.55435847208619, "grad_norm": 0.0815631082705351, "learning_rate": 2.0924518259145933e-07, "loss": 0.4143, "num_tokens": 2205198101.0, "step": 2327 }, { "epoch": 4.556317335945152, "grad_norm": 0.08418789645376833, "learning_rate": 2.0743441860480218e-07, "loss": 0.4087, "num_tokens": 2206132206.0, "step": 2328 }, { "epoch": 4.558276199804114, "grad_norm": 0.08308236583244617, "learning_rate": 2.0563135771125896e-07, "loss": 0.4166, "num_tokens": 2207092223.0, "step": 2329 }, { "epoch": 4.560235063663075, "grad_norm": 0.08685073165635805, "learning_rate": 2.0383600280889148e-07, "loss": 0.4254, "num_tokens": 2208040243.0, "step": 2330 }, { "epoch": 4.562193927522038, "grad_norm": 0.0816330532045113, "learning_rate": 2.0204835678337363e-07, "loss": 0.4104, "num_tokens": 2208988026.0, "step": 2331 }, { "epoch": 4.564152791380999, "grad_norm": 0.08342726520601144, "learning_rate": 2.0026842250799038e-07, "loss": 0.414, "num_tokens": 2209933323.0, "step": 2332 }, { "epoch": 4.566111655239961, "grad_norm": 0.08594110751750678, "learning_rate": 1.9849620284363047e-07, "loss": 0.4214, "num_tokens": 2210879277.0, "step": 2333 }, { "epoch": 4.568070519098923, "grad_norm": 0.08322421671137203, "learning_rate": 1.967317006387831e-07, "loss": 0.4018, "num_tokens": 2211792338.0, "step": 2334 }, { "epoch": 4.570029382957884, "grad_norm": 0.08366428902197062, "learning_rate": 1.9497491872953466e-07, "loss": 0.406, "num_tokens": 2212749293.0, "step": 2335 }, { "epoch": 4.571988246816846, "grad_norm": 0.08728341413641615, "learning_rate": 1.9322585993956145e-07, "loss": 0.4187, "num_tokens": 2213676954.0, "step": 2336 }, { "epoch": 4.5739471106758085, "grad_norm": 0.07969672172446281, "learning_rate": 1.9148452708012522e-07, "loss": 0.4113, "num_tokens": 2214654180.0, "step": 2337 }, { "epoch": 4.57590597453477, "grad_norm": 0.08440667566458711, "learning_rate": 1.897509229500727e-07, "loss": 0.4115, "num_tokens": 2215566546.0, "step": 2338 }, { "epoch": 4.577864838393731, "grad_norm": 0.08328162617540631, "learning_rate": 1.8802505033582608e-07, "loss": 0.388, "num_tokens": 2216506180.0, "step": 2339 }, { "epoch": 4.5798237022526935, "grad_norm": 0.08060675956140442, "learning_rate": 1.863069120113814e-07, "loss": 0.4117, "num_tokens": 2217488576.0, "step": 2340 }, { "epoch": 4.581782566111655, "grad_norm": 0.0814430156731195, "learning_rate": 1.8459651073830297e-07, "loss": 0.4115, "num_tokens": 2218451065.0, "step": 2341 }, { "epoch": 4.583741429970617, "grad_norm": 0.08540439417185391, "learning_rate": 1.8289384926572062e-07, "loss": 0.4227, "num_tokens": 2219388730.0, "step": 2342 }, { "epoch": 4.5857002938295786, "grad_norm": 0.08222524864127444, "learning_rate": 1.8119893033032132e-07, "loss": 0.4024, "num_tokens": 2220360189.0, "step": 2343 }, { "epoch": 4.587659157688541, "grad_norm": 0.0853117061101539, "learning_rate": 1.795117566563509e-07, "loss": 0.4223, "num_tokens": 2221316945.0, "step": 2344 }, { "epoch": 4.589618021547502, "grad_norm": 0.0867948644050997, "learning_rate": 1.778323309556035e-07, "loss": 0.438, "num_tokens": 2222244470.0, "step": 2345 }, { "epoch": 4.5915768854064645, "grad_norm": 0.0847031120690201, "learning_rate": 1.7616065592742038e-07, "loss": 0.4095, "num_tokens": 2223206358.0, "step": 2346 }, { "epoch": 4.593535749265426, "grad_norm": 0.08238375704959286, "learning_rate": 1.7449673425868674e-07, "loss": 0.4082, "num_tokens": 2224167450.0, "step": 2347 }, { "epoch": 4.595494613124388, "grad_norm": 0.08226070434458777, "learning_rate": 1.7284056862382492e-07, "loss": 0.3996, "num_tokens": 2225119088.0, "step": 2348 }, { "epoch": 4.5974534769833495, "grad_norm": 0.08492545584995367, "learning_rate": 1.7119216168478947e-07, "loss": 0.4193, "num_tokens": 2226073864.0, "step": 2349 }, { "epoch": 4.599412340842312, "grad_norm": 0.08158618882047468, "learning_rate": 1.6955151609106713e-07, "loss": 0.4164, "num_tokens": 2227067448.0, "step": 2350 }, { "epoch": 4.601371204701273, "grad_norm": 0.08423025376913923, "learning_rate": 1.6791863447966738e-07, "loss": 0.4263, "num_tokens": 2227998850.0, "step": 2351 }, { "epoch": 4.603330068560235, "grad_norm": 0.08110131092092367, "learning_rate": 1.6629351947512195e-07, "loss": 0.4043, "num_tokens": 2228982717.0, "step": 2352 }, { "epoch": 4.605288932419197, "grad_norm": 0.08481134528549475, "learning_rate": 1.6467617368947918e-07, "loss": 0.4204, "num_tokens": 2229924297.0, "step": 2353 }, { "epoch": 4.607247796278159, "grad_norm": 0.08910601063308517, "learning_rate": 1.6306659972230021e-07, "loss": 0.4131, "num_tokens": 2230849377.0, "step": 2354 }, { "epoch": 4.60920666013712, "grad_norm": 0.08470823640302579, "learning_rate": 1.614648001606528e-07, "loss": 0.4192, "num_tokens": 2231804698.0, "step": 2355 }, { "epoch": 4.611165523996082, "grad_norm": 0.07968500335160754, "learning_rate": 1.598707775791114e-07, "loss": 0.3972, "num_tokens": 2232778503.0, "step": 2356 }, { "epoch": 4.613124387855044, "grad_norm": 0.08441112155069348, "learning_rate": 1.582845345397488e-07, "loss": 0.406, "num_tokens": 2233701567.0, "step": 2357 }, { "epoch": 4.615083251714006, "grad_norm": 0.08406791439153873, "learning_rate": 1.5670607359213442e-07, "loss": 0.4166, "num_tokens": 2234655517.0, "step": 2358 }, { "epoch": 4.617042115572968, "grad_norm": 0.08634533203708583, "learning_rate": 1.551353972733305e-07, "loss": 0.4026, "num_tokens": 2235561182.0, "step": 2359 }, { "epoch": 4.619000979431929, "grad_norm": 0.08036240529465088, "learning_rate": 1.5357250810788316e-07, "loss": 0.4112, "num_tokens": 2236524351.0, "step": 2360 }, { "epoch": 4.620959843290891, "grad_norm": 0.08539634637474039, "learning_rate": 1.5201740860782687e-07, "loss": 0.4179, "num_tokens": 2237468329.0, "step": 2361 }, { "epoch": 4.622918707149853, "grad_norm": 0.08293143545477717, "learning_rate": 1.5047010127267393e-07, "loss": 0.4017, "num_tokens": 2238427138.0, "step": 2362 }, { "epoch": 4.624877571008815, "grad_norm": 0.07958122971564409, "learning_rate": 1.48930588589411e-07, "loss": 0.4081, "num_tokens": 2239422421.0, "step": 2363 }, { "epoch": 4.626836434867776, "grad_norm": 0.08192479917068426, "learning_rate": 1.4739887303249877e-07, "loss": 0.4127, "num_tokens": 2240381325.0, "step": 2364 }, { "epoch": 4.628795298726739, "grad_norm": 0.0851374889618941, "learning_rate": 1.45874957063864e-07, "loss": 0.4108, "num_tokens": 2241344982.0, "step": 2365 }, { "epoch": 4.6307541625857, "grad_norm": 0.08630612690419566, "learning_rate": 1.443588431328974e-07, "loss": 0.4118, "num_tokens": 2242308682.0, "step": 2366 }, { "epoch": 4.632713026444662, "grad_norm": 0.08219955862988715, "learning_rate": 1.4285053367645074e-07, "loss": 0.4091, "num_tokens": 2243249460.0, "step": 2367 }, { "epoch": 4.634671890303624, "grad_norm": 0.08206520215294191, "learning_rate": 1.4135003111882928e-07, "loss": 0.4262, "num_tokens": 2244236315.0, "step": 2368 }, { "epoch": 4.636630754162586, "grad_norm": 0.0843696440191487, "learning_rate": 1.3985733787179157e-07, "loss": 0.411, "num_tokens": 2245164535.0, "step": 2369 }, { "epoch": 4.638589618021547, "grad_norm": 0.0822541471251733, "learning_rate": 1.3837245633454512e-07, "loss": 0.4066, "num_tokens": 2246109231.0, "step": 2370 }, { "epoch": 4.64054848188051, "grad_norm": 0.0788509196853584, "learning_rate": 1.3689538889373965e-07, "loss": 0.3823, "num_tokens": 2247063965.0, "step": 2371 }, { "epoch": 4.642507345739471, "grad_norm": 0.08516640770529209, "learning_rate": 1.354261379234678e-07, "loss": 0.4042, "num_tokens": 2247990221.0, "step": 2372 }, { "epoch": 4.644466209598433, "grad_norm": 0.0846260946119953, "learning_rate": 1.3396470578525723e-07, "loss": 0.4203, "num_tokens": 2248905364.0, "step": 2373 }, { "epoch": 4.646425073457395, "grad_norm": 0.08562926935925291, "learning_rate": 1.3251109482806667e-07, "loss": 0.4071, "num_tokens": 2249826876.0, "step": 2374 }, { "epoch": 4.648383937316357, "grad_norm": 0.08280272498249136, "learning_rate": 1.3106530738828836e-07, "loss": 0.4088, "num_tokens": 2250778458.0, "step": 2375 }, { "epoch": 4.650342801175318, "grad_norm": 0.08534850115813652, "learning_rate": 1.2962734578973568e-07, "loss": 0.4254, "num_tokens": 2251720073.0, "step": 2376 }, { "epoch": 4.65230166503428, "grad_norm": 0.08661768596530278, "learning_rate": 1.2819721234364478e-07, "loss": 0.4202, "num_tokens": 2252622073.0, "step": 2377 }, { "epoch": 4.654260528893242, "grad_norm": 0.08497972478587038, "learning_rate": 1.2677490934867088e-07, "loss": 0.4124, "num_tokens": 2253561372.0, "step": 2378 }, { "epoch": 4.656219392752204, "grad_norm": 0.08358185604686996, "learning_rate": 1.253604390908819e-07, "loss": 0.4159, "num_tokens": 2254515196.0, "step": 2379 }, { "epoch": 4.658178256611166, "grad_norm": 0.08438690612513523, "learning_rate": 1.2395380384375654e-07, "loss": 0.4284, "num_tokens": 2255447919.0, "step": 2380 }, { "epoch": 4.660137120470127, "grad_norm": 0.08694154918463079, "learning_rate": 1.2255500586818015e-07, "loss": 0.4162, "num_tokens": 2256387955.0, "step": 2381 }, { "epoch": 4.662095984329089, "grad_norm": 0.0817943680700282, "learning_rate": 1.2116404741244204e-07, "loss": 0.4059, "num_tokens": 2257337818.0, "step": 2382 }, { "epoch": 4.664054848188051, "grad_norm": 0.08703181023937295, "learning_rate": 1.1978093071222995e-07, "loss": 0.4249, "num_tokens": 2258296586.0, "step": 2383 }, { "epoch": 4.666013712047013, "grad_norm": 0.08609012031456915, "learning_rate": 1.1840565799062843e-07, "loss": 0.4176, "num_tokens": 2259223903.0, "step": 2384 }, { "epoch": 4.667972575905974, "grad_norm": 0.08490192424131472, "learning_rate": 1.1703823145811422e-07, "loss": 0.4164, "num_tokens": 2260185068.0, "step": 2385 }, { "epoch": 4.6699314397649365, "grad_norm": 0.08470856216398916, "learning_rate": 1.1567865331255091e-07, "loss": 0.4233, "num_tokens": 2261120302.0, "step": 2386 }, { "epoch": 4.671890303623898, "grad_norm": 0.08384829499901271, "learning_rate": 1.1432692573919158e-07, "loss": 0.4372, "num_tokens": 2262131176.0, "step": 2387 }, { "epoch": 4.67384916748286, "grad_norm": 0.0868669309071135, "learning_rate": 1.1298305091066664e-07, "loss": 0.4049, "num_tokens": 2263079735.0, "step": 2388 }, { "epoch": 4.6758080313418215, "grad_norm": 0.083428730942915, "learning_rate": 1.1164703098698882e-07, "loss": 0.4097, "num_tokens": 2264024547.0, "step": 2389 }, { "epoch": 4.677766895200784, "grad_norm": 0.08185316461580272, "learning_rate": 1.1031886811554204e-07, "loss": 0.4084, "num_tokens": 2264994194.0, "step": 2390 }, { "epoch": 4.679725759059745, "grad_norm": 0.08459364324175946, "learning_rate": 1.0899856443108314e-07, "loss": 0.4031, "num_tokens": 2265923989.0, "step": 2391 }, { "epoch": 4.6816846229187075, "grad_norm": 0.08432846109620747, "learning_rate": 1.0768612205573791e-07, "loss": 0.4028, "num_tokens": 2266848216.0, "step": 2392 }, { "epoch": 4.683643486777669, "grad_norm": 0.0837381766860101, "learning_rate": 1.0638154309899562e-07, "loss": 0.4321, "num_tokens": 2267822961.0, "step": 2393 }, { "epoch": 4.685602350636631, "grad_norm": 0.08297454594420871, "learning_rate": 1.0508482965770506e-07, "loss": 0.4112, "num_tokens": 2268766814.0, "step": 2394 }, { "epoch": 4.6875612144955925, "grad_norm": 0.0880545235876429, "learning_rate": 1.0379598381607681e-07, "loss": 0.4264, "num_tokens": 2269698980.0, "step": 2395 }, { "epoch": 4.689520078354555, "grad_norm": 0.0828724427906624, "learning_rate": 1.0251500764567158e-07, "loss": 0.4116, "num_tokens": 2270630265.0, "step": 2396 }, { "epoch": 4.691478942213516, "grad_norm": 0.08548570246469113, "learning_rate": 1.012419032054035e-07, "loss": 0.4145, "num_tokens": 2271552753.0, "step": 2397 }, { "epoch": 4.6934378060724775, "grad_norm": 0.08550232441760104, "learning_rate": 9.997667254153464e-08, "loss": 0.4154, "num_tokens": 2272485419.0, "step": 2398 }, { "epoch": 4.69539666993144, "grad_norm": 0.0818702276118547, "learning_rate": 9.871931768767051e-08, "loss": 0.4239, "num_tokens": 2273461524.0, "step": 2399 }, { "epoch": 4.697355533790402, "grad_norm": 0.0831329454801517, "learning_rate": 9.746984066475728e-08, "loss": 0.4171, "num_tokens": 2274444871.0, "step": 2400 }, { "epoch": 4.699314397649363, "grad_norm": 0.08695654103675411, "learning_rate": 9.622824348108184e-08, "loss": 0.4365, "num_tokens": 2275392464.0, "step": 2401 }, { "epoch": 4.701273261508325, "grad_norm": 0.08276098441690038, "learning_rate": 9.499452813226284e-08, "loss": 0.4099, "num_tokens": 2276353262.0, "step": 2402 }, { "epoch": 4.703232125367287, "grad_norm": 0.08547505038028375, "learning_rate": 9.376869660125077e-08, "loss": 0.4178, "num_tokens": 2277284565.0, "step": 2403 }, { "epoch": 4.7051909892262485, "grad_norm": 0.08178310517058496, "learning_rate": 9.255075085832732e-08, "loss": 0.4089, "num_tokens": 2278245515.0, "step": 2404 }, { "epoch": 4.707149853085211, "grad_norm": 0.08547233860776729, "learning_rate": 9.134069286109604e-08, "loss": 0.421, "num_tokens": 2279198953.0, "step": 2405 }, { "epoch": 4.709108716944172, "grad_norm": 0.08344008488366243, "learning_rate": 9.013852455448335e-08, "loss": 0.4032, "num_tokens": 2280143860.0, "step": 2406 }, { "epoch": 4.711067580803134, "grad_norm": 0.08252842516593287, "learning_rate": 8.894424787073641e-08, "loss": 0.4172, "num_tokens": 2281116255.0, "step": 2407 }, { "epoch": 4.713026444662096, "grad_norm": 0.08361282068989813, "learning_rate": 8.775786472941528e-08, "loss": 0.4172, "num_tokens": 2282098453.0, "step": 2408 }, { "epoch": 4.714985308521058, "grad_norm": 0.0827717543188308, "learning_rate": 8.657937703739516e-08, "loss": 0.4299, "num_tokens": 2283055930.0, "step": 2409 }, { "epoch": 4.716944172380019, "grad_norm": 0.08230883377703196, "learning_rate": 8.540878668885977e-08, "loss": 0.4092, "num_tokens": 2284026427.0, "step": 2410 }, { "epoch": 4.718903036238982, "grad_norm": 0.09009461696717451, "learning_rate": 8.424609556529905e-08, "loss": 0.4155, "num_tokens": 2284956250.0, "step": 2411 }, { "epoch": 4.720861900097943, "grad_norm": 0.08096454719821111, "learning_rate": 8.309130553550815e-08, "loss": 0.404, "num_tokens": 2285916220.0, "step": 2412 }, { "epoch": 4.722820763956905, "grad_norm": 0.08291039517018342, "learning_rate": 8.19444184555801e-08, "loss": 0.4135, "num_tokens": 2286845304.0, "step": 2413 }, { "epoch": 4.724779627815867, "grad_norm": 0.0827014311360937, "learning_rate": 8.080543616890812e-08, "loss": 0.4009, "num_tokens": 2287812536.0, "step": 2414 }, { "epoch": 4.726738491674829, "grad_norm": 0.09025428355380949, "learning_rate": 7.967436050617893e-08, "loss": 0.4096, "num_tokens": 2288731826.0, "step": 2415 }, { "epoch": 4.72869735553379, "grad_norm": 0.0844809939820929, "learning_rate": 7.855119328537109e-08, "loss": 0.4264, "num_tokens": 2289698350.0, "step": 2416 }, { "epoch": 4.730656219392753, "grad_norm": 0.08579543628342719, "learning_rate": 7.743593631175106e-08, "loss": 0.4328, "num_tokens": 2290642899.0, "step": 2417 }, { "epoch": 4.732615083251714, "grad_norm": 0.08294522877285475, "learning_rate": 7.632859137787329e-08, "loss": 0.4104, "num_tokens": 2291575195.0, "step": 2418 }, { "epoch": 4.734573947110675, "grad_norm": 0.0838627781343406, "learning_rate": 7.522916026357297e-08, "loss": 0.4187, "num_tokens": 2292520282.0, "step": 2419 }, { "epoch": 4.736532810969638, "grad_norm": 0.08319567204494978, "learning_rate": 7.413764473596597e-08, "loss": 0.4165, "num_tokens": 2293445706.0, "step": 2420 }, { "epoch": 4.7384916748286, "grad_norm": 0.08012182889960814, "learning_rate": 7.30540465494467e-08, "loss": 0.3993, "num_tokens": 2294430713.0, "step": 2421 }, { "epoch": 4.740450538687561, "grad_norm": 0.08407500721762665, "learning_rate": 7.197836744568254e-08, "loss": 0.4075, "num_tokens": 2295408537.0, "step": 2422 }, { "epoch": 4.742409402546523, "grad_norm": 0.08365778938700633, "learning_rate": 7.09106091536127e-08, "loss": 0.4198, "num_tokens": 2296358794.0, "step": 2423 }, { "epoch": 4.744368266405485, "grad_norm": 0.08414292406344226, "learning_rate": 6.985077338944656e-08, "loss": 0.4173, "num_tokens": 2297315698.0, "step": 2424 }, { "epoch": 4.746327130264446, "grad_norm": 0.08603132400390481, "learning_rate": 6.879886185665818e-08, "loss": 0.4145, "num_tokens": 2298226505.0, "step": 2425 }, { "epoch": 4.748285994123409, "grad_norm": 0.08153954602319959, "learning_rate": 6.775487624598509e-08, "loss": 0.4012, "num_tokens": 2299201729.0, "step": 2426 }, { "epoch": 4.75024485798237, "grad_norm": 0.08304431063848801, "learning_rate": 6.67188182354267e-08, "loss": 0.4131, "num_tokens": 2300150837.0, "step": 2427 }, { "epoch": 4.752203721841332, "grad_norm": 0.08479816275786761, "learning_rate": 6.569068949023983e-08, "loss": 0.4105, "num_tokens": 2301073301.0, "step": 2428 }, { "epoch": 4.754162585700294, "grad_norm": 0.08213140338211498, "learning_rate": 6.467049166293483e-08, "loss": 0.4083, "num_tokens": 2302069644.0, "step": 2429 }, { "epoch": 4.756121449559256, "grad_norm": 0.08392926153014255, "learning_rate": 6.365822639327724e-08, "loss": 0.4025, "num_tokens": 2303024104.0, "step": 2430 }, { "epoch": 4.758080313418217, "grad_norm": 0.0844957988978168, "learning_rate": 6.26538953082817e-08, "loss": 0.4135, "num_tokens": 2303970182.0, "step": 2431 }, { "epoch": 4.7600391772771795, "grad_norm": 0.08504677682528261, "learning_rate": 6.165750002220916e-08, "loss": 0.4284, "num_tokens": 2304931055.0, "step": 2432 }, { "epoch": 4.761998041136141, "grad_norm": 0.08527646674148694, "learning_rate": 6.066904213656743e-08, "loss": 0.4101, "num_tokens": 2305861689.0, "step": 2433 }, { "epoch": 4.763956904995103, "grad_norm": 0.08725237255778938, "learning_rate": 5.968852324010454e-08, "loss": 0.4124, "num_tokens": 2306817684.0, "step": 2434 }, { "epoch": 4.7659157688540645, "grad_norm": 0.0824015101165882, "learning_rate": 5.8715944908809273e-08, "loss": 0.402, "num_tokens": 2307755058.0, "step": 2435 }, { "epoch": 4.767874632713027, "grad_norm": 0.08310793982906324, "learning_rate": 5.775130870590784e-08, "loss": 0.417, "num_tokens": 2308712133.0, "step": 2436 }, { "epoch": 4.769833496571988, "grad_norm": 0.08377938395008862, "learning_rate": 5.679461618185944e-08, "loss": 0.4167, "num_tokens": 2309670577.0, "step": 2437 }, { "epoch": 4.7717923604309505, "grad_norm": 0.08540182526090144, "learning_rate": 5.584586887435739e-08, "loss": 0.422, "num_tokens": 2310610225.0, "step": 2438 }, { "epoch": 4.773751224289912, "grad_norm": 0.08384391855385634, "learning_rate": 5.4905068308323536e-08, "loss": 0.4174, "num_tokens": 2311547266.0, "step": 2439 }, { "epoch": 4.775710088148873, "grad_norm": 0.08547930312731497, "learning_rate": 5.3972215995906076e-08, "loss": 0.4233, "num_tokens": 2312485067.0, "step": 2440 }, { "epoch": 4.7776689520078355, "grad_norm": 0.08399921760551363, "learning_rate": 5.3047313436480064e-08, "loss": 0.4145, "num_tokens": 2313432284.0, "step": 2441 }, { "epoch": 4.779627815866798, "grad_norm": 0.08206044911979275, "learning_rate": 5.213036211664191e-08, "loss": 0.4064, "num_tokens": 2314389898.0, "step": 2442 }, { "epoch": 4.781586679725759, "grad_norm": 0.08406160504043352, "learning_rate": 5.122136351020768e-08, "loss": 0.4017, "num_tokens": 2315315259.0, "step": 2443 }, { "epoch": 4.7835455435847205, "grad_norm": 0.08229692806506131, "learning_rate": 5.032031907821089e-08, "loss": 0.4073, "num_tokens": 2316294564.0, "step": 2444 }, { "epoch": 4.785504407443683, "grad_norm": 0.08599759770030403, "learning_rate": 4.942723026890139e-08, "loss": 0.4151, "num_tokens": 2317249683.0, "step": 2445 }, { "epoch": 4.787463271302644, "grad_norm": 0.08603985999117829, "learning_rate": 4.8542098517740945e-08, "loss": 0.4155, "num_tokens": 2318177699.0, "step": 2446 }, { "epoch": 4.789422135161606, "grad_norm": 0.08455414914704969, "learning_rate": 4.7664925247402074e-08, "loss": 0.4175, "num_tokens": 2319139001.0, "step": 2447 }, { "epoch": 4.791380999020568, "grad_norm": 0.08653806792097062, "learning_rate": 4.6795711867766436e-08, "loss": 0.4177, "num_tokens": 2320066572.0, "step": 2448 }, { "epoch": 4.79333986287953, "grad_norm": 0.0837413334352232, "learning_rate": 4.593445977592037e-08, "loss": 0.416, "num_tokens": 2321004533.0, "step": 2449 }, { "epoch": 4.7952987267384914, "grad_norm": 0.08591481470046484, "learning_rate": 4.5081170356156e-08, "loss": 0.4216, "num_tokens": 2321922365.0, "step": 2450 }, { "epoch": 4.797257590597454, "grad_norm": 0.08217979132395248, "learning_rate": 4.423584497996458e-08, "loss": 0.4087, "num_tokens": 2322892155.0, "step": 2451 }, { "epoch": 4.799216454456415, "grad_norm": 0.08410357083151844, "learning_rate": 4.339848500603816e-08, "loss": 0.3923, "num_tokens": 2323833595.0, "step": 2452 }, { "epoch": 4.801175318315377, "grad_norm": 0.08064405021012844, "learning_rate": 4.2569091780266245e-08, "loss": 0.4099, "num_tokens": 2324820336.0, "step": 2453 }, { "epoch": 4.803134182174339, "grad_norm": 0.08041802184099593, "learning_rate": 4.1747666635733594e-08, "loss": 0.4032, "num_tokens": 2325796312.0, "step": 2454 }, { "epoch": 4.805093046033301, "grad_norm": 0.08052566702220959, "learning_rate": 4.0934210892715765e-08, "loss": 0.4073, "num_tokens": 2326777747.0, "step": 2455 }, { "epoch": 4.807051909892262, "grad_norm": 0.08670948808326147, "learning_rate": 4.0128725858681884e-08, "loss": 0.4057, "num_tokens": 2327696411.0, "step": 2456 }, { "epoch": 4.809010773751225, "grad_norm": 0.08491674895829152, "learning_rate": 3.9331212828288e-08, "loss": 0.4243, "num_tokens": 2328622491.0, "step": 2457 }, { "epoch": 4.810969637610186, "grad_norm": 0.08322728782997706, "learning_rate": 3.8541673083377086e-08, "loss": 0.4024, "num_tokens": 2329553106.0, "step": 2458 }, { "epoch": 4.812928501469148, "grad_norm": 0.08174093923633531, "learning_rate": 3.776010789297735e-08, "loss": 0.4048, "num_tokens": 2330524979.0, "step": 2459 }, { "epoch": 4.81488736532811, "grad_norm": 0.08617383854563468, "learning_rate": 3.698651851329838e-08, "loss": 0.42, "num_tokens": 2331461473.0, "step": 2460 }, { "epoch": 4.816846229187071, "grad_norm": 0.08586259180754897, "learning_rate": 3.622090618773055e-08, "loss": 0.4211, "num_tokens": 2332400035.0, "step": 2461 }, { "epoch": 4.818805093046033, "grad_norm": 0.0838908968026586, "learning_rate": 3.546327214684342e-08, "loss": 0.4208, "num_tokens": 2333336979.0, "step": 2462 }, { "epoch": 4.820763956904996, "grad_norm": 0.08750160307910682, "learning_rate": 3.4713617608382324e-08, "loss": 0.4153, "num_tokens": 2334252618.0, "step": 2463 }, { "epoch": 4.822722820763957, "grad_norm": 0.08104017291582989, "learning_rate": 3.3971943777267866e-08, "loss": 0.4113, "num_tokens": 2335224929.0, "step": 2464 }, { "epoch": 4.824681684622918, "grad_norm": 0.08173541218523228, "learning_rate": 3.323825184559204e-08, "loss": 0.4079, "num_tokens": 2336185985.0, "step": 2465 }, { "epoch": 4.826640548481881, "grad_norm": 0.08300795903661226, "learning_rate": 3.251254299261875e-08, "loss": 0.4178, "num_tokens": 2337147503.0, "step": 2466 }, { "epoch": 4.828599412340842, "grad_norm": 0.08226662942561731, "learning_rate": 3.179481838477993e-08, "loss": 0.4053, "num_tokens": 2338153505.0, "step": 2467 }, { "epoch": 4.830558276199804, "grad_norm": 0.0848585446696675, "learning_rate": 3.108507917567505e-08, "loss": 0.4151, "num_tokens": 2339063128.0, "step": 2468 }, { "epoch": 4.832517140058766, "grad_norm": 0.08274741489835595, "learning_rate": 3.038332650606823e-08, "loss": 0.4069, "num_tokens": 2340015294.0, "step": 2469 }, { "epoch": 4.834476003917728, "grad_norm": 0.08290719552926558, "learning_rate": 2.9689561503886687e-08, "loss": 0.4069, "num_tokens": 2340986036.0, "step": 2470 }, { "epoch": 4.836434867776689, "grad_norm": 0.08749408764584028, "learning_rate": 2.9003785284218988e-08, "loss": 0.422, "num_tokens": 2341897246.0, "step": 2471 }, { "epoch": 4.838393731635652, "grad_norm": 0.08267423004795207, "learning_rate": 2.8325998949314536e-08, "loss": 0.4091, "num_tokens": 2342853731.0, "step": 2472 }, { "epoch": 4.840352595494613, "grad_norm": 0.08247784544045712, "learning_rate": 2.7656203588578566e-08, "loss": 0.4232, "num_tokens": 2343837974.0, "step": 2473 }, { "epoch": 4.842311459353575, "grad_norm": 0.0812281692364607, "learning_rate": 2.699440027857436e-08, "loss": 0.4074, "num_tokens": 2344803189.0, "step": 2474 }, { "epoch": 4.844270323212537, "grad_norm": 0.08427135044126924, "learning_rate": 2.6340590083018257e-08, "loss": 0.416, "num_tokens": 2345773157.0, "step": 2475 }, { "epoch": 4.846229187071499, "grad_norm": 0.08151715111615959, "learning_rate": 2.56947740527802e-08, "loss": 0.4202, "num_tokens": 2346741251.0, "step": 2476 }, { "epoch": 4.84818805093046, "grad_norm": 0.08897622249235938, "learning_rate": 2.505695322588042e-08, "loss": 0.4204, "num_tokens": 2347693106.0, "step": 2477 }, { "epoch": 4.8501469147894225, "grad_norm": 0.08461225830789412, "learning_rate": 2.442712862748775e-08, "loss": 0.4144, "num_tokens": 2348644307.0, "step": 2478 }, { "epoch": 4.852105778648384, "grad_norm": 0.08340120410325955, "learning_rate": 2.3805301269920754e-08, "loss": 0.4246, "num_tokens": 2349610681.0, "step": 2479 }, { "epoch": 4.854064642507346, "grad_norm": 0.0812242686859867, "learning_rate": 2.3191472152642726e-08, "loss": 0.4128, "num_tokens": 2350563195.0, "step": 2480 }, { "epoch": 4.8560235063663075, "grad_norm": 0.08351170707632818, "learning_rate": 2.2585642262260564e-08, "loss": 0.4143, "num_tokens": 2351510197.0, "step": 2481 }, { "epoch": 4.857982370225269, "grad_norm": 0.08392497822042437, "learning_rate": 2.1987812572526468e-08, "loss": 0.4245, "num_tokens": 2352457727.0, "step": 2482 }, { "epoch": 4.859941234084231, "grad_norm": 0.08431120254635958, "learning_rate": 2.1397984044331245e-08, "loss": 0.4261, "num_tokens": 2353403769.0, "step": 2483 }, { "epoch": 4.861900097943193, "grad_norm": 0.08552350387254533, "learning_rate": 2.0816157625706547e-08, "loss": 0.424, "num_tokens": 2354357230.0, "step": 2484 }, { "epoch": 4.863858961802155, "grad_norm": 0.0841283006789614, "learning_rate": 2.0242334251823204e-08, "loss": 0.4196, "num_tokens": 2355268335.0, "step": 2485 }, { "epoch": 4.865817825661116, "grad_norm": 0.08306796049433385, "learning_rate": 1.9676514844987338e-08, "loss": 0.4155, "num_tokens": 2356224375.0, "step": 2486 }, { "epoch": 4.8677766895200785, "grad_norm": 0.08321395206329488, "learning_rate": 1.911870031464036e-08, "loss": 0.4119, "num_tokens": 2357167198.0, "step": 2487 }, { "epoch": 4.86973555337904, "grad_norm": 0.08205135289045662, "learning_rate": 1.8568891557358413e-08, "loss": 0.4097, "num_tokens": 2358116578.0, "step": 2488 }, { "epoch": 4.871694417238002, "grad_norm": 0.08661824822951616, "learning_rate": 1.8027089456849055e-08, "loss": 0.4199, "num_tokens": 2359037560.0, "step": 2489 }, { "epoch": 4.8736532810969635, "grad_norm": 0.08594177887736316, "learning_rate": 1.749329488395124e-08, "loss": 0.4144, "num_tokens": 2359935151.0, "step": 2490 }, { "epoch": 4.875612144955926, "grad_norm": 0.08208582658409447, "learning_rate": 1.696750869663366e-08, "loss": 0.4114, "num_tokens": 2360896401.0, "step": 2491 }, { "epoch": 4.877571008814887, "grad_norm": 0.08144251104442016, "learning_rate": 1.6449731739991427e-08, "loss": 0.4018, "num_tokens": 2361843206.0, "step": 2492 }, { "epoch": 4.879529872673849, "grad_norm": 0.08323250894735935, "learning_rate": 1.593996484624938e-08, "loss": 0.418, "num_tokens": 2362816297.0, "step": 2493 }, { "epoch": 4.881488736532811, "grad_norm": 0.08389123523792696, "learning_rate": 1.5438208834754886e-08, "loss": 0.41, "num_tokens": 2363731796.0, "step": 2494 }, { "epoch": 4.883447600391773, "grad_norm": 0.08312659665297858, "learning_rate": 1.494446451198117e-08, "loss": 0.4108, "num_tokens": 2364729829.0, "step": 2495 }, { "epoch": 4.885406464250734, "grad_norm": 0.08208683931587697, "learning_rate": 1.4458732671523978e-08, "loss": 0.413, "num_tokens": 2365707631.0, "step": 2496 }, { "epoch": 4.887365328109697, "grad_norm": 0.08494231721901108, "learning_rate": 1.3981014094099354e-08, "loss": 0.4071, "num_tokens": 2366670586.0, "step": 2497 }, { "epoch": 4.889324191968658, "grad_norm": 0.0843058471808589, "learning_rate": 1.3511309547545315e-08, "loss": 0.4114, "num_tokens": 2367617080.0, "step": 2498 }, { "epoch": 4.89128305582762, "grad_norm": 0.08392439725095673, "learning_rate": 1.3049619786818512e-08, "loss": 0.4098, "num_tokens": 2368552710.0, "step": 2499 }, { "epoch": 4.893241919686582, "grad_norm": 0.08198489445557527, "learning_rate": 1.2595945553992572e-08, "loss": 0.4275, "num_tokens": 2369511071.0, "step": 2500 }, { "epoch": 4.895200783545544, "grad_norm": 0.08313640551389809, "learning_rate": 1.2150287578258646e-08, "loss": 0.4059, "num_tokens": 2370492381.0, "step": 2501 }, { "epoch": 4.897159647404505, "grad_norm": 0.08540445698858405, "learning_rate": 1.1712646575922637e-08, "loss": 0.4263, "num_tokens": 2371465884.0, "step": 2502 }, { "epoch": 4.899118511263467, "grad_norm": 0.08418418866658998, "learning_rate": 1.1283023250405756e-08, "loss": 0.4152, "num_tokens": 2372398758.0, "step": 2503 }, { "epoch": 4.901077375122429, "grad_norm": 0.0869585142660311, "learning_rate": 1.0861418292241188e-08, "loss": 0.4047, "num_tokens": 2373352246.0, "step": 2504 }, { "epoch": 4.903036238981391, "grad_norm": 0.08038143774441894, "learning_rate": 1.0447832379075761e-08, "loss": 0.4014, "num_tokens": 2374305208.0, "step": 2505 }, { "epoch": 4.904995102840353, "grad_norm": 0.08275289811279075, "learning_rate": 1.004226617566495e-08, "loss": 0.3972, "num_tokens": 2375248809.0, "step": 2506 }, { "epoch": 4.906953966699314, "grad_norm": 0.08178303547444857, "learning_rate": 9.6447203338762e-09, "loss": 0.3989, "num_tokens": 2376186884.0, "step": 2507 }, { "epoch": 4.908912830558276, "grad_norm": 0.08298956919319314, "learning_rate": 9.25519549268561e-09, "loss": 0.4182, "num_tokens": 2377153282.0, "step": 2508 }, { "epoch": 4.910871694417238, "grad_norm": 0.08168679032964059, "learning_rate": 8.873692278175693e-09, "loss": 0.3997, "num_tokens": 2378084196.0, "step": 2509 }, { "epoch": 4.9128305582762, "grad_norm": 0.08099350949175751, "learning_rate": 8.50021130353762e-09, "loss": 0.3998, "num_tokens": 2379064006.0, "step": 2510 }, { "epoch": 4.914789422135161, "grad_norm": 0.08448643905441347, "learning_rate": 8.134753169066756e-09, "loss": 0.4211, "num_tokens": 2380029423.0, "step": 2511 }, { "epoch": 4.916748285994124, "grad_norm": 0.08276948389547982, "learning_rate": 7.777318462164896e-09, "loss": 0.4025, "num_tokens": 2380955867.0, "step": 2512 }, { "epoch": 4.918707149853085, "grad_norm": 0.08353943599442933, "learning_rate": 7.427907757336927e-09, "loss": 0.412, "num_tokens": 2381873014.0, "step": 2513 }, { "epoch": 4.920666013712047, "grad_norm": 0.08724914833683893, "learning_rate": 7.0865216161902785e-09, "loss": 0.4237, "num_tokens": 2382796451.0, "step": 2514 }, { "epoch": 4.922624877571009, "grad_norm": 0.08241738671060188, "learning_rate": 6.75316058743547e-09, "loss": 0.4144, "num_tokens": 2383776478.0, "step": 2515 }, { "epoch": 4.924583741429971, "grad_norm": 0.08257681575010505, "learning_rate": 6.427825206883897e-09, "loss": 0.4087, "num_tokens": 2384725210.0, "step": 2516 }, { "epoch": 4.926542605288932, "grad_norm": 0.08247321302321747, "learning_rate": 6.110515997447275e-09, "loss": 0.4153, "num_tokens": 2385658811.0, "step": 2517 }, { "epoch": 4.9285014691478946, "grad_norm": 0.08527485644494695, "learning_rate": 5.80123346913708e-09, "loss": 0.4164, "num_tokens": 2386572645.0, "step": 2518 }, { "epoch": 4.930460333006856, "grad_norm": 0.0854930459702537, "learning_rate": 5.499978119062888e-09, "loss": 0.425, "num_tokens": 2387495071.0, "step": 2519 }, { "epoch": 4.932419196865818, "grad_norm": 0.08244657338826553, "learning_rate": 5.2067504314323725e-09, "loss": 0.4135, "num_tokens": 2388441374.0, "step": 2520 }, { "epoch": 4.93437806072478, "grad_norm": 0.08579930375525753, "learning_rate": 4.921550877550752e-09, "loss": 0.4094, "num_tokens": 2389388179.0, "step": 2521 }, { "epoch": 4.936336924583742, "grad_norm": 0.08239029943044571, "learning_rate": 4.644379915819674e-09, "loss": 0.4019, "num_tokens": 2390322786.0, "step": 2522 }, { "epoch": 4.938295788442703, "grad_norm": 0.08218263979258253, "learning_rate": 4.375237991736114e-09, "loss": 0.3947, "num_tokens": 2391252574.0, "step": 2523 }, { "epoch": 4.940254652301665, "grad_norm": 0.08460511024119019, "learning_rate": 4.114125537891811e-09, "loss": 0.4163, "num_tokens": 2392190407.0, "step": 2524 }, { "epoch": 4.942213516160627, "grad_norm": 0.08482161989770623, "learning_rate": 3.861042973973273e-09, "loss": 0.4197, "num_tokens": 2393133906.0, "step": 2525 }, { "epoch": 4.944172380019588, "grad_norm": 0.08367047310031385, "learning_rate": 3.6159907067601086e-09, "loss": 0.4013, "num_tokens": 2394067417.0, "step": 2526 }, { "epoch": 4.9461312438785505, "grad_norm": 0.08636739382428231, "learning_rate": 3.3789691301244766e-09, "loss": 0.4281, "num_tokens": 2395000779.0, "step": 2527 }, { "epoch": 4.948090107737512, "grad_norm": 0.08298122665582304, "learning_rate": 3.1499786250321904e-09, "loss": 0.417, "num_tokens": 2395987864.0, "step": 2528 }, { "epoch": 4.950048971596474, "grad_norm": 0.0872332731712304, "learning_rate": 2.9290195595388373e-09, "loss": 0.4263, "num_tokens": 2396948089.0, "step": 2529 }, { "epoch": 4.9520078354554355, "grad_norm": 0.08505589645696036, "learning_rate": 2.716092288792549e-09, "loss": 0.4138, "num_tokens": 2397876177.0, "step": 2530 }, { "epoch": 4.953966699314398, "grad_norm": 0.08201258823432063, "learning_rate": 2.511197155031231e-09, "loss": 0.4078, "num_tokens": 2398817796.0, "step": 2531 }, { "epoch": 4.955925563173359, "grad_norm": 0.0863739072094779, "learning_rate": 2.314334487583114e-09, "loss": 0.4171, "num_tokens": 2399760770.0, "step": 2532 }, { "epoch": 4.9578844270323215, "grad_norm": 0.08543826022418272, "learning_rate": 2.125504602866202e-09, "loss": 0.4112, "num_tokens": 2400685381.0, "step": 2533 }, { "epoch": 4.959843290891283, "grad_norm": 0.08517092811786608, "learning_rate": 1.944707804385493e-09, "loss": 0.4142, "num_tokens": 2401588105.0, "step": 2534 }, { "epoch": 4.961802154750245, "grad_norm": 0.08232903288303334, "learning_rate": 1.7719443827368677e-09, "loss": 0.4015, "num_tokens": 2402542987.0, "step": 2535 }, { "epoch": 4.9637610186092065, "grad_norm": 0.08489238637251854, "learning_rate": 1.6072146156032031e-09, "loss": 0.414, "num_tokens": 2403450808.0, "step": 2536 }, { "epoch": 4.965719882468169, "grad_norm": 0.08420075838921016, "learning_rate": 1.4505187677538169e-09, "loss": 0.4169, "num_tokens": 2404385191.0, "step": 2537 }, { "epoch": 4.96767874632713, "grad_norm": 0.08344347881043974, "learning_rate": 1.3018570910466876e-09, "loss": 0.4292, "num_tokens": 2405368652.0, "step": 2538 }, { "epoch": 4.969637610186092, "grad_norm": 0.084912657172186, "learning_rate": 1.1612298244256803e-09, "loss": 0.4253, "num_tokens": 2406298399.0, "step": 2539 }, { "epoch": 4.971596474045054, "grad_norm": 0.08283567224097792, "learning_rate": 1.0286371939205453e-09, "loss": 0.4155, "num_tokens": 2407276087.0, "step": 2540 }, { "epoch": 4.973555337904016, "grad_norm": 0.07901068391647909, "learning_rate": 9.040794126485841e-10, "loss": 0.4054, "num_tokens": 2408268769.0, "step": 2541 }, { "epoch": 4.975514201762977, "grad_norm": 0.08076771523672151, "learning_rate": 7.875566808107638e-10, "loss": 0.4196, "num_tokens": 2409263019.0, "step": 2542 }, { "epoch": 4.97747306562194, "grad_norm": 0.08504240350042797, "learning_rate": 6.790691856939369e-10, "loss": 0.418, "num_tokens": 2410207195.0, "step": 2543 }, { "epoch": 4.979431929480901, "grad_norm": 0.0836494206211676, "learning_rate": 5.78617101670842e-10, "loss": 0.4246, "num_tokens": 2411172709.0, "step": 2544 }, { "epoch": 4.9813907933398625, "grad_norm": 0.08370225143841217, "learning_rate": 4.862005901978828e-10, "loss": 0.4132, "num_tokens": 2412106660.0, "step": 2545 }, { "epoch": 4.983349657198825, "grad_norm": 0.08273121213895504, "learning_rate": 4.0181979981568364e-10, "loss": 0.4178, "num_tokens": 2413069391.0, "step": 2546 }, { "epoch": 4.985308521057786, "grad_norm": 0.08335935364884406, "learning_rate": 3.254748661507545e-10, "loss": 0.4155, "num_tokens": 2413994838.0, "step": 2547 }, { "epoch": 4.987267384916748, "grad_norm": 0.081987188110208, "learning_rate": 2.5716591191105034e-10, "loss": 0.4092, "num_tokens": 2414962822.0, "step": 2548 }, { "epoch": 4.98922624877571, "grad_norm": 0.08342080693098328, "learning_rate": 1.9689304688985667e-10, "loss": 0.4094, "num_tokens": 2415932912.0, "step": 2549 }, { "epoch": 4.991185112634672, "grad_norm": 0.08624338152957356, "learning_rate": 1.4465636796412442e-10, "loss": 0.4315, "num_tokens": 2416882851.0, "step": 2550 }, { "epoch": 4.993143976493633, "grad_norm": 0.0808935459746632, "learning_rate": 1.004559590939147e-10, "loss": 0.4055, "num_tokens": 2417834083.0, "step": 2551 }, { "epoch": 4.995102840352596, "grad_norm": 0.08319657490590793, "learning_rate": 6.429189132239888e-11, "loss": 0.4108, "num_tokens": 2418779500.0, "step": 2552 }, { "epoch": 4.997061704211557, "grad_norm": 0.08226287009167223, "learning_rate": 3.6164222776413626e-11, "loss": 0.401, "num_tokens": 2419700676.0, "step": 2553 }, { "epoch": 4.999020568070519, "grad_norm": 0.0838174095017364, "learning_rate": 1.6072998664795615e-11, "loss": 0.4207, "num_tokens": 2420659085.0, "step": 2554 }, { "epoch": 5.0, "grad_norm": 0.0838174095017364, "learning_rate": 4.018251280601959e-12, "loss": 0.4132, "num_tokens": 2421162947.0, "step": 2555 }, { "epoch": 5.0, "step": 2555, "total_flos": 2.4054064625943183e+18, "train_loss": 0.42931601253507656, "train_runtime": 13617.0429, "train_samples_per_second": 47.968, "train_steps_per_second": 0.188 } ], "logging_steps": 1, "max_steps": 2555, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.4054064625943183e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }