{ "best_global_step": 1000, "best_metric": 0.16221453249454498, "best_model_checkpoint": "saves/prompt-tuning/gemma-3-1b-it/train_cb_1745950309/checkpoint-1000", "epoch": 701.7610619469026, "eval_steps": 200, "global_step": 40000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08849557522123894, "grad_norm": 30.857913970947266, "learning_rate": 0.29999999259779675, "loss": 11.664, "num_input_tokens_seen": 3008, "step": 5 }, { "epoch": 0.17699115044247787, "grad_norm": 15.412628173828125, "learning_rate": 0.29999996252634736, "loss": 11.926, "num_input_tokens_seen": 6128, "step": 10 }, { "epoch": 0.26548672566371684, "grad_norm": 4.846696853637695, "learning_rate": 0.2999999093230187, "loss": 11.5005, "num_input_tokens_seen": 9056, "step": 15 }, { "epoch": 0.35398230088495575, "grad_norm": 3.7165660858154297, "learning_rate": 0.299999832987819, "loss": 9.2555, "num_input_tokens_seen": 12656, "step": 20 }, { "epoch": 0.4424778761061947, "grad_norm": 3.7266926765441895, "learning_rate": 0.29999973352076004, "loss": 6.9613, "num_input_tokens_seen": 15808, "step": 25 }, { "epoch": 0.5309734513274337, "grad_norm": 2.0266427993774414, "learning_rate": 0.2999996109218572, "loss": 6.1477, "num_input_tokens_seen": 18304, "step": 30 }, { "epoch": 0.6194690265486725, "grad_norm": 1.4981653690338135, "learning_rate": 0.2999994651911293, "loss": 3.1662, "num_input_tokens_seen": 21712, "step": 35 }, { "epoch": 0.7079646017699115, "grad_norm": 1.5509648323059082, "learning_rate": 0.2999992963285989, "loss": 2.0894, "num_input_tokens_seen": 24576, "step": 40 }, { "epoch": 0.7964601769911505, "grad_norm": 1.0404917001724243, "learning_rate": 0.29999910433429194, "loss": 1.941, "num_input_tokens_seen": 27088, "step": 45 }, { "epoch": 0.8849557522123894, "grad_norm": 0.5678565502166748, "learning_rate": 0.29999888920823814, "loss": 1.32, "num_input_tokens_seen": 29648, "step": 50 }, { "epoch": 0.9734513274336283, "grad_norm": 0.6404495239257812, "learning_rate": 0.29999865095047057, "loss": 1.5291, "num_input_tokens_seen": 32032, "step": 55 }, { "epoch": 1.0530973451327434, "grad_norm": 0.6864966750144958, "learning_rate": 0.29999838956102604, "loss": 0.7341, "num_input_tokens_seen": 34624, "step": 60 }, { "epoch": 1.1415929203539823, "grad_norm": 0.5049331188201904, "learning_rate": 0.29999810503994484, "loss": 0.5649, "num_input_tokens_seen": 37600, "step": 65 }, { "epoch": 1.2300884955752212, "grad_norm": 0.3618448078632355, "learning_rate": 0.29999779738727084, "loss": 0.8966, "num_input_tokens_seen": 40256, "step": 70 }, { "epoch": 1.3185840707964602, "grad_norm": 0.6455984711647034, "learning_rate": 0.29999746660305154, "loss": 0.7875, "num_input_tokens_seen": 43440, "step": 75 }, { "epoch": 1.407079646017699, "grad_norm": 0.5885500311851501, "learning_rate": 0.2999971126873379, "loss": 0.6253, "num_input_tokens_seen": 46192, "step": 80 }, { "epoch": 1.495575221238938, "grad_norm": 0.17574167251586914, "learning_rate": 0.2999967356401845, "loss": 0.6007, "num_input_tokens_seen": 49360, "step": 85 }, { "epoch": 1.584070796460177, "grad_norm": 0.34683212637901306, "learning_rate": 0.29999633546164944, "loss": 0.5341, "num_input_tokens_seen": 51840, "step": 90 }, { "epoch": 1.672566371681416, "grad_norm": 0.2354772537946701, "learning_rate": 0.29999591215179444, "loss": 0.3807, "num_input_tokens_seen": 54960, "step": 95 }, { "epoch": 1.7610619469026547, "grad_norm": 0.4168570041656494, "learning_rate": 0.2999954657106849, "loss": 0.3344, "num_input_tokens_seen": 57776, "step": 100 }, { "epoch": 1.8495575221238938, "grad_norm": 0.17382696270942688, "learning_rate": 0.2999949961383896, "loss": 0.3279, "num_input_tokens_seen": 60624, "step": 105 }, { "epoch": 1.9380530973451329, "grad_norm": 0.23753246665000916, "learning_rate": 0.2999945034349809, "loss": 0.309, "num_input_tokens_seen": 63424, "step": 110 }, { "epoch": 2.017699115044248, "grad_norm": 0.07051464915275574, "learning_rate": 0.2999939876005348, "loss": 0.3785, "num_input_tokens_seen": 65544, "step": 115 }, { "epoch": 2.106194690265487, "grad_norm": 0.11237034201622009, "learning_rate": 0.29999344863513094, "loss": 0.3453, "num_input_tokens_seen": 68552, "step": 120 }, { "epoch": 2.1946902654867255, "grad_norm": 0.05134290084242821, "learning_rate": 0.2999928865388523, "loss": 0.2252, "num_input_tokens_seen": 71256, "step": 125 }, { "epoch": 2.2831858407079646, "grad_norm": 0.3033003509044647, "learning_rate": 0.29999230131178567, "loss": 0.3022, "num_input_tokens_seen": 74200, "step": 130 }, { "epoch": 2.3716814159292037, "grad_norm": 0.07960475981235504, "learning_rate": 0.2999916929540212, "loss": 0.3919, "num_input_tokens_seen": 76696, "step": 135 }, { "epoch": 2.4601769911504423, "grad_norm": 0.12232175469398499, "learning_rate": 0.29999106146565285, "loss": 0.3556, "num_input_tokens_seen": 79640, "step": 140 }, { "epoch": 2.5486725663716814, "grad_norm": 0.08066463470458984, "learning_rate": 0.29999040684677786, "loss": 0.3411, "num_input_tokens_seen": 82632, "step": 145 }, { "epoch": 2.6371681415929205, "grad_norm": 0.12378513067960739, "learning_rate": 0.2999897290974972, "loss": 0.248, "num_input_tokens_seen": 85496, "step": 150 }, { "epoch": 2.725663716814159, "grad_norm": 0.07555761933326721, "learning_rate": 0.2999890282179155, "loss": 0.159, "num_input_tokens_seen": 89128, "step": 155 }, { "epoch": 2.814159292035398, "grad_norm": 0.20620428025722504, "learning_rate": 0.29998830420814077, "loss": 0.3356, "num_input_tokens_seen": 92152, "step": 160 }, { "epoch": 2.9026548672566372, "grad_norm": 0.06485848128795624, "learning_rate": 0.2999875570682846, "loss": 0.3261, "num_input_tokens_seen": 94632, "step": 165 }, { "epoch": 2.991150442477876, "grad_norm": 0.03192218020558357, "learning_rate": 0.2999867867984623, "loss": 0.1706, "num_input_tokens_seen": 97544, "step": 170 }, { "epoch": 3.0707964601769913, "grad_norm": 0.0249206330627203, "learning_rate": 0.29998599339879267, "loss": 0.1368, "num_input_tokens_seen": 99992, "step": 175 }, { "epoch": 3.15929203539823, "grad_norm": 0.08197808265686035, "learning_rate": 0.29998517686939796, "loss": 0.1791, "num_input_tokens_seen": 102824, "step": 180 }, { "epoch": 3.247787610619469, "grad_norm": 0.12062375247478485, "learning_rate": 0.29998433721040413, "loss": 0.2248, "num_input_tokens_seen": 105608, "step": 185 }, { "epoch": 3.336283185840708, "grad_norm": 0.08413828164339066, "learning_rate": 0.29998347442194073, "loss": 0.1702, "num_input_tokens_seen": 108712, "step": 190 }, { "epoch": 3.4247787610619467, "grad_norm": 0.012988623231649399, "learning_rate": 0.2999825885041407, "loss": 0.1231, "num_input_tokens_seen": 111608, "step": 195 }, { "epoch": 3.5132743362831858, "grad_norm": 0.07738474011421204, "learning_rate": 0.29998167945714077, "loss": 0.388, "num_input_tokens_seen": 114504, "step": 200 }, { "epoch": 3.5132743362831858, "eval_loss": 0.26136693358421326, "eval_runtime": 0.9276, "eval_samples_per_second": 26.95, "eval_steps_per_second": 14.014, "num_input_tokens_seen": 114504, "step": 200 }, { "epoch": 3.601769911504425, "grad_norm": 0.09463059902191162, "learning_rate": 0.2999807472810811, "loss": 0.3094, "num_input_tokens_seen": 117592, "step": 205 }, { "epoch": 3.6902654867256635, "grad_norm": 0.017677707597613335, "learning_rate": 0.29997979197610536, "loss": 0.1854, "num_input_tokens_seen": 120216, "step": 210 }, { "epoch": 3.7787610619469025, "grad_norm": 0.021596524864435196, "learning_rate": 0.299978813542361, "loss": 0.3197, "num_input_tokens_seen": 122904, "step": 215 }, { "epoch": 3.8672566371681416, "grad_norm": 0.05760117247700691, "learning_rate": 0.2999778119799988, "loss": 0.186, "num_input_tokens_seen": 125832, "step": 220 }, { "epoch": 3.9557522123893807, "grad_norm": 0.047069959342479706, "learning_rate": 0.29997678728917326, "loss": 0.2049, "num_input_tokens_seen": 129512, "step": 225 }, { "epoch": 4.035398230088496, "grad_norm": 0.020536065101623535, "learning_rate": 0.2999757394700424, "loss": 0.1653, "num_input_tokens_seen": 131872, "step": 230 }, { "epoch": 4.123893805309734, "grad_norm": 0.04338550567626953, "learning_rate": 0.29997466852276783, "loss": 0.1195, "num_input_tokens_seen": 134624, "step": 235 }, { "epoch": 4.212389380530974, "grad_norm": 0.022253643721342087, "learning_rate": 0.29997357444751466, "loss": 0.1374, "num_input_tokens_seen": 137168, "step": 240 }, { "epoch": 4.300884955752212, "grad_norm": 0.03547932952642441, "learning_rate": 0.2999724572444516, "loss": 0.1834, "num_input_tokens_seen": 139904, "step": 245 }, { "epoch": 4.389380530973451, "grad_norm": 0.04908934235572815, "learning_rate": 0.29997131691375095, "loss": 0.1778, "num_input_tokens_seen": 142496, "step": 250 }, { "epoch": 4.477876106194691, "grad_norm": 0.018861019983887672, "learning_rate": 0.2999701534555886, "loss": 0.1685, "num_input_tokens_seen": 145632, "step": 255 }, { "epoch": 4.566371681415929, "grad_norm": 0.02899417281150818, "learning_rate": 0.2999689668701439, "loss": 0.1218, "num_input_tokens_seen": 148224, "step": 260 }, { "epoch": 4.654867256637168, "grad_norm": 0.027406131848692894, "learning_rate": 0.29996775715759993, "loss": 0.1349, "num_input_tokens_seen": 150928, "step": 265 }, { "epoch": 4.743362831858407, "grad_norm": 0.04806069657206535, "learning_rate": 0.2999665243181432, "loss": 0.1437, "num_input_tokens_seen": 153808, "step": 270 }, { "epoch": 4.831858407079646, "grad_norm": 0.032632045447826385, "learning_rate": 0.2999652683519638, "loss": 0.2409, "num_input_tokens_seen": 157248, "step": 275 }, { "epoch": 4.920353982300885, "grad_norm": 0.02714240550994873, "learning_rate": 0.29996398925925544, "loss": 0.1812, "num_input_tokens_seen": 160352, "step": 280 }, { "epoch": 5.0, "grad_norm": 0.1032564714550972, "learning_rate": 0.2999626870402154, "loss": 0.1153, "num_input_tokens_seen": 162728, "step": 285 }, { "epoch": 5.088495575221239, "grad_norm": 0.05542123317718506, "learning_rate": 0.29996136169504445, "loss": 0.2365, "num_input_tokens_seen": 165368, "step": 290 }, { "epoch": 5.176991150442478, "grad_norm": 0.04795343056321144, "learning_rate": 0.29996001322394694, "loss": 0.1683, "num_input_tokens_seen": 168456, "step": 295 }, { "epoch": 5.265486725663717, "grad_norm": 0.06972891837358475, "learning_rate": 0.29995864162713093, "loss": 0.1462, "num_input_tokens_seen": 171512, "step": 300 }, { "epoch": 5.353982300884955, "grad_norm": 0.05509583279490471, "learning_rate": 0.2999572469048079, "loss": 0.181, "num_input_tokens_seen": 174920, "step": 305 }, { "epoch": 5.442477876106195, "grad_norm": 0.03771742060780525, "learning_rate": 0.29995582905719287, "loss": 0.131, "num_input_tokens_seen": 177496, "step": 310 }, { "epoch": 5.530973451327434, "grad_norm": 0.026887349784374237, "learning_rate": 0.2999543880845046, "loss": 0.0887, "num_input_tokens_seen": 180328, "step": 315 }, { "epoch": 5.619469026548672, "grad_norm": 0.04695015028119087, "learning_rate": 0.2999529239869652, "loss": 0.1749, "num_input_tokens_seen": 183000, "step": 320 }, { "epoch": 5.707964601769912, "grad_norm": 0.037778183817863464, "learning_rate": 0.2999514367648005, "loss": 0.1148, "num_input_tokens_seen": 185512, "step": 325 }, { "epoch": 5.79646017699115, "grad_norm": 0.039306797087192535, "learning_rate": 0.29994992641823987, "loss": 0.1924, "num_input_tokens_seen": 188776, "step": 330 }, { "epoch": 5.88495575221239, "grad_norm": 0.07070355862379074, "learning_rate": 0.29994839294751613, "loss": 0.1748, "num_input_tokens_seen": 191816, "step": 335 }, { "epoch": 5.9734513274336285, "grad_norm": 0.06912440806627274, "learning_rate": 0.29994683635286584, "loss": 0.1692, "num_input_tokens_seen": 194344, "step": 340 }, { "epoch": 6.053097345132743, "grad_norm": 0.013760467059910297, "learning_rate": 0.2999452566345291, "loss": 0.0577, "num_input_tokens_seen": 197000, "step": 345 }, { "epoch": 6.1415929203539825, "grad_norm": 0.05414609983563423, "learning_rate": 0.2999436537927494, "loss": 0.2424, "num_input_tokens_seen": 199992, "step": 350 }, { "epoch": 6.230088495575221, "grad_norm": 0.04373975098133087, "learning_rate": 0.299942027827774, "loss": 0.0888, "num_input_tokens_seen": 203208, "step": 355 }, { "epoch": 6.31858407079646, "grad_norm": 0.07380948215723038, "learning_rate": 0.29994037873985363, "loss": 0.169, "num_input_tokens_seen": 205976, "step": 360 }, { "epoch": 6.407079646017699, "grad_norm": 0.02306806482374668, "learning_rate": 0.29993870652924254, "loss": 0.1052, "num_input_tokens_seen": 209176, "step": 365 }, { "epoch": 6.495575221238938, "grad_norm": 0.010759019292891026, "learning_rate": 0.29993701119619876, "loss": 0.1041, "num_input_tokens_seen": 211896, "step": 370 }, { "epoch": 6.584070796460177, "grad_norm": 0.07841109484434128, "learning_rate": 0.2999352927409835, "loss": 0.1436, "num_input_tokens_seen": 214824, "step": 375 }, { "epoch": 6.672566371681416, "grad_norm": 0.09694568067789078, "learning_rate": 0.29993355116386194, "loss": 0.1655, "num_input_tokens_seen": 217192, "step": 380 }, { "epoch": 6.761061946902655, "grad_norm": 0.04355792701244354, "learning_rate": 0.29993178646510266, "loss": 0.1346, "num_input_tokens_seen": 219816, "step": 385 }, { "epoch": 6.849557522123893, "grad_norm": 0.03253980726003647, "learning_rate": 0.2999299986449777, "loss": 0.1362, "num_input_tokens_seen": 222360, "step": 390 }, { "epoch": 6.938053097345133, "grad_norm": 0.03275546059012413, "learning_rate": 0.29992818770376284, "loss": 0.0809, "num_input_tokens_seen": 225864, "step": 395 }, { "epoch": 7.017699115044247, "grad_norm": 0.004386965651065111, "learning_rate": 0.29992635364173725, "loss": 0.1626, "num_input_tokens_seen": 228504, "step": 400 }, { "epoch": 7.017699115044247, "eval_loss": 0.26157596707344055, "eval_runtime": 0.9127, "eval_samples_per_second": 27.39, "eval_steps_per_second": 14.243, "num_input_tokens_seen": 228504, "step": 400 }, { "epoch": 7.106194690265487, "grad_norm": 0.02609449438750744, "learning_rate": 0.2999244964591839, "loss": 0.1058, "num_input_tokens_seen": 231416, "step": 405 }, { "epoch": 7.1946902654867255, "grad_norm": 0.04712270200252533, "learning_rate": 0.2999226161563891, "loss": 0.071, "num_input_tokens_seen": 233832, "step": 410 }, { "epoch": 7.283185840707965, "grad_norm": 0.1328945904970169, "learning_rate": 0.2999207127336429, "loss": 0.3077, "num_input_tokens_seen": 236776, "step": 415 }, { "epoch": 7.371681415929204, "grad_norm": 0.1069122925400734, "learning_rate": 0.2999187861912387, "loss": 0.303, "num_input_tokens_seen": 239720, "step": 420 }, { "epoch": 7.460176991150442, "grad_norm": 0.014653612859547138, "learning_rate": 0.2999168365294737, "loss": 0.2254, "num_input_tokens_seen": 242344, "step": 425 }, { "epoch": 7.548672566371682, "grad_norm": 0.0725458487868309, "learning_rate": 0.29991486374864856, "loss": 0.2119, "num_input_tokens_seen": 245208, "step": 430 }, { "epoch": 7.6371681415929205, "grad_norm": 0.17669686675071716, "learning_rate": 0.29991286784906745, "loss": 0.2821, "num_input_tokens_seen": 248232, "step": 435 }, { "epoch": 7.725663716814159, "grad_norm": 0.03473649173974991, "learning_rate": 0.2999108488310382, "loss": 0.2033, "num_input_tokens_seen": 251080, "step": 440 }, { "epoch": 7.814159292035399, "grad_norm": 0.1072806790471077, "learning_rate": 0.29990880669487213, "loss": 0.1306, "num_input_tokens_seen": 253576, "step": 445 }, { "epoch": 7.902654867256637, "grad_norm": 0.07879234850406647, "learning_rate": 0.29990674144088425, "loss": 0.287, "num_input_tokens_seen": 256616, "step": 450 }, { "epoch": 7.991150442477876, "grad_norm": 0.010200544260442257, "learning_rate": 0.299904653069393, "loss": 0.1301, "num_input_tokens_seen": 259544, "step": 455 }, { "epoch": 8.070796460176991, "grad_norm": 0.05468774959445, "learning_rate": 0.29990254158072044, "loss": 0.107, "num_input_tokens_seen": 262232, "step": 460 }, { "epoch": 8.15929203539823, "grad_norm": 0.050852928310632706, "learning_rate": 0.2999004069751921, "loss": 0.2407, "num_input_tokens_seen": 265048, "step": 465 }, { "epoch": 8.247787610619469, "grad_norm": 0.048885371536016464, "learning_rate": 0.2998982492531373, "loss": 0.1074, "num_input_tokens_seen": 268008, "step": 470 }, { "epoch": 8.336283185840708, "grad_norm": 0.01333150826394558, "learning_rate": 0.2998960684148887, "loss": 0.1284, "num_input_tokens_seen": 270568, "step": 475 }, { "epoch": 8.424778761061948, "grad_norm": 0.061909109354019165, "learning_rate": 0.29989386446078264, "loss": 0.1909, "num_input_tokens_seen": 273224, "step": 480 }, { "epoch": 8.513274336283185, "grad_norm": 0.04440929368138313, "learning_rate": 0.299891637391159, "loss": 0.2364, "num_input_tokens_seen": 276472, "step": 485 }, { "epoch": 8.601769911504425, "grad_norm": 0.015465333126485348, "learning_rate": 0.2998893872063612, "loss": 0.1328, "num_input_tokens_seen": 279432, "step": 490 }, { "epoch": 8.690265486725664, "grad_norm": 0.021732686087489128, "learning_rate": 0.2998871139067363, "loss": 0.159, "num_input_tokens_seen": 282232, "step": 495 }, { "epoch": 8.778761061946902, "grad_norm": 0.0326734259724617, "learning_rate": 0.2998848174926348, "loss": 0.1221, "num_input_tokens_seen": 284776, "step": 500 }, { "epoch": 8.867256637168142, "grad_norm": 0.048362698405981064, "learning_rate": 0.2998824979644109, "loss": 0.1715, "num_input_tokens_seen": 287864, "step": 505 }, { "epoch": 8.955752212389381, "grad_norm": 0.03577310964465141, "learning_rate": 0.29988015532242224, "loss": 0.1428, "num_input_tokens_seen": 290712, "step": 510 }, { "epoch": 9.035398230088495, "grad_norm": 0.03865543380379677, "learning_rate": 0.29987778956703015, "loss": 0.0928, "num_input_tokens_seen": 293160, "step": 515 }, { "epoch": 9.123893805309734, "grad_norm": 0.009994987398386002, "learning_rate": 0.2998754006985994, "loss": 0.0693, "num_input_tokens_seen": 296472, "step": 520 }, { "epoch": 9.212389380530974, "grad_norm": 0.05774105340242386, "learning_rate": 0.29987298871749846, "loss": 0.1607, "num_input_tokens_seen": 299112, "step": 525 }, { "epoch": 9.300884955752213, "grad_norm": 0.04266613349318504, "learning_rate": 0.2998705536240992, "loss": 0.1653, "num_input_tokens_seen": 302024, "step": 530 }, { "epoch": 9.389380530973451, "grad_norm": 0.04025699943304062, "learning_rate": 0.2998680954187772, "loss": 0.1146, "num_input_tokens_seen": 304872, "step": 535 }, { "epoch": 9.47787610619469, "grad_norm": 0.08113261312246323, "learning_rate": 0.2998656141019115, "loss": 0.1603, "num_input_tokens_seen": 307560, "step": 540 }, { "epoch": 9.56637168141593, "grad_norm": 0.06283559650182724, "learning_rate": 0.2998631096738848, "loss": 0.0989, "num_input_tokens_seen": 310840, "step": 545 }, { "epoch": 9.654867256637168, "grad_norm": 0.045276716351509094, "learning_rate": 0.29986058213508326, "loss": 0.1774, "num_input_tokens_seen": 313800, "step": 550 }, { "epoch": 9.743362831858407, "grad_norm": 0.02669377438724041, "learning_rate": 0.29985803148589674, "loss": 0.1324, "num_input_tokens_seen": 316664, "step": 555 }, { "epoch": 9.831858407079647, "grad_norm": 0.06914814561605453, "learning_rate": 0.2998554577267185, "loss": 0.2518, "num_input_tokens_seen": 319656, "step": 560 }, { "epoch": 9.920353982300885, "grad_norm": 0.050139423459768295, "learning_rate": 0.2998528608579455, "loss": 0.1795, "num_input_tokens_seen": 322536, "step": 565 }, { "epoch": 10.0, "grad_norm": 0.0408666767179966, "learning_rate": 0.2998502408799781, "loss": 0.2118, "num_input_tokens_seen": 324624, "step": 570 }, { "epoch": 10.08849557522124, "grad_norm": 0.07781104743480682, "learning_rate": 0.2998475977932205, "loss": 0.2035, "num_input_tokens_seen": 327488, "step": 575 }, { "epoch": 10.176991150442477, "grad_norm": 0.05363497510552406, "learning_rate": 0.29984493159808023, "loss": 0.1568, "num_input_tokens_seen": 330240, "step": 580 }, { "epoch": 10.265486725663717, "grad_norm": 0.06371191143989563, "learning_rate": 0.29984224229496836, "loss": 0.1581, "num_input_tokens_seen": 332976, "step": 585 }, { "epoch": 10.353982300884956, "grad_norm": 0.030843645334243774, "learning_rate": 0.2998395298842998, "loss": 0.1529, "num_input_tokens_seen": 335504, "step": 590 }, { "epoch": 10.442477876106194, "grad_norm": 0.06816186755895615, "learning_rate": 0.29983679436649263, "loss": 0.1245, "num_input_tokens_seen": 338512, "step": 595 }, { "epoch": 10.530973451327434, "grad_norm": 0.038672056049108505, "learning_rate": 0.2998340357419689, "loss": 0.1683, "num_input_tokens_seen": 341136, "step": 600 }, { "epoch": 10.530973451327434, "eval_loss": 0.18268337845802307, "eval_runtime": 0.9301, "eval_samples_per_second": 26.88, "eval_steps_per_second": 13.977, "num_input_tokens_seen": 341136, "step": 600 }, { "epoch": 10.619469026548673, "grad_norm": 0.039640266448259354, "learning_rate": 0.29983125401115385, "loss": 0.094, "num_input_tokens_seen": 343664, "step": 605 }, { "epoch": 10.70796460176991, "grad_norm": 0.05746905133128166, "learning_rate": 0.29982844917447654, "loss": 0.1391, "num_input_tokens_seen": 346656, "step": 610 }, { "epoch": 10.79646017699115, "grad_norm": 0.06383468955755234, "learning_rate": 0.2998256212323695, "loss": 0.0731, "num_input_tokens_seen": 349712, "step": 615 }, { "epoch": 10.88495575221239, "grad_norm": 0.023070938885211945, "learning_rate": 0.29982277018526887, "loss": 0.1777, "num_input_tokens_seen": 352896, "step": 620 }, { "epoch": 10.973451327433628, "grad_norm": 0.01819828897714615, "learning_rate": 0.2998198960336143, "loss": 0.0921, "num_input_tokens_seen": 356016, "step": 625 }, { "epoch": 11.053097345132743, "grad_norm": 0.044772058725357056, "learning_rate": 0.299816998777849, "loss": 0.1844, "num_input_tokens_seen": 358432, "step": 630 }, { "epoch": 11.141592920353983, "grad_norm": 0.05431145057082176, "learning_rate": 0.2998140784184197, "loss": 0.1205, "num_input_tokens_seen": 361008, "step": 635 }, { "epoch": 11.230088495575222, "grad_norm": 0.025473183020949364, "learning_rate": 0.2998111349557769, "loss": 0.1439, "num_input_tokens_seen": 363728, "step": 640 }, { "epoch": 11.31858407079646, "grad_norm": 0.12129873782396317, "learning_rate": 0.29980816839037444, "loss": 0.1391, "num_input_tokens_seen": 367232, "step": 645 }, { "epoch": 11.4070796460177, "grad_norm": 0.07408632338047028, "learning_rate": 0.2998051787226698, "loss": 0.1066, "num_input_tokens_seen": 370448, "step": 650 }, { "epoch": 11.495575221238939, "grad_norm": 0.07822711020708084, "learning_rate": 0.29980216595312403, "loss": 0.138, "num_input_tokens_seen": 373504, "step": 655 }, { "epoch": 11.584070796460177, "grad_norm": 0.035831745713949203, "learning_rate": 0.29979913008220177, "loss": 0.1095, "num_input_tokens_seen": 376464, "step": 660 }, { "epoch": 11.672566371681416, "grad_norm": 0.05501428619027138, "learning_rate": 0.2997960711103711, "loss": 0.1716, "num_input_tokens_seen": 379216, "step": 665 }, { "epoch": 11.761061946902656, "grad_norm": 0.036521703004837036, "learning_rate": 0.29979298903810386, "loss": 0.0997, "num_input_tokens_seen": 381664, "step": 670 }, { "epoch": 11.849557522123893, "grad_norm": 0.017547767609357834, "learning_rate": 0.29978988386587524, "loss": 0.1861, "num_input_tokens_seen": 384432, "step": 675 }, { "epoch": 11.938053097345133, "grad_norm": 0.021192388609051704, "learning_rate": 0.2997867555941642, "loss": 0.0548, "num_input_tokens_seen": 387296, "step": 680 }, { "epoch": 12.017699115044248, "grad_norm": 0.0301345344632864, "learning_rate": 0.299783604223453, "loss": 0.1076, "num_input_tokens_seen": 390160, "step": 685 }, { "epoch": 12.106194690265486, "grad_norm": 0.09441268444061279, "learning_rate": 0.29978042975422786, "loss": 0.0864, "num_input_tokens_seen": 392944, "step": 690 }, { "epoch": 12.194690265486726, "grad_norm": 0.010798695497214794, "learning_rate": 0.29977723218697816, "loss": 0.0513, "num_input_tokens_seen": 395984, "step": 695 }, { "epoch": 12.283185840707965, "grad_norm": 0.07217500358819962, "learning_rate": 0.299774011522197, "loss": 0.1622, "num_input_tokens_seen": 398848, "step": 700 }, { "epoch": 12.371681415929203, "grad_norm": 0.01950668916106224, "learning_rate": 0.29977076776038114, "loss": 0.1213, "num_input_tokens_seen": 401536, "step": 705 }, { "epoch": 12.460176991150442, "grad_norm": 0.037151433527469635, "learning_rate": 0.2997675009020307, "loss": 0.0885, "num_input_tokens_seen": 404864, "step": 710 }, { "epoch": 12.548672566371682, "grad_norm": 0.03521841764450073, "learning_rate": 0.2997642109476496, "loss": 0.0841, "num_input_tokens_seen": 407744, "step": 715 }, { "epoch": 12.63716814159292, "grad_norm": 0.05878576263785362, "learning_rate": 0.299760897897745, "loss": 0.1851, "num_input_tokens_seen": 410416, "step": 720 }, { "epoch": 12.725663716814159, "grad_norm": 0.05503930523991585, "learning_rate": 0.29975756175282803, "loss": 0.154, "num_input_tokens_seen": 413488, "step": 725 }, { "epoch": 12.814159292035399, "grad_norm": 0.00581141235306859, "learning_rate": 0.29975420251341306, "loss": 0.143, "num_input_tokens_seen": 416048, "step": 730 }, { "epoch": 12.902654867256636, "grad_norm": 0.10581871122121811, "learning_rate": 0.29975082018001814, "loss": 0.2202, "num_input_tokens_seen": 418944, "step": 735 }, { "epoch": 12.991150442477876, "grad_norm": 0.04614975303411484, "learning_rate": 0.2997474147531648, "loss": 0.1573, "num_input_tokens_seen": 421632, "step": 740 }, { "epoch": 13.070796460176991, "grad_norm": 0.01075072307139635, "learning_rate": 0.29974398623337833, "loss": 0.0464, "num_input_tokens_seen": 424608, "step": 745 }, { "epoch": 13.15929203539823, "grad_norm": 0.017960282042622566, "learning_rate": 0.2997405346211873, "loss": 0.1037, "num_input_tokens_seen": 427088, "step": 750 }, { "epoch": 13.247787610619469, "grad_norm": 0.0385766439139843, "learning_rate": 0.2997370599171241, "loss": 0.1049, "num_input_tokens_seen": 429744, "step": 755 }, { "epoch": 13.336283185840708, "grad_norm": 0.05268830806016922, "learning_rate": 0.2997335621217246, "loss": 0.0809, "num_input_tokens_seen": 432752, "step": 760 }, { "epoch": 13.424778761061948, "grad_norm": 0.03978714719414711, "learning_rate": 0.29973004123552816, "loss": 0.1434, "num_input_tokens_seen": 435456, "step": 765 }, { "epoch": 13.513274336283185, "grad_norm": 0.0829022005200386, "learning_rate": 0.2997264972590777, "loss": 0.0889, "num_input_tokens_seen": 438608, "step": 770 }, { "epoch": 13.601769911504425, "grad_norm": 0.05147767812013626, "learning_rate": 0.29972293019291973, "loss": 0.1205, "num_input_tokens_seen": 441248, "step": 775 }, { "epoch": 13.690265486725664, "grad_norm": 0.02918502688407898, "learning_rate": 0.2997193400376045, "loss": 0.1391, "num_input_tokens_seen": 444160, "step": 780 }, { "epoch": 13.778761061946902, "grad_norm": 0.011774185113608837, "learning_rate": 0.2997157267936854, "loss": 0.0952, "num_input_tokens_seen": 447200, "step": 785 }, { "epoch": 13.867256637168142, "grad_norm": 0.04630854353308678, "learning_rate": 0.2997120904617199, "loss": 0.0895, "num_input_tokens_seen": 450320, "step": 790 }, { "epoch": 13.955752212389381, "grad_norm": 0.05750810354948044, "learning_rate": 0.29970843104226863, "loss": 0.0835, "num_input_tokens_seen": 453328, "step": 795 }, { "epoch": 14.035398230088495, "grad_norm": 0.05526066944003105, "learning_rate": 0.2997047485358959, "loss": 0.1169, "num_input_tokens_seen": 455488, "step": 800 }, { "epoch": 14.035398230088495, "eval_loss": 0.3692370653152466, "eval_runtime": 0.9282, "eval_samples_per_second": 26.933, "eval_steps_per_second": 14.005, "num_input_tokens_seen": 455488, "step": 800 }, { "epoch": 14.123893805309734, "grad_norm": 0.001955971820279956, "learning_rate": 0.2997010429431697, "loss": 0.068, "num_input_tokens_seen": 458400, "step": 805 }, { "epoch": 14.212389380530974, "grad_norm": 0.003783697262406349, "learning_rate": 0.29969731426466134, "loss": 0.0242, "num_input_tokens_seen": 461168, "step": 810 }, { "epoch": 14.300884955752213, "grad_norm": 0.049082037061452866, "learning_rate": 0.299693562500946, "loss": 0.1366, "num_input_tokens_seen": 463840, "step": 815 }, { "epoch": 14.389380530973451, "grad_norm": 0.044649045914411545, "learning_rate": 0.29968978765260207, "loss": 0.0568, "num_input_tokens_seen": 467008, "step": 820 }, { "epoch": 14.47787610619469, "grad_norm": 0.06274111568927765, "learning_rate": 0.2996859897202118, "loss": 0.0451, "num_input_tokens_seen": 469856, "step": 825 }, { "epoch": 14.56637168141593, "grad_norm": 0.008572902530431747, "learning_rate": 0.2996821687043609, "loss": 0.0694, "num_input_tokens_seen": 472672, "step": 830 }, { "epoch": 14.654867256637168, "grad_norm": 0.007443713489919901, "learning_rate": 0.2996783246056384, "loss": 0.0542, "num_input_tokens_seen": 475648, "step": 835 }, { "epoch": 14.743362831858407, "grad_norm": 0.04691655933856964, "learning_rate": 0.29967445742463744, "loss": 0.0625, "num_input_tokens_seen": 479216, "step": 840 }, { "epoch": 14.831858407079647, "grad_norm": 0.0310326237231493, "learning_rate": 0.29967056716195417, "loss": 0.1367, "num_input_tokens_seen": 481744, "step": 845 }, { "epoch": 14.920353982300885, "grad_norm": 0.019747117534279823, "learning_rate": 0.2996666538181885, "loss": 0.1892, "num_input_tokens_seen": 484640, "step": 850 }, { "epoch": 15.0, "grad_norm": 0.04650654271245003, "learning_rate": 0.29966271739394407, "loss": 0.1188, "num_input_tokens_seen": 486672, "step": 855 }, { "epoch": 15.08849557522124, "grad_norm": 0.039386965334415436, "learning_rate": 0.29965875788982776, "loss": 0.0739, "num_input_tokens_seen": 489680, "step": 860 }, { "epoch": 15.176991150442477, "grad_norm": 0.05197660252451897, "learning_rate": 0.2996547753064503, "loss": 0.2028, "num_input_tokens_seen": 492864, "step": 865 }, { "epoch": 15.265486725663717, "grad_norm": 0.04256889596581459, "learning_rate": 0.29965076964442583, "loss": 0.2029, "num_input_tokens_seen": 495568, "step": 870 }, { "epoch": 15.353982300884956, "grad_norm": 0.03237804397940636, "learning_rate": 0.299646740904372, "loss": 0.1158, "num_input_tokens_seen": 498544, "step": 875 }, { "epoch": 15.442477876106194, "grad_norm": 0.028797201812267303, "learning_rate": 0.29964268908691016, "loss": 0.129, "num_input_tokens_seen": 501472, "step": 880 }, { "epoch": 15.530973451327434, "grad_norm": 0.06987904012203217, "learning_rate": 0.29963861419266513, "loss": 0.1092, "num_input_tokens_seen": 504064, "step": 885 }, { "epoch": 15.619469026548673, "grad_norm": 0.103307344019413, "learning_rate": 0.29963451622226533, "loss": 0.1868, "num_input_tokens_seen": 506816, "step": 890 }, { "epoch": 15.70796460176991, "grad_norm": 0.044770486652851105, "learning_rate": 0.29963039517634277, "loss": 0.1244, "num_input_tokens_seen": 509552, "step": 895 }, { "epoch": 15.79646017699115, "grad_norm": 0.009984035976231098, "learning_rate": 0.2996262510555328, "loss": 0.0608, "num_input_tokens_seen": 512576, "step": 900 }, { "epoch": 15.88495575221239, "grad_norm": 0.0823654904961586, "learning_rate": 0.2996220838604746, "loss": 0.0569, "num_input_tokens_seen": 515664, "step": 905 }, { "epoch": 15.973451327433628, "grad_norm": 0.04599125683307648, "learning_rate": 0.29961789359181085, "loss": 0.0578, "num_input_tokens_seen": 518464, "step": 910 }, { "epoch": 16.053097345132745, "grad_norm": 0.05799437314271927, "learning_rate": 0.29961368025018764, "loss": 0.4042, "num_input_tokens_seen": 520584, "step": 915 }, { "epoch": 16.141592920353983, "grad_norm": 0.0686408057808876, "learning_rate": 0.2996094438362548, "loss": 0.1675, "num_input_tokens_seen": 524104, "step": 920 }, { "epoch": 16.23008849557522, "grad_norm": 0.06178189441561699, "learning_rate": 0.2996051843506657, "loss": 0.1185, "num_input_tokens_seen": 527352, "step": 925 }, { "epoch": 16.31858407079646, "grad_norm": 0.07655368000268936, "learning_rate": 0.299600901794077, "loss": 0.1685, "num_input_tokens_seen": 530328, "step": 930 }, { "epoch": 16.4070796460177, "grad_norm": 0.007738374173641205, "learning_rate": 0.29959659616714923, "loss": 0.1346, "num_input_tokens_seen": 532936, "step": 935 }, { "epoch": 16.495575221238937, "grad_norm": 0.01790127158164978, "learning_rate": 0.2995922674705464, "loss": 0.174, "num_input_tokens_seen": 536168, "step": 940 }, { "epoch": 16.58407079646018, "grad_norm": 0.10434616357088089, "learning_rate": 0.2995879157049361, "loss": 0.2692, "num_input_tokens_seen": 538728, "step": 945 }, { "epoch": 16.672566371681416, "grad_norm": 0.06209958344697952, "learning_rate": 0.2995835408709893, "loss": 0.2169, "num_input_tokens_seen": 541832, "step": 950 }, { "epoch": 16.761061946902654, "grad_norm": 0.03988637402653694, "learning_rate": 0.29957914296938076, "loss": 0.2327, "num_input_tokens_seen": 544440, "step": 955 }, { "epoch": 16.849557522123895, "grad_norm": 0.012704829685389996, "learning_rate": 0.2995747220007886, "loss": 0.118, "num_input_tokens_seen": 547256, "step": 960 }, { "epoch": 16.938053097345133, "grad_norm": 0.0582464300096035, "learning_rate": 0.2995702779658947, "loss": 0.1066, "num_input_tokens_seen": 550360, "step": 965 }, { "epoch": 17.01769911504425, "grad_norm": 0.02164345234632492, "learning_rate": 0.29956581086538425, "loss": 0.1278, "num_input_tokens_seen": 552464, "step": 970 }, { "epoch": 17.106194690265486, "grad_norm": 0.03588252514600754, "learning_rate": 0.2995613206999462, "loss": 0.1696, "num_input_tokens_seen": 555040, "step": 975 }, { "epoch": 17.194690265486727, "grad_norm": 0.045338165014982224, "learning_rate": 0.29955680747027297, "loss": 0.1647, "num_input_tokens_seen": 557968, "step": 980 }, { "epoch": 17.283185840707965, "grad_norm": 0.017174439504742622, "learning_rate": 0.2995522711770607, "loss": 0.2074, "num_input_tokens_seen": 560768, "step": 985 }, { "epoch": 17.371681415929203, "grad_norm": 0.04313628003001213, "learning_rate": 0.2995477118210087, "loss": 0.1886, "num_input_tokens_seen": 563424, "step": 990 }, { "epoch": 17.460176991150444, "grad_norm": 0.027376322075724602, "learning_rate": 0.29954312940282024, "loss": 0.0531, "num_input_tokens_seen": 566384, "step": 995 }, { "epoch": 17.548672566371682, "grad_norm": 0.07877122610807419, "learning_rate": 0.29953852392320196, "loss": 0.2206, "num_input_tokens_seen": 569504, "step": 1000 }, { "epoch": 17.548672566371682, "eval_loss": 0.16221453249454498, "eval_runtime": 0.9346, "eval_samples_per_second": 26.748, "eval_steps_per_second": 13.909, "num_input_tokens_seen": 569504, "step": 1000 }, { "epoch": 17.63716814159292, "grad_norm": 0.04619221016764641, "learning_rate": 0.2995338953828641, "loss": 0.1302, "num_input_tokens_seen": 572608, "step": 1005 }, { "epoch": 17.72566371681416, "grad_norm": 0.029165692627429962, "learning_rate": 0.2995292437825204, "loss": 0.1459, "num_input_tokens_seen": 575504, "step": 1010 }, { "epoch": 17.8141592920354, "grad_norm": 0.009039947763085365, "learning_rate": 0.29952456912288816, "loss": 0.1063, "num_input_tokens_seen": 578064, "step": 1015 }, { "epoch": 17.902654867256636, "grad_norm": 0.039725981652736664, "learning_rate": 0.2995198714046884, "loss": 0.1626, "num_input_tokens_seen": 581312, "step": 1020 }, { "epoch": 17.991150442477878, "grad_norm": 0.054699528962373734, "learning_rate": 0.2995151506286454, "loss": 0.1926, "num_input_tokens_seen": 584240, "step": 1025 }, { "epoch": 18.07079646017699, "grad_norm": 0.03119960054755211, "learning_rate": 0.2995104067954873, "loss": 0.1908, "num_input_tokens_seen": 586296, "step": 1030 }, { "epoch": 18.15929203539823, "grad_norm": 0.029157282784581184, "learning_rate": 0.2995056399059456, "loss": 0.3949, "num_input_tokens_seen": 588936, "step": 1035 }, { "epoch": 18.24778761061947, "grad_norm": 0.020410750061273575, "learning_rate": 0.2995008499607554, "loss": 0.1605, "num_input_tokens_seen": 591816, "step": 1040 }, { "epoch": 18.336283185840706, "grad_norm": 0.013808061368763447, "learning_rate": 0.2994960369606554, "loss": 0.1222, "num_input_tokens_seen": 595000, "step": 1045 }, { "epoch": 18.424778761061948, "grad_norm": 0.034983083605766296, "learning_rate": 0.2994912009063878, "loss": 0.0674, "num_input_tokens_seen": 597960, "step": 1050 }, { "epoch": 18.513274336283185, "grad_norm": 0.046385470777750015, "learning_rate": 0.29948634179869843, "loss": 0.0976, "num_input_tokens_seen": 600728, "step": 1055 }, { "epoch": 18.601769911504427, "grad_norm": 0.01408429816365242, "learning_rate": 0.29948145963833656, "loss": 0.0755, "num_input_tokens_seen": 603496, "step": 1060 }, { "epoch": 18.690265486725664, "grad_norm": 0.05294256657361984, "learning_rate": 0.29947655442605514, "loss": 0.1158, "num_input_tokens_seen": 606104, "step": 1065 }, { "epoch": 18.778761061946902, "grad_norm": 0.018804816529154778, "learning_rate": 0.2994716261626106, "loss": 0.0952, "num_input_tokens_seen": 609160, "step": 1070 }, { "epoch": 18.86725663716814, "grad_norm": 0.020481033250689507, "learning_rate": 0.2994666748487629, "loss": 0.0937, "num_input_tokens_seen": 612264, "step": 1075 }, { "epoch": 18.95575221238938, "grad_norm": 0.02373555861413479, "learning_rate": 0.2994617004852756, "loss": 0.0929, "num_input_tokens_seen": 615256, "step": 1080 }, { "epoch": 19.035398230088497, "grad_norm": 0.03828621283173561, "learning_rate": 0.2994567030729159, "loss": 0.0655, "num_input_tokens_seen": 617880, "step": 1085 }, { "epoch": 19.123893805309734, "grad_norm": 0.0347868986427784, "learning_rate": 0.29945168261245436, "loss": 0.1002, "num_input_tokens_seen": 620360, "step": 1090 }, { "epoch": 19.212389380530972, "grad_norm": 0.0012306994758546352, "learning_rate": 0.29944663910466524, "loss": 0.0101, "num_input_tokens_seen": 623224, "step": 1095 }, { "epoch": 19.300884955752213, "grad_norm": 0.01410099770873785, "learning_rate": 0.2994415725503263, "loss": 0.059, "num_input_tokens_seen": 625560, "step": 1100 }, { "epoch": 19.38938053097345, "grad_norm": 0.029469553381204605, "learning_rate": 0.29943648295021885, "loss": 0.0519, "num_input_tokens_seen": 628488, "step": 1105 }, { "epoch": 19.47787610619469, "grad_norm": 0.04847237095236778, "learning_rate": 0.2994313703051278, "loss": 0.145, "num_input_tokens_seen": 631400, "step": 1110 }, { "epoch": 19.56637168141593, "grad_norm": 0.007048665080219507, "learning_rate": 0.29942623461584156, "loss": 0.109, "num_input_tokens_seen": 634408, "step": 1115 }, { "epoch": 19.654867256637168, "grad_norm": 0.02925610914826393, "learning_rate": 0.29942107588315214, "loss": 0.0506, "num_input_tokens_seen": 637448, "step": 1120 }, { "epoch": 19.743362831858406, "grad_norm": 0.033245909959077835, "learning_rate": 0.29941589410785513, "loss": 0.0651, "num_input_tokens_seen": 640280, "step": 1125 }, { "epoch": 19.831858407079647, "grad_norm": 0.009714157320559025, "learning_rate": 0.29941068929074954, "loss": 0.14, "num_input_tokens_seen": 642984, "step": 1130 }, { "epoch": 19.920353982300885, "grad_norm": 0.01279335655272007, "learning_rate": 0.2994054614326381, "loss": 0.071, "num_input_tokens_seen": 646072, "step": 1135 }, { "epoch": 20.0, "grad_norm": 0.09708935767412186, "learning_rate": 0.29940021053432686, "loss": 0.1207, "num_input_tokens_seen": 648736, "step": 1140 }, { "epoch": 20.088495575221238, "grad_norm": 0.11073679476976395, "learning_rate": 0.29939493659662575, "loss": 0.1115, "num_input_tokens_seen": 651264, "step": 1145 }, { "epoch": 20.17699115044248, "grad_norm": 0.012537207454442978, "learning_rate": 0.299389639620348, "loss": 0.1204, "num_input_tokens_seen": 654352, "step": 1150 }, { "epoch": 20.265486725663717, "grad_norm": 0.03436347469687462, "learning_rate": 0.29938431960631046, "loss": 0.0668, "num_input_tokens_seen": 657088, "step": 1155 }, { "epoch": 20.353982300884955, "grad_norm": 0.03144063800573349, "learning_rate": 0.2993789765553335, "loss": 0.1168, "num_input_tokens_seen": 659968, "step": 1160 }, { "epoch": 20.442477876106196, "grad_norm": 0.0384085439145565, "learning_rate": 0.2993736104682412, "loss": 0.0787, "num_input_tokens_seen": 663152, "step": 1165 }, { "epoch": 20.530973451327434, "grad_norm": 0.07563851773738861, "learning_rate": 0.299368221345861, "loss": 0.076, "num_input_tokens_seen": 666240, "step": 1170 }, { "epoch": 20.61946902654867, "grad_norm": 0.04717668518424034, "learning_rate": 0.29936280918902397, "loss": 0.1242, "num_input_tokens_seen": 668896, "step": 1175 }, { "epoch": 20.707964601769913, "grad_norm": 0.08111153542995453, "learning_rate": 0.2993573739985648, "loss": 0.0807, "num_input_tokens_seen": 671376, "step": 1180 }, { "epoch": 20.79646017699115, "grad_norm": 0.01971960999071598, "learning_rate": 0.2993519157753216, "loss": 0.061, "num_input_tokens_seen": 674304, "step": 1185 }, { "epoch": 20.884955752212388, "grad_norm": 0.008201570250093937, "learning_rate": 0.2993464345201361, "loss": 0.1526, "num_input_tokens_seen": 677152, "step": 1190 }, { "epoch": 20.97345132743363, "grad_norm": 0.040368225425481796, "learning_rate": 0.2993409302338536, "loss": 0.0303, "num_input_tokens_seen": 679840, "step": 1195 }, { "epoch": 21.053097345132745, "grad_norm": 0.06068111211061478, "learning_rate": 0.2993354029173229, "loss": 0.0923, "num_input_tokens_seen": 682024, "step": 1200 }, { "epoch": 21.053097345132745, "eval_loss": 0.3018835783004761, "eval_runtime": 0.9297, "eval_samples_per_second": 26.891, "eval_steps_per_second": 13.983, "num_input_tokens_seen": 682024, "step": 1200 }, { "epoch": 21.141592920353983, "grad_norm": 0.10846542567014694, "learning_rate": 0.2993298525713965, "loss": 0.1178, "num_input_tokens_seen": 685320, "step": 1205 }, { "epoch": 21.23008849557522, "grad_norm": 0.0035468339920043945, "learning_rate": 0.29932427919693017, "loss": 0.0531, "num_input_tokens_seen": 688200, "step": 1210 }, { "epoch": 21.31858407079646, "grad_norm": 0.02589237317442894, "learning_rate": 0.2993186827947834, "loss": 0.0576, "num_input_tokens_seen": 691432, "step": 1215 }, { "epoch": 21.4070796460177, "grad_norm": 0.07809528708457947, "learning_rate": 0.2993130633658194, "loss": 0.0765, "num_input_tokens_seen": 694088, "step": 1220 }, { "epoch": 21.495575221238937, "grad_norm": 0.004746662452816963, "learning_rate": 0.29930742091090456, "loss": 0.0183, "num_input_tokens_seen": 696568, "step": 1225 }, { "epoch": 21.58407079646018, "grad_norm": 0.07144982367753983, "learning_rate": 0.29930175543090914, "loss": 0.1573, "num_input_tokens_seen": 699128, "step": 1230 }, { "epoch": 21.672566371681416, "grad_norm": 0.024281533434987068, "learning_rate": 0.2992960669267068, "loss": 0.1005, "num_input_tokens_seen": 702936, "step": 1235 }, { "epoch": 21.761061946902654, "grad_norm": 0.052919622510671616, "learning_rate": 0.29929035539917476, "loss": 0.0991, "num_input_tokens_seen": 705560, "step": 1240 }, { "epoch": 21.849557522123895, "grad_norm": 0.021938621997833252, "learning_rate": 0.2992846208491938, "loss": 0.0506, "num_input_tokens_seen": 708312, "step": 1245 }, { "epoch": 21.938053097345133, "grad_norm": 0.01015006098896265, "learning_rate": 0.2992788632776483, "loss": 0.0501, "num_input_tokens_seen": 711224, "step": 1250 }, { "epoch": 22.01769911504425, "grad_norm": 0.05136142298579216, "learning_rate": 0.29927308268542613, "loss": 0.0967, "num_input_tokens_seen": 713400, "step": 1255 }, { "epoch": 22.106194690265486, "grad_norm": 0.05894184857606888, "learning_rate": 0.2992672790734187, "loss": 0.053, "num_input_tokens_seen": 716440, "step": 1260 }, { "epoch": 22.194690265486727, "grad_norm": 0.0012867054902017117, "learning_rate": 0.299261452442521, "loss": 0.0791, "num_input_tokens_seen": 719112, "step": 1265 }, { "epoch": 22.283185840707965, "grad_norm": 0.06244965270161629, "learning_rate": 0.29925560279363167, "loss": 0.07, "num_input_tokens_seen": 722520, "step": 1270 }, { "epoch": 22.371681415929203, "grad_norm": 0.0011850915616378188, "learning_rate": 0.29924973012765266, "loss": 0.0368, "num_input_tokens_seen": 726040, "step": 1275 }, { "epoch": 22.460176991150444, "grad_norm": 0.005481795407831669, "learning_rate": 0.29924383444548974, "loss": 0.026, "num_input_tokens_seen": 728712, "step": 1280 }, { "epoch": 22.548672566371682, "grad_norm": 0.0618191659450531, "learning_rate": 0.299237915748052, "loss": 0.1531, "num_input_tokens_seen": 731848, "step": 1285 }, { "epoch": 22.63716814159292, "grad_norm": 0.006729009561240673, "learning_rate": 0.2992319740362522, "loss": 0.0592, "num_input_tokens_seen": 734504, "step": 1290 }, { "epoch": 22.72566371681416, "grad_norm": 0.036971136927604675, "learning_rate": 0.2992260093110066, "loss": 0.0356, "num_input_tokens_seen": 737240, "step": 1295 }, { "epoch": 22.8141592920354, "grad_norm": 0.008717183955013752, "learning_rate": 0.2992200215732352, "loss": 0.0241, "num_input_tokens_seen": 740024, "step": 1300 }, { "epoch": 22.902654867256636, "grad_norm": 0.005968763958662748, "learning_rate": 0.2992140108238611, "loss": 0.0138, "num_input_tokens_seen": 742568, "step": 1305 }, { "epoch": 22.991150442477878, "grad_norm": 0.009583886712789536, "learning_rate": 0.2992079770638115, "loss": 0.0277, "num_input_tokens_seen": 745224, "step": 1310 }, { "epoch": 23.07079646017699, "grad_norm": 0.0741044208407402, "learning_rate": 0.29920192029401677, "loss": 0.1856, "num_input_tokens_seen": 747824, "step": 1315 }, { "epoch": 23.15929203539823, "grad_norm": 0.00021991966059431434, "learning_rate": 0.2991958405154109, "loss": 0.0163, "num_input_tokens_seen": 750752, "step": 1320 }, { "epoch": 23.24778761061947, "grad_norm": 0.05521894618868828, "learning_rate": 0.29918973772893154, "loss": 0.0799, "num_input_tokens_seen": 753872, "step": 1325 }, { "epoch": 23.336283185840706, "grad_norm": 0.0023225238546729088, "learning_rate": 0.29918361193551973, "loss": 0.0335, "num_input_tokens_seen": 756752, "step": 1330 }, { "epoch": 23.424778761061948, "grad_norm": 0.0033994922414422035, "learning_rate": 0.29917746313612026, "loss": 0.0395, "num_input_tokens_seen": 759328, "step": 1335 }, { "epoch": 23.513274336283185, "grad_norm": 0.008613375015556812, "learning_rate": 0.29917129133168124, "loss": 0.0506, "num_input_tokens_seen": 762224, "step": 1340 }, { "epoch": 23.601769911504427, "grad_norm": 0.04974130913615227, "learning_rate": 0.2991650965231546, "loss": 0.0822, "num_input_tokens_seen": 765344, "step": 1345 }, { "epoch": 23.690265486725664, "grad_norm": 0.005501857027411461, "learning_rate": 0.29915887871149544, "loss": 0.0781, "num_input_tokens_seen": 768256, "step": 1350 }, { "epoch": 23.778761061946902, "grad_norm": 0.017547132447361946, "learning_rate": 0.2991526378976628, "loss": 0.0634, "num_input_tokens_seen": 770960, "step": 1355 }, { "epoch": 23.86725663716814, "grad_norm": 0.028280172497034073, "learning_rate": 0.29914637408261896, "loss": 0.1649, "num_input_tokens_seen": 773584, "step": 1360 }, { "epoch": 23.95575221238938, "grad_norm": 0.008519790135324001, "learning_rate": 0.29914008726733, "loss": 0.0458, "num_input_tokens_seen": 776848, "step": 1365 }, { "epoch": 24.035398230088497, "grad_norm": 0.004154284950345755, "learning_rate": 0.2991337774527653, "loss": 0.0702, "num_input_tokens_seen": 779304, "step": 1370 }, { "epoch": 24.123893805309734, "grad_norm": 0.0028321868740022182, "learning_rate": 0.2991274446398981, "loss": 0.0177, "num_input_tokens_seen": 782088, "step": 1375 }, { "epoch": 24.212389380530972, "grad_norm": 0.014611666090786457, "learning_rate": 0.29912108882970484, "loss": 0.0222, "num_input_tokens_seen": 785080, "step": 1380 }, { "epoch": 24.300884955752213, "grad_norm": 0.008415494114160538, "learning_rate": 0.2991147100231657, "loss": 0.0544, "num_input_tokens_seen": 787800, "step": 1385 }, { "epoch": 24.38938053097345, "grad_norm": 0.07670147716999054, "learning_rate": 0.2991083082212644, "loss": 0.0466, "num_input_tokens_seen": 790424, "step": 1390 }, { "epoch": 24.47787610619469, "grad_norm": 0.0016266423044726253, "learning_rate": 0.2991018834249881, "loss": 0.0114, "num_input_tokens_seen": 793032, "step": 1395 }, { "epoch": 24.56637168141593, "grad_norm": 0.07207012176513672, "learning_rate": 0.29909543563532764, "loss": 0.0482, "num_input_tokens_seen": 796328, "step": 1400 }, { "epoch": 24.56637168141593, "eval_loss": 0.20980682969093323, "eval_runtime": 0.9402, "eval_samples_per_second": 26.59, "eval_steps_per_second": 13.827, "num_input_tokens_seen": 796328, "step": 1400 }, { "epoch": 24.654867256637168, "grad_norm": 0.011100090108811855, "learning_rate": 0.29908896485327746, "loss": 0.0384, "num_input_tokens_seen": 799176, "step": 1405 }, { "epoch": 24.743362831858406, "grad_norm": 0.036893852055072784, "learning_rate": 0.29908247107983527, "loss": 0.0925, "num_input_tokens_seen": 802072, "step": 1410 }, { "epoch": 24.831858407079647, "grad_norm": 0.007857692427933216, "learning_rate": 0.29907595431600253, "loss": 0.0166, "num_input_tokens_seen": 804648, "step": 1415 }, { "epoch": 24.920353982300885, "grad_norm": 0.1398167908191681, "learning_rate": 0.29906941456278424, "loss": 0.0461, "num_input_tokens_seen": 807368, "step": 1420 }, { "epoch": 25.0, "grad_norm": 0.0017228537471964955, "learning_rate": 0.2990628518211889, "loss": 0.038, "num_input_tokens_seen": 810192, "step": 1425 }, { "epoch": 25.088495575221238, "grad_norm": 0.003742335131391883, "learning_rate": 0.2990562660922286, "loss": 0.0423, "num_input_tokens_seen": 812960, "step": 1430 }, { "epoch": 25.17699115044248, "grad_norm": 0.027222873643040657, "learning_rate": 0.2990496573769189, "loss": 0.067, "num_input_tokens_seen": 816256, "step": 1435 }, { "epoch": 25.265486725663717, "grad_norm": 0.0814589336514473, "learning_rate": 0.29904302567627894, "loss": 0.0506, "num_input_tokens_seen": 819232, "step": 1440 }, { "epoch": 25.353982300884955, "grad_norm": 0.005998051259666681, "learning_rate": 0.2990363709913314, "loss": 0.0522, "num_input_tokens_seen": 822080, "step": 1445 }, { "epoch": 25.442477876106196, "grad_norm": 0.06759881973266602, "learning_rate": 0.29902969332310264, "loss": 0.0548, "num_input_tokens_seen": 824688, "step": 1450 }, { "epoch": 25.530973451327434, "grad_norm": 0.03362750634551048, "learning_rate": 0.2990229926726223, "loss": 0.0222, "num_input_tokens_seen": 827360, "step": 1455 }, { "epoch": 25.61946902654867, "grad_norm": 0.059995878487825394, "learning_rate": 0.29901626904092365, "loss": 0.0337, "num_input_tokens_seen": 830848, "step": 1460 }, { "epoch": 25.707964601769913, "grad_norm": 0.008152305148541927, "learning_rate": 0.2990095224290438, "loss": 0.0441, "num_input_tokens_seen": 833264, "step": 1465 }, { "epoch": 25.79646017699115, "grad_norm": 0.028977930545806885, "learning_rate": 0.29900275283802297, "loss": 0.0138, "num_input_tokens_seen": 836480, "step": 1470 }, { "epoch": 25.884955752212388, "grad_norm": 0.0021785518620163202, "learning_rate": 0.2989959602689051, "loss": 0.0224, "num_input_tokens_seen": 839168, "step": 1475 }, { "epoch": 25.97345132743363, "grad_norm": 0.0018287242855876684, "learning_rate": 0.2989891447227379, "loss": 0.0495, "num_input_tokens_seen": 842160, "step": 1480 }, { "epoch": 26.053097345132745, "grad_norm": 0.022007077932357788, "learning_rate": 0.29898230620057215, "loss": 0.017, "num_input_tokens_seen": 844392, "step": 1485 }, { "epoch": 26.141592920353983, "grad_norm": 0.006985585205256939, "learning_rate": 0.2989754447034626, "loss": 0.007, "num_input_tokens_seen": 847464, "step": 1490 }, { "epoch": 26.23008849557522, "grad_norm": 0.027698179706931114, "learning_rate": 0.2989685602324673, "loss": 0.018, "num_input_tokens_seen": 849752, "step": 1495 }, { "epoch": 26.31858407079646, "grad_norm": 0.003572483779862523, "learning_rate": 0.298961652788648, "loss": 0.0022, "num_input_tokens_seen": 852632, "step": 1500 }, { "epoch": 26.4070796460177, "grad_norm": 0.0845576599240303, "learning_rate": 0.29895472237306986, "loss": 0.0479, "num_input_tokens_seen": 855640, "step": 1505 }, { "epoch": 26.495575221238937, "grad_norm": 0.0020605502650141716, "learning_rate": 0.29894776898680164, "loss": 0.0065, "num_input_tokens_seen": 858728, "step": 1510 }, { "epoch": 26.58407079646018, "grad_norm": 0.06568868458271027, "learning_rate": 0.29894079263091566, "loss": 0.0767, "num_input_tokens_seen": 861736, "step": 1515 }, { "epoch": 26.672566371681416, "grad_norm": 0.04822291433811188, "learning_rate": 0.2989337933064877, "loss": 0.0383, "num_input_tokens_seen": 864248, "step": 1520 }, { "epoch": 26.761061946902654, "grad_norm": 0.11739757657051086, "learning_rate": 0.29892677101459725, "loss": 0.1148, "num_input_tokens_seen": 867288, "step": 1525 }, { "epoch": 26.849557522123895, "grad_norm": 0.10341432690620422, "learning_rate": 0.2989197257563272, "loss": 0.1161, "num_input_tokens_seen": 870536, "step": 1530 }, { "epoch": 26.938053097345133, "grad_norm": 0.06631104648113251, "learning_rate": 0.2989126575327639, "loss": 0.0718, "num_input_tokens_seen": 873048, "step": 1535 }, { "epoch": 27.01769911504425, "grad_norm": 0.007914594374597073, "learning_rate": 0.29890556634499754, "loss": 0.073, "num_input_tokens_seen": 875304, "step": 1540 }, { "epoch": 27.106194690265486, "grad_norm": 0.004657643381506205, "learning_rate": 0.2988984521941216, "loss": 0.0566, "num_input_tokens_seen": 878312, "step": 1545 }, { "epoch": 27.194690265486727, "grad_norm": 0.04576535150408745, "learning_rate": 0.29889131508123307, "loss": 0.0811, "num_input_tokens_seen": 881192, "step": 1550 }, { "epoch": 27.283185840707965, "grad_norm": 0.005946827586740255, "learning_rate": 0.2988841550074327, "loss": 0.1027, "num_input_tokens_seen": 883896, "step": 1555 }, { "epoch": 27.371681415929203, "grad_norm": 0.09602929651737213, "learning_rate": 0.2988769719738246, "loss": 0.1841, "num_input_tokens_seen": 886376, "step": 1560 }, { "epoch": 27.460176991150444, "grad_norm": 0.005651470273733139, "learning_rate": 0.29886976598151666, "loss": 0.0983, "num_input_tokens_seen": 889384, "step": 1565 }, { "epoch": 27.548672566371682, "grad_norm": 0.006867815740406513, "learning_rate": 0.29886253703161986, "loss": 0.0125, "num_input_tokens_seen": 892344, "step": 1570 }, { "epoch": 27.63716814159292, "grad_norm": 0.020086800679564476, "learning_rate": 0.29885528512524917, "loss": 0.0948, "num_input_tokens_seen": 895048, "step": 1575 }, { "epoch": 27.72566371681416, "grad_norm": 0.005952614359557629, "learning_rate": 0.29884801026352287, "loss": 0.3111, "num_input_tokens_seen": 897992, "step": 1580 }, { "epoch": 27.8141592920354, "grad_norm": 0.010969198308885098, "learning_rate": 0.2988407124475629, "loss": 0.1454, "num_input_tokens_seen": 900888, "step": 1585 }, { "epoch": 27.902654867256636, "grad_norm": 0.008461329154670238, "learning_rate": 0.2988333916784945, "loss": 0.1315, "num_input_tokens_seen": 904072, "step": 1590 }, { "epoch": 27.991150442477878, "grad_norm": 0.09626653045415878, "learning_rate": 0.2988260479574468, "loss": 0.1699, "num_input_tokens_seen": 907016, "step": 1595 }, { "epoch": 28.07079646017699, "grad_norm": 0.02973833493888378, "learning_rate": 0.2988186812855523, "loss": 0.1297, "num_input_tokens_seen": 909320, "step": 1600 }, { "epoch": 28.07079646017699, "eval_loss": 0.1714715212583542, "eval_runtime": 0.9186, "eval_samples_per_second": 27.214, "eval_steps_per_second": 14.151, "num_input_tokens_seen": 909320, "step": 1600 }, { "epoch": 28.15929203539823, "grad_norm": 0.013810357078909874, "learning_rate": 0.29881129166394693, "loss": 0.0483, "num_input_tokens_seen": 912536, "step": 1605 }, { "epoch": 28.24778761061947, "grad_norm": 0.0178288035094738, "learning_rate": 0.29880387909377026, "loss": 0.178, "num_input_tokens_seen": 915480, "step": 1610 }, { "epoch": 28.336283185840706, "grad_norm": 0.10714978724718094, "learning_rate": 0.2987964435761655, "loss": 0.2568, "num_input_tokens_seen": 918344, "step": 1615 }, { "epoch": 28.424778761061948, "grad_norm": 0.013234782963991165, "learning_rate": 0.29878898511227925, "loss": 0.1044, "num_input_tokens_seen": 920808, "step": 1620 }, { "epoch": 28.513274336283185, "grad_norm": 0.04466245695948601, "learning_rate": 0.2987815037032617, "loss": 0.1365, "num_input_tokens_seen": 923976, "step": 1625 }, { "epoch": 28.601769911504427, "grad_norm": 0.014913058839738369, "learning_rate": 0.29877399935026655, "loss": 0.0472, "num_input_tokens_seen": 926568, "step": 1630 }, { "epoch": 28.690265486725664, "grad_norm": 0.03534584864974022, "learning_rate": 0.2987664720544511, "loss": 0.0901, "num_input_tokens_seen": 929512, "step": 1635 }, { "epoch": 28.778761061946902, "grad_norm": 0.0429089292883873, "learning_rate": 0.2987589218169761, "loss": 0.0837, "num_input_tokens_seen": 932424, "step": 1640 }, { "epoch": 28.86725663716814, "grad_norm": 0.04179130867123604, "learning_rate": 0.29875134863900604, "loss": 0.033, "num_input_tokens_seen": 935496, "step": 1645 }, { "epoch": 28.95575221238938, "grad_norm": 0.018635129556059837, "learning_rate": 0.29874375252170865, "loss": 0.0218, "num_input_tokens_seen": 938664, "step": 1650 }, { "epoch": 29.035398230088497, "grad_norm": 0.008180464617908001, "learning_rate": 0.2987361334662553, "loss": 0.0077, "num_input_tokens_seen": 940776, "step": 1655 }, { "epoch": 29.123893805309734, "grad_norm": 0.004545401781797409, "learning_rate": 0.29872849147382113, "loss": 0.005, "num_input_tokens_seen": 943624, "step": 1660 }, { "epoch": 29.212389380530972, "grad_norm": 0.04973456263542175, "learning_rate": 0.2987208265455845, "loss": 0.0639, "num_input_tokens_seen": 946728, "step": 1665 }, { "epoch": 29.300884955752213, "grad_norm": 0.009610278531908989, "learning_rate": 0.29871313868272753, "loss": 0.2009, "num_input_tokens_seen": 949880, "step": 1670 }, { "epoch": 29.38938053097345, "grad_norm": 0.02161199040710926, "learning_rate": 0.29870542788643567, "loss": 0.1236, "num_input_tokens_seen": 952376, "step": 1675 }, { "epoch": 29.47787610619469, "grad_norm": 0.00747729791328311, "learning_rate": 0.2986976941578981, "loss": 0.111, "num_input_tokens_seen": 955272, "step": 1680 }, { "epoch": 29.56637168141593, "grad_norm": 0.037067610770463943, "learning_rate": 0.29868993749830747, "loss": 0.0864, "num_input_tokens_seen": 958376, "step": 1685 }, { "epoch": 29.654867256637168, "grad_norm": 0.010913576930761337, "learning_rate": 0.2986821579088598, "loss": 0.122, "num_input_tokens_seen": 960952, "step": 1690 }, { "epoch": 29.743362831858406, "grad_norm": 0.017758822068572044, "learning_rate": 0.29867435539075504, "loss": 0.0474, "num_input_tokens_seen": 963752, "step": 1695 }, { "epoch": 29.831858407079647, "grad_norm": 0.002401971723884344, "learning_rate": 0.2986665299451963, "loss": 0.0371, "num_input_tokens_seen": 967032, "step": 1700 }, { "epoch": 29.920353982300885, "grad_norm": 0.01882217451930046, "learning_rate": 0.29865868157339037, "loss": 0.0414, "num_input_tokens_seen": 969672, "step": 1705 }, { "epoch": 30.0, "grad_norm": 0.00020096651860512793, "learning_rate": 0.2986508102765476, "loss": 0.1137, "num_input_tokens_seen": 972184, "step": 1710 }, { "epoch": 30.088495575221238, "grad_norm": 0.030124805867671967, "learning_rate": 0.2986429160558818, "loss": 0.0351, "num_input_tokens_seen": 974888, "step": 1715 }, { "epoch": 30.17699115044248, "grad_norm": 0.016149351373314857, "learning_rate": 0.2986349989126104, "loss": 0.0394, "num_input_tokens_seen": 977368, "step": 1720 }, { "epoch": 30.265486725663717, "grad_norm": 0.061579059809446335, "learning_rate": 0.29862705884795426, "loss": 0.0692, "num_input_tokens_seen": 980696, "step": 1725 }, { "epoch": 30.353982300884955, "grad_norm": 0.003947456367313862, "learning_rate": 0.2986190958631379, "loss": 0.0296, "num_input_tokens_seen": 983288, "step": 1730 }, { "epoch": 30.442477876106196, "grad_norm": 0.006744958460330963, "learning_rate": 0.29861110995938933, "loss": 0.0523, "num_input_tokens_seen": 986520, "step": 1735 }, { "epoch": 30.530973451327434, "grad_norm": 0.005091800820082426, "learning_rate": 0.29860310113794, "loss": 0.0182, "num_input_tokens_seen": 989496, "step": 1740 }, { "epoch": 30.61946902654867, "grad_norm": 0.04587063565850258, "learning_rate": 0.29859506940002506, "loss": 0.0408, "num_input_tokens_seen": 992552, "step": 1745 }, { "epoch": 30.707964601769913, "grad_norm": 0.0051162526942789555, "learning_rate": 0.298587014746883, "loss": 0.036, "num_input_tokens_seen": 995448, "step": 1750 }, { "epoch": 30.79646017699115, "grad_norm": 0.009663380682468414, "learning_rate": 0.298578937179756, "loss": 0.0337, "num_input_tokens_seen": 998504, "step": 1755 }, { "epoch": 30.884955752212388, "grad_norm": 0.002576402621343732, "learning_rate": 0.29857083669988976, "loss": 0.0551, "num_input_tokens_seen": 1001352, "step": 1760 }, { "epoch": 30.97345132743363, "grad_norm": 0.07329577207565308, "learning_rate": 0.29856271330853346, "loss": 0.0463, "num_input_tokens_seen": 1003896, "step": 1765 }, { "epoch": 31.053097345132745, "grad_norm": 0.001554815680719912, "learning_rate": 0.2985545670069398, "loss": 0.0156, "num_input_tokens_seen": 1006384, "step": 1770 }, { "epoch": 31.141592920353983, "grad_norm": 0.06041880324482918, "learning_rate": 0.29854639779636505, "loss": 0.0525, "num_input_tokens_seen": 1009600, "step": 1775 }, { "epoch": 31.23008849557522, "grad_norm": 0.03961880877614021, "learning_rate": 0.298538205678069, "loss": 0.0377, "num_input_tokens_seen": 1012736, "step": 1780 }, { "epoch": 31.31858407079646, "grad_norm": 0.0006600599735975266, "learning_rate": 0.298529990653315, "loss": 0.0034, "num_input_tokens_seen": 1015632, "step": 1785 }, { "epoch": 31.4070796460177, "grad_norm": 0.050408147275447845, "learning_rate": 0.29852175272336984, "loss": 0.0425, "num_input_tokens_seen": 1018624, "step": 1790 }, { "epoch": 31.495575221238937, "grad_norm": 0.019935214892029762, "learning_rate": 0.29851349188950405, "loss": 0.0381, "num_input_tokens_seen": 1021152, "step": 1795 }, { "epoch": 31.58407079646018, "grad_norm": 0.0744040235877037, "learning_rate": 0.2985052081529914, "loss": 0.0484, "num_input_tokens_seen": 1023696, "step": 1800 }, { "epoch": 31.58407079646018, "eval_loss": 0.28920552134513855, "eval_runtime": 0.9363, "eval_samples_per_second": 26.7, "eval_steps_per_second": 13.884, "num_input_tokens_seen": 1023696, "step": 1800 }, { "epoch": 31.672566371681416, "grad_norm": 0.001106122275814414, "learning_rate": 0.29849690151510944, "loss": 0.0156, "num_input_tokens_seen": 1026400, "step": 1805 }, { "epoch": 31.761061946902654, "grad_norm": 0.046785518527030945, "learning_rate": 0.2984885719771392, "loss": 0.122, "num_input_tokens_seen": 1029040, "step": 1810 }, { "epoch": 31.849557522123895, "grad_norm": 0.04179898276925087, "learning_rate": 0.2984802195403651, "loss": 0.0581, "num_input_tokens_seen": 1032032, "step": 1815 }, { "epoch": 31.938053097345133, "grad_norm": 0.00936127919703722, "learning_rate": 0.2984718442060752, "loss": 0.0284, "num_input_tokens_seen": 1034912, "step": 1820 }, { "epoch": 32.017699115044245, "grad_norm": 0.020806491374969482, "learning_rate": 0.2984634459755611, "loss": 0.0336, "num_input_tokens_seen": 1037416, "step": 1825 }, { "epoch": 32.10619469026549, "grad_norm": 0.041175976395606995, "learning_rate": 0.29845502485011793, "loss": 0.0165, "num_input_tokens_seen": 1040040, "step": 1830 }, { "epoch": 32.19469026548673, "grad_norm": 0.003511570394039154, "learning_rate": 0.2984465808310444, "loss": 0.0032, "num_input_tokens_seen": 1043288, "step": 1835 }, { "epoch": 32.283185840707965, "grad_norm": 0.0015427340986207128, "learning_rate": 0.29843811391964253, "loss": 0.0475, "num_input_tokens_seen": 1045896, "step": 1840 }, { "epoch": 32.3716814159292, "grad_norm": 0.011623822152614594, "learning_rate": 0.2984296241172182, "loss": 0.0094, "num_input_tokens_seen": 1048792, "step": 1845 }, { "epoch": 32.46017699115044, "grad_norm": 0.0001682483562035486, "learning_rate": 0.29842111142508043, "loss": 0.0068, "num_input_tokens_seen": 1051352, "step": 1850 }, { "epoch": 32.54867256637168, "grad_norm": 0.0014628011267632246, "learning_rate": 0.29841257584454217, "loss": 0.0116, "num_input_tokens_seen": 1054520, "step": 1855 }, { "epoch": 32.63716814159292, "grad_norm": 0.00035647553158923984, "learning_rate": 0.29840401737691963, "loss": 0.0049, "num_input_tokens_seen": 1057768, "step": 1860 }, { "epoch": 32.72566371681416, "grad_norm": 0.05992606282234192, "learning_rate": 0.29839543602353263, "loss": 0.1039, "num_input_tokens_seen": 1060936, "step": 1865 }, { "epoch": 32.8141592920354, "grad_norm": 0.015231838449835777, "learning_rate": 0.2983868317857046, "loss": 0.1211, "num_input_tokens_seen": 1063880, "step": 1870 }, { "epoch": 32.902654867256636, "grad_norm": 0.01562560722231865, "learning_rate": 0.2983782046647623, "loss": 0.0378, "num_input_tokens_seen": 1066616, "step": 1875 }, { "epoch": 32.991150442477874, "grad_norm": 0.008026563562452793, "learning_rate": 0.2983695546620362, "loss": 0.0457, "num_input_tokens_seen": 1069080, "step": 1880 }, { "epoch": 33.07079646017699, "grad_norm": 0.0028766130562871695, "learning_rate": 0.2983608817788603, "loss": 0.0156, "num_input_tokens_seen": 1071712, "step": 1885 }, { "epoch": 33.15929203539823, "grad_norm": 0.03110269270837307, "learning_rate": 0.29835218601657193, "loss": 0.0176, "num_input_tokens_seen": 1074608, "step": 1890 }, { "epoch": 33.24778761061947, "grad_norm": 0.0035526317078620195, "learning_rate": 0.29834346737651224, "loss": 0.0398, "num_input_tokens_seen": 1077168, "step": 1895 }, { "epoch": 33.336283185840706, "grad_norm": 0.09152396023273468, "learning_rate": 0.29833472586002563, "loss": 0.1019, "num_input_tokens_seen": 1079920, "step": 1900 }, { "epoch": 33.424778761061944, "grad_norm": 0.0025378770660609007, "learning_rate": 0.29832596146846024, "loss": 0.0171, "num_input_tokens_seen": 1082816, "step": 1905 }, { "epoch": 33.51327433628319, "grad_norm": 0.0006385207525454462, "learning_rate": 0.2983171742031676, "loss": 0.0375, "num_input_tokens_seen": 1085600, "step": 1910 }, { "epoch": 33.60176991150443, "grad_norm": 0.020058417692780495, "learning_rate": 0.2983083640655028, "loss": 0.0725, "num_input_tokens_seen": 1088464, "step": 1915 }, { "epoch": 33.690265486725664, "grad_norm": 0.03535565361380577, "learning_rate": 0.29829953105682455, "loss": 0.0217, "num_input_tokens_seen": 1091648, "step": 1920 }, { "epoch": 33.7787610619469, "grad_norm": 0.0005175524274818599, "learning_rate": 0.29829067517849495, "loss": 0.0181, "num_input_tokens_seen": 1094080, "step": 1925 }, { "epoch": 33.86725663716814, "grad_norm": 0.002039116807281971, "learning_rate": 0.2982817964318797, "loss": 0.0483, "num_input_tokens_seen": 1097344, "step": 1930 }, { "epoch": 33.95575221238938, "grad_norm": 0.019467122852802277, "learning_rate": 0.298272894818348, "loss": 0.0238, "num_input_tokens_seen": 1100032, "step": 1935 }, { "epoch": 34.0353982300885, "grad_norm": 0.01485920324921608, "learning_rate": 0.2982639703392726, "loss": 0.2296, "num_input_tokens_seen": 1102528, "step": 1940 }, { "epoch": 34.123893805309734, "grad_norm": 0.045919399708509445, "learning_rate": 0.29825502299602974, "loss": 0.0699, "num_input_tokens_seen": 1105648, "step": 1945 }, { "epoch": 34.21238938053097, "grad_norm": 0.053987715393304825, "learning_rate": 0.2982460527899993, "loss": 0.0452, "num_input_tokens_seen": 1108656, "step": 1950 }, { "epoch": 34.30088495575221, "grad_norm": 0.11219354718923569, "learning_rate": 0.29823705972256453, "loss": 0.1482, "num_input_tokens_seen": 1111504, "step": 1955 }, { "epoch": 34.389380530973455, "grad_norm": 0.008548084646463394, "learning_rate": 0.2982280437951123, "loss": 0.044, "num_input_tokens_seen": 1114448, "step": 1960 }, { "epoch": 34.47787610619469, "grad_norm": 0.07235093414783478, "learning_rate": 0.298219005009033, "loss": 0.2286, "num_input_tokens_seen": 1118144, "step": 1965 }, { "epoch": 34.56637168141593, "grad_norm": 0.25690484046936035, "learning_rate": 0.29820994336572043, "loss": 0.5325, "num_input_tokens_seen": 1120592, "step": 1970 }, { "epoch": 34.65486725663717, "grad_norm": 0.11676662415266037, "learning_rate": 0.2982008588665721, "loss": 0.3398, "num_input_tokens_seen": 1123024, "step": 1975 }, { "epoch": 34.743362831858406, "grad_norm": 0.07518859952688217, "learning_rate": 0.2981917515129889, "loss": 0.2636, "num_input_tokens_seen": 1126016, "step": 1980 }, { "epoch": 34.83185840707964, "grad_norm": 0.010378862731158733, "learning_rate": 0.2981826213063753, "loss": 0.0812, "num_input_tokens_seen": 1129056, "step": 1985 }, { "epoch": 34.92035398230089, "grad_norm": 0.052443962544202805, "learning_rate": 0.2981734682481394, "loss": 0.2229, "num_input_tokens_seen": 1131600, "step": 1990 }, { "epoch": 35.0, "grad_norm": 0.04272105544805527, "learning_rate": 0.29816429233969255, "loss": 0.1478, "num_input_tokens_seen": 1134032, "step": 1995 }, { "epoch": 35.08849557522124, "grad_norm": 0.06886915117502213, "learning_rate": 0.2981550935824499, "loss": 0.1897, "num_input_tokens_seen": 1137280, "step": 2000 }, { "epoch": 35.08849557522124, "eval_loss": 0.3682709038257599, "eval_runtime": 0.9332, "eval_samples_per_second": 26.79, "eval_steps_per_second": 13.931, "num_input_tokens_seen": 1137280, "step": 2000 }, { "epoch": 35.176991150442475, "grad_norm": 0.010127265006303787, "learning_rate": 0.29814587197783, "loss": 0.0925, "num_input_tokens_seen": 1139856, "step": 2005 }, { "epoch": 35.26548672566372, "grad_norm": 0.010259071364998817, "learning_rate": 0.29813662752725495, "loss": 0.2344, "num_input_tokens_seen": 1142800, "step": 2010 }, { "epoch": 35.35398230088496, "grad_norm": 0.03356510400772095, "learning_rate": 0.29812736023215025, "loss": 0.1673, "num_input_tokens_seen": 1145824, "step": 2015 }, { "epoch": 35.442477876106196, "grad_norm": 0.0005747402901761234, "learning_rate": 0.29811807009394514, "loss": 0.1207, "num_input_tokens_seen": 1148736, "step": 2020 }, { "epoch": 35.530973451327434, "grad_norm": 0.00039747063419781625, "learning_rate": 0.2981087571140723, "loss": 0.0546, "num_input_tokens_seen": 1151760, "step": 2025 }, { "epoch": 35.61946902654867, "grad_norm": 0.006995708215981722, "learning_rate": 0.2980994212939678, "loss": 0.285, "num_input_tokens_seen": 1154496, "step": 2030 }, { "epoch": 35.70796460176991, "grad_norm": 0.03026936948299408, "learning_rate": 0.2980900626350715, "loss": 0.1518, "num_input_tokens_seen": 1157088, "step": 2035 }, { "epoch": 35.796460176991154, "grad_norm": 0.012430732138454914, "learning_rate": 0.29808068113882646, "loss": 0.1007, "num_input_tokens_seen": 1159824, "step": 2040 }, { "epoch": 35.88495575221239, "grad_norm": 0.02682667039334774, "learning_rate": 0.2980712768066795, "loss": 0.1987, "num_input_tokens_seen": 1162432, "step": 2045 }, { "epoch": 35.97345132743363, "grad_norm": 0.005583827383816242, "learning_rate": 0.2980618496400809, "loss": 0.1475, "num_input_tokens_seen": 1165680, "step": 2050 }, { "epoch": 36.05309734513274, "grad_norm": 0.006933813448995352, "learning_rate": 0.2980523996404844, "loss": 0.0823, "num_input_tokens_seen": 1168136, "step": 2055 }, { "epoch": 36.14159292035398, "grad_norm": 0.012457326054573059, "learning_rate": 0.2980429268093473, "loss": 0.0938, "num_input_tokens_seen": 1171576, "step": 2060 }, { "epoch": 36.230088495575224, "grad_norm": 0.006737505551427603, "learning_rate": 0.29803343114813047, "loss": 0.0864, "num_input_tokens_seen": 1174360, "step": 2065 }, { "epoch": 36.31858407079646, "grad_norm": 0.006521584931761026, "learning_rate": 0.2980239126582983, "loss": 0.0472, "num_input_tokens_seen": 1177240, "step": 2070 }, { "epoch": 36.4070796460177, "grad_norm": 0.002053737174719572, "learning_rate": 0.2980143713413186, "loss": 0.0901, "num_input_tokens_seen": 1179896, "step": 2075 }, { "epoch": 36.49557522123894, "grad_norm": 0.0017744585638865829, "learning_rate": 0.29800480719866274, "loss": 0.0332, "num_input_tokens_seen": 1182664, "step": 2080 }, { "epoch": 36.584070796460175, "grad_norm": 0.021712226793169975, "learning_rate": 0.2979952202318057, "loss": 0.0669, "num_input_tokens_seen": 1185304, "step": 2085 }, { "epoch": 36.67256637168141, "grad_norm": 0.038005631417036057, "learning_rate": 0.2979856104422259, "loss": 0.1202, "num_input_tokens_seen": 1188536, "step": 2090 }, { "epoch": 36.76106194690266, "grad_norm": 0.008423271588981152, "learning_rate": 0.2979759778314052, "loss": 0.0335, "num_input_tokens_seen": 1191304, "step": 2095 }, { "epoch": 36.849557522123895, "grad_norm": 0.002938485937193036, "learning_rate": 0.2979663224008292, "loss": 0.0599, "num_input_tokens_seen": 1193864, "step": 2100 }, { "epoch": 36.93805309734513, "grad_norm": 0.01930067129433155, "learning_rate": 0.2979566441519868, "loss": 0.0655, "num_input_tokens_seen": 1197096, "step": 2105 }, { "epoch": 37.017699115044245, "grad_norm": 0.0013358413707464933, "learning_rate": 0.29794694308637054, "loss": 0.0734, "num_input_tokens_seen": 1199704, "step": 2110 }, { "epoch": 37.10619469026549, "grad_norm": 0.003562435507774353, "learning_rate": 0.2979372192054764, "loss": 0.0579, "num_input_tokens_seen": 1203080, "step": 2115 }, { "epoch": 37.19469026548673, "grad_norm": 0.03605637699365616, "learning_rate": 0.297927472510804, "loss": 0.0883, "num_input_tokens_seen": 1205912, "step": 2120 }, { "epoch": 37.283185840707965, "grad_norm": 0.040509190410375595, "learning_rate": 0.29791770300385634, "loss": 0.1017, "num_input_tokens_seen": 1208792, "step": 2125 }, { "epoch": 37.3716814159292, "grad_norm": 0.00545346038416028, "learning_rate": 0.29790791068614003, "loss": 0.0397, "num_input_tokens_seen": 1211752, "step": 2130 }, { "epoch": 37.46017699115044, "grad_norm": 0.01528411265462637, "learning_rate": 0.2978980955591652, "loss": 0.017, "num_input_tokens_seen": 1214312, "step": 2135 }, { "epoch": 37.54867256637168, "grad_norm": 0.034804169088602066, "learning_rate": 0.2978882576244454, "loss": 0.0881, "num_input_tokens_seen": 1216968, "step": 2140 }, { "epoch": 37.63716814159292, "grad_norm": 0.0058987257070839405, "learning_rate": 0.2978783968834978, "loss": 0.0278, "num_input_tokens_seen": 1220744, "step": 2145 }, { "epoch": 37.72566371681416, "grad_norm": 0.002121626166626811, "learning_rate": 0.29786851333784303, "loss": 0.0367, "num_input_tokens_seen": 1223352, "step": 2150 }, { "epoch": 37.8141592920354, "grad_norm": 0.04717940837144852, "learning_rate": 0.2978586069890053, "loss": 0.0835, "num_input_tokens_seen": 1226424, "step": 2155 }, { "epoch": 37.902654867256636, "grad_norm": 0.009007456712424755, "learning_rate": 0.29784867783851227, "loss": 0.0282, "num_input_tokens_seen": 1228920, "step": 2160 }, { "epoch": 37.991150442477874, "grad_norm": 0.020844202488660812, "learning_rate": 0.2978387258878951, "loss": 0.1106, "num_input_tokens_seen": 1231672, "step": 2165 }, { "epoch": 38.07079646017699, "grad_norm": 0.002085199113935232, "learning_rate": 0.29782875113868856, "loss": 0.0292, "num_input_tokens_seen": 1234104, "step": 2170 }, { "epoch": 38.15929203539823, "grad_norm": 0.0055747199803590775, "learning_rate": 0.2978187535924309, "loss": 0.1164, "num_input_tokens_seen": 1237080, "step": 2175 }, { "epoch": 38.24778761061947, "grad_norm": 0.006691089831292629, "learning_rate": 0.29780873325066376, "loss": 0.03, "num_input_tokens_seen": 1240248, "step": 2180 }, { "epoch": 38.336283185840706, "grad_norm": 0.007961523719131947, "learning_rate": 0.2977986901149325, "loss": 0.128, "num_input_tokens_seen": 1242936, "step": 2185 }, { "epoch": 38.424778761061944, "grad_norm": 0.01194468792527914, "learning_rate": 0.29778862418678587, "loss": 0.0533, "num_input_tokens_seen": 1245752, "step": 2190 }, { "epoch": 38.51327433628319, "grad_norm": 0.0014250905951485038, "learning_rate": 0.29777853546777616, "loss": 0.0486, "num_input_tokens_seen": 1248152, "step": 2195 }, { "epoch": 38.60176991150443, "grad_norm": 0.010688142850995064, "learning_rate": 0.2977684239594592, "loss": 0.0221, "num_input_tokens_seen": 1251592, "step": 2200 }, { "epoch": 38.60176991150443, "eval_loss": 0.2030499279499054, "eval_runtime": 0.9316, "eval_samples_per_second": 26.836, "eval_steps_per_second": 13.955, "num_input_tokens_seen": 1251592, "step": 2200 }, { "epoch": 38.690265486725664, "grad_norm": 0.03169664740562439, "learning_rate": 0.29775828966339424, "loss": 0.0425, "num_input_tokens_seen": 1254312, "step": 2205 }, { "epoch": 38.7787610619469, "grad_norm": 0.02540615387260914, "learning_rate": 0.29774813258114424, "loss": 0.0525, "num_input_tokens_seen": 1257096, "step": 2210 }, { "epoch": 38.86725663716814, "grad_norm": 0.0032327938824892044, "learning_rate": 0.29773795271427544, "loss": 0.0378, "num_input_tokens_seen": 1260184, "step": 2215 }, { "epoch": 38.95575221238938, "grad_norm": 0.00040652762982062995, "learning_rate": 0.2977277500643577, "loss": 0.0125, "num_input_tokens_seen": 1262888, "step": 2220 }, { "epoch": 39.0353982300885, "grad_norm": 0.00010131843009730801, "learning_rate": 0.29771752463296447, "loss": 0.0529, "num_input_tokens_seen": 1265464, "step": 2225 }, { "epoch": 39.123893805309734, "grad_norm": 0.02381172403693199, "learning_rate": 0.29770727642167266, "loss": 0.0156, "num_input_tokens_seen": 1268088, "step": 2230 }, { "epoch": 39.21238938053097, "grad_norm": 0.010098197497427464, "learning_rate": 0.29769700543206257, "loss": 0.0051, "num_input_tokens_seen": 1271352, "step": 2235 }, { "epoch": 39.30088495575221, "grad_norm": 0.0014061558758839965, "learning_rate": 0.2976867116657182, "loss": 0.0086, "num_input_tokens_seen": 1273736, "step": 2240 }, { "epoch": 39.389380530973455, "grad_norm": 0.00041998736560344696, "learning_rate": 0.2976763951242269, "loss": 0.0293, "num_input_tokens_seen": 1276600, "step": 2245 }, { "epoch": 39.47787610619469, "grad_norm": 0.0028859747108072042, "learning_rate": 0.29766605580917965, "loss": 0.002, "num_input_tokens_seen": 1279800, "step": 2250 }, { "epoch": 39.56637168141593, "grad_norm": 0.005030928645282984, "learning_rate": 0.29765569372217093, "loss": 0.0078, "num_input_tokens_seen": 1283144, "step": 2255 }, { "epoch": 39.65486725663717, "grad_norm": 0.02045232057571411, "learning_rate": 0.2976453088647987, "loss": 0.0579, "num_input_tokens_seen": 1285816, "step": 2260 }, { "epoch": 39.743362831858406, "grad_norm": 0.00452482420951128, "learning_rate": 0.2976349012386644, "loss": 0.0073, "num_input_tokens_seen": 1288856, "step": 2265 }, { "epoch": 39.83185840707964, "grad_norm": 0.040368787944316864, "learning_rate": 0.29762447084537297, "loss": 0.0859, "num_input_tokens_seen": 1291544, "step": 2270 }, { "epoch": 39.92035398230089, "grad_norm": 0.042311638593673706, "learning_rate": 0.29761401768653306, "loss": 0.0589, "num_input_tokens_seen": 1294296, "step": 2275 }, { "epoch": 40.0, "grad_norm": 0.0013129874132573605, "learning_rate": 0.29760354176375653, "loss": 0.0073, "num_input_tokens_seen": 1296696, "step": 2280 }, { "epoch": 40.08849557522124, "grad_norm": 0.04044976457953453, "learning_rate": 0.29759304307865897, "loss": 0.0196, "num_input_tokens_seen": 1299640, "step": 2285 }, { "epoch": 40.176991150442475, "grad_norm": 0.0229876097291708, "learning_rate": 0.2975825216328594, "loss": 0.0287, "num_input_tokens_seen": 1302520, "step": 2290 }, { "epoch": 40.26548672566372, "grad_norm": 0.1036461815237999, "learning_rate": 0.2975719774279804, "loss": 0.0405, "num_input_tokens_seen": 1305784, "step": 2295 }, { "epoch": 40.35398230088496, "grad_norm": 0.0316178984940052, "learning_rate": 0.29756141046564794, "loss": 0.018, "num_input_tokens_seen": 1309128, "step": 2300 }, { "epoch": 40.442477876106196, "grad_norm": 0.0034250549506396055, "learning_rate": 0.2975508207474916, "loss": 0.0103, "num_input_tokens_seen": 1311592, "step": 2305 }, { "epoch": 40.530973451327434, "grad_norm": 0.00023040662927087396, "learning_rate": 0.2975402082751445, "loss": 0.0054, "num_input_tokens_seen": 1314104, "step": 2310 }, { "epoch": 40.61946902654867, "grad_norm": 0.0020521068945527077, "learning_rate": 0.29752957305024313, "loss": 0.0113, "num_input_tokens_seen": 1316776, "step": 2315 }, { "epoch": 40.70796460176991, "grad_norm": 0.0021144552156329155, "learning_rate": 0.2975189150744277, "loss": 0.03, "num_input_tokens_seen": 1319400, "step": 2320 }, { "epoch": 40.796460176991154, "grad_norm": 0.0022206571884453297, "learning_rate": 0.29750823434934165, "loss": 0.0068, "num_input_tokens_seen": 1322408, "step": 2325 }, { "epoch": 40.88495575221239, "grad_norm": 0.002589950803667307, "learning_rate": 0.29749753087663217, "loss": 0.0053, "num_input_tokens_seen": 1325256, "step": 2330 }, { "epoch": 40.97345132743363, "grad_norm": 0.03549109399318695, "learning_rate": 0.29748680465794985, "loss": 0.0352, "num_input_tokens_seen": 1327976, "step": 2335 }, { "epoch": 41.05309734513274, "grad_norm": 0.005971208680421114, "learning_rate": 0.29747605569494884, "loss": 0.0062, "num_input_tokens_seen": 1330648, "step": 2340 }, { "epoch": 41.14159292035398, "grad_norm": 0.00230122241191566, "learning_rate": 0.29746528398928673, "loss": 0.0111, "num_input_tokens_seen": 1334088, "step": 2345 }, { "epoch": 41.230088495575224, "grad_norm": 0.0012742439284920692, "learning_rate": 0.2974544895426247, "loss": 0.0008, "num_input_tokens_seen": 1337000, "step": 2350 }, { "epoch": 41.31858407079646, "grad_norm": 0.0003652924788184464, "learning_rate": 0.29744367235662733, "loss": 0.0008, "num_input_tokens_seen": 1339976, "step": 2355 }, { "epoch": 41.4070796460177, "grad_norm": 0.00021368266607169062, "learning_rate": 0.29743283243296276, "loss": 0.0046, "num_input_tokens_seen": 1342792, "step": 2360 }, { "epoch": 41.49557522123894, "grad_norm": 0.0009084056364372373, "learning_rate": 0.29742196977330276, "loss": 0.0011, "num_input_tokens_seen": 1345480, "step": 2365 }, { "epoch": 41.584070796460175, "grad_norm": 0.0026350938715040684, "learning_rate": 0.2974110843793223, "loss": 0.006, "num_input_tokens_seen": 1348472, "step": 2370 }, { "epoch": 41.67256637168141, "grad_norm": 0.0006701347301714122, "learning_rate": 0.2974001762527002, "loss": 0.0008, "num_input_tokens_seen": 1351432, "step": 2375 }, { "epoch": 41.76106194690266, "grad_norm": 0.0020258172880858183, "learning_rate": 0.2973892453951186, "loss": 0.0083, "num_input_tokens_seen": 1354168, "step": 2380 }, { "epoch": 41.849557522123895, "grad_norm": 0.050072696059942245, "learning_rate": 0.2973782918082631, "loss": 0.0236, "num_input_tokens_seen": 1356952, "step": 2385 }, { "epoch": 41.93805309734513, "grad_norm": 0.00030286749824881554, "learning_rate": 0.29736731549382295, "loss": 0.0004, "num_input_tokens_seen": 1359304, "step": 2390 }, { "epoch": 42.017699115044245, "grad_norm": 0.00046257005305960774, "learning_rate": 0.2973563164534908, "loss": 0.0004, "num_input_tokens_seen": 1361752, "step": 2395 }, { "epoch": 42.10619469026549, "grad_norm": 0.00022960211208555847, "learning_rate": 0.29734529468896287, "loss": 0.0254, "num_input_tokens_seen": 1364312, "step": 2400 }, { "epoch": 42.10619469026549, "eval_loss": 0.32499754428863525, "eval_runtime": 1.3048, "eval_samples_per_second": 19.16, "eval_steps_per_second": 9.963, "num_input_tokens_seen": 1364312, "step": 2400 }, { "epoch": 42.19469026548673, "grad_norm": 0.000963477767072618, "learning_rate": 0.2973342502019388, "loss": 0.0009, "num_input_tokens_seen": 1367144, "step": 2405 }, { "epoch": 42.283185840707965, "grad_norm": 0.0012080012820661068, "learning_rate": 0.2973231829941219, "loss": 0.0006, "num_input_tokens_seen": 1369864, "step": 2410 }, { "epoch": 42.3716814159292, "grad_norm": 0.0013240133412182331, "learning_rate": 0.2973120930672188, "loss": 0.0027, "num_input_tokens_seen": 1373304, "step": 2415 }, { "epoch": 42.46017699115044, "grad_norm": 0.0076743527315557, "learning_rate": 0.2973009804229397, "loss": 0.0037, "num_input_tokens_seen": 1376168, "step": 2420 }, { "epoch": 42.54867256637168, "grad_norm": 0.0006759791867807508, "learning_rate": 0.29728984506299827, "loss": 0.0044, "num_input_tokens_seen": 1379080, "step": 2425 }, { "epoch": 42.63716814159292, "grad_norm": 0.005893242545425892, "learning_rate": 0.2972786869891118, "loss": 0.0034, "num_input_tokens_seen": 1382088, "step": 2430 }, { "epoch": 42.72566371681416, "grad_norm": 0.001658368855714798, "learning_rate": 0.29726750620300096, "loss": 0.0007, "num_input_tokens_seen": 1384600, "step": 2435 }, { "epoch": 42.8141592920354, "grad_norm": 0.009218432009220123, "learning_rate": 0.29725630270639003, "loss": 0.003, "num_input_tokens_seen": 1387128, "step": 2440 }, { "epoch": 42.902654867256636, "grad_norm": 0.024031465873122215, "learning_rate": 0.2972450765010067, "loss": 0.0053, "num_input_tokens_seen": 1389896, "step": 2445 }, { "epoch": 42.991150442477874, "grad_norm": 0.00016472725837957114, "learning_rate": 0.29723382758858213, "loss": 0.0039, "num_input_tokens_seen": 1393192, "step": 2450 }, { "epoch": 43.07079646017699, "grad_norm": 0.0019065671367570758, "learning_rate": 0.29722255597085107, "loss": 0.0033, "num_input_tokens_seen": 1395600, "step": 2455 }, { "epoch": 43.15929203539823, "grad_norm": 0.0070867761969566345, "learning_rate": 0.2972112616495518, "loss": 0.0059, "num_input_tokens_seen": 1398944, "step": 2460 }, { "epoch": 43.24778761061947, "grad_norm": 0.0006388761103153229, "learning_rate": 0.297199944626426, "loss": 0.0009, "num_input_tokens_seen": 1402160, "step": 2465 }, { "epoch": 43.336283185840706, "grad_norm": 1.3827519978804048e-05, "learning_rate": 0.2971886049032189, "loss": 0.0003, "num_input_tokens_seen": 1404784, "step": 2470 }, { "epoch": 43.424778761061944, "grad_norm": 7.785346679156646e-05, "learning_rate": 0.29717724248167926, "loss": 0.0001, "num_input_tokens_seen": 1407344, "step": 2475 }, { "epoch": 43.51327433628319, "grad_norm": 5.201085878070444e-05, "learning_rate": 0.29716585736355927, "loss": 0.0005, "num_input_tokens_seen": 1410656, "step": 2480 }, { "epoch": 43.60176991150443, "grad_norm": 0.007106730714440346, "learning_rate": 0.2971544495506147, "loss": 0.0017, "num_input_tokens_seen": 1413344, "step": 2485 }, { "epoch": 43.690265486725664, "grad_norm": 4.0393471863353625e-05, "learning_rate": 0.2971430190446048, "loss": 0.0007, "num_input_tokens_seen": 1415904, "step": 2490 }, { "epoch": 43.7787610619469, "grad_norm": 5.077809328213334e-05, "learning_rate": 0.2971315658472921, "loss": 0.0002, "num_input_tokens_seen": 1418384, "step": 2495 }, { "epoch": 43.86725663716814, "grad_norm": 0.0006421466241590679, "learning_rate": 0.2971200899604431, "loss": 0.0005, "num_input_tokens_seen": 1421328, "step": 2500 }, { "epoch": 43.95575221238938, "grad_norm": 0.0005047202575951815, "learning_rate": 0.29710859138582735, "loss": 0.0003, "num_input_tokens_seen": 1424304, "step": 2505 }, { "epoch": 44.0353982300885, "grad_norm": 0.00023062039690557867, "learning_rate": 0.29709707012521813, "loss": 0.0009, "num_input_tokens_seen": 1426608, "step": 2510 }, { "epoch": 44.123893805309734, "grad_norm": 3.449825817369856e-05, "learning_rate": 0.29708552618039213, "loss": 0.0002, "num_input_tokens_seen": 1429392, "step": 2515 }, { "epoch": 44.21238938053097, "grad_norm": 0.00013140607916284353, "learning_rate": 0.2970739595531296, "loss": 0.0002, "num_input_tokens_seen": 1432464, "step": 2520 }, { "epoch": 44.30088495575221, "grad_norm": 0.00016725629393476993, "learning_rate": 0.2970623702452143, "loss": 0.0002, "num_input_tokens_seen": 1435200, "step": 2525 }, { "epoch": 44.389380530973455, "grad_norm": 0.00014046099386177957, "learning_rate": 0.2970507582584334, "loss": 0.0001, "num_input_tokens_seen": 1438448, "step": 2530 }, { "epoch": 44.47787610619469, "grad_norm": 0.00039028681931085885, "learning_rate": 0.2970391235945776, "loss": 0.0002, "num_input_tokens_seen": 1440976, "step": 2535 }, { "epoch": 44.56637168141593, "grad_norm": 0.0005338029586710036, "learning_rate": 0.2970274662554412, "loss": 0.0002, "num_input_tokens_seen": 1444160, "step": 2540 }, { "epoch": 44.65486725663717, "grad_norm": 3.023855788342189e-05, "learning_rate": 0.2970157862428218, "loss": 0.0004, "num_input_tokens_seen": 1446944, "step": 2545 }, { "epoch": 44.743362831858406, "grad_norm": 0.00011754921433748677, "learning_rate": 0.2970040835585206, "loss": 0.0002, "num_input_tokens_seen": 1450336, "step": 2550 }, { "epoch": 44.83185840707964, "grad_norm": 0.0002539013512432575, "learning_rate": 0.2969923582043424, "loss": 0.0002, "num_input_tokens_seen": 1453328, "step": 2555 }, { "epoch": 44.92035398230089, "grad_norm": 2.270279401273001e-05, "learning_rate": 0.2969806101820953, "loss": 0.0001, "num_input_tokens_seen": 1456208, "step": 2560 }, { "epoch": 45.0, "grad_norm": 1.9186840290785767e-05, "learning_rate": 0.2969688394935911, "loss": 0.0003, "num_input_tokens_seen": 1458448, "step": 2565 }, { "epoch": 45.08849557522124, "grad_norm": 2.004735688387882e-05, "learning_rate": 0.2969570461406449, "loss": 0.0001, "num_input_tokens_seen": 1461616, "step": 2570 }, { "epoch": 45.176991150442475, "grad_norm": 0.00011274288408458233, "learning_rate": 0.29694523012507534, "loss": 0.0001, "num_input_tokens_seen": 1464352, "step": 2575 }, { "epoch": 45.26548672566372, "grad_norm": 0.00018203615036327392, "learning_rate": 0.2969333914487048, "loss": 0.0002, "num_input_tokens_seen": 1467232, "step": 2580 }, { "epoch": 45.35398230088496, "grad_norm": 0.00010410078539280221, "learning_rate": 0.2969215301133587, "loss": 0.0002, "num_input_tokens_seen": 1470144, "step": 2585 }, { "epoch": 45.442477876106196, "grad_norm": 3.477710561128333e-05, "learning_rate": 0.29690964612086634, "loss": 0.0001, "num_input_tokens_seen": 1473120, "step": 2590 }, { "epoch": 45.530973451327434, "grad_norm": 0.0003340657567605376, "learning_rate": 0.2968977394730604, "loss": 0.0001, "num_input_tokens_seen": 1475856, "step": 2595 }, { "epoch": 45.61946902654867, "grad_norm": 2.1355777789722197e-05, "learning_rate": 0.296885810171777, "loss": 0.0001, "num_input_tokens_seen": 1478704, "step": 2600 }, { "epoch": 45.61946902654867, "eval_loss": 0.3398699462413788, "eval_runtime": 0.9123, "eval_samples_per_second": 27.404, "eval_steps_per_second": 14.25, "num_input_tokens_seen": 1478704, "step": 2600 }, { "epoch": 45.70796460176991, "grad_norm": 3.7565023376373574e-06, "learning_rate": 0.2968738582188558, "loss": 0.0001, "num_input_tokens_seen": 1481120, "step": 2605 }, { "epoch": 45.796460176991154, "grad_norm": 8.239444287028164e-05, "learning_rate": 0.2968618836161399, "loss": 0.0001, "num_input_tokens_seen": 1484320, "step": 2610 }, { "epoch": 45.88495575221239, "grad_norm": 4.265425377525389e-05, "learning_rate": 0.296849886365476, "loss": 0.0001, "num_input_tokens_seen": 1487568, "step": 2615 }, { "epoch": 45.97345132743363, "grad_norm": 0.00030638196039944887, "learning_rate": 0.2968378664687142, "loss": 0.0002, "num_input_tokens_seen": 1490304, "step": 2620 }, { "epoch": 46.05309734513274, "grad_norm": 5.462783155962825e-05, "learning_rate": 0.296825823927708, "loss": 0.0001, "num_input_tokens_seen": 1492544, "step": 2625 }, { "epoch": 46.14159292035398, "grad_norm": 7.614289643242955e-05, "learning_rate": 0.29681375874431476, "loss": 0.0, "num_input_tokens_seen": 1495632, "step": 2630 }, { "epoch": 46.230088495575224, "grad_norm": 0.0001503842358943075, "learning_rate": 0.29680167092039483, "loss": 0.0002, "num_input_tokens_seen": 1498528, "step": 2635 }, { "epoch": 46.31858407079646, "grad_norm": 0.0001590990723343566, "learning_rate": 0.2967895604578125, "loss": 0.0001, "num_input_tokens_seen": 1501648, "step": 2640 }, { "epoch": 46.4070796460177, "grad_norm": 0.00012324508861638606, "learning_rate": 0.2967774273584352, "loss": 0.0001, "num_input_tokens_seen": 1504352, "step": 2645 }, { "epoch": 46.49557522123894, "grad_norm": 0.0002609076036605984, "learning_rate": 0.2967652716241342, "loss": 0.0001, "num_input_tokens_seen": 1507360, "step": 2650 }, { "epoch": 46.584070796460175, "grad_norm": 0.00014193008246365935, "learning_rate": 0.29675309325678384, "loss": 0.0001, "num_input_tokens_seen": 1510032, "step": 2655 }, { "epoch": 46.67256637168141, "grad_norm": 0.00013018070603720844, "learning_rate": 0.29674089225826233, "loss": 0.0001, "num_input_tokens_seen": 1513136, "step": 2660 }, { "epoch": 46.76106194690266, "grad_norm": 1.0219133400823921e-05, "learning_rate": 0.29672866863045116, "loss": 0.0001, "num_input_tokens_seen": 1515520, "step": 2665 }, { "epoch": 46.849557522123895, "grad_norm": 0.0002818087232299149, "learning_rate": 0.2967164223752354, "loss": 0.0002, "num_input_tokens_seen": 1518544, "step": 2670 }, { "epoch": 46.93805309734513, "grad_norm": 1.9716044334927574e-05, "learning_rate": 0.2967041534945035, "loss": 0.0001, "num_input_tokens_seen": 1521280, "step": 2675 }, { "epoch": 47.017699115044245, "grad_norm": 0.00020203158783260733, "learning_rate": 0.2966918619901476, "loss": 0.0001, "num_input_tokens_seen": 1523616, "step": 2680 }, { "epoch": 47.10619469026549, "grad_norm": 5.987444819766097e-05, "learning_rate": 0.2966795478640631, "loss": 0.0001, "num_input_tokens_seen": 1526816, "step": 2685 }, { "epoch": 47.19469026548673, "grad_norm": 4.1342242184327915e-05, "learning_rate": 0.29666721111814903, "loss": 0.0001, "num_input_tokens_seen": 1529776, "step": 2690 }, { "epoch": 47.283185840707965, "grad_norm": 0.00016613007755950093, "learning_rate": 0.2966548517543079, "loss": 0.0001, "num_input_tokens_seen": 1532624, "step": 2695 }, { "epoch": 47.3716814159292, "grad_norm": 1.1030122550437227e-05, "learning_rate": 0.29664246977444564, "loss": 0.0001, "num_input_tokens_seen": 1535440, "step": 2700 }, { "epoch": 47.46017699115044, "grad_norm": 1.6608530131634325e-05, "learning_rate": 0.2966300651804717, "loss": 0.0001, "num_input_tokens_seen": 1538336, "step": 2705 }, { "epoch": 47.54867256637168, "grad_norm": 8.151827933033928e-05, "learning_rate": 0.296617637974299, "loss": 0.0001, "num_input_tokens_seen": 1541024, "step": 2710 }, { "epoch": 47.63716814159292, "grad_norm": 8.813589374767616e-05, "learning_rate": 0.2966051881578441, "loss": 0.0001, "num_input_tokens_seen": 1544160, "step": 2715 }, { "epoch": 47.72566371681416, "grad_norm": 0.00020420108921825886, "learning_rate": 0.29659271573302676, "loss": 0.0001, "num_input_tokens_seen": 1546880, "step": 2720 }, { "epoch": 47.8141592920354, "grad_norm": 7.070146239129826e-05, "learning_rate": 0.2965802207017705, "loss": 0.0001, "num_input_tokens_seen": 1549216, "step": 2725 }, { "epoch": 47.902654867256636, "grad_norm": 8.066048030741513e-05, "learning_rate": 0.2965677030660021, "loss": 0.0001, "num_input_tokens_seen": 1551648, "step": 2730 }, { "epoch": 47.991150442477874, "grad_norm": 1.5294517652364448e-05, "learning_rate": 0.2965551628276521, "loss": 0.0001, "num_input_tokens_seen": 1554752, "step": 2735 }, { "epoch": 48.07079646017699, "grad_norm": 5.138834694662364e-06, "learning_rate": 0.29654259998865423, "loss": 0.0, "num_input_tokens_seen": 1557288, "step": 2740 }, { "epoch": 48.15929203539823, "grad_norm": 0.0001015152593026869, "learning_rate": 0.2965300145509458, "loss": 0.0001, "num_input_tokens_seen": 1560408, "step": 2745 }, { "epoch": 48.24778761061947, "grad_norm": 2.979358941956889e-05, "learning_rate": 0.2965174065164678, "loss": 0.0001, "num_input_tokens_seen": 1563160, "step": 2750 }, { "epoch": 48.336283185840706, "grad_norm": 0.00016515686002094299, "learning_rate": 0.2965047758871644, "loss": 0.0001, "num_input_tokens_seen": 1565896, "step": 2755 }, { "epoch": 48.424778761061944, "grad_norm": 2.683965431060642e-05, "learning_rate": 0.2964921226649835, "loss": 0.0001, "num_input_tokens_seen": 1568776, "step": 2760 }, { "epoch": 48.51327433628319, "grad_norm": 3.990148616139777e-05, "learning_rate": 0.2964794468518763, "loss": 0.0001, "num_input_tokens_seen": 1571736, "step": 2765 }, { "epoch": 48.60176991150443, "grad_norm": 2.5928427930921316e-05, "learning_rate": 0.2964667484497977, "loss": 0.0001, "num_input_tokens_seen": 1575016, "step": 2770 }, { "epoch": 48.690265486725664, "grad_norm": 7.753532554488629e-05, "learning_rate": 0.29645402746070587, "loss": 0.0001, "num_input_tokens_seen": 1577448, "step": 2775 }, { "epoch": 48.7787610619469, "grad_norm": 0.00011889307643286884, "learning_rate": 0.2964412838865625, "loss": 0.0001, "num_input_tokens_seen": 1580056, "step": 2780 }, { "epoch": 48.86725663716814, "grad_norm": 4.798822192242369e-05, "learning_rate": 0.29642851772933293, "loss": 0.0001, "num_input_tokens_seen": 1583096, "step": 2785 }, { "epoch": 48.95575221238938, "grad_norm": 3.1255076464731246e-05, "learning_rate": 0.29641572899098567, "loss": 0.0001, "num_input_tokens_seen": 1585864, "step": 2790 }, { "epoch": 49.0353982300885, "grad_norm": 3.93828668165952e-05, "learning_rate": 0.29640291767349314, "loss": 0.0001, "num_input_tokens_seen": 1588512, "step": 2795 }, { "epoch": 49.123893805309734, "grad_norm": 9.597477037459612e-05, "learning_rate": 0.2963900837788308, "loss": 0.0001, "num_input_tokens_seen": 1591424, "step": 2800 }, { "epoch": 49.123893805309734, "eval_loss": 0.34531939029693604, "eval_runtime": 0.927, "eval_samples_per_second": 26.968, "eval_steps_per_second": 14.023, "num_input_tokens_seen": 1591424, "step": 2800 }, { "epoch": 49.21238938053097, "grad_norm": 1.5117789189389441e-05, "learning_rate": 0.2963772273089779, "loss": 0.0001, "num_input_tokens_seen": 1593840, "step": 2805 }, { "epoch": 49.30088495575221, "grad_norm": 5.635102934320457e-05, "learning_rate": 0.2963643482659171, "loss": 0.0001, "num_input_tokens_seen": 1597232, "step": 2810 }, { "epoch": 49.389380530973455, "grad_norm": 0.0001008128238026984, "learning_rate": 0.2963514466516345, "loss": 0.0001, "num_input_tokens_seen": 1600144, "step": 2815 }, { "epoch": 49.47787610619469, "grad_norm": 5.571169458562508e-05, "learning_rate": 0.2963385224681196, "loss": 0.0, "num_input_tokens_seen": 1602800, "step": 2820 }, { "epoch": 49.56637168141593, "grad_norm": 1.1481000001367647e-05, "learning_rate": 0.29632557571736556, "loss": 0.0001, "num_input_tokens_seen": 1605872, "step": 2825 }, { "epoch": 49.65486725663717, "grad_norm": 5.6025917729130015e-05, "learning_rate": 0.2963126064013689, "loss": 0.0, "num_input_tokens_seen": 1608768, "step": 2830 }, { "epoch": 49.743362831858406, "grad_norm": 9.012265945784748e-05, "learning_rate": 0.29629961452212966, "loss": 0.0001, "num_input_tokens_seen": 1611488, "step": 2835 }, { "epoch": 49.83185840707964, "grad_norm": 6.681102968286723e-05, "learning_rate": 0.2962866000816513, "loss": 0.0001, "num_input_tokens_seen": 1614448, "step": 2840 }, { "epoch": 49.92035398230089, "grad_norm": 4.147098297835328e-05, "learning_rate": 0.2962735630819409, "loss": 0.0, "num_input_tokens_seen": 1616784, "step": 2845 }, { "epoch": 50.0, "grad_norm": 2.157991502826917e-06, "learning_rate": 0.2962605035250089, "loss": 0.0001, "num_input_tokens_seen": 1619360, "step": 2850 }, { "epoch": 50.08849557522124, "grad_norm": 3.0686205718666315e-05, "learning_rate": 0.29624742141286914, "loss": 0.0001, "num_input_tokens_seen": 1622448, "step": 2855 }, { "epoch": 50.176991150442475, "grad_norm": 0.0002142949670087546, "learning_rate": 0.29623431674753925, "loss": 0.0001, "num_input_tokens_seen": 1625536, "step": 2860 }, { "epoch": 50.26548672566372, "grad_norm": 3.992580332123907e-06, "learning_rate": 0.29622118953103993, "loss": 0.0001, "num_input_tokens_seen": 1628176, "step": 2865 }, { "epoch": 50.35398230088496, "grad_norm": 1.9366199921933003e-05, "learning_rate": 0.2962080397653957, "loss": 0.0001, "num_input_tokens_seen": 1630640, "step": 2870 }, { "epoch": 50.442477876106196, "grad_norm": 1.979381158889737e-05, "learning_rate": 0.29619486745263435, "loss": 0.0, "num_input_tokens_seen": 1633072, "step": 2875 }, { "epoch": 50.530973451327434, "grad_norm": 2.276863051520195e-05, "learning_rate": 0.2961816725947873, "loss": 0.0, "num_input_tokens_seen": 1635664, "step": 2880 }, { "epoch": 50.61946902654867, "grad_norm": 2.0678015062003396e-05, "learning_rate": 0.29616845519388924, "loss": 0.0001, "num_input_tokens_seen": 1638816, "step": 2885 }, { "epoch": 50.70796460176991, "grad_norm": 8.507293387083337e-05, "learning_rate": 0.2961552152519785, "loss": 0.0001, "num_input_tokens_seen": 1641968, "step": 2890 }, { "epoch": 50.796460176991154, "grad_norm": 9.362563105241861e-06, "learning_rate": 0.29614195277109695, "loss": 0.0, "num_input_tokens_seen": 1644960, "step": 2895 }, { "epoch": 50.88495575221239, "grad_norm": 0.00014587417535949498, "learning_rate": 0.2961286677532897, "loss": 0.0001, "num_input_tokens_seen": 1648272, "step": 2900 }, { "epoch": 50.97345132743363, "grad_norm": 1.0639868378348183e-05, "learning_rate": 0.2961153602006055, "loss": 0.0, "num_input_tokens_seen": 1651152, "step": 2905 }, { "epoch": 51.05309734513274, "grad_norm": 8.087149035418406e-05, "learning_rate": 0.29610203011509656, "loss": 0.0001, "num_input_tokens_seen": 1653504, "step": 2910 }, { "epoch": 51.14159292035398, "grad_norm": 2.990651955769863e-05, "learning_rate": 0.29608867749881856, "loss": 0.0001, "num_input_tokens_seen": 1656624, "step": 2915 }, { "epoch": 51.230088495575224, "grad_norm": 5.3029732953291386e-05, "learning_rate": 0.29607530235383067, "loss": 0.0, "num_input_tokens_seen": 1659424, "step": 2920 }, { "epoch": 51.31858407079646, "grad_norm": 5.0947433919645846e-05, "learning_rate": 0.2960619046821954, "loss": 0.0001, "num_input_tokens_seen": 1662528, "step": 2925 }, { "epoch": 51.4070796460177, "grad_norm": 7.634783105459064e-05, "learning_rate": 0.2960484844859789, "loss": 0.0, "num_input_tokens_seen": 1665632, "step": 2930 }, { "epoch": 51.49557522123894, "grad_norm": 0.00011019138764822856, "learning_rate": 0.29603504176725076, "loss": 0.0001, "num_input_tokens_seen": 1668544, "step": 2935 }, { "epoch": 51.584070796460175, "grad_norm": 5.928683094680309e-05, "learning_rate": 0.296021576528084, "loss": 0.0001, "num_input_tokens_seen": 1671184, "step": 2940 }, { "epoch": 51.67256637168141, "grad_norm": 9.8450500445324e-06, "learning_rate": 0.29600808877055507, "loss": 0.0001, "num_input_tokens_seen": 1674320, "step": 2945 }, { "epoch": 51.76106194690266, "grad_norm": 2.676615258678794e-05, "learning_rate": 0.29599457849674404, "loss": 0.0, "num_input_tokens_seen": 1677216, "step": 2950 }, { "epoch": 51.849557522123895, "grad_norm": 2.0211069568176754e-05, "learning_rate": 0.2959810457087343, "loss": 0.0, "num_input_tokens_seen": 1679968, "step": 2955 }, { "epoch": 51.93805309734513, "grad_norm": 0.00011889731831615791, "learning_rate": 0.2959674904086128, "loss": 0.0001, "num_input_tokens_seen": 1682480, "step": 2960 }, { "epoch": 52.017699115044245, "grad_norm": 4.294277459848672e-05, "learning_rate": 0.2959539125984699, "loss": 0.0, "num_input_tokens_seen": 1685352, "step": 2965 }, { "epoch": 52.10619469026549, "grad_norm": 4.894227095064707e-05, "learning_rate": 0.2959403122803996, "loss": 0.0001, "num_input_tokens_seen": 1687784, "step": 2970 }, { "epoch": 52.19469026548673, "grad_norm": 5.468092058436014e-05, "learning_rate": 0.2959266894564991, "loss": 0.0001, "num_input_tokens_seen": 1691416, "step": 2975 }, { "epoch": 52.283185840707965, "grad_norm": 0.00013957191549707204, "learning_rate": 0.2959130441288692, "loss": 0.0001, "num_input_tokens_seen": 1694344, "step": 2980 }, { "epoch": 52.3716814159292, "grad_norm": 6.133555143605918e-05, "learning_rate": 0.2958993762996143, "loss": 0.0001, "num_input_tokens_seen": 1696840, "step": 2985 }, { "epoch": 52.46017699115044, "grad_norm": 4.692682341556065e-05, "learning_rate": 0.2958856859708421, "loss": 0.0, "num_input_tokens_seen": 1699304, "step": 2990 }, { "epoch": 52.54867256637168, "grad_norm": 5.648274964187294e-06, "learning_rate": 0.2958719731446638, "loss": 0.0, "num_input_tokens_seen": 1701880, "step": 2995 }, { "epoch": 52.63716814159292, "grad_norm": 2.9879267458454706e-05, "learning_rate": 0.29585823782319404, "loss": 0.0, "num_input_tokens_seen": 1705000, "step": 3000 }, { "epoch": 52.63716814159292, "eval_loss": 0.3432482182979584, "eval_runtime": 0.9289, "eval_samples_per_second": 26.912, "eval_steps_per_second": 13.994, "num_input_tokens_seen": 1705000, "step": 3000 }, { "epoch": 52.72566371681416, "grad_norm": 2.4884155209292658e-05, "learning_rate": 0.2958444800085511, "loss": 0.0, "num_input_tokens_seen": 1707896, "step": 3005 }, { "epoch": 52.8141592920354, "grad_norm": 1.7843229215941392e-05, "learning_rate": 0.2958306997028565, "loss": 0.0, "num_input_tokens_seen": 1710840, "step": 3010 }, { "epoch": 52.902654867256636, "grad_norm": 0.00010329360520699993, "learning_rate": 0.2958168969082354, "loss": 0.0, "num_input_tokens_seen": 1713480, "step": 3015 }, { "epoch": 52.991150442477874, "grad_norm": 5.095766027807258e-05, "learning_rate": 0.2958030716268164, "loss": 0.0001, "num_input_tokens_seen": 1716696, "step": 3020 }, { "epoch": 53.07079646017699, "grad_norm": 3.9759801438776776e-05, "learning_rate": 0.2957892238607314, "loss": 0.0, "num_input_tokens_seen": 1719344, "step": 3025 }, { "epoch": 53.15929203539823, "grad_norm": 9.61593832471408e-05, "learning_rate": 0.2957753536121161, "loss": 0.0, "num_input_tokens_seen": 1722352, "step": 3030 }, { "epoch": 53.24778761061947, "grad_norm": 5.18625856784638e-05, "learning_rate": 0.29576146088310923, "loss": 0.0, "num_input_tokens_seen": 1725312, "step": 3035 }, { "epoch": 53.336283185840706, "grad_norm": 0.00011492300109239295, "learning_rate": 0.2957475456758533, "loss": 0.0001, "num_input_tokens_seen": 1728208, "step": 3040 }, { "epoch": 53.424778761061944, "grad_norm": 7.969270518515259e-05, "learning_rate": 0.2957336079924944, "loss": 0.0, "num_input_tokens_seen": 1731056, "step": 3045 }, { "epoch": 53.51327433628319, "grad_norm": 2.0095483705517836e-05, "learning_rate": 0.2957196478351816, "loss": 0.0, "num_input_tokens_seen": 1734304, "step": 3050 }, { "epoch": 53.60176991150443, "grad_norm": 5.5109999266278464e-06, "learning_rate": 0.295705665206068, "loss": 0.0, "num_input_tokens_seen": 1737168, "step": 3055 }, { "epoch": 53.690265486725664, "grad_norm": 2.2567943233298138e-05, "learning_rate": 0.2956916601073097, "loss": 0.0, "num_input_tokens_seen": 1740048, "step": 3060 }, { "epoch": 53.7787610619469, "grad_norm": 3.392180587979965e-05, "learning_rate": 0.29567763254106655, "loss": 0.0001, "num_input_tokens_seen": 1742960, "step": 3065 }, { "epoch": 53.86725663716814, "grad_norm": 0.00010556568304309621, "learning_rate": 0.29566358250950175, "loss": 0.0001, "num_input_tokens_seen": 1745376, "step": 3070 }, { "epoch": 53.95575221238938, "grad_norm": 3.916039349860512e-05, "learning_rate": 0.295649510014782, "loss": 0.0001, "num_input_tokens_seen": 1748160, "step": 3075 }, { "epoch": 54.0353982300885, "grad_norm": 2.8387836209731176e-05, "learning_rate": 0.2956354150590775, "loss": 0.0, "num_input_tokens_seen": 1750480, "step": 3080 }, { "epoch": 54.123893805309734, "grad_norm": 3.4690929169300944e-05, "learning_rate": 0.2956212976445618, "loss": 0.0, "num_input_tokens_seen": 1753504, "step": 3085 }, { "epoch": 54.21238938053097, "grad_norm": 6.044669135008007e-06, "learning_rate": 0.295607157773412, "loss": 0.0001, "num_input_tokens_seen": 1756128, "step": 3090 }, { "epoch": 54.30088495575221, "grad_norm": 6.607595423702151e-06, "learning_rate": 0.2955929954478087, "loss": 0.0, "num_input_tokens_seen": 1758800, "step": 3095 }, { "epoch": 54.389380530973455, "grad_norm": 4.2982086597476155e-05, "learning_rate": 0.29557881066993585, "loss": 0.0, "num_input_tokens_seen": 1761888, "step": 3100 }, { "epoch": 54.47787610619469, "grad_norm": 3.6731697036884725e-05, "learning_rate": 0.29556460344198093, "loss": 0.0, "num_input_tokens_seen": 1765200, "step": 3105 }, { "epoch": 54.56637168141593, "grad_norm": 2.8716565793729387e-05, "learning_rate": 0.29555037376613486, "loss": 0.0001, "num_input_tokens_seen": 1767712, "step": 3110 }, { "epoch": 54.65486725663717, "grad_norm": 2.0178968043182977e-05, "learning_rate": 0.29553612164459203, "loss": 0.0, "num_input_tokens_seen": 1769920, "step": 3115 }, { "epoch": 54.743362831858406, "grad_norm": 1.1766589523176663e-05, "learning_rate": 0.29552184707955037, "loss": 0.0, "num_input_tokens_seen": 1773312, "step": 3120 }, { "epoch": 54.83185840707964, "grad_norm": 3.3964772683248157e-06, "learning_rate": 0.29550755007321117, "loss": 0.0001, "num_input_tokens_seen": 1776128, "step": 3125 }, { "epoch": 54.92035398230089, "grad_norm": 8.562380389776081e-05, "learning_rate": 0.29549323062777916, "loss": 0.0001, "num_input_tokens_seen": 1779088, "step": 3130 }, { "epoch": 55.0, "grad_norm": 2.1120120436535217e-05, "learning_rate": 0.29547888874546263, "loss": 0.0, "num_input_tokens_seen": 1781944, "step": 3135 }, { "epoch": 55.08849557522124, "grad_norm": 7.86690870882012e-05, "learning_rate": 0.2954645244284732, "loss": 0.0, "num_input_tokens_seen": 1784568, "step": 3140 }, { "epoch": 55.176991150442475, "grad_norm": 4.128727232455276e-05, "learning_rate": 0.2954501376790261, "loss": 0.0001, "num_input_tokens_seen": 1787560, "step": 3145 }, { "epoch": 55.26548672566372, "grad_norm": 3.9662732888245955e-05, "learning_rate": 0.29543572849933997, "loss": 0.0, "num_input_tokens_seen": 1790824, "step": 3150 }, { "epoch": 55.35398230088496, "grad_norm": 5.2851195505354553e-05, "learning_rate": 0.2954212968916368, "loss": 0.0, "num_input_tokens_seen": 1793176, "step": 3155 }, { "epoch": 55.442477876106196, "grad_norm": 1.7206702978000976e-05, "learning_rate": 0.29540684285814217, "loss": 0.0, "num_input_tokens_seen": 1796008, "step": 3160 }, { "epoch": 55.530973451327434, "grad_norm": 2.7548794605536386e-05, "learning_rate": 0.2953923664010851, "loss": 0.0001, "num_input_tokens_seen": 1799096, "step": 3165 }, { "epoch": 55.61946902654867, "grad_norm": 5.025412974646315e-05, "learning_rate": 0.295377867522698, "loss": 0.0, "num_input_tokens_seen": 1802104, "step": 3170 }, { "epoch": 55.70796460176991, "grad_norm": 3.946249307773542e-06, "learning_rate": 0.2953633462252168, "loss": 0.0, "num_input_tokens_seen": 1804808, "step": 3175 }, { "epoch": 55.796460176991154, "grad_norm": 5.955912001809338e-06, "learning_rate": 0.2953488025108809, "loss": 0.0, "num_input_tokens_seen": 1807704, "step": 3180 }, { "epoch": 55.88495575221239, "grad_norm": 5.4806539992569014e-05, "learning_rate": 0.295334236381933, "loss": 0.0, "num_input_tokens_seen": 1810440, "step": 3185 }, { "epoch": 55.97345132743363, "grad_norm": 4.975424235453829e-05, "learning_rate": 0.29531964784061954, "loss": 0.0001, "num_input_tokens_seen": 1813272, "step": 3190 }, { "epoch": 56.05309734513274, "grad_norm": 2.4640861738589592e-05, "learning_rate": 0.2953050368891902, "loss": 0.0, "num_input_tokens_seen": 1815616, "step": 3195 }, { "epoch": 56.14159292035398, "grad_norm": 7.316016854019836e-05, "learning_rate": 0.29529040352989805, "loss": 0.0, "num_input_tokens_seen": 1818688, "step": 3200 }, { "epoch": 56.14159292035398, "eval_loss": 0.367011696100235, "eval_runtime": 0.9212, "eval_samples_per_second": 27.138, "eval_steps_per_second": 14.112, "num_input_tokens_seen": 1818688, "step": 3200 }, { "epoch": 56.230088495575224, "grad_norm": 1.605027682671789e-05, "learning_rate": 0.29527574776499993, "loss": 0.0, "num_input_tokens_seen": 1821104, "step": 3205 }, { "epoch": 56.31858407079646, "grad_norm": 5.4767086112406105e-05, "learning_rate": 0.2952610695967558, "loss": 0.0, "num_input_tokens_seen": 1824144, "step": 3210 }, { "epoch": 56.4070796460177, "grad_norm": 8.414339390583336e-05, "learning_rate": 0.29524636902742935, "loss": 0.0, "num_input_tokens_seen": 1827536, "step": 3215 }, { "epoch": 56.49557522123894, "grad_norm": 8.184802572941408e-05, "learning_rate": 0.2952316460592875, "loss": 0.0001, "num_input_tokens_seen": 1830624, "step": 3220 }, { "epoch": 56.584070796460175, "grad_norm": 4.687665932578966e-05, "learning_rate": 0.29521690069460066, "loss": 0.0, "num_input_tokens_seen": 1832992, "step": 3225 }, { "epoch": 56.67256637168141, "grad_norm": 2.7223957658861764e-05, "learning_rate": 0.29520213293564285, "loss": 0.0001, "num_input_tokens_seen": 1835936, "step": 3230 }, { "epoch": 56.76106194690266, "grad_norm": 2.9260703740874305e-05, "learning_rate": 0.29518734278469144, "loss": 0.0, "num_input_tokens_seen": 1838768, "step": 3235 }, { "epoch": 56.849557522123895, "grad_norm": 1.3085154932923615e-05, "learning_rate": 0.29517253024402723, "loss": 0.0, "num_input_tokens_seen": 1841488, "step": 3240 }, { "epoch": 56.93805309734513, "grad_norm": 3.1526549719274044e-05, "learning_rate": 0.2951576953159345, "loss": 0.0, "num_input_tokens_seen": 1844688, "step": 3245 }, { "epoch": 57.017699115044245, "grad_norm": 2.665295687620528e-05, "learning_rate": 0.29514283800270097, "loss": 0.0, "num_input_tokens_seen": 1847216, "step": 3250 }, { "epoch": 57.10619469026549, "grad_norm": 5.847301508765668e-05, "learning_rate": 0.2951279583066179, "loss": 0.0001, "num_input_tokens_seen": 1849856, "step": 3255 }, { "epoch": 57.19469026548673, "grad_norm": 6.715930067002773e-05, "learning_rate": 0.2951130562299798, "loss": 0.0001, "num_input_tokens_seen": 1852576, "step": 3260 }, { "epoch": 57.283185840707965, "grad_norm": 3.9144859329098836e-05, "learning_rate": 0.29509813177508487, "loss": 0.0, "num_input_tokens_seen": 1855872, "step": 3265 }, { "epoch": 57.3716814159292, "grad_norm": 4.464506673684809e-06, "learning_rate": 0.2950831849442346, "loss": 0.0, "num_input_tokens_seen": 1858416, "step": 3270 }, { "epoch": 57.46017699115044, "grad_norm": 3.8103535189293325e-05, "learning_rate": 0.2950682157397339, "loss": 0.0, "num_input_tokens_seen": 1861024, "step": 3275 }, { "epoch": 57.54867256637168, "grad_norm": 2.776885412458796e-05, "learning_rate": 0.2950532241638914, "loss": 0.0, "num_input_tokens_seen": 1863616, "step": 3280 }, { "epoch": 57.63716814159292, "grad_norm": 2.5324638045276515e-05, "learning_rate": 0.2950382102190188, "loss": 0.0, "num_input_tokens_seen": 1866672, "step": 3285 }, { "epoch": 57.72566371681416, "grad_norm": 4.904646266368218e-05, "learning_rate": 0.2950231739074316, "loss": 0.0, "num_input_tokens_seen": 1869456, "step": 3290 }, { "epoch": 57.8141592920354, "grad_norm": 1.1954310139117297e-05, "learning_rate": 0.29500811523144843, "loss": 0.0, "num_input_tokens_seen": 1872032, "step": 3295 }, { "epoch": 57.902654867256636, "grad_norm": 2.068079811579082e-05, "learning_rate": 0.2949930341933917, "loss": 0.0, "num_input_tokens_seen": 1875104, "step": 3300 }, { "epoch": 57.991150442477874, "grad_norm": 1.9430868633207865e-05, "learning_rate": 0.29497793079558693, "loss": 0.0, "num_input_tokens_seen": 1878672, "step": 3305 }, { "epoch": 58.07079646017699, "grad_norm": 1.1421545423218049e-05, "learning_rate": 0.2949628050403633, "loss": 0.0, "num_input_tokens_seen": 1881032, "step": 3310 }, { "epoch": 58.15929203539823, "grad_norm": 5.9051759308204055e-05, "learning_rate": 0.2949476569300535, "loss": 0.0, "num_input_tokens_seen": 1883480, "step": 3315 }, { "epoch": 58.24778761061947, "grad_norm": 2.4441131245112047e-05, "learning_rate": 0.29493248646699344, "loss": 0.0, "num_input_tokens_seen": 1886648, "step": 3320 }, { "epoch": 58.336283185840706, "grad_norm": 4.138241638429463e-05, "learning_rate": 0.29491729365352265, "loss": 0.0, "num_input_tokens_seen": 1890216, "step": 3325 }, { "epoch": 58.424778761061944, "grad_norm": 1.017398426483851e-05, "learning_rate": 0.29490207849198397, "loss": 0.0, "num_input_tokens_seen": 1892888, "step": 3330 }, { "epoch": 58.51327433628319, "grad_norm": 4.3009556975448504e-05, "learning_rate": 0.29488684098472384, "loss": 0.0, "num_input_tokens_seen": 1895944, "step": 3335 }, { "epoch": 58.60176991150443, "grad_norm": 2.5724417355377227e-05, "learning_rate": 0.2948715811340921, "loss": 0.0, "num_input_tokens_seen": 1898840, "step": 3340 }, { "epoch": 58.690265486725664, "grad_norm": 1.1311556590953842e-05, "learning_rate": 0.294856298942442, "loss": 0.0, "num_input_tokens_seen": 1901560, "step": 3345 }, { "epoch": 58.7787610619469, "grad_norm": 2.4466533432132564e-05, "learning_rate": 0.2948409944121302, "loss": 0.0, "num_input_tokens_seen": 1904248, "step": 3350 }, { "epoch": 58.86725663716814, "grad_norm": 3.420401844778098e-05, "learning_rate": 0.29482566754551687, "loss": 0.0, "num_input_tokens_seen": 1906904, "step": 3355 }, { "epoch": 58.95575221238938, "grad_norm": 9.09080117708072e-05, "learning_rate": 0.2948103183449656, "loss": 0.0001, "num_input_tokens_seen": 1909800, "step": 3360 }, { "epoch": 59.0353982300885, "grad_norm": 2.430028689559549e-05, "learning_rate": 0.2947949468128435, "loss": 0.0, "num_input_tokens_seen": 1912184, "step": 3365 }, { "epoch": 59.123893805309734, "grad_norm": 7.5930161074211355e-06, "learning_rate": 0.2947795529515209, "loss": 0.0, "num_input_tokens_seen": 1914504, "step": 3370 }, { "epoch": 59.21238938053097, "grad_norm": 2.669124114618171e-05, "learning_rate": 0.29476413676337193, "loss": 0.0, "num_input_tokens_seen": 1917192, "step": 3375 }, { "epoch": 59.30088495575221, "grad_norm": 3.4852207591029583e-06, "learning_rate": 0.2947486982507738, "loss": 0.0, "num_input_tokens_seen": 1920056, "step": 3380 }, { "epoch": 59.389380530973455, "grad_norm": 2.071005837933626e-05, "learning_rate": 0.29473323741610735, "loss": 0.0, "num_input_tokens_seen": 1923048, "step": 3385 }, { "epoch": 59.47787610619469, "grad_norm": 9.639786185289267e-06, "learning_rate": 0.2947177542617569, "loss": 0.0, "num_input_tokens_seen": 1926328, "step": 3390 }, { "epoch": 59.56637168141593, "grad_norm": 6.62473394186236e-05, "learning_rate": 0.2947022487901101, "loss": 0.0, "num_input_tokens_seen": 1929640, "step": 3395 }, { "epoch": 59.65486725663717, "grad_norm": 4.277366315363906e-05, "learning_rate": 0.2946867210035581, "loss": 0.0, "num_input_tokens_seen": 1932248, "step": 3400 }, { "epoch": 59.65486725663717, "eval_loss": 0.3640179932117462, "eval_runtime": 0.9173, "eval_samples_per_second": 27.253, "eval_steps_per_second": 14.172, "num_input_tokens_seen": 1932248, "step": 3400 }, { "epoch": 59.743362831858406, "grad_norm": 3.996547820861451e-05, "learning_rate": 0.2946711709044954, "loss": 0.0, "num_input_tokens_seen": 1935208, "step": 3405 }, { "epoch": 59.83185840707964, "grad_norm": 5.84391182201216e-06, "learning_rate": 0.2946555984953202, "loss": 0.0, "num_input_tokens_seen": 1938216, "step": 3410 }, { "epoch": 59.92035398230089, "grad_norm": 7.713926606811583e-06, "learning_rate": 0.2946400037784338, "loss": 0.0, "num_input_tokens_seen": 1941000, "step": 3415 }, { "epoch": 60.0, "grad_norm": 0.00011831547453766689, "learning_rate": 0.29462438675624114, "loss": 0.0001, "num_input_tokens_seen": 1943544, "step": 3420 }, { "epoch": 60.08849557522124, "grad_norm": 3.3156073186546564e-05, "learning_rate": 0.2946087474311506, "loss": 0.0, "num_input_tokens_seen": 1946152, "step": 3425 }, { "epoch": 60.176991150442475, "grad_norm": 1.1541232197487261e-05, "learning_rate": 0.294593085805574, "loss": 0.0, "num_input_tokens_seen": 1949240, "step": 3430 }, { "epoch": 60.26548672566372, "grad_norm": 7.341849413933232e-05, "learning_rate": 0.2945774018819264, "loss": 0.0, "num_input_tokens_seen": 1952248, "step": 3435 }, { "epoch": 60.35398230088496, "grad_norm": 4.056173929711804e-05, "learning_rate": 0.2945616956626266, "loss": 0.0, "num_input_tokens_seen": 1955064, "step": 3440 }, { "epoch": 60.442477876106196, "grad_norm": 1.5421097486978397e-05, "learning_rate": 0.2945459671500966, "loss": 0.0, "num_input_tokens_seen": 1957896, "step": 3445 }, { "epoch": 60.530973451327434, "grad_norm": 1.6032827261369675e-06, "learning_rate": 0.2945302163467621, "loss": 0.0, "num_input_tokens_seen": 1960632, "step": 3450 }, { "epoch": 60.61946902654867, "grad_norm": 4.69860497105401e-05, "learning_rate": 0.2945144432550519, "loss": 0.0, "num_input_tokens_seen": 1963416, "step": 3455 }, { "epoch": 60.70796460176991, "grad_norm": 1.8185739463660866e-05, "learning_rate": 0.29449864787739843, "loss": 0.0, "num_input_tokens_seen": 1966376, "step": 3460 }, { "epoch": 60.796460176991154, "grad_norm": 5.031113687437028e-05, "learning_rate": 0.2944828302162376, "loss": 0.0, "num_input_tokens_seen": 1969272, "step": 3465 }, { "epoch": 60.88495575221239, "grad_norm": 1.8644868760020472e-05, "learning_rate": 0.2944669902740087, "loss": 0.0, "num_input_tokens_seen": 1972168, "step": 3470 }, { "epoch": 60.97345132743363, "grad_norm": 1.5105496459000278e-05, "learning_rate": 0.2944511280531544, "loss": 0.0, "num_input_tokens_seen": 1975432, "step": 3475 }, { "epoch": 61.05309734513274, "grad_norm": 4.591782271745615e-06, "learning_rate": 0.29443524355612083, "loss": 0.0001, "num_input_tokens_seen": 1978104, "step": 3480 }, { "epoch": 61.14159292035398, "grad_norm": 3.4458971640560776e-05, "learning_rate": 0.29441933678535764, "loss": 0.0, "num_input_tokens_seen": 1980872, "step": 3485 }, { "epoch": 61.230088495575224, "grad_norm": 1.9792671082541347e-05, "learning_rate": 0.29440340774331786, "loss": 0.0, "num_input_tokens_seen": 1983864, "step": 3490 }, { "epoch": 61.31858407079646, "grad_norm": 2.2356160116032697e-05, "learning_rate": 0.2943874564324579, "loss": 0.0, "num_input_tokens_seen": 1986440, "step": 3495 }, { "epoch": 61.4070796460177, "grad_norm": 1.9416729628574103e-06, "learning_rate": 0.2943714828552376, "loss": 0.0, "num_input_tokens_seen": 1989336, "step": 3500 }, { "epoch": 61.49557522123894, "grad_norm": 2.1110237867105752e-05, "learning_rate": 0.29435548701412045, "loss": 0.0, "num_input_tokens_seen": 1991624, "step": 3505 }, { "epoch": 61.584070796460175, "grad_norm": 1.0600576388242189e-05, "learning_rate": 0.2943394689115731, "loss": 0.0, "num_input_tokens_seen": 1994440, "step": 3510 }, { "epoch": 61.67256637168141, "grad_norm": 1.8550312233855948e-05, "learning_rate": 0.29432342855006577, "loss": 0.0, "num_input_tokens_seen": 1997704, "step": 3515 }, { "epoch": 61.76106194690266, "grad_norm": 2.7752890673582442e-05, "learning_rate": 0.294307365932072, "loss": 0.0, "num_input_tokens_seen": 2000312, "step": 3520 }, { "epoch": 61.849557522123895, "grad_norm": 1.898999653349165e-05, "learning_rate": 0.294291281060069, "loss": 0.0, "num_input_tokens_seen": 2003096, "step": 3525 }, { "epoch": 61.93805309734513, "grad_norm": 2.5115748940152116e-05, "learning_rate": 0.29427517393653724, "loss": 0.0, "num_input_tokens_seen": 2006040, "step": 3530 }, { "epoch": 62.017699115044245, "grad_norm": 1.0225969163002446e-05, "learning_rate": 0.29425904456396046, "loss": 0.0, "num_input_tokens_seen": 2008712, "step": 3535 }, { "epoch": 62.10619469026549, "grad_norm": 8.17652380646905e-06, "learning_rate": 0.2942428929448262, "loss": 0.0, "num_input_tokens_seen": 2011672, "step": 3540 }, { "epoch": 62.19469026548673, "grad_norm": 3.286463106633164e-05, "learning_rate": 0.2942267190816252, "loss": 0.0, "num_input_tokens_seen": 2014808, "step": 3545 }, { "epoch": 62.283185840707965, "grad_norm": 1.4190652109391522e-05, "learning_rate": 0.2942105229768516, "loss": 0.0, "num_input_tokens_seen": 2018024, "step": 3550 }, { "epoch": 62.3716814159292, "grad_norm": 1.3379671145230532e-05, "learning_rate": 0.29419430463300306, "loss": 0.0, "num_input_tokens_seen": 2020360, "step": 3555 }, { "epoch": 62.46017699115044, "grad_norm": 7.918322808109224e-05, "learning_rate": 0.2941780640525808, "loss": 0.0, "num_input_tokens_seen": 2023064, "step": 3560 }, { "epoch": 62.54867256637168, "grad_norm": 2.0234196199453436e-05, "learning_rate": 0.2941618012380891, "loss": 0.0, "num_input_tokens_seen": 2026040, "step": 3565 }, { "epoch": 62.63716814159292, "grad_norm": 7.978651410667226e-06, "learning_rate": 0.29414551619203605, "loss": 0.0, "num_input_tokens_seen": 2028696, "step": 3570 }, { "epoch": 62.72566371681416, "grad_norm": 2.4655011657159775e-05, "learning_rate": 0.29412920891693295, "loss": 0.0, "num_input_tokens_seen": 2031720, "step": 3575 }, { "epoch": 62.8141592920354, "grad_norm": 1.907130535983015e-05, "learning_rate": 0.2941128794152946, "loss": 0.0, "num_input_tokens_seen": 2034760, "step": 3580 }, { "epoch": 62.902654867256636, "grad_norm": 6.197288894327357e-05, "learning_rate": 0.2940965276896392, "loss": 0.0, "num_input_tokens_seen": 2037736, "step": 3585 }, { "epoch": 62.991150442477874, "grad_norm": 3.361006383784115e-05, "learning_rate": 0.2940801537424884, "loss": 0.0, "num_input_tokens_seen": 2040008, "step": 3590 }, { "epoch": 63.07079646017699, "grad_norm": 6.9220732257235795e-06, "learning_rate": 0.2940637575763673, "loss": 0.0, "num_input_tokens_seen": 2042440, "step": 3595 }, { "epoch": 63.15929203539823, "grad_norm": 1.1588084817049094e-05, "learning_rate": 0.2940473391938043, "loss": 0.0, "num_input_tokens_seen": 2045464, "step": 3600 }, { "epoch": 63.15929203539823, "eval_loss": 0.36257556080818176, "eval_runtime": 0.9329, "eval_samples_per_second": 26.799, "eval_steps_per_second": 13.935, "num_input_tokens_seen": 2045464, "step": 3600 }, { "epoch": 63.24778761061947, "grad_norm": 1.743879693094641e-05, "learning_rate": 0.29403089859733145, "loss": 0.0, "num_input_tokens_seen": 2048200, "step": 3605 }, { "epoch": 63.336283185840706, "grad_norm": 2.00543745449977e-05, "learning_rate": 0.294014435789484, "loss": 0.0, "num_input_tokens_seen": 2051080, "step": 3610 }, { "epoch": 63.424778761061944, "grad_norm": 2.1424470105557702e-05, "learning_rate": 0.2939979507728007, "loss": 0.0, "num_input_tokens_seen": 2054040, "step": 3615 }, { "epoch": 63.51327433628319, "grad_norm": 3.447451308602467e-05, "learning_rate": 0.2939814435498239, "loss": 0.0, "num_input_tokens_seen": 2056968, "step": 3620 }, { "epoch": 63.60176991150443, "grad_norm": 1.3003301319258753e-05, "learning_rate": 0.29396491412309905, "loss": 0.0, "num_input_tokens_seen": 2059704, "step": 3625 }, { "epoch": 63.690265486725664, "grad_norm": 3.005195139849093e-06, "learning_rate": 0.2939483624951753, "loss": 0.0, "num_input_tokens_seen": 2062520, "step": 3630 }, { "epoch": 63.7787610619469, "grad_norm": 1.224400784849422e-05, "learning_rate": 0.2939317886686051, "loss": 0.0, "num_input_tokens_seen": 2065688, "step": 3635 }, { "epoch": 63.86725663716814, "grad_norm": 1.872412394732237e-05, "learning_rate": 0.2939151926459443, "loss": 0.0, "num_input_tokens_seen": 2068472, "step": 3640 }, { "epoch": 63.95575221238938, "grad_norm": 3.355348962941207e-05, "learning_rate": 0.2938985744297522, "loss": 0.0, "num_input_tokens_seen": 2071192, "step": 3645 }, { "epoch": 64.03539823008849, "grad_norm": 2.813566788972821e-05, "learning_rate": 0.29388193402259166, "loss": 0.0, "num_input_tokens_seen": 2073424, "step": 3650 }, { "epoch": 64.12389380530973, "grad_norm": 3.366770033608191e-05, "learning_rate": 0.29386527142702873, "loss": 0.0, "num_input_tokens_seen": 2076352, "step": 3655 }, { "epoch": 64.21238938053098, "grad_norm": 2.9681104933843017e-05, "learning_rate": 0.293848586645633, "loss": 0.0, "num_input_tokens_seen": 2078912, "step": 3660 }, { "epoch": 64.30088495575221, "grad_norm": 5.300247357808985e-05, "learning_rate": 0.2938318796809775, "loss": 0.0, "num_input_tokens_seen": 2081904, "step": 3665 }, { "epoch": 64.38938053097345, "grad_norm": 1.645704105612822e-05, "learning_rate": 0.29381515053563867, "loss": 0.0, "num_input_tokens_seen": 2084768, "step": 3670 }, { "epoch": 64.47787610619469, "grad_norm": 1.1793941666837782e-05, "learning_rate": 0.29379839921219636, "loss": 0.0, "num_input_tokens_seen": 2087792, "step": 3675 }, { "epoch": 64.56637168141593, "grad_norm": 2.8017246222589165e-05, "learning_rate": 0.2937816257132338, "loss": 0.0, "num_input_tokens_seen": 2090336, "step": 3680 }, { "epoch": 64.65486725663717, "grad_norm": 3.0241208150982857e-06, "learning_rate": 0.2937648300413376, "loss": 0.0, "num_input_tokens_seen": 2093264, "step": 3685 }, { "epoch": 64.7433628318584, "grad_norm": 5.729058102588169e-05, "learning_rate": 0.293748012199098, "loss": 0.0, "num_input_tokens_seen": 2095856, "step": 3690 }, { "epoch": 64.83185840707965, "grad_norm": 2.1401046979008242e-06, "learning_rate": 0.29373117218910844, "loss": 0.0, "num_input_tokens_seen": 2099568, "step": 3695 }, { "epoch": 64.92035398230088, "grad_norm": 1.6656838852213696e-05, "learning_rate": 0.2937143100139659, "loss": 0.0, "num_input_tokens_seen": 2102688, "step": 3700 }, { "epoch": 65.0, "grad_norm": 6.4887540247582365e-06, "learning_rate": 0.29369742567627083, "loss": 0.0, "num_input_tokens_seen": 2104960, "step": 3705 }, { "epoch": 65.08849557522124, "grad_norm": 9.477848834649194e-06, "learning_rate": 0.29368051917862675, "loss": 0.0, "num_input_tokens_seen": 2107680, "step": 3710 }, { "epoch": 65.17699115044248, "grad_norm": 1.1240840649406891e-05, "learning_rate": 0.2936635905236411, "loss": 0.0, "num_input_tokens_seen": 2110384, "step": 3715 }, { "epoch": 65.26548672566372, "grad_norm": 3.6127050407230854e-05, "learning_rate": 0.2936466397139244, "loss": 0.0, "num_input_tokens_seen": 2113184, "step": 3720 }, { "epoch": 65.35398230088495, "grad_norm": 2.2430753233493306e-05, "learning_rate": 0.2936296667520907, "loss": 0.0, "num_input_tokens_seen": 2116048, "step": 3725 }, { "epoch": 65.4424778761062, "grad_norm": 1.661307214817498e-05, "learning_rate": 0.2936126716407574, "loss": 0.0, "num_input_tokens_seen": 2119344, "step": 3730 }, { "epoch": 65.53097345132744, "grad_norm": 8.324688678840175e-06, "learning_rate": 0.29359565438254537, "loss": 0.0, "num_input_tokens_seen": 2122240, "step": 3735 }, { "epoch": 65.61946902654867, "grad_norm": 1.6947155017987825e-05, "learning_rate": 0.29357861498007887, "loss": 0.0, "num_input_tokens_seen": 2124912, "step": 3740 }, { "epoch": 65.70796460176992, "grad_norm": 1.5571436961181462e-05, "learning_rate": 0.29356155343598567, "loss": 0.0, "num_input_tokens_seen": 2128208, "step": 3745 }, { "epoch": 65.79646017699115, "grad_norm": 2.6291845642845146e-05, "learning_rate": 0.2935444697528968, "loss": 0.0, "num_input_tokens_seen": 2130848, "step": 3750 }, { "epoch": 65.88495575221239, "grad_norm": 2.0684970877482556e-05, "learning_rate": 0.2935273639334468, "loss": 0.0, "num_input_tokens_seen": 2133728, "step": 3755 }, { "epoch": 65.97345132743362, "grad_norm": 2.110229615936987e-05, "learning_rate": 0.29351023598027365, "loss": 0.0, "num_input_tokens_seen": 2136624, "step": 3760 }, { "epoch": 66.05309734513274, "grad_norm": 1.0571146049187519e-05, "learning_rate": 0.2934930858960186, "loss": 0.0, "num_input_tokens_seen": 2139000, "step": 3765 }, { "epoch": 66.14159292035399, "grad_norm": 1.583739503985271e-05, "learning_rate": 0.29347591368332643, "loss": 0.0, "num_input_tokens_seen": 2142296, "step": 3770 }, { "epoch": 66.23008849557522, "grad_norm": 7.381248451565625e-06, "learning_rate": 0.2934587193448454, "loss": 0.0, "num_input_tokens_seen": 2145432, "step": 3775 }, { "epoch": 66.31858407079646, "grad_norm": 4.2322859371779487e-05, "learning_rate": 0.29344150288322696, "loss": 0.0, "num_input_tokens_seen": 2147960, "step": 3780 }, { "epoch": 66.40707964601769, "grad_norm": 1.9186632925993763e-05, "learning_rate": 0.2934242643011263, "loss": 0.0, "num_input_tokens_seen": 2151112, "step": 3785 }, { "epoch": 66.49557522123894, "grad_norm": 1.4251646689444897e-06, "learning_rate": 0.2934070036012016, "loss": 0.0, "num_input_tokens_seen": 2153448, "step": 3790 }, { "epoch": 66.58407079646018, "grad_norm": 3.9978836866794154e-05, "learning_rate": 0.29338972078611475, "loss": 0.0, "num_input_tokens_seen": 2156280, "step": 3795 }, { "epoch": 66.67256637168141, "grad_norm": 3.4388358471915126e-05, "learning_rate": 0.2933724158585311, "loss": 0.0, "num_input_tokens_seen": 2159128, "step": 3800 }, { "epoch": 66.67256637168141, "eval_loss": 0.3677736520767212, "eval_runtime": 0.9122, "eval_samples_per_second": 27.405, "eval_steps_per_second": 14.251, "num_input_tokens_seen": 2159128, "step": 3800 }, { "epoch": 66.76106194690266, "grad_norm": 1.5423656805069186e-05, "learning_rate": 0.29335508882111916, "loss": 0.0, "num_input_tokens_seen": 2162344, "step": 3805 }, { "epoch": 66.84955752212389, "grad_norm": 3.076591156059294e-06, "learning_rate": 0.29333773967655097, "loss": 0.0, "num_input_tokens_seen": 2164920, "step": 3810 }, { "epoch": 66.93805309734513, "grad_norm": 4.15208050981164e-05, "learning_rate": 0.2933203684275021, "loss": 0.0, "num_input_tokens_seen": 2168024, "step": 3815 }, { "epoch": 67.01769911504425, "grad_norm": 1.0525621291890275e-05, "learning_rate": 0.2933029750766513, "loss": 0.0, "num_input_tokens_seen": 2170720, "step": 3820 }, { "epoch": 67.10619469026548, "grad_norm": 3.237776400055736e-05, "learning_rate": 0.2932855596266809, "loss": 0.0, "num_input_tokens_seen": 2173312, "step": 3825 }, { "epoch": 67.19469026548673, "grad_norm": 2.807303189911181e-06, "learning_rate": 0.2932681220802765, "loss": 0.0, "num_input_tokens_seen": 2175984, "step": 3830 }, { "epoch": 67.28318584070796, "grad_norm": 1.4826597180217505e-05, "learning_rate": 0.2932506624401274, "loss": 0.0, "num_input_tokens_seen": 2178624, "step": 3835 }, { "epoch": 67.3716814159292, "grad_norm": 2.9049484510323964e-05, "learning_rate": 0.29323318070892584, "loss": 0.0, "num_input_tokens_seen": 2181088, "step": 3840 }, { "epoch": 67.46017699115045, "grad_norm": 1.7954704162548296e-05, "learning_rate": 0.29321567688936784, "loss": 0.0, "num_input_tokens_seen": 2184736, "step": 3845 }, { "epoch": 67.54867256637168, "grad_norm": 3.859784919768572e-05, "learning_rate": 0.29319815098415275, "loss": 0.0, "num_input_tokens_seen": 2187296, "step": 3850 }, { "epoch": 67.63716814159292, "grad_norm": 8.108509064186364e-06, "learning_rate": 0.2931806029959832, "loss": 0.0, "num_input_tokens_seen": 2190432, "step": 3855 }, { "epoch": 67.72566371681415, "grad_norm": 2.2443511625169776e-05, "learning_rate": 0.29316303292756535, "loss": 0.0, "num_input_tokens_seen": 2193568, "step": 3860 }, { "epoch": 67.8141592920354, "grad_norm": 2.516521817597095e-05, "learning_rate": 0.29314544078160876, "loss": 0.0, "num_input_tokens_seen": 2196464, "step": 3865 }, { "epoch": 67.90265486725664, "grad_norm": 2.00249287445331e-05, "learning_rate": 0.2931278265608263, "loss": 0.0, "num_input_tokens_seen": 2199584, "step": 3870 }, { "epoch": 67.99115044247787, "grad_norm": 3.619547715061344e-05, "learning_rate": 0.29311019026793433, "loss": 0.0, "num_input_tokens_seen": 2202432, "step": 3875 }, { "epoch": 68.070796460177, "grad_norm": 2.2741040083928965e-05, "learning_rate": 0.29309253190565254, "loss": 0.0, "num_input_tokens_seen": 2205616, "step": 3880 }, { "epoch": 68.15929203539822, "grad_norm": 1.1188014468643814e-05, "learning_rate": 0.2930748514767042, "loss": 0.0, "num_input_tokens_seen": 2208608, "step": 3885 }, { "epoch": 68.24778761061947, "grad_norm": 1.85035023605451e-05, "learning_rate": 0.29305714898381574, "loss": 0.0, "num_input_tokens_seen": 2210912, "step": 3890 }, { "epoch": 68.33628318584071, "grad_norm": 2.842759931809269e-05, "learning_rate": 0.29303942442971714, "loss": 0.0, "num_input_tokens_seen": 2213952, "step": 3895 }, { "epoch": 68.42477876106194, "grad_norm": 2.0531195332296193e-05, "learning_rate": 0.2930216778171417, "loss": 0.0, "num_input_tokens_seen": 2217024, "step": 3900 }, { "epoch": 68.51327433628319, "grad_norm": 1.0467572792549618e-05, "learning_rate": 0.2930039091488263, "loss": 0.0, "num_input_tokens_seen": 2219680, "step": 3905 }, { "epoch": 68.60176991150442, "grad_norm": 6.6619077188079245e-06, "learning_rate": 0.29298611842751093, "loss": 0.0, "num_input_tokens_seen": 2222384, "step": 3910 }, { "epoch": 68.69026548672566, "grad_norm": 1.021869138639886e-05, "learning_rate": 0.29296830565593923, "loss": 0.0, "num_input_tokens_seen": 2224864, "step": 3915 }, { "epoch": 68.77876106194691, "grad_norm": 2.8807148737541866e-06, "learning_rate": 0.2929504708368582, "loss": 0.0, "num_input_tokens_seen": 2228192, "step": 3920 }, { "epoch": 68.86725663716814, "grad_norm": 1.1849306247313507e-05, "learning_rate": 0.29293261397301806, "loss": 0.0, "num_input_tokens_seen": 2230976, "step": 3925 }, { "epoch": 68.95575221238938, "grad_norm": 1.0004808245867025e-05, "learning_rate": 0.29291473506717275, "loss": 0.0, "num_input_tokens_seen": 2233968, "step": 3930 }, { "epoch": 69.03539823008849, "grad_norm": 3.3840062769741053e-06, "learning_rate": 0.29289683412207923, "loss": 0.0, "num_input_tokens_seen": 2236392, "step": 3935 }, { "epoch": 69.12389380530973, "grad_norm": 5.918141141592059e-06, "learning_rate": 0.29287891114049813, "loss": 0.0, "num_input_tokens_seen": 2239032, "step": 3940 }, { "epoch": 69.21238938053098, "grad_norm": 3.469240982667543e-05, "learning_rate": 0.29286096612519347, "loss": 0.0, "num_input_tokens_seen": 2241912, "step": 3945 }, { "epoch": 69.30088495575221, "grad_norm": 7.144365099520655e-06, "learning_rate": 0.2928429990789325, "loss": 0.0, "num_input_tokens_seen": 2245016, "step": 3950 }, { "epoch": 69.38938053097345, "grad_norm": 1.3272005162434652e-05, "learning_rate": 0.29282501000448596, "loss": 0.0, "num_input_tokens_seen": 2247720, "step": 3955 }, { "epoch": 69.47787610619469, "grad_norm": 6.599826519959606e-06, "learning_rate": 0.2928069989046281, "loss": 0.0, "num_input_tokens_seen": 2250360, "step": 3960 }, { "epoch": 69.56637168141593, "grad_norm": 9.144034265773371e-06, "learning_rate": 0.2927889657821363, "loss": 0.0, "num_input_tokens_seen": 2253272, "step": 3965 }, { "epoch": 69.65486725663717, "grad_norm": 2.5255953914893325e-06, "learning_rate": 0.2927709106397916, "loss": 0.0, "num_input_tokens_seen": 2256120, "step": 3970 }, { "epoch": 69.7433628318584, "grad_norm": 1.5825173250050284e-05, "learning_rate": 0.29275283348037834, "loss": 0.0, "num_input_tokens_seen": 2259400, "step": 3975 }, { "epoch": 69.83185840707965, "grad_norm": 2.662315637280699e-05, "learning_rate": 0.29273473430668423, "loss": 0.0, "num_input_tokens_seen": 2262344, "step": 3980 }, { "epoch": 69.92035398230088, "grad_norm": 2.2529902707901783e-05, "learning_rate": 0.2927166131215003, "loss": 0.0, "num_input_tokens_seen": 2265160, "step": 3985 }, { "epoch": 70.0, "grad_norm": 4.212867224850925e-06, "learning_rate": 0.2926984699276212, "loss": 0.0, "num_input_tokens_seen": 2267528, "step": 3990 }, { "epoch": 70.08849557522124, "grad_norm": 3.805399683187716e-05, "learning_rate": 0.29268030472784473, "loss": 0.0, "num_input_tokens_seen": 2270056, "step": 3995 }, { "epoch": 70.17699115044248, "grad_norm": 9.422032235306688e-06, "learning_rate": 0.2926621175249723, "loss": 0.0, "num_input_tokens_seen": 2272792, "step": 4000 }, { "epoch": 70.17699115044248, "eval_loss": 0.36759963631629944, "eval_runtime": 0.9198, "eval_samples_per_second": 27.179, "eval_steps_per_second": 14.133, "num_input_tokens_seen": 2272792, "step": 4000 }, { "epoch": 70.26548672566372, "grad_norm": 2.9090702810208313e-05, "learning_rate": 0.29264390832180853, "loss": 0.0, "num_input_tokens_seen": 2275432, "step": 4005 }, { "epoch": 70.35398230088495, "grad_norm": 2.2283124053501524e-05, "learning_rate": 0.29262567712116144, "loss": 0.0, "num_input_tokens_seen": 2278280, "step": 4010 }, { "epoch": 70.4424778761062, "grad_norm": 6.148783086246112e-06, "learning_rate": 0.29260742392584266, "loss": 0.0, "num_input_tokens_seen": 2281128, "step": 4015 }, { "epoch": 70.53097345132744, "grad_norm": 1.527516360511072e-05, "learning_rate": 0.292589148738667, "loss": 0.0, "num_input_tokens_seen": 2284376, "step": 4020 }, { "epoch": 70.61946902654867, "grad_norm": 1.203413830808131e-05, "learning_rate": 0.2925708515624527, "loss": 0.0, "num_input_tokens_seen": 2287208, "step": 4025 }, { "epoch": 70.70796460176992, "grad_norm": 1.9319243165227817e-06, "learning_rate": 0.29255253240002144, "loss": 0.0, "num_input_tokens_seen": 2290024, "step": 4030 }, { "epoch": 70.79646017699115, "grad_norm": 8.436039934167638e-06, "learning_rate": 0.2925341912541983, "loss": 0.0, "num_input_tokens_seen": 2292616, "step": 4035 }, { "epoch": 70.88495575221239, "grad_norm": 7.0400778895418625e-06, "learning_rate": 0.2925158281278116, "loss": 0.0, "num_input_tokens_seen": 2296104, "step": 4040 }, { "epoch": 70.97345132743362, "grad_norm": 1.4069966709939763e-05, "learning_rate": 0.29249744302369324, "loss": 0.0, "num_input_tokens_seen": 2298888, "step": 4045 }, { "epoch": 71.05309734513274, "grad_norm": 2.107543514284771e-05, "learning_rate": 0.29247903594467844, "loss": 0.0, "num_input_tokens_seen": 2301192, "step": 4050 }, { "epoch": 71.14159292035399, "grad_norm": 2.4853881768649444e-05, "learning_rate": 0.2924606068936058, "loss": 0.0, "num_input_tokens_seen": 2304312, "step": 4055 }, { "epoch": 71.23008849557522, "grad_norm": 1.627755591471214e-05, "learning_rate": 0.2924421558733173, "loss": 0.0, "num_input_tokens_seen": 2306824, "step": 4060 }, { "epoch": 71.31858407079646, "grad_norm": 2.586418304417748e-05, "learning_rate": 0.2924236828866583, "loss": 0.0, "num_input_tokens_seen": 2309800, "step": 4065 }, { "epoch": 71.40707964601769, "grad_norm": 2.6714513296610676e-05, "learning_rate": 0.29240518793647763, "loss": 0.0, "num_input_tokens_seen": 2312728, "step": 4070 }, { "epoch": 71.49557522123894, "grad_norm": 2.024080094997771e-05, "learning_rate": 0.29238667102562743, "loss": 0.0, "num_input_tokens_seen": 2315608, "step": 4075 }, { "epoch": 71.58407079646018, "grad_norm": 4.3234398617642e-05, "learning_rate": 0.29236813215696317, "loss": 0.0, "num_input_tokens_seen": 2318264, "step": 4080 }, { "epoch": 71.67256637168141, "grad_norm": 1.0371915095674922e-06, "learning_rate": 0.2923495713333439, "loss": 0.0, "num_input_tokens_seen": 2321032, "step": 4085 }, { "epoch": 71.76106194690266, "grad_norm": 2.0506815417320468e-05, "learning_rate": 0.29233098855763173, "loss": 0.0, "num_input_tokens_seen": 2324568, "step": 4090 }, { "epoch": 71.84955752212389, "grad_norm": 1.4703994565934408e-05, "learning_rate": 0.29231238383269254, "loss": 0.0, "num_input_tokens_seen": 2327144, "step": 4095 }, { "epoch": 71.93805309734513, "grad_norm": 8.721726771909744e-06, "learning_rate": 0.2922937571613954, "loss": 0.0, "num_input_tokens_seen": 2330104, "step": 4100 }, { "epoch": 72.01769911504425, "grad_norm": 1.4340456800709944e-05, "learning_rate": 0.29227510854661265, "loss": 0.0, "num_input_tokens_seen": 2332656, "step": 4105 }, { "epoch": 72.10619469026548, "grad_norm": 3.811505848716479e-06, "learning_rate": 0.29225643799122025, "loss": 0.0, "num_input_tokens_seen": 2335584, "step": 4110 }, { "epoch": 72.19469026548673, "grad_norm": 2.8085227313567884e-05, "learning_rate": 0.2922377454980974, "loss": 0.0, "num_input_tokens_seen": 2338048, "step": 4115 }, { "epoch": 72.28318584070796, "grad_norm": 1.8555034330347553e-05, "learning_rate": 0.29221903107012676, "loss": 0.0, "num_input_tokens_seen": 2340848, "step": 4120 }, { "epoch": 72.3716814159292, "grad_norm": 8.461062861897517e-06, "learning_rate": 0.29220029471019426, "loss": 0.0, "num_input_tokens_seen": 2344112, "step": 4125 }, { "epoch": 72.46017699115045, "grad_norm": 1.403624628437683e-05, "learning_rate": 0.2921815364211893, "loss": 0.0, "num_input_tokens_seen": 2346544, "step": 4130 }, { "epoch": 72.54867256637168, "grad_norm": 3.411239231354557e-05, "learning_rate": 0.29216275620600474, "loss": 0.0, "num_input_tokens_seen": 2349456, "step": 4135 }, { "epoch": 72.63716814159292, "grad_norm": 3.712247007570113e-06, "learning_rate": 0.29214395406753657, "loss": 0.0, "num_input_tokens_seen": 2352160, "step": 4140 }, { "epoch": 72.72566371681415, "grad_norm": 3.315218236821238e-06, "learning_rate": 0.2921251300086844, "loss": 0.0, "num_input_tokens_seen": 2355104, "step": 4145 }, { "epoch": 72.8141592920354, "grad_norm": 2.315730853297282e-05, "learning_rate": 0.2921062840323511, "loss": 0.0, "num_input_tokens_seen": 2358288, "step": 4150 }, { "epoch": 72.90265486725664, "grad_norm": 1.399136999680195e-05, "learning_rate": 0.29208741614144307, "loss": 0.0, "num_input_tokens_seen": 2361056, "step": 4155 }, { "epoch": 72.99115044247787, "grad_norm": 8.795711437414866e-06, "learning_rate": 0.2920685263388698, "loss": 0.0, "num_input_tokens_seen": 2364160, "step": 4160 }, { "epoch": 73.070796460177, "grad_norm": 9.402758223586716e-06, "learning_rate": 0.2920496146275445, "loss": 0.0, "num_input_tokens_seen": 2366384, "step": 4165 }, { "epoch": 73.15929203539822, "grad_norm": 1.2161111044406425e-05, "learning_rate": 0.29203068101038343, "loss": 0.0, "num_input_tokens_seen": 2369632, "step": 4170 }, { "epoch": 73.24778761061947, "grad_norm": 1.0335465958632994e-05, "learning_rate": 0.2920117254903065, "loss": 0.0, "num_input_tokens_seen": 2372880, "step": 4175 }, { "epoch": 73.33628318584071, "grad_norm": 1.2378063729556743e-05, "learning_rate": 0.29199274807023695, "loss": 0.0, "num_input_tokens_seen": 2375344, "step": 4180 }, { "epoch": 73.42477876106194, "grad_norm": 2.986155595863238e-05, "learning_rate": 0.29197374875310117, "loss": 0.0, "num_input_tokens_seen": 2378288, "step": 4185 }, { "epoch": 73.51327433628319, "grad_norm": 1.3800038232147926e-06, "learning_rate": 0.2919547275418292, "loss": 0.0, "num_input_tokens_seen": 2381600, "step": 4190 }, { "epoch": 73.60176991150442, "grad_norm": 2.637521356518846e-05, "learning_rate": 0.29193568443935436, "loss": 0.0, "num_input_tokens_seen": 2384096, "step": 4195 }, { "epoch": 73.69026548672566, "grad_norm": 2.1674657546100207e-05, "learning_rate": 0.2919166194486133, "loss": 0.0, "num_input_tokens_seen": 2387344, "step": 4200 }, { "epoch": 73.69026548672566, "eval_loss": 0.36723047494888306, "eval_runtime": 0.9354, "eval_samples_per_second": 26.726, "eval_steps_per_second": 13.898, "num_input_tokens_seen": 2387344, "step": 4200 }, { "epoch": 73.77876106194691, "grad_norm": 1.824320133891888e-05, "learning_rate": 0.2918975325725461, "loss": 0.0, "num_input_tokens_seen": 2390256, "step": 4205 }, { "epoch": 73.86725663716814, "grad_norm": 4.217048171994975e-06, "learning_rate": 0.29187842381409607, "loss": 0.0, "num_input_tokens_seen": 2393056, "step": 4210 }, { "epoch": 73.95575221238938, "grad_norm": 5.750944637838984e-06, "learning_rate": 0.29185929317621023, "loss": 0.0, "num_input_tokens_seen": 2395600, "step": 4215 }, { "epoch": 74.03539823008849, "grad_norm": 1.1150009413540829e-05, "learning_rate": 0.29184014066183867, "loss": 0.0, "num_input_tokens_seen": 2397832, "step": 4220 }, { "epoch": 74.12389380530973, "grad_norm": 3.069782223974471e-06, "learning_rate": 0.2918209662739349, "loss": 0.0, "num_input_tokens_seen": 2400872, "step": 4225 }, { "epoch": 74.21238938053098, "grad_norm": 3.798725811066106e-05, "learning_rate": 0.29180177001545593, "loss": 0.0, "num_input_tokens_seen": 2403544, "step": 4230 }, { "epoch": 74.30088495575221, "grad_norm": 2.520867383282166e-05, "learning_rate": 0.29178255188936203, "loss": 0.0, "num_input_tokens_seen": 2406744, "step": 4235 }, { "epoch": 74.38938053097345, "grad_norm": 1.0224712241324596e-05, "learning_rate": 0.2917633118986169, "loss": 0.0, "num_input_tokens_seen": 2409880, "step": 4240 }, { "epoch": 74.47787610619469, "grad_norm": 9.725671588967089e-06, "learning_rate": 0.2917440500461875, "loss": 0.0, "num_input_tokens_seen": 2412408, "step": 4245 }, { "epoch": 74.56637168141593, "grad_norm": 2.4388580186496256e-06, "learning_rate": 0.29172476633504435, "loss": 0.0, "num_input_tokens_seen": 2415304, "step": 4250 }, { "epoch": 74.65486725663717, "grad_norm": 3.5032303458137903e-06, "learning_rate": 0.2917054607681612, "loss": 0.0, "num_input_tokens_seen": 2418088, "step": 4255 }, { "epoch": 74.7433628318584, "grad_norm": 9.095188943319954e-06, "learning_rate": 0.29168613334851523, "loss": 0.0, "num_input_tokens_seen": 2420872, "step": 4260 }, { "epoch": 74.83185840707965, "grad_norm": 1.1440006346674636e-05, "learning_rate": 0.2916667840790869, "loss": 0.0, "num_input_tokens_seen": 2423592, "step": 4265 }, { "epoch": 74.92035398230088, "grad_norm": 2.935229076683754e-06, "learning_rate": 0.2916474129628603, "loss": 0.0, "num_input_tokens_seen": 2426584, "step": 4270 }, { "epoch": 75.0, "grad_norm": 2.804813084367197e-05, "learning_rate": 0.29162802000282245, "loss": 0.0, "num_input_tokens_seen": 2428728, "step": 4275 }, { "epoch": 75.08849557522124, "grad_norm": 2.6733125196187757e-05, "learning_rate": 0.2916086052019642, "loss": 0.0, "num_input_tokens_seen": 2431576, "step": 4280 }, { "epoch": 75.17699115044248, "grad_norm": 2.111234061885625e-05, "learning_rate": 0.2915891685632794, "loss": 0.0, "num_input_tokens_seen": 2434488, "step": 4285 }, { "epoch": 75.26548672566372, "grad_norm": 1.1898192497028504e-05, "learning_rate": 0.29156971008976545, "loss": 0.0, "num_input_tokens_seen": 2437208, "step": 4290 }, { "epoch": 75.35398230088495, "grad_norm": 1.543630969536025e-05, "learning_rate": 0.2915502297844232, "loss": 0.0, "num_input_tokens_seen": 2440120, "step": 4295 }, { "epoch": 75.4424778761062, "grad_norm": 3.0471141144516878e-05, "learning_rate": 0.2915307276502566, "loss": 0.0, "num_input_tokens_seen": 2442856, "step": 4300 }, { "epoch": 75.53097345132744, "grad_norm": 9.20421280170558e-06, "learning_rate": 0.29151120369027334, "loss": 0.0, "num_input_tokens_seen": 2445816, "step": 4305 }, { "epoch": 75.61946902654867, "grad_norm": 5.196699930820614e-06, "learning_rate": 0.29149165790748405, "loss": 0.0, "num_input_tokens_seen": 2448520, "step": 4310 }, { "epoch": 75.70796460176992, "grad_norm": 3.7334625631046947e-06, "learning_rate": 0.291472090304903, "loss": 0.0, "num_input_tokens_seen": 2451704, "step": 4315 }, { "epoch": 75.79646017699115, "grad_norm": 7.74161071603885e-06, "learning_rate": 0.2914525008855478, "loss": 0.0, "num_input_tokens_seen": 2454392, "step": 4320 }, { "epoch": 75.88495575221239, "grad_norm": 1.5376670489786193e-05, "learning_rate": 0.2914328896524394, "loss": 0.0, "num_input_tokens_seen": 2457304, "step": 4325 }, { "epoch": 75.97345132743362, "grad_norm": 6.465412297984585e-06, "learning_rate": 0.291413256608602, "loss": 0.0, "num_input_tokens_seen": 2460728, "step": 4330 }, { "epoch": 76.05309734513274, "grad_norm": 2.3227035853778943e-05, "learning_rate": 0.29139360175706336, "loss": 0.0, "num_input_tokens_seen": 2463304, "step": 4335 }, { "epoch": 76.14159292035399, "grad_norm": 8.571220860176254e-06, "learning_rate": 0.2913739251008544, "loss": 0.0, "num_input_tokens_seen": 2465720, "step": 4340 }, { "epoch": 76.23008849557522, "grad_norm": 2.317671533091925e-05, "learning_rate": 0.29135422664300964, "loss": 0.0, "num_input_tokens_seen": 2468824, "step": 4345 }, { "epoch": 76.31858407079646, "grad_norm": 1.6231848348979838e-05, "learning_rate": 0.29133450638656677, "loss": 0.0, "num_input_tokens_seen": 2472040, "step": 4350 }, { "epoch": 76.40707964601769, "grad_norm": 9.262331332138274e-06, "learning_rate": 0.2913147643345669, "loss": 0.0, "num_input_tokens_seen": 2474936, "step": 4355 }, { "epoch": 76.49557522123894, "grad_norm": 1.877340764622204e-05, "learning_rate": 0.29129500049005447, "loss": 0.0, "num_input_tokens_seen": 2477736, "step": 4360 }, { "epoch": 76.58407079646018, "grad_norm": 1.683931623119861e-05, "learning_rate": 0.2912752148560773, "loss": 0.0, "num_input_tokens_seen": 2480296, "step": 4365 }, { "epoch": 76.67256637168141, "grad_norm": 7.220799489005003e-06, "learning_rate": 0.2912554074356866, "loss": 0.0, "num_input_tokens_seen": 2483256, "step": 4370 }, { "epoch": 76.76106194690266, "grad_norm": 1.2846136087318882e-05, "learning_rate": 0.2912355782319371, "loss": 0.0, "num_input_tokens_seen": 2486360, "step": 4375 }, { "epoch": 76.84955752212389, "grad_norm": 1.734439138090238e-05, "learning_rate": 0.2912157272478864, "loss": 0.0, "num_input_tokens_seen": 2489352, "step": 4380 }, { "epoch": 76.93805309734513, "grad_norm": 4.010015800304245e-06, "learning_rate": 0.291195854486596, "loss": 0.0, "num_input_tokens_seen": 2492136, "step": 4385 }, { "epoch": 77.01769911504425, "grad_norm": 5.7586989896663e-06, "learning_rate": 0.2911759599511305, "loss": 0.0, "num_input_tokens_seen": 2494240, "step": 4390 }, { "epoch": 77.10619469026548, "grad_norm": 1.9729763153009117e-05, "learning_rate": 0.29115604364455777, "loss": 0.0, "num_input_tokens_seen": 2497360, "step": 4395 }, { "epoch": 77.19469026548673, "grad_norm": 1.7830365322879516e-05, "learning_rate": 0.2911361055699493, "loss": 0.0, "num_input_tokens_seen": 2500160, "step": 4400 }, { "epoch": 77.19469026548673, "eval_loss": 0.38368889689445496, "eval_runtime": 0.9318, "eval_samples_per_second": 26.83, "eval_steps_per_second": 13.952, "num_input_tokens_seen": 2500160, "step": 4400 }, { "epoch": 77.28318584070796, "grad_norm": 1.655864980421029e-05, "learning_rate": 0.2911161457303797, "loss": 0.0, "num_input_tokens_seen": 2503184, "step": 4405 }, { "epoch": 77.3716814159292, "grad_norm": 9.265312655770686e-06, "learning_rate": 0.291096164128927, "loss": 0.0, "num_input_tokens_seen": 2505808, "step": 4410 }, { "epoch": 77.46017699115045, "grad_norm": 4.439972599357134e-06, "learning_rate": 0.2910761607686727, "loss": 0.0, "num_input_tokens_seen": 2508128, "step": 4415 }, { "epoch": 77.54867256637168, "grad_norm": 1.818980649659352e-06, "learning_rate": 0.2910561356527016, "loss": 0.0, "num_input_tokens_seen": 2510896, "step": 4420 }, { "epoch": 77.63716814159292, "grad_norm": 2.7093818061985075e-05, "learning_rate": 0.2910360887841017, "loss": 0.0, "num_input_tokens_seen": 2513792, "step": 4425 }, { "epoch": 77.72566371681415, "grad_norm": 5.347759724827483e-06, "learning_rate": 0.2910160201659645, "loss": 0.0, "num_input_tokens_seen": 2516512, "step": 4430 }, { "epoch": 77.8141592920354, "grad_norm": 1.3840855899616145e-05, "learning_rate": 0.29099592980138494, "loss": 0.0, "num_input_tokens_seen": 2520448, "step": 4435 }, { "epoch": 77.90265486725664, "grad_norm": 1.4718340025865473e-05, "learning_rate": 0.29097581769346115, "loss": 0.0, "num_input_tokens_seen": 2523264, "step": 4440 }, { "epoch": 77.99115044247787, "grad_norm": 1.7104386643040925e-05, "learning_rate": 0.29095568384529463, "loss": 0.0, "num_input_tokens_seen": 2525888, "step": 4445 }, { "epoch": 78.070796460177, "grad_norm": 4.389239165902836e-06, "learning_rate": 0.2909355282599903, "loss": 0.0, "num_input_tokens_seen": 2528640, "step": 4450 }, { "epoch": 78.15929203539822, "grad_norm": 8.524692930222955e-06, "learning_rate": 0.29091535094065635, "loss": 0.0, "num_input_tokens_seen": 2531472, "step": 4455 }, { "epoch": 78.24778761061947, "grad_norm": 7.799997547408566e-06, "learning_rate": 0.2908951518904045, "loss": 0.0, "num_input_tokens_seen": 2533968, "step": 4460 }, { "epoch": 78.33628318584071, "grad_norm": 1.5775445717736147e-05, "learning_rate": 0.29087493111234963, "loss": 0.0, "num_input_tokens_seen": 2536464, "step": 4465 }, { "epoch": 78.42477876106194, "grad_norm": 1.965599221875891e-05, "learning_rate": 0.29085468860961, "loss": 0.0, "num_input_tokens_seen": 2539008, "step": 4470 }, { "epoch": 78.51327433628319, "grad_norm": 7.914882189652417e-06, "learning_rate": 0.2908344243853073, "loss": 0.0, "num_input_tokens_seen": 2542368, "step": 4475 }, { "epoch": 78.60176991150442, "grad_norm": 1.04861337604234e-05, "learning_rate": 0.2908141384425666, "loss": 0.0, "num_input_tokens_seen": 2545184, "step": 4480 }, { "epoch": 78.69026548672566, "grad_norm": 1.8185164663009346e-05, "learning_rate": 0.2907938307845161, "loss": 0.0, "num_input_tokens_seen": 2548224, "step": 4485 }, { "epoch": 78.77876106194691, "grad_norm": 3.674972049338976e-06, "learning_rate": 0.2907735014142876, "loss": 0.0, "num_input_tokens_seen": 2551072, "step": 4490 }, { "epoch": 78.86725663716814, "grad_norm": 1.638784488022793e-05, "learning_rate": 0.2907531503350161, "loss": 0.0, "num_input_tokens_seen": 2554016, "step": 4495 }, { "epoch": 78.95575221238938, "grad_norm": 5.37524920218857e-06, "learning_rate": 0.29073277754983995, "loss": 0.0, "num_input_tokens_seen": 2556960, "step": 4500 }, { "epoch": 79.03539823008849, "grad_norm": 7.64646210882347e-06, "learning_rate": 0.290712383061901, "loss": 0.0, "num_input_tokens_seen": 2559240, "step": 4505 }, { "epoch": 79.12389380530973, "grad_norm": 1.1027284926967695e-05, "learning_rate": 0.2906919668743443, "loss": 0.0, "num_input_tokens_seen": 2562056, "step": 4510 }, { "epoch": 79.21238938053098, "grad_norm": 1.8373077182332054e-05, "learning_rate": 0.29067152899031823, "loss": 0.0, "num_input_tokens_seen": 2564744, "step": 4515 }, { "epoch": 79.30088495575221, "grad_norm": 9.340361430076882e-06, "learning_rate": 0.2906510694129746, "loss": 0.0, "num_input_tokens_seen": 2568040, "step": 4520 }, { "epoch": 79.38938053097345, "grad_norm": 2.304910412931349e-05, "learning_rate": 0.2906305881454685, "loss": 0.0, "num_input_tokens_seen": 2570808, "step": 4525 }, { "epoch": 79.47787610619469, "grad_norm": 2.4579037926741876e-05, "learning_rate": 0.2906100851909585, "loss": 0.0, "num_input_tokens_seen": 2573992, "step": 4530 }, { "epoch": 79.56637168141593, "grad_norm": 2.1346629637264414e-06, "learning_rate": 0.29058956055260626, "loss": 0.0, "num_input_tokens_seen": 2576744, "step": 4535 }, { "epoch": 79.65486725663717, "grad_norm": 1.560763303132262e-05, "learning_rate": 0.2905690142335771, "loss": 0.0, "num_input_tokens_seen": 2579864, "step": 4540 }, { "epoch": 79.7433628318584, "grad_norm": 8.336866812896915e-06, "learning_rate": 0.29054844623703946, "loss": 0.0, "num_input_tokens_seen": 2582632, "step": 4545 }, { "epoch": 79.83185840707965, "grad_norm": 6.094447599025443e-06, "learning_rate": 0.2905278565661651, "loss": 0.0, "num_input_tokens_seen": 2585688, "step": 4550 }, { "epoch": 79.92035398230088, "grad_norm": 2.2186550268088467e-05, "learning_rate": 0.2905072452241293, "loss": 0.0, "num_input_tokens_seen": 2588504, "step": 4555 }, { "epoch": 80.0, "grad_norm": 2.4563826173107373e-06, "learning_rate": 0.2904866122141106, "loss": 0.0, "num_input_tokens_seen": 2590976, "step": 4560 }, { "epoch": 80.08849557522124, "grad_norm": 1.119999069487676e-05, "learning_rate": 0.2904659575392908, "loss": 0.0, "num_input_tokens_seen": 2593808, "step": 4565 }, { "epoch": 80.17699115044248, "grad_norm": 8.269725526588445e-07, "learning_rate": 0.2904452812028551, "loss": 0.0, "num_input_tokens_seen": 2596800, "step": 4570 }, { "epoch": 80.26548672566372, "grad_norm": 3.2291966363118263e-06, "learning_rate": 0.2904245832079922, "loss": 0.0, "num_input_tokens_seen": 2599616, "step": 4575 }, { "epoch": 80.35398230088495, "grad_norm": 3.3825422178779263e-06, "learning_rate": 0.29040386355789377, "loss": 0.0, "num_input_tokens_seen": 2602432, "step": 4580 }, { "epoch": 80.4424778761062, "grad_norm": 2.996683633682551e-06, "learning_rate": 0.29038312225575524, "loss": 0.0, "num_input_tokens_seen": 2605104, "step": 4585 }, { "epoch": 80.53097345132744, "grad_norm": 2.6377067115390673e-05, "learning_rate": 0.29036235930477505, "loss": 0.0, "num_input_tokens_seen": 2607984, "step": 4590 }, { "epoch": 80.61946902654867, "grad_norm": 1.1928204003197607e-05, "learning_rate": 0.29034157470815514, "loss": 0.0, "num_input_tokens_seen": 2610832, "step": 4595 }, { "epoch": 80.70796460176992, "grad_norm": 3.496194494800875e-06, "learning_rate": 0.2903207684691008, "loss": 0.0, "num_input_tokens_seen": 2614032, "step": 4600 }, { "epoch": 80.70796460176992, "eval_loss": 0.3870992958545685, "eval_runtime": 0.9399, "eval_samples_per_second": 26.598, "eval_steps_per_second": 13.831, "num_input_tokens_seen": 2614032, "step": 4600 }, { "epoch": 80.79646017699115, "grad_norm": 1.9724067897186615e-05, "learning_rate": 0.29029994059082054, "loss": 0.0, "num_input_tokens_seen": 2616800, "step": 4605 }, { "epoch": 80.88495575221239, "grad_norm": 1.8426884480504668e-06, "learning_rate": 0.2902790910765264, "loss": 0.0, "num_input_tokens_seen": 2619648, "step": 4610 }, { "epoch": 80.97345132743362, "grad_norm": 7.508423095714534e-06, "learning_rate": 0.29025821992943346, "loss": 0.0, "num_input_tokens_seen": 2622864, "step": 4615 }, { "epoch": 81.05309734513274, "grad_norm": 9.51443871599622e-06, "learning_rate": 0.29023732715276046, "loss": 0.0, "num_input_tokens_seen": 2625232, "step": 4620 }, { "epoch": 81.14159292035399, "grad_norm": 1.909748971229419e-05, "learning_rate": 0.2902164127497293, "loss": 0.0, "num_input_tokens_seen": 2628320, "step": 4625 }, { "epoch": 81.23008849557522, "grad_norm": 1.7563539586262777e-05, "learning_rate": 0.2901954767235652, "loss": 0.0, "num_input_tokens_seen": 2631056, "step": 4630 }, { "epoch": 81.31858407079646, "grad_norm": 6.830956863268511e-06, "learning_rate": 0.2901745190774968, "loss": 0.0, "num_input_tokens_seen": 2633792, "step": 4635 }, { "epoch": 81.40707964601769, "grad_norm": 1.0477417163201608e-05, "learning_rate": 0.290153539814756, "loss": 0.0, "num_input_tokens_seen": 2636288, "step": 4640 }, { "epoch": 81.49557522123894, "grad_norm": 9.02857027540449e-06, "learning_rate": 0.2901325389385781, "loss": 0.0, "num_input_tokens_seen": 2639536, "step": 4645 }, { "epoch": 81.58407079646018, "grad_norm": 7.994470252015162e-06, "learning_rate": 0.2901115164522016, "loss": 0.0, "num_input_tokens_seen": 2642256, "step": 4650 }, { "epoch": 81.67256637168141, "grad_norm": 2.8310262223385507e-06, "learning_rate": 0.29009047235886865, "loss": 0.0, "num_input_tokens_seen": 2645280, "step": 4655 }, { "epoch": 81.76106194690266, "grad_norm": 8.874430932337418e-06, "learning_rate": 0.2900694066618243, "loss": 0.0, "num_input_tokens_seen": 2648624, "step": 4660 }, { "epoch": 81.84955752212389, "grad_norm": 8.299872206407599e-06, "learning_rate": 0.2900483193643172, "loss": 0.0, "num_input_tokens_seen": 2651824, "step": 4665 }, { "epoch": 81.93805309734513, "grad_norm": 1.915339453262277e-05, "learning_rate": 0.29002721046959934, "loss": 0.0, "num_input_tokens_seen": 2654352, "step": 4670 }, { "epoch": 82.01769911504425, "grad_norm": 1.232035288012412e-06, "learning_rate": 0.29000607998092587, "loss": 0.0, "num_input_tokens_seen": 2656984, "step": 4675 }, { "epoch": 82.10619469026548, "grad_norm": 2.1881428438064177e-06, "learning_rate": 0.2899849279015555, "loss": 0.0, "num_input_tokens_seen": 2659736, "step": 4680 }, { "epoch": 82.19469026548673, "grad_norm": 6.808013040426886e-06, "learning_rate": 0.28996375423475007, "loss": 0.0, "num_input_tokens_seen": 2662392, "step": 4685 }, { "epoch": 82.28318584070796, "grad_norm": 5.052491815149551e-06, "learning_rate": 0.28994255898377486, "loss": 0.0, "num_input_tokens_seen": 2664808, "step": 4690 }, { "epoch": 82.3716814159292, "grad_norm": 2.081787897623144e-05, "learning_rate": 0.2899213421518984, "loss": 0.0, "num_input_tokens_seen": 2667560, "step": 4695 }, { "epoch": 82.46017699115045, "grad_norm": 1.5330432461269083e-06, "learning_rate": 0.2899001037423926, "loss": 0.0, "num_input_tokens_seen": 2670232, "step": 4700 }, { "epoch": 82.54867256637168, "grad_norm": 1.9790188162005506e-05, "learning_rate": 0.28987884375853273, "loss": 0.0, "num_input_tokens_seen": 2673512, "step": 4705 }, { "epoch": 82.63716814159292, "grad_norm": 1.2768457054335158e-05, "learning_rate": 0.2898575622035974, "loss": 0.0, "num_input_tokens_seen": 2676472, "step": 4710 }, { "epoch": 82.72566371681415, "grad_norm": 4.696543328464031e-06, "learning_rate": 0.2898362590808683, "loss": 0.0, "num_input_tokens_seen": 2679736, "step": 4715 }, { "epoch": 82.8141592920354, "grad_norm": 1.9302939108456485e-05, "learning_rate": 0.2898149343936308, "loss": 0.0, "num_input_tokens_seen": 2682456, "step": 4720 }, { "epoch": 82.90265486725664, "grad_norm": 1.3511328688764479e-05, "learning_rate": 0.2897935881451734, "loss": 0.0, "num_input_tokens_seen": 2685880, "step": 4725 }, { "epoch": 82.99115044247787, "grad_norm": 3.0456401873379946e-05, "learning_rate": 0.28977222033878797, "loss": 0.0, "num_input_tokens_seen": 2688776, "step": 4730 }, { "epoch": 83.070796460177, "grad_norm": 1.2985710782231763e-05, "learning_rate": 0.28975083097776966, "loss": 0.0, "num_input_tokens_seen": 2691032, "step": 4735 }, { "epoch": 83.15929203539822, "grad_norm": 1.567654180689715e-05, "learning_rate": 0.28972942006541696, "loss": 0.0, "num_input_tokens_seen": 2693656, "step": 4740 }, { "epoch": 83.24778761061947, "grad_norm": 9.73894839262357e-06, "learning_rate": 0.2897079876050318, "loss": 0.0, "num_input_tokens_seen": 2696248, "step": 4745 }, { "epoch": 83.33628318584071, "grad_norm": 2.284490165038733e-06, "learning_rate": 0.2896865335999192, "loss": 0.0, "num_input_tokens_seen": 2699672, "step": 4750 }, { "epoch": 83.42477876106194, "grad_norm": 5.151137429493247e-06, "learning_rate": 0.28966505805338777, "loss": 0.0, "num_input_tokens_seen": 2702344, "step": 4755 }, { "epoch": 83.51327433628319, "grad_norm": 8.255156899394933e-06, "learning_rate": 0.2896435609687492, "loss": 0.0, "num_input_tokens_seen": 2705304, "step": 4760 }, { "epoch": 83.60176991150442, "grad_norm": 8.191512279154267e-06, "learning_rate": 0.2896220423493187, "loss": 0.0, "num_input_tokens_seen": 2708328, "step": 4765 }, { "epoch": 83.69026548672566, "grad_norm": 8.54578502185177e-06, "learning_rate": 0.28960050219841466, "loss": 0.0, "num_input_tokens_seen": 2711352, "step": 4770 }, { "epoch": 83.77876106194691, "grad_norm": 5.412281552708009e-06, "learning_rate": 0.28957894051935884, "loss": 0.0, "num_input_tokens_seen": 2714312, "step": 4775 }, { "epoch": 83.86725663716814, "grad_norm": 1.6235857401625253e-05, "learning_rate": 0.2895573573154764, "loss": 0.0, "num_input_tokens_seen": 2717544, "step": 4780 }, { "epoch": 83.95575221238938, "grad_norm": 1.0383406333858147e-05, "learning_rate": 0.28953575259009556, "loss": 0.0, "num_input_tokens_seen": 2720344, "step": 4785 }, { "epoch": 84.03539823008849, "grad_norm": 1.0638526646289392e-06, "learning_rate": 0.2895141263465482, "loss": 0.0, "num_input_tokens_seen": 2722840, "step": 4790 }, { "epoch": 84.12389380530973, "grad_norm": 3.017914878000738e-06, "learning_rate": 0.28949247858816934, "loss": 0.0, "num_input_tokens_seen": 2725656, "step": 4795 }, { "epoch": 84.21238938053098, "grad_norm": 4.111861471756129e-06, "learning_rate": 0.2894708093182973, "loss": 0.0, "num_input_tokens_seen": 2728488, "step": 4800 }, { "epoch": 84.21238938053098, "eval_loss": 0.38885733485221863, "eval_runtime": 0.9304, "eval_samples_per_second": 26.87, "eval_steps_per_second": 13.973, "num_input_tokens_seen": 2728488, "step": 4800 }, { "epoch": 84.30088495575221, "grad_norm": 1.6306403267662972e-05, "learning_rate": 0.2894491185402737, "loss": 0.0, "num_input_tokens_seen": 2731112, "step": 4805 }, { "epoch": 84.38938053097345, "grad_norm": 1.1271943549218122e-05, "learning_rate": 0.2894274062574437, "loss": 0.0, "num_input_tokens_seen": 2734328, "step": 4810 }, { "epoch": 84.47787610619469, "grad_norm": 1.7814594457377098e-06, "learning_rate": 0.2894056724731554, "loss": 0.0, "num_input_tokens_seen": 2737272, "step": 4815 }, { "epoch": 84.56637168141593, "grad_norm": 4.007998086308362e-06, "learning_rate": 0.28938391719076056, "loss": 0.0, "num_input_tokens_seen": 2740040, "step": 4820 }, { "epoch": 84.65486725663717, "grad_norm": 5.874401267647045e-06, "learning_rate": 0.28936214041361413, "loss": 0.0, "num_input_tokens_seen": 2743128, "step": 4825 }, { "epoch": 84.7433628318584, "grad_norm": 1.2655121281568427e-05, "learning_rate": 0.2893403421450743, "loss": 0.0, "num_input_tokens_seen": 2745656, "step": 4830 }, { "epoch": 84.83185840707965, "grad_norm": 1.8253658708999865e-05, "learning_rate": 0.2893185223885026, "loss": 0.0, "num_input_tokens_seen": 2748120, "step": 4835 }, { "epoch": 84.92035398230088, "grad_norm": 1.1924939826712944e-06, "learning_rate": 0.289296681147264, "loss": 0.0, "num_input_tokens_seen": 2751528, "step": 4840 }, { "epoch": 85.0, "grad_norm": 3.4109798434656113e-06, "learning_rate": 0.28927481842472663, "loss": 0.0, "num_input_tokens_seen": 2754000, "step": 4845 }, { "epoch": 85.08849557522124, "grad_norm": 2.948901237687096e-05, "learning_rate": 0.28925293422426207, "loss": 0.0, "num_input_tokens_seen": 2756992, "step": 4850 }, { "epoch": 85.17699115044248, "grad_norm": 1.2588737945407047e-06, "learning_rate": 0.28923102854924504, "loss": 0.0, "num_input_tokens_seen": 2760000, "step": 4855 }, { "epoch": 85.26548672566372, "grad_norm": 8.239495400630403e-06, "learning_rate": 0.2892091014030537, "loss": 0.0, "num_input_tokens_seen": 2762704, "step": 4860 }, { "epoch": 85.35398230088495, "grad_norm": 1.9073662770097144e-05, "learning_rate": 0.2891871527890696, "loss": 0.0, "num_input_tokens_seen": 2765568, "step": 4865 }, { "epoch": 85.4424778761062, "grad_norm": 8.761867320572492e-07, "learning_rate": 0.2891651827106773, "loss": 0.0, "num_input_tokens_seen": 2768592, "step": 4870 }, { "epoch": 85.53097345132744, "grad_norm": 1.3471468491843552e-06, "learning_rate": 0.2891431911712651, "loss": 0.0, "num_input_tokens_seen": 2771504, "step": 4875 }, { "epoch": 85.61946902654867, "grad_norm": 8.556247848900966e-06, "learning_rate": 0.2891211781742241, "loss": 0.0, "num_input_tokens_seen": 2774592, "step": 4880 }, { "epoch": 85.70796460176992, "grad_norm": 7.468584044545423e-06, "learning_rate": 0.2890991437229492, "loss": 0.0, "num_input_tokens_seen": 2777552, "step": 4885 }, { "epoch": 85.79646017699115, "grad_norm": 1.0246650163026061e-05, "learning_rate": 0.2890770878208383, "loss": 0.0, "num_input_tokens_seen": 2780272, "step": 4890 }, { "epoch": 85.88495575221239, "grad_norm": 3.511073373374529e-06, "learning_rate": 0.28905501047129273, "loss": 0.0, "num_input_tokens_seen": 2783008, "step": 4895 }, { "epoch": 85.97345132743362, "grad_norm": 1.733628596412018e-05, "learning_rate": 0.289032911677717, "loss": 0.0, "num_input_tokens_seen": 2785936, "step": 4900 }, { "epoch": 86.05309734513274, "grad_norm": 7.542731509602163e-06, "learning_rate": 0.28901079144351915, "loss": 0.0, "num_input_tokens_seen": 2788192, "step": 4905 }, { "epoch": 86.14159292035399, "grad_norm": 2.0946015865774825e-05, "learning_rate": 0.2889886497721103, "loss": 0.0, "num_input_tokens_seen": 2791344, "step": 4910 }, { "epoch": 86.23008849557522, "grad_norm": 7.863848622946534e-06, "learning_rate": 0.28896648666690505, "loss": 0.0, "num_input_tokens_seen": 2793792, "step": 4915 }, { "epoch": 86.31858407079646, "grad_norm": 9.639821655582637e-06, "learning_rate": 0.2889443021313212, "loss": 0.0, "num_input_tokens_seen": 2796320, "step": 4920 }, { "epoch": 86.40707964601769, "grad_norm": 5.281523954181466e-06, "learning_rate": 0.28892209616877984, "loss": 0.0, "num_input_tokens_seen": 2799360, "step": 4925 }, { "epoch": 86.49557522123894, "grad_norm": 9.040928489412181e-06, "learning_rate": 0.28889986878270546, "loss": 0.0, "num_input_tokens_seen": 2801824, "step": 4930 }, { "epoch": 86.58407079646018, "grad_norm": 1.5492018974327948e-06, "learning_rate": 0.28887761997652583, "loss": 0.0, "num_input_tokens_seen": 2804704, "step": 4935 }, { "epoch": 86.67256637168141, "grad_norm": 1.0514958376006689e-05, "learning_rate": 0.2888553497536719, "loss": 0.0, "num_input_tokens_seen": 2807424, "step": 4940 }, { "epoch": 86.76106194690266, "grad_norm": 9.266422239306848e-06, "learning_rate": 0.2888330581175781, "loss": 0.0, "num_input_tokens_seen": 2810304, "step": 4945 }, { "epoch": 86.84955752212389, "grad_norm": 6.9604320742655545e-06, "learning_rate": 0.28881074507168203, "loss": 0.0, "num_input_tokens_seen": 2813328, "step": 4950 }, { "epoch": 86.93805309734513, "grad_norm": 9.27461042010691e-06, "learning_rate": 0.2887884106194247, "loss": 0.0, "num_input_tokens_seen": 2816432, "step": 4955 }, { "epoch": 87.01769911504425, "grad_norm": 5.5976183830352966e-06, "learning_rate": 0.28876605476425027, "loss": 0.0, "num_input_tokens_seen": 2819264, "step": 4960 }, { "epoch": 87.10619469026548, "grad_norm": 8.884831913746893e-06, "learning_rate": 0.2887436775096064, "loss": 0.0, "num_input_tokens_seen": 2821984, "step": 4965 }, { "epoch": 87.19469026548673, "grad_norm": 1.0755811672424898e-05, "learning_rate": 0.2887212788589439, "loss": 0.0, "num_input_tokens_seen": 2824784, "step": 4970 }, { "epoch": 87.28318584070796, "grad_norm": 1.2911949852423277e-05, "learning_rate": 0.2886988588157169, "loss": 0.0, "num_input_tokens_seen": 2827456, "step": 4975 }, { "epoch": 87.3716814159292, "grad_norm": 1.3602093531517312e-05, "learning_rate": 0.28867641738338284, "loss": 0.0, "num_input_tokens_seen": 2830592, "step": 4980 }, { "epoch": 87.46017699115045, "grad_norm": 1.392661488353042e-06, "learning_rate": 0.2886539545654026, "loss": 0.0, "num_input_tokens_seen": 2833328, "step": 4985 }, { "epoch": 87.54867256637168, "grad_norm": 2.1592147732008016e-06, "learning_rate": 0.28863147036524006, "loss": 0.0, "num_input_tokens_seen": 2836752, "step": 4990 }, { "epoch": 87.63716814159292, "grad_norm": 3.7613256154145347e-06, "learning_rate": 0.2886089647863626, "loss": 0.0, "num_input_tokens_seen": 2840000, "step": 4995 }, { "epoch": 87.72566371681415, "grad_norm": 3.5086122807115316e-06, "learning_rate": 0.288586437832241, "loss": 0.0, "num_input_tokens_seen": 2842656, "step": 5000 }, { "epoch": 87.72566371681415, "eval_loss": 0.3831348419189453, "eval_runtime": 0.9129, "eval_samples_per_second": 27.385, "eval_steps_per_second": 14.24, "num_input_tokens_seen": 2842656, "step": 5000 }, { "epoch": 87.8141592920354, "grad_norm": 9.461453373660333e-06, "learning_rate": 0.28856388950634904, "loss": 0.0, "num_input_tokens_seen": 2845328, "step": 5005 }, { "epoch": 87.90265486725664, "grad_norm": 9.121586117544211e-06, "learning_rate": 0.288541319812164, "loss": 0.0, "num_input_tokens_seen": 2848560, "step": 5010 }, { "epoch": 87.99115044247787, "grad_norm": 6.533849045808893e-06, "learning_rate": 0.2885187287531665, "loss": 0.0, "num_input_tokens_seen": 2851200, "step": 5015 }, { "epoch": 88.070796460177, "grad_norm": 1.781542414391879e-05, "learning_rate": 0.2884961163328402, "loss": 0.0, "num_input_tokens_seen": 2853392, "step": 5020 }, { "epoch": 88.15929203539822, "grad_norm": 9.960565876099281e-06, "learning_rate": 0.28847348255467237, "loss": 0.0, "num_input_tokens_seen": 2856016, "step": 5025 }, { "epoch": 88.24778761061947, "grad_norm": 1.1412565072532743e-05, "learning_rate": 0.28845082742215333, "loss": 0.0, "num_input_tokens_seen": 2859104, "step": 5030 }, { "epoch": 88.33628318584071, "grad_norm": 5.683822109858738e-06, "learning_rate": 0.2884281509387769, "loss": 0.0, "num_input_tokens_seen": 2862016, "step": 5035 }, { "epoch": 88.42477876106194, "grad_norm": 1.148941919382196e-06, "learning_rate": 0.2884054531080399, "loss": 0.0, "num_input_tokens_seen": 2864800, "step": 5040 }, { "epoch": 88.51327433628319, "grad_norm": 1.4054267012397759e-05, "learning_rate": 0.28838273393344277, "loss": 0.0, "num_input_tokens_seen": 2867296, "step": 5045 }, { "epoch": 88.60176991150442, "grad_norm": 1.3280301573104225e-05, "learning_rate": 0.288359993418489, "loss": 0.0, "num_input_tokens_seen": 2870608, "step": 5050 }, { "epoch": 88.69026548672566, "grad_norm": 1.16450300993165e-05, "learning_rate": 0.28833723156668556, "loss": 0.0, "num_input_tokens_seen": 2873472, "step": 5055 }, { "epoch": 88.77876106194691, "grad_norm": 3.0814367164566647e-06, "learning_rate": 0.2883144483815425, "loss": 0.0, "num_input_tokens_seen": 2876592, "step": 5060 }, { "epoch": 88.86725663716814, "grad_norm": 4.007888946944149e-06, "learning_rate": 0.28829164386657335, "loss": 0.0, "num_input_tokens_seen": 2879440, "step": 5065 }, { "epoch": 88.95575221238938, "grad_norm": 5.850240540894447e-06, "learning_rate": 0.28826881802529486, "loss": 0.0, "num_input_tokens_seen": 2882560, "step": 5070 }, { "epoch": 89.03539823008849, "grad_norm": 6.122223226157075e-07, "learning_rate": 0.28824597086122705, "loss": 0.0, "num_input_tokens_seen": 2884808, "step": 5075 }, { "epoch": 89.12389380530973, "grad_norm": 9.078589755517896e-06, "learning_rate": 0.28822310237789317, "loss": 0.0, "num_input_tokens_seen": 2887656, "step": 5080 }, { "epoch": 89.21238938053098, "grad_norm": 1.4294032553152647e-06, "learning_rate": 0.2882002125788199, "loss": 0.0, "num_input_tokens_seen": 2890552, "step": 5085 }, { "epoch": 89.30088495575221, "grad_norm": 2.5968711270252243e-06, "learning_rate": 0.2881773014675371, "loss": 0.0, "num_input_tokens_seen": 2893288, "step": 5090 }, { "epoch": 89.38938053097345, "grad_norm": 5.55835595150711e-06, "learning_rate": 0.288154369047578, "loss": 0.0, "num_input_tokens_seen": 2896008, "step": 5095 }, { "epoch": 89.47787610619469, "grad_norm": 4.663425443141023e-06, "learning_rate": 0.28813141532247905, "loss": 0.0, "num_input_tokens_seen": 2899096, "step": 5100 }, { "epoch": 89.56637168141593, "grad_norm": 4.8926585805020295e-06, "learning_rate": 0.28810844029578, "loss": 0.0, "num_input_tokens_seen": 2901544, "step": 5105 }, { "epoch": 89.65486725663717, "grad_norm": 2.1385108993854374e-05, "learning_rate": 0.2880854439710238, "loss": 0.0, "num_input_tokens_seen": 2904632, "step": 5110 }, { "epoch": 89.7433628318584, "grad_norm": 1.2534471352410037e-05, "learning_rate": 0.28806242635175694, "loss": 0.0, "num_input_tokens_seen": 2907992, "step": 5115 }, { "epoch": 89.83185840707965, "grad_norm": 4.232079390931176e-06, "learning_rate": 0.2880393874415289, "loss": 0.0, "num_input_tokens_seen": 2911192, "step": 5120 }, { "epoch": 89.92035398230088, "grad_norm": 1.2675844118348323e-05, "learning_rate": 0.2880163272438926, "loss": 0.0, "num_input_tokens_seen": 2913912, "step": 5125 }, { "epoch": 90.0, "grad_norm": 1.3723955817113165e-05, "learning_rate": 0.2879932457624042, "loss": 0.0, "num_input_tokens_seen": 2916440, "step": 5130 }, { "epoch": 90.08849557522124, "grad_norm": 2.251704609079752e-05, "learning_rate": 0.2879701430006232, "loss": 0.0, "num_input_tokens_seen": 2919000, "step": 5135 }, { "epoch": 90.17699115044248, "grad_norm": 5.5813093240431044e-06, "learning_rate": 0.28794701896211233, "loss": 0.0, "num_input_tokens_seen": 2922520, "step": 5140 }, { "epoch": 90.26548672566372, "grad_norm": 1.4057060070626903e-05, "learning_rate": 0.28792387365043753, "loss": 0.0, "num_input_tokens_seen": 2925144, "step": 5145 }, { "epoch": 90.35398230088495, "grad_norm": 1.7582716509423335e-06, "learning_rate": 0.28790070706916815, "loss": 0.0, "num_input_tokens_seen": 2928216, "step": 5150 }, { "epoch": 90.4424778761062, "grad_norm": 7.362992164416937e-06, "learning_rate": 0.2878775192218768, "loss": 0.0, "num_input_tokens_seen": 2931416, "step": 5155 }, { "epoch": 90.53097345132744, "grad_norm": 1.3449930520437192e-05, "learning_rate": 0.2878543101121393, "loss": 0.0, "num_input_tokens_seen": 2934360, "step": 5160 }, { "epoch": 90.61946902654867, "grad_norm": 8.30828048492549e-06, "learning_rate": 0.28783107974353483, "loss": 0.0, "num_input_tokens_seen": 2937096, "step": 5165 }, { "epoch": 90.70796460176992, "grad_norm": 5.301880264596548e-06, "learning_rate": 0.2878078281196457, "loss": 0.0, "num_input_tokens_seen": 2939720, "step": 5170 }, { "epoch": 90.79646017699115, "grad_norm": 9.05451724975137e-06, "learning_rate": 0.28778455524405777, "loss": 0.0, "num_input_tokens_seen": 2942648, "step": 5175 }, { "epoch": 90.88495575221239, "grad_norm": 1.5162173440330662e-05, "learning_rate": 0.2877612611203598, "loss": 0.0, "num_input_tokens_seen": 2945032, "step": 5180 }, { "epoch": 90.97345132743362, "grad_norm": 7.1386130002792925e-06, "learning_rate": 0.28773794575214423, "loss": 0.0, "num_input_tokens_seen": 2948184, "step": 5185 }, { "epoch": 91.05309734513274, "grad_norm": 1.4461415958066937e-05, "learning_rate": 0.28771460914300645, "loss": 0.0, "num_input_tokens_seen": 2950648, "step": 5190 }, { "epoch": 91.14159292035399, "grad_norm": 3.3484454888821347e-06, "learning_rate": 0.2876912512965454, "loss": 0.0, "num_input_tokens_seen": 2954088, "step": 5195 }, { "epoch": 91.23008849557522, "grad_norm": 5.230941496847663e-06, "learning_rate": 0.287667872216363, "loss": 0.0, "num_input_tokens_seen": 2956824, "step": 5200 }, { "epoch": 91.23008849557522, "eval_loss": 0.38440048694610596, "eval_runtime": 0.9397, "eval_samples_per_second": 26.603, "eval_steps_per_second": 13.834, "num_input_tokens_seen": 2956824, "step": 5200 }, { "epoch": 91.31858407079646, "grad_norm": 2.0295258309488418e-06, "learning_rate": 0.2876444719060647, "loss": 0.0, "num_input_tokens_seen": 2959496, "step": 5205 }, { "epoch": 91.40707964601769, "grad_norm": 3.1040040084917564e-06, "learning_rate": 0.287621050369259, "loss": 0.0, "num_input_tokens_seen": 2962744, "step": 5210 }, { "epoch": 91.49557522123894, "grad_norm": 3.946323431591736e-06, "learning_rate": 0.28759760760955794, "loss": 0.0, "num_input_tokens_seen": 2965576, "step": 5215 }, { "epoch": 91.58407079646018, "grad_norm": 1.8091521951646428e-06, "learning_rate": 0.2875741436305766, "loss": 0.0, "num_input_tokens_seen": 2968264, "step": 5220 }, { "epoch": 91.67256637168141, "grad_norm": 5.5147042985481676e-06, "learning_rate": 0.28755065843593347, "loss": 0.0, "num_input_tokens_seen": 2971000, "step": 5225 }, { "epoch": 91.76106194690266, "grad_norm": 6.247760211408604e-06, "learning_rate": 0.2875271520292502, "loss": 0.0, "num_input_tokens_seen": 2973608, "step": 5230 }, { "epoch": 91.84955752212389, "grad_norm": 1.1990829989372287e-05, "learning_rate": 0.28750362441415184, "loss": 0.0, "num_input_tokens_seen": 2976184, "step": 5235 }, { "epoch": 91.93805309734513, "grad_norm": 8.481175427732524e-06, "learning_rate": 0.28748007559426664, "loss": 0.0, "num_input_tokens_seen": 2979208, "step": 5240 }, { "epoch": 92.01769911504425, "grad_norm": 5.959813279332593e-06, "learning_rate": 0.2874565055732261, "loss": 0.0, "num_input_tokens_seen": 2981840, "step": 5245 }, { "epoch": 92.10619469026548, "grad_norm": 1.2250218787812628e-05, "learning_rate": 0.28743291435466495, "loss": 0.0, "num_input_tokens_seen": 2984448, "step": 5250 }, { "epoch": 92.19469026548673, "grad_norm": 3.890971584041836e-06, "learning_rate": 0.2874093019422214, "loss": 0.0, "num_input_tokens_seen": 2987360, "step": 5255 }, { "epoch": 92.28318584070796, "grad_norm": 4.438588348421035e-06, "learning_rate": 0.28738566833953666, "loss": 0.0, "num_input_tokens_seen": 2990048, "step": 5260 }, { "epoch": 92.3716814159292, "grad_norm": 3.5161633604730014e-06, "learning_rate": 0.28736201355025537, "loss": 0.0, "num_input_tokens_seen": 2992992, "step": 5265 }, { "epoch": 92.46017699115045, "grad_norm": 8.457532203465234e-07, "learning_rate": 0.28733833757802535, "loss": 0.0, "num_input_tokens_seen": 2995824, "step": 5270 }, { "epoch": 92.54867256637168, "grad_norm": 7.232480129459873e-06, "learning_rate": 0.28731464042649785, "loss": 0.0, "num_input_tokens_seen": 2998688, "step": 5275 }, { "epoch": 92.63716814159292, "grad_norm": 4.593528501573019e-06, "learning_rate": 0.2872909220993271, "loss": 0.0, "num_input_tokens_seen": 3001248, "step": 5280 }, { "epoch": 92.72566371681415, "grad_norm": 6.342128472169861e-06, "learning_rate": 0.287267182600171, "loss": 0.0, "num_input_tokens_seen": 3003856, "step": 5285 }, { "epoch": 92.8141592920354, "grad_norm": 9.76822502707364e-06, "learning_rate": 0.2872434219326902, "loss": 0.0, "num_input_tokens_seen": 3007216, "step": 5290 }, { "epoch": 92.90265486725664, "grad_norm": 7.333170742640505e-06, "learning_rate": 0.28721964010054907, "loss": 0.0, "num_input_tokens_seen": 3010432, "step": 5295 }, { "epoch": 92.99115044247787, "grad_norm": 7.978036592248827e-06, "learning_rate": 0.28719583710741503, "loss": 0.0, "num_input_tokens_seen": 3013056, "step": 5300 }, { "epoch": 93.070796460177, "grad_norm": 7.157663731049979e-06, "learning_rate": 0.28717201295695877, "loss": 0.0, "num_input_tokens_seen": 3015736, "step": 5305 }, { "epoch": 93.15929203539822, "grad_norm": 1.272394547413569e-05, "learning_rate": 0.28714816765285434, "loss": 0.0, "num_input_tokens_seen": 3018552, "step": 5310 }, { "epoch": 93.24778761061947, "grad_norm": 2.4609657884866465e-06, "learning_rate": 0.28712430119877896, "loss": 0.0, "num_input_tokens_seen": 3021416, "step": 5315 }, { "epoch": 93.33628318584071, "grad_norm": 1.5214490304060746e-05, "learning_rate": 0.28710041359841304, "loss": 0.0, "num_input_tokens_seen": 3024168, "step": 5320 }, { "epoch": 93.42477876106194, "grad_norm": 5.6995741033460945e-06, "learning_rate": 0.28707650485544056, "loss": 0.0, "num_input_tokens_seen": 3026856, "step": 5325 }, { "epoch": 93.51327433628319, "grad_norm": 1.3058319382253103e-05, "learning_rate": 0.28705257497354836, "loss": 0.0, "num_input_tokens_seen": 3029864, "step": 5330 }, { "epoch": 93.60176991150442, "grad_norm": 2.4836615466483636e-06, "learning_rate": 0.28702862395642675, "loss": 0.0, "num_input_tokens_seen": 3032888, "step": 5335 }, { "epoch": 93.69026548672566, "grad_norm": 9.12006180442404e-06, "learning_rate": 0.28700465180776935, "loss": 0.0, "num_input_tokens_seen": 3035608, "step": 5340 }, { "epoch": 93.77876106194691, "grad_norm": 6.13910697211395e-06, "learning_rate": 0.2869806585312729, "loss": 0.0, "num_input_tokens_seen": 3038536, "step": 5345 }, { "epoch": 93.86725663716814, "grad_norm": 9.552423762215767e-06, "learning_rate": 0.28695664413063754, "loss": 0.0, "num_input_tokens_seen": 3040920, "step": 5350 }, { "epoch": 93.95575221238938, "grad_norm": 4.05720811613719e-06, "learning_rate": 0.28693260860956654, "loss": 0.0, "num_input_tokens_seen": 3043688, "step": 5355 }, { "epoch": 94.03539823008849, "grad_norm": 2.4857777134457137e-06, "learning_rate": 0.2869085519717665, "loss": 0.0, "num_input_tokens_seen": 3046240, "step": 5360 }, { "epoch": 94.12389380530973, "grad_norm": 8.040336183512409e-07, "learning_rate": 0.28688447422094726, "loss": 0.0, "num_input_tokens_seen": 3049072, "step": 5365 }, { "epoch": 94.21238938053098, "grad_norm": 1.3127288184477948e-05, "learning_rate": 0.2868603753608219, "loss": 0.0, "num_input_tokens_seen": 3052000, "step": 5370 }, { "epoch": 94.30088495575221, "grad_norm": 8.754888767725788e-06, "learning_rate": 0.28683625539510665, "loss": 0.0, "num_input_tokens_seen": 3054752, "step": 5375 }, { "epoch": 94.38938053097345, "grad_norm": 8.120430720737204e-06, "learning_rate": 0.28681211432752135, "loss": 0.0, "num_input_tokens_seen": 3057792, "step": 5380 }, { "epoch": 94.47787610619469, "grad_norm": 1.510795937065268e-05, "learning_rate": 0.2867879521617887, "loss": 0.0, "num_input_tokens_seen": 3060912, "step": 5385 }, { "epoch": 94.56637168141593, "grad_norm": 6.285953986662207e-06, "learning_rate": 0.28676376890163485, "loss": 0.0, "num_input_tokens_seen": 3064096, "step": 5390 }, { "epoch": 94.65486725663717, "grad_norm": 6.538160050695296e-06, "learning_rate": 0.2867395645507891, "loss": 0.0, "num_input_tokens_seen": 3066896, "step": 5395 }, { "epoch": 94.7433628318584, "grad_norm": 1.5279762010322884e-05, "learning_rate": 0.2867153391129842, "loss": 0.0, "num_input_tokens_seen": 3069840, "step": 5400 }, { "epoch": 94.7433628318584, "eval_loss": 0.3785821497440338, "eval_runtime": 0.9457, "eval_samples_per_second": 26.436, "eval_steps_per_second": 13.747, "num_input_tokens_seen": 3069840, "step": 5400 }, { "epoch": 94.83185840707965, "grad_norm": 6.3126603890850674e-06, "learning_rate": 0.28669109259195585, "loss": 0.0, "num_input_tokens_seen": 3072432, "step": 5405 }, { "epoch": 94.92035398230088, "grad_norm": 1.2755456737068016e-05, "learning_rate": 0.2866668249914433, "loss": 0.0, "num_input_tokens_seen": 3075152, "step": 5410 }, { "epoch": 95.0, "grad_norm": 2.0567915726132924e-06, "learning_rate": 0.2866425363151889, "loss": 0.0, "num_input_tokens_seen": 3077616, "step": 5415 }, { "epoch": 95.08849557522124, "grad_norm": 3.7781182982143946e-06, "learning_rate": 0.2866182265669382, "loss": 0.0, "num_input_tokens_seen": 3080304, "step": 5420 }, { "epoch": 95.17699115044248, "grad_norm": 7.118732355593238e-06, "learning_rate": 0.28659389575044014, "loss": 0.0, "num_input_tokens_seen": 3083168, "step": 5425 }, { "epoch": 95.26548672566372, "grad_norm": 3.226055468985578e-06, "learning_rate": 0.28656954386944683, "loss": 0.0, "num_input_tokens_seen": 3086608, "step": 5430 }, { "epoch": 95.35398230088495, "grad_norm": 8.298664397443645e-06, "learning_rate": 0.28654517092771353, "loss": 0.0, "num_input_tokens_seen": 3089840, "step": 5435 }, { "epoch": 95.4424778761062, "grad_norm": 8.834174877847545e-06, "learning_rate": 0.286520776928999, "loss": 0.0, "num_input_tokens_seen": 3092464, "step": 5440 }, { "epoch": 95.53097345132744, "grad_norm": 9.578356184647419e-06, "learning_rate": 0.286496361877065, "loss": 0.0, "num_input_tokens_seen": 3095376, "step": 5445 }, { "epoch": 95.61946902654867, "grad_norm": 8.355544196092524e-06, "learning_rate": 0.28647192577567676, "loss": 0.0, "num_input_tokens_seen": 3098352, "step": 5450 }, { "epoch": 95.70796460176992, "grad_norm": 3.535879386618035e-06, "learning_rate": 0.28644746862860254, "loss": 0.0, "num_input_tokens_seen": 3100992, "step": 5455 }, { "epoch": 95.79646017699115, "grad_norm": 8.77792956543999e-07, "learning_rate": 0.2864229904396139, "loss": 0.0, "num_input_tokens_seen": 3104016, "step": 5460 }, { "epoch": 95.88495575221239, "grad_norm": 6.0454935919551644e-06, "learning_rate": 0.28639849121248573, "loss": 0.0, "num_input_tokens_seen": 3107088, "step": 5465 }, { "epoch": 95.97345132743362, "grad_norm": 4.778089987667045e-06, "learning_rate": 0.28637397095099615, "loss": 0.0, "num_input_tokens_seen": 3109696, "step": 5470 }, { "epoch": 96.05309734513274, "grad_norm": 8.965533197624609e-06, "learning_rate": 0.28634942965892646, "loss": 0.0, "num_input_tokens_seen": 3111880, "step": 5475 }, { "epoch": 96.14159292035399, "grad_norm": 3.2200096029555425e-06, "learning_rate": 0.28632486734006124, "loss": 0.0, "num_input_tokens_seen": 3114696, "step": 5480 }, { "epoch": 96.23008849557522, "grad_norm": 8.183047611964867e-06, "learning_rate": 0.28630028399818835, "loss": 0.0, "num_input_tokens_seen": 3118040, "step": 5485 }, { "epoch": 96.31858407079646, "grad_norm": 5.1480137699400075e-06, "learning_rate": 0.2862756796370987, "loss": 0.0, "num_input_tokens_seen": 3120856, "step": 5490 }, { "epoch": 96.40707964601769, "grad_norm": 1.558330222906079e-05, "learning_rate": 0.2862510542605868, "loss": 0.0, "num_input_tokens_seen": 3123560, "step": 5495 }, { "epoch": 96.49557522123894, "grad_norm": 4.589311402014573e-07, "learning_rate": 0.2862264078724501, "loss": 0.0, "num_input_tokens_seen": 3126200, "step": 5500 }, { "epoch": 96.58407079646018, "grad_norm": 3.7958411667204928e-06, "learning_rate": 0.28620174047648933, "loss": 0.0, "num_input_tokens_seen": 3129336, "step": 5505 }, { "epoch": 96.67256637168141, "grad_norm": 3.7388774671853753e-06, "learning_rate": 0.2861770520765086, "loss": 0.0, "num_input_tokens_seen": 3132296, "step": 5510 }, { "epoch": 96.76106194690266, "grad_norm": 2.616256779219839e-06, "learning_rate": 0.2861523426763151, "loss": 0.0, "num_input_tokens_seen": 3135160, "step": 5515 }, { "epoch": 96.84955752212389, "grad_norm": 8.177626114047598e-06, "learning_rate": 0.2861276122797194, "loss": 0.0, "num_input_tokens_seen": 3137912, "step": 5520 }, { "epoch": 96.93805309734513, "grad_norm": 2.7976798264717218e-06, "learning_rate": 0.28610286089053516, "loss": 0.0, "num_input_tokens_seen": 3140872, "step": 5525 }, { "epoch": 97.01769911504425, "grad_norm": 8.705530376573734e-07, "learning_rate": 0.28607808851257943, "loss": 0.0, "num_input_tokens_seen": 3143568, "step": 5530 }, { "epoch": 97.10619469026548, "grad_norm": 7.264301530085504e-06, "learning_rate": 0.28605329514967237, "loss": 0.0, "num_input_tokens_seen": 3146368, "step": 5535 }, { "epoch": 97.19469026548673, "grad_norm": 7.986803211679216e-06, "learning_rate": 0.2860284808056374, "loss": 0.0, "num_input_tokens_seen": 3149040, "step": 5540 }, { "epoch": 97.28318584070796, "grad_norm": 3.0409169085032772e-06, "learning_rate": 0.28600364548430135, "loss": 0.0, "num_input_tokens_seen": 3152224, "step": 5545 }, { "epoch": 97.3716814159292, "grad_norm": 4.981218808097765e-06, "learning_rate": 0.28597878918949393, "loss": 0.0, "num_input_tokens_seen": 3155216, "step": 5550 }, { "epoch": 97.46017699115045, "grad_norm": 5.273623173707165e-06, "learning_rate": 0.2859539119250485, "loss": 0.0, "num_input_tokens_seen": 3157984, "step": 5555 }, { "epoch": 97.54867256637168, "grad_norm": 2.834217184499721e-06, "learning_rate": 0.2859290136948013, "loss": 0.0, "num_input_tokens_seen": 3160928, "step": 5560 }, { "epoch": 97.63716814159292, "grad_norm": 2.9708717192988843e-06, "learning_rate": 0.28590409450259197, "loss": 0.0, "num_input_tokens_seen": 3163920, "step": 5565 }, { "epoch": 97.72566371681415, "grad_norm": 3.281513272668235e-06, "learning_rate": 0.28587915435226346, "loss": 0.0, "num_input_tokens_seen": 3166416, "step": 5570 }, { "epoch": 97.8141592920354, "grad_norm": 1.58724797074683e-05, "learning_rate": 0.2858541932476617, "loss": 0.0, "num_input_tokens_seen": 3169456, "step": 5575 }, { "epoch": 97.90265486725664, "grad_norm": 5.964952833892312e-06, "learning_rate": 0.2858292111926361, "loss": 0.0, "num_input_tokens_seen": 3172208, "step": 5580 }, { "epoch": 97.99115044247787, "grad_norm": 1.2183214494143613e-05, "learning_rate": 0.28580420819103924, "loss": 0.0, "num_input_tokens_seen": 3175584, "step": 5585 }, { "epoch": 98.070796460177, "grad_norm": 4.998957138013793e-06, "learning_rate": 0.2857791842467269, "loss": 0.0, "num_input_tokens_seen": 3178096, "step": 5590 }, { "epoch": 98.15929203539822, "grad_norm": 3.0503570087603293e-06, "learning_rate": 0.2857541393635579, "loss": 0.0, "num_input_tokens_seen": 3181152, "step": 5595 }, { "epoch": 98.24778761061947, "grad_norm": 7.21739161235746e-06, "learning_rate": 0.2857290735453948, "loss": 0.0, "num_input_tokens_seen": 3183600, "step": 5600 }, { "epoch": 98.24778761061947, "eval_loss": 0.39556920528411865, "eval_runtime": 0.9408, "eval_samples_per_second": 26.572, "eval_steps_per_second": 13.817, "num_input_tokens_seen": 3183600, "step": 5600 }, { "epoch": 98.33628318584071, "grad_norm": 4.905630248686066e-06, "learning_rate": 0.28570398679610276, "loss": 0.0, "num_input_tokens_seen": 3186528, "step": 5605 }, { "epoch": 98.42477876106194, "grad_norm": 3.255623141740216e-06, "learning_rate": 0.2856788791195506, "loss": 0.0, "num_input_tokens_seen": 3189840, "step": 5610 }, { "epoch": 98.51327433628319, "grad_norm": 3.229627964174142e-06, "learning_rate": 0.28565375051961023, "loss": 0.0, "num_input_tokens_seen": 3192512, "step": 5615 }, { "epoch": 98.60176991150442, "grad_norm": 1.2250823147041956e-06, "learning_rate": 0.28562860100015686, "loss": 0.0, "num_input_tokens_seen": 3195648, "step": 5620 }, { "epoch": 98.69026548672566, "grad_norm": 1.0644415851857048e-05, "learning_rate": 0.2856034305650687, "loss": 0.0, "num_input_tokens_seen": 3198160, "step": 5625 }, { "epoch": 98.77876106194691, "grad_norm": 7.3345972850802355e-06, "learning_rate": 0.28557823921822756, "loss": 0.0, "num_input_tokens_seen": 3201184, "step": 5630 }, { "epoch": 98.86725663716814, "grad_norm": 1.0097201084136032e-05, "learning_rate": 0.2855530269635181, "loss": 0.0, "num_input_tokens_seen": 3204016, "step": 5635 }, { "epoch": 98.95575221238938, "grad_norm": 1.746657108014915e-05, "learning_rate": 0.2855277938048284, "loss": 0.0, "num_input_tokens_seen": 3206624, "step": 5640 }, { "epoch": 99.03539823008849, "grad_norm": 2.4694882085896097e-06, "learning_rate": 0.2855025397460498, "loss": 0.0, "num_input_tokens_seen": 3208856, "step": 5645 }, { "epoch": 99.12389380530973, "grad_norm": 1.423750745743746e-05, "learning_rate": 0.28547726479107666, "loss": 0.0, "num_input_tokens_seen": 3211992, "step": 5650 }, { "epoch": 99.21238938053098, "grad_norm": 2.8929041491210228e-06, "learning_rate": 0.2854519689438068, "loss": 0.0, "num_input_tokens_seen": 3215064, "step": 5655 }, { "epoch": 99.30088495575221, "grad_norm": 2.9804689347656677e-06, "learning_rate": 0.2854266522081412, "loss": 0.0, "num_input_tokens_seen": 3217736, "step": 5660 }, { "epoch": 99.38938053097345, "grad_norm": 5.857941232534358e-06, "learning_rate": 0.28540131458798385, "loss": 0.0, "num_input_tokens_seen": 3220600, "step": 5665 }, { "epoch": 99.47787610619469, "grad_norm": 5.448228876048233e-06, "learning_rate": 0.28537595608724226, "loss": 0.0, "num_input_tokens_seen": 3223592, "step": 5670 }, { "epoch": 99.56637168141593, "grad_norm": 4.407000233186409e-06, "learning_rate": 0.28535057670982705, "loss": 0.0, "num_input_tokens_seen": 3226248, "step": 5675 }, { "epoch": 99.65486725663717, "grad_norm": 3.2127420581673505e-06, "learning_rate": 0.285325176459652, "loss": 0.0, "num_input_tokens_seen": 3229432, "step": 5680 }, { "epoch": 99.7433628318584, "grad_norm": 9.831165698415134e-06, "learning_rate": 0.28529975534063406, "loss": 0.0, "num_input_tokens_seen": 3231896, "step": 5685 }, { "epoch": 99.83185840707965, "grad_norm": 1.8643887642610935e-06, "learning_rate": 0.2852743133566936, "loss": 0.0, "num_input_tokens_seen": 3235032, "step": 5690 }, { "epoch": 99.92035398230088, "grad_norm": 4.8655547288944945e-06, "learning_rate": 0.2852488505117541, "loss": 0.0, "num_input_tokens_seen": 3237752, "step": 5695 }, { "epoch": 100.0, "grad_norm": 5.355445864552166e-07, "learning_rate": 0.28522336680974214, "loss": 0.0, "num_input_tokens_seen": 3239888, "step": 5700 }, { "epoch": 100.08849557522124, "grad_norm": 2.8886913696624106e-06, "learning_rate": 0.2851978622545877, "loss": 0.0, "num_input_tokens_seen": 3242672, "step": 5705 }, { "epoch": 100.17699115044248, "grad_norm": 4.4802509364672005e-06, "learning_rate": 0.285172336850224, "loss": 0.0, "num_input_tokens_seen": 3245408, "step": 5710 }, { "epoch": 100.26548672566372, "grad_norm": 3.3934311431949027e-06, "learning_rate": 0.2851467906005871, "loss": 0.0, "num_input_tokens_seen": 3248592, "step": 5715 }, { "epoch": 100.35398230088495, "grad_norm": 8.108844667731319e-06, "learning_rate": 0.28512122350961683, "loss": 0.0, "num_input_tokens_seen": 3251072, "step": 5720 }, { "epoch": 100.4424778761062, "grad_norm": 1.1402268000892946e-06, "learning_rate": 0.2850956355812559, "loss": 0.0, "num_input_tokens_seen": 3253872, "step": 5725 }, { "epoch": 100.53097345132744, "grad_norm": 2.7269518341199728e-06, "learning_rate": 0.28507002681945015, "loss": 0.0, "num_input_tokens_seen": 3256752, "step": 5730 }, { "epoch": 100.61946902654867, "grad_norm": 1.6866455325725838e-06, "learning_rate": 0.28504439722814895, "loss": 0.0, "num_input_tokens_seen": 3259584, "step": 5735 }, { "epoch": 100.70796460176992, "grad_norm": 4.127869033254683e-06, "learning_rate": 0.28501874681130457, "loss": 0.0, "num_input_tokens_seen": 3263120, "step": 5740 }, { "epoch": 100.79646017699115, "grad_norm": 1.3445212061924394e-05, "learning_rate": 0.2849930755728727, "loss": 0.0, "num_input_tokens_seen": 3266032, "step": 5745 }, { "epoch": 100.88495575221239, "grad_norm": 1.0505062846277724e-06, "learning_rate": 0.28496738351681217, "loss": 0.0, "num_input_tokens_seen": 3268736, "step": 5750 }, { "epoch": 100.97345132743362, "grad_norm": 1.0929511518043e-05, "learning_rate": 0.284941670647085, "loss": 0.0, "num_input_tokens_seen": 3272016, "step": 5755 }, { "epoch": 101.05309734513274, "grad_norm": 1.240762344423274e-06, "learning_rate": 0.2849159369676563, "loss": 0.0, "num_input_tokens_seen": 3274392, "step": 5760 }, { "epoch": 101.14159292035399, "grad_norm": 3.9250580812222324e-06, "learning_rate": 0.2848901824824948, "loss": 0.0, "num_input_tokens_seen": 3276952, "step": 5765 }, { "epoch": 101.23008849557522, "grad_norm": 3.7109812183189206e-06, "learning_rate": 0.284864407195572, "loss": 0.0, "num_input_tokens_seen": 3280104, "step": 5770 }, { "epoch": 101.31858407079646, "grad_norm": 2.959833636850817e-06, "learning_rate": 0.28483861111086284, "loss": 0.0, "num_input_tokens_seen": 3282840, "step": 5775 }, { "epoch": 101.40707964601769, "grad_norm": 2.8342963105387753e-06, "learning_rate": 0.2848127942323453, "loss": 0.0, "num_input_tokens_seen": 3285672, "step": 5780 }, { "epoch": 101.49557522123894, "grad_norm": 3.219944801458041e-06, "learning_rate": 0.2847869565640007, "loss": 0.0, "num_input_tokens_seen": 3288872, "step": 5785 }, { "epoch": 101.58407079646018, "grad_norm": 2.8587567157956073e-06, "learning_rate": 0.2847610981098136, "loss": 0.0, "num_input_tokens_seen": 3292264, "step": 5790 }, { "epoch": 101.67256637168141, "grad_norm": 4.762504886457464e-06, "learning_rate": 0.2847352188737716, "loss": 0.0, "num_input_tokens_seen": 3294808, "step": 5795 }, { "epoch": 101.76106194690266, "grad_norm": 1.465115747123491e-05, "learning_rate": 0.2847093188598658, "loss": 0.0, "num_input_tokens_seen": 3297896, "step": 5800 }, { "epoch": 101.76106194690266, "eval_loss": 0.38708847761154175, "eval_runtime": 0.9179, "eval_samples_per_second": 27.235, "eval_steps_per_second": 14.162, "num_input_tokens_seen": 3297896, "step": 5800 }, { "epoch": 101.84955752212389, "grad_norm": 2.9225434445834253e-06, "learning_rate": 0.28468339807209003, "loss": 0.0, "num_input_tokens_seen": 3300648, "step": 5805 }, { "epoch": 101.93805309734513, "grad_norm": 7.71751638239948e-06, "learning_rate": 0.2846574565144418, "loss": 0.0, "num_input_tokens_seen": 3303400, "step": 5810 }, { "epoch": 102.01769911504425, "grad_norm": 1.3598828445537947e-06, "learning_rate": 0.28463149419092154, "loss": 0.0, "num_input_tokens_seen": 3305792, "step": 5815 }, { "epoch": 102.10619469026548, "grad_norm": 1.2941313798364718e-06, "learning_rate": 0.284605511105533, "loss": 0.0, "num_input_tokens_seen": 3308880, "step": 5820 }, { "epoch": 102.19469026548673, "grad_norm": 1.2958608976987307e-06, "learning_rate": 0.28457950726228315, "loss": 0.0, "num_input_tokens_seen": 3312368, "step": 5825 }, { "epoch": 102.28318584070796, "grad_norm": 7.3799733399937395e-06, "learning_rate": 0.28455348266518193, "loss": 0.0, "num_input_tokens_seen": 3315072, "step": 5830 }, { "epoch": 102.3716814159292, "grad_norm": 6.492371539934538e-06, "learning_rate": 0.28452743731824287, "loss": 0.0, "num_input_tokens_seen": 3317728, "step": 5835 }, { "epoch": 102.46017699115045, "grad_norm": 1.2733941730402876e-05, "learning_rate": 0.28450137122548236, "loss": 0.0, "num_input_tokens_seen": 3320592, "step": 5840 }, { "epoch": 102.54867256637168, "grad_norm": 1.0832934549398487e-06, "learning_rate": 0.2844752843909201, "loss": 0.0, "num_input_tokens_seen": 3322976, "step": 5845 }, { "epoch": 102.63716814159292, "grad_norm": 9.709224286780227e-06, "learning_rate": 0.28444917681857923, "loss": 0.0, "num_input_tokens_seen": 3325872, "step": 5850 }, { "epoch": 102.72566371681415, "grad_norm": 7.281668331415858e-06, "learning_rate": 0.28442304851248557, "loss": 0.0, "num_input_tokens_seen": 3328432, "step": 5855 }, { "epoch": 102.8141592920354, "grad_norm": 2.1977873529976932e-06, "learning_rate": 0.2843968994766686, "loss": 0.0, "num_input_tokens_seen": 3331600, "step": 5860 }, { "epoch": 102.90265486725664, "grad_norm": 8.34025013318751e-06, "learning_rate": 0.28437072971516075, "loss": 0.0, "num_input_tokens_seen": 3335040, "step": 5865 }, { "epoch": 102.99115044247787, "grad_norm": 1.7243605725525413e-06, "learning_rate": 0.2843445392319979, "loss": 0.0, "num_input_tokens_seen": 3337920, "step": 5870 }, { "epoch": 103.070796460177, "grad_norm": 4.696688336025545e-07, "learning_rate": 0.28431832803121865, "loss": 0.0, "num_input_tokens_seen": 3340552, "step": 5875 }, { "epoch": 103.15929203539822, "grad_norm": 7.644524885108694e-06, "learning_rate": 0.28429209611686534, "loss": 0.0, "num_input_tokens_seen": 3342904, "step": 5880 }, { "epoch": 103.24778761061947, "grad_norm": 2.1358916910685366e-06, "learning_rate": 0.28426584349298323, "loss": 0.0, "num_input_tokens_seen": 3345480, "step": 5885 }, { "epoch": 103.33628318584071, "grad_norm": 5.622121534543112e-06, "learning_rate": 0.2842395701636207, "loss": 0.0, "num_input_tokens_seen": 3348456, "step": 5890 }, { "epoch": 103.42477876106194, "grad_norm": 1.729793780214095e-06, "learning_rate": 0.28421327613282954, "loss": 0.0, "num_input_tokens_seen": 3351000, "step": 5895 }, { "epoch": 103.51327433628319, "grad_norm": 3.069641707043047e-06, "learning_rate": 0.28418696140466454, "loss": 0.0, "num_input_tokens_seen": 3353624, "step": 5900 }, { "epoch": 103.60176991150442, "grad_norm": 5.894824425922707e-06, "learning_rate": 0.2841606259831838, "loss": 0.0, "num_input_tokens_seen": 3356504, "step": 5905 }, { "epoch": 103.69026548672566, "grad_norm": 2.033227247011382e-06, "learning_rate": 0.2841342698724486, "loss": 0.0, "num_input_tokens_seen": 3359080, "step": 5910 }, { "epoch": 103.77876106194691, "grad_norm": 1.504096871940419e-05, "learning_rate": 0.28410789307652334, "loss": 0.0, "num_input_tokens_seen": 3362216, "step": 5915 }, { "epoch": 103.86725663716814, "grad_norm": 5.029681233281735e-06, "learning_rate": 0.2840814955994756, "loss": 0.0, "num_input_tokens_seen": 3365960, "step": 5920 }, { "epoch": 103.95575221238938, "grad_norm": 8.646763490105513e-06, "learning_rate": 0.2840550774453763, "loss": 0.0, "num_input_tokens_seen": 3369192, "step": 5925 }, { "epoch": 104.03539823008849, "grad_norm": 5.243864052317804e-06, "learning_rate": 0.28402863861829947, "loss": 0.0, "num_input_tokens_seen": 3371848, "step": 5930 }, { "epoch": 104.12389380530973, "grad_norm": 3.3659880500636064e-06, "learning_rate": 0.2840021791223222, "loss": 0.0, "num_input_tokens_seen": 3374792, "step": 5935 }, { "epoch": 104.21238938053098, "grad_norm": 1.0396927791589405e-05, "learning_rate": 0.2839756989615249, "loss": 0.0, "num_input_tokens_seen": 3377816, "step": 5940 }, { "epoch": 104.30088495575221, "grad_norm": 9.120252002503548e-07, "learning_rate": 0.28394919813999125, "loss": 0.0, "num_input_tokens_seen": 3380808, "step": 5945 }, { "epoch": 104.38938053097345, "grad_norm": 2.27427517529577e-06, "learning_rate": 0.28392267666180787, "loss": 0.0, "num_input_tokens_seen": 3384008, "step": 5950 }, { "epoch": 104.47787610619469, "grad_norm": 4.938204256177414e-06, "learning_rate": 0.2838961345310648, "loss": 0.0, "num_input_tokens_seen": 3386472, "step": 5955 }, { "epoch": 104.56637168141593, "grad_norm": 4.593832727550762e-06, "learning_rate": 0.2838695717518552, "loss": 0.0, "num_input_tokens_seen": 3389272, "step": 5960 }, { "epoch": 104.65486725663717, "grad_norm": 9.174766091746278e-06, "learning_rate": 0.28384298832827526, "loss": 0.0, "num_input_tokens_seen": 3392392, "step": 5965 }, { "epoch": 104.7433628318584, "grad_norm": 4.127657575736521e-06, "learning_rate": 0.28381638426442457, "loss": 0.0, "num_input_tokens_seen": 3395304, "step": 5970 }, { "epoch": 104.83185840707965, "grad_norm": 7.833178642613348e-06, "learning_rate": 0.2837897595644057, "loss": 0.0, "num_input_tokens_seen": 3397896, "step": 5975 }, { "epoch": 104.92035398230088, "grad_norm": 5.309138941811398e-06, "learning_rate": 0.28376311423232475, "loss": 0.0, "num_input_tokens_seen": 3400552, "step": 5980 }, { "epoch": 105.0, "grad_norm": 7.199303126981249e-06, "learning_rate": 0.2837364482722905, "loss": 0.0, "num_input_tokens_seen": 3402936, "step": 5985 }, { "epoch": 105.08849557522124, "grad_norm": 3.823163751803804e-06, "learning_rate": 0.28370976168841533, "loss": 0.0, "num_input_tokens_seen": 3405640, "step": 5990 }, { "epoch": 105.17699115044248, "grad_norm": 8.13579754321836e-06, "learning_rate": 0.2836830544848146, "loss": 0.0, "num_input_tokens_seen": 3408232, "step": 5995 }, { "epoch": 105.26548672566372, "grad_norm": 7.552147508249618e-06, "learning_rate": 0.2836563266656069, "loss": 0.0, "num_input_tokens_seen": 3411544, "step": 6000 }, { "epoch": 105.26548672566372, "eval_loss": 0.39194348454475403, "eval_runtime": 0.9414, "eval_samples_per_second": 26.557, "eval_steps_per_second": 13.81, "num_input_tokens_seen": 3411544, "step": 6000 }, { "epoch": 105.35398230088495, "grad_norm": 1.6060544112406205e-06, "learning_rate": 0.283629578234914, "loss": 0.0, "num_input_tokens_seen": 3414664, "step": 6005 }, { "epoch": 105.4424778761062, "grad_norm": 1.640885102460743e-06, "learning_rate": 0.2836028091968608, "loss": 0.0, "num_input_tokens_seen": 3417528, "step": 6010 }, { "epoch": 105.53097345132744, "grad_norm": 3.6557187286234694e-06, "learning_rate": 0.28357601955557554, "loss": 0.0, "num_input_tokens_seen": 3420408, "step": 6015 }, { "epoch": 105.61946902654867, "grad_norm": 7.786599780956749e-06, "learning_rate": 0.2835492093151894, "loss": 0.0, "num_input_tokens_seen": 3423272, "step": 6020 }, { "epoch": 105.70796460176992, "grad_norm": 1.7630585489314399e-06, "learning_rate": 0.2835223784798369, "loss": 0.0, "num_input_tokens_seen": 3426264, "step": 6025 }, { "epoch": 105.79646017699115, "grad_norm": 2.4380735794693464e-06, "learning_rate": 0.2834955270536557, "loss": 0.0, "num_input_tokens_seen": 3429304, "step": 6030 }, { "epoch": 105.88495575221239, "grad_norm": 6.758303356946271e-07, "learning_rate": 0.2834686550407866, "loss": 0.0, "num_input_tokens_seen": 3432120, "step": 6035 }, { "epoch": 105.97345132743362, "grad_norm": 7.810292117937934e-06, "learning_rate": 0.28344176244537367, "loss": 0.0, "num_input_tokens_seen": 3434552, "step": 6040 }, { "epoch": 106.05309734513274, "grad_norm": 5.205596607993357e-06, "learning_rate": 0.28341484927156396, "loss": 0.0, "num_input_tokens_seen": 3436568, "step": 6045 }, { "epoch": 106.14159292035399, "grad_norm": 2.5173019366775407e-06, "learning_rate": 0.28338791552350795, "loss": 0.0, "num_input_tokens_seen": 3440088, "step": 6050 }, { "epoch": 106.23008849557522, "grad_norm": 2.6564669042272726e-06, "learning_rate": 0.28336096120535914, "loss": 0.0, "num_input_tokens_seen": 3443400, "step": 6055 }, { "epoch": 106.31858407079646, "grad_norm": 4.999465090804733e-06, "learning_rate": 0.2833339863212741, "loss": 0.0, "num_input_tokens_seen": 3446120, "step": 6060 }, { "epoch": 106.40707964601769, "grad_norm": 5.723108188249171e-06, "learning_rate": 0.28330699087541283, "loss": 0.0, "num_input_tokens_seen": 3448520, "step": 6065 }, { "epoch": 106.49557522123894, "grad_norm": 4.3669965634762775e-06, "learning_rate": 0.2832799748719384, "loss": 0.0, "num_input_tokens_seen": 3451192, "step": 6070 }, { "epoch": 106.58407079646018, "grad_norm": 4.829083081858698e-06, "learning_rate": 0.28325293831501686, "loss": 0.0, "num_input_tokens_seen": 3454376, "step": 6075 }, { "epoch": 106.67256637168141, "grad_norm": 4.783206804859219e-06, "learning_rate": 0.2832258812088177, "loss": 0.0, "num_input_tokens_seen": 3456872, "step": 6080 }, { "epoch": 106.76106194690266, "grad_norm": 1.961733687494416e-06, "learning_rate": 0.2831988035575134, "loss": 0.0, "num_input_tokens_seen": 3459512, "step": 6085 }, { "epoch": 106.84955752212389, "grad_norm": 3.420240773266414e-06, "learning_rate": 0.28317170536527975, "loss": 0.0, "num_input_tokens_seen": 3462472, "step": 6090 }, { "epoch": 106.93805309734513, "grad_norm": 1.540603307148558e-06, "learning_rate": 0.2831445866362956, "loss": 0.0, "num_input_tokens_seen": 3465096, "step": 6095 }, { "epoch": 107.01769911504425, "grad_norm": 4.0676791286387015e-06, "learning_rate": 0.2831174473747429, "loss": 0.0, "num_input_tokens_seen": 3467568, "step": 6100 }, { "epoch": 107.10619469026548, "grad_norm": 4.596388862410095e-06, "learning_rate": 0.2830902875848071, "loss": 0.0, "num_input_tokens_seen": 3470832, "step": 6105 }, { "epoch": 107.19469026548673, "grad_norm": 3.92007041227771e-06, "learning_rate": 0.28306310727067635, "loss": 0.0, "num_input_tokens_seen": 3474032, "step": 6110 }, { "epoch": 107.28318584070796, "grad_norm": 2.386195092185517e-06, "learning_rate": 0.2830359064365423, "loss": 0.0, "num_input_tokens_seen": 3476944, "step": 6115 }, { "epoch": 107.3716814159292, "grad_norm": 5.933271950198105e-06, "learning_rate": 0.28300868508659965, "loss": 0.0, "num_input_tokens_seen": 3479840, "step": 6120 }, { "epoch": 107.46017699115045, "grad_norm": 1.1817024869742454e-06, "learning_rate": 0.28298144322504626, "loss": 0.0, "num_input_tokens_seen": 3482960, "step": 6125 }, { "epoch": 107.54867256637168, "grad_norm": 7.710846148256678e-06, "learning_rate": 0.2829541808560832, "loss": 0.0, "num_input_tokens_seen": 3485280, "step": 6130 }, { "epoch": 107.63716814159292, "grad_norm": 4.327190708863782e-06, "learning_rate": 0.2829268979839146, "loss": 0.0, "num_input_tokens_seen": 3488304, "step": 6135 }, { "epoch": 107.72566371681415, "grad_norm": 1.8247881143906852e-06, "learning_rate": 0.2828995946127479, "loss": 0.0, "num_input_tokens_seen": 3491232, "step": 6140 }, { "epoch": 107.8141592920354, "grad_norm": 7.242450010380708e-06, "learning_rate": 0.2828722707467936, "loss": 0.0, "num_input_tokens_seen": 3494368, "step": 6145 }, { "epoch": 107.90265486725664, "grad_norm": 4.23063465859741e-06, "learning_rate": 0.2828449263902653, "loss": 0.0, "num_input_tokens_seen": 3496912, "step": 6150 }, { "epoch": 107.99115044247787, "grad_norm": 1.4614139445257024e-06, "learning_rate": 0.28281756154738, "loss": 0.0, "num_input_tokens_seen": 3499632, "step": 6155 }, { "epoch": 108.070796460177, "grad_norm": 1.1395134151825914e-06, "learning_rate": 0.28279017622235764, "loss": 0.0, "num_input_tokens_seen": 3502320, "step": 6160 }, { "epoch": 108.15929203539822, "grad_norm": 3.575568371161353e-06, "learning_rate": 0.28276277041942127, "loss": 0.0, "num_input_tokens_seen": 3505104, "step": 6165 }, { "epoch": 108.24778761061947, "grad_norm": 2.577141458459664e-06, "learning_rate": 0.2827353441427974, "loss": 0.0, "num_input_tokens_seen": 3507744, "step": 6170 }, { "epoch": 108.33628318584071, "grad_norm": 4.559305125440005e-06, "learning_rate": 0.2827078973967153, "loss": 0.0, "num_input_tokens_seen": 3510848, "step": 6175 }, { "epoch": 108.42477876106194, "grad_norm": 1.0862028148039826e-06, "learning_rate": 0.2826804301854078, "loss": 0.0, "num_input_tokens_seen": 3513952, "step": 6180 }, { "epoch": 108.51327433628319, "grad_norm": 1.0676974852685817e-05, "learning_rate": 0.2826529425131105, "loss": 0.0, "num_input_tokens_seen": 3516992, "step": 6185 }, { "epoch": 108.60176991150442, "grad_norm": 1.7182989040520624e-06, "learning_rate": 0.2826254343840625, "loss": 0.0, "num_input_tokens_seen": 3519952, "step": 6190 }, { "epoch": 108.69026548672566, "grad_norm": 4.042374257551273e-06, "learning_rate": 0.2825979058025059, "loss": 0.0, "num_input_tokens_seen": 3522688, "step": 6195 }, { "epoch": 108.77876106194691, "grad_norm": 9.53942617343273e-06, "learning_rate": 0.2825703567726858, "loss": 0.0, "num_input_tokens_seen": 3525472, "step": 6200 }, { "epoch": 108.77876106194691, "eval_loss": 0.4034156799316406, "eval_runtime": 0.9299, "eval_samples_per_second": 26.884, "eval_steps_per_second": 13.98, "num_input_tokens_seen": 3525472, "step": 6200 }, { "epoch": 108.86725663716814, "grad_norm": 6.004881470289547e-06, "learning_rate": 0.2825427872988508, "loss": 0.0, "num_input_tokens_seen": 3528720, "step": 6205 }, { "epoch": 108.95575221238938, "grad_norm": 6.326859875116497e-06, "learning_rate": 0.28251519738525227, "loss": 0.0, "num_input_tokens_seen": 3531264, "step": 6210 }, { "epoch": 109.03539823008849, "grad_norm": 5.74574096390279e-06, "learning_rate": 0.28248758703614507, "loss": 0.0, "num_input_tokens_seen": 3533456, "step": 6215 }, { "epoch": 109.12389380530973, "grad_norm": 5.970582606096286e-06, "learning_rate": 0.28245995625578696, "loss": 0.0, "num_input_tokens_seen": 3536624, "step": 6220 }, { "epoch": 109.21238938053098, "grad_norm": 1.1766745046770666e-05, "learning_rate": 0.282432305048439, "loss": 0.0, "num_input_tokens_seen": 3539520, "step": 6225 }, { "epoch": 109.30088495575221, "grad_norm": 5.028723535360768e-07, "learning_rate": 0.28240463341836536, "loss": 0.0, "num_input_tokens_seen": 3542160, "step": 6230 }, { "epoch": 109.38938053097345, "grad_norm": 4.864241873292485e-06, "learning_rate": 0.2823769413698334, "loss": 0.0, "num_input_tokens_seen": 3544944, "step": 6235 }, { "epoch": 109.47787610619469, "grad_norm": 1.9876190435752505e-06, "learning_rate": 0.2823492289071135, "loss": 0.0, "num_input_tokens_seen": 3547424, "step": 6240 }, { "epoch": 109.56637168141593, "grad_norm": 2.344625045225257e-06, "learning_rate": 0.2823214960344793, "loss": 0.0, "num_input_tokens_seen": 3550416, "step": 6245 }, { "epoch": 109.65486725663717, "grad_norm": 3.9153246689238586e-06, "learning_rate": 0.28229374275620756, "loss": 0.0, "num_input_tokens_seen": 3553728, "step": 6250 }, { "epoch": 109.7433628318584, "grad_norm": 1.2720953463940532e-06, "learning_rate": 0.28226596907657814, "loss": 0.0, "num_input_tokens_seen": 3556992, "step": 6255 }, { "epoch": 109.83185840707965, "grad_norm": 1.0839229389603133e-06, "learning_rate": 0.28223817499987414, "loss": 0.0, "num_input_tokens_seen": 3559776, "step": 6260 }, { "epoch": 109.92035398230088, "grad_norm": 2.831737901942688e-06, "learning_rate": 0.2822103605303818, "loss": 0.0, "num_input_tokens_seen": 3562640, "step": 6265 }, { "epoch": 110.0, "grad_norm": 6.756088737347454e-07, "learning_rate": 0.2821825256723903, "loss": 0.0, "num_input_tokens_seen": 3565040, "step": 6270 }, { "epoch": 110.08849557522124, "grad_norm": 2.571578079368919e-06, "learning_rate": 0.2821546704301923, "loss": 0.0, "num_input_tokens_seen": 3567632, "step": 6275 }, { "epoch": 110.17699115044248, "grad_norm": 2.2433482627093326e-06, "learning_rate": 0.2821267948080834, "loss": 0.0, "num_input_tokens_seen": 3570816, "step": 6280 }, { "epoch": 110.26548672566372, "grad_norm": 1.6697358660167083e-06, "learning_rate": 0.28209889881036226, "loss": 0.0, "num_input_tokens_seen": 3573392, "step": 6285 }, { "epoch": 110.35398230088495, "grad_norm": 7.419398571073543e-06, "learning_rate": 0.28207098244133094, "loss": 0.0, "num_input_tokens_seen": 3576192, "step": 6290 }, { "epoch": 110.4424778761062, "grad_norm": 4.1724465518200304e-06, "learning_rate": 0.2820430457052943, "loss": 0.0, "num_input_tokens_seen": 3578928, "step": 6295 }, { "epoch": 110.53097345132744, "grad_norm": 4.223645646561636e-06, "learning_rate": 0.28201508860656077, "loss": 0.0, "num_input_tokens_seen": 3581680, "step": 6300 }, { "epoch": 110.61946902654867, "grad_norm": 3.505168706396944e-06, "learning_rate": 0.2819871111494415, "loss": 0.0, "num_input_tokens_seen": 3584880, "step": 6305 }, { "epoch": 110.70796460176992, "grad_norm": 6.081432275095722e-06, "learning_rate": 0.28195911333825113, "loss": 0.0, "num_input_tokens_seen": 3587760, "step": 6310 }, { "epoch": 110.79646017699115, "grad_norm": 2.263700253024581e-06, "learning_rate": 0.28193109517730713, "loss": 0.0, "num_input_tokens_seen": 3590752, "step": 6315 }, { "epoch": 110.88495575221239, "grad_norm": 1.3871551800548332e-06, "learning_rate": 0.2819030566709303, "loss": 0.0, "num_input_tokens_seen": 3593648, "step": 6320 }, { "epoch": 110.97345132743362, "grad_norm": 2.5103088319156086e-06, "learning_rate": 0.2818749978234445, "loss": 0.0, "num_input_tokens_seen": 3596384, "step": 6325 }, { "epoch": 111.05309734513274, "grad_norm": 6.441619916586205e-06, "learning_rate": 0.2818469186391768, "loss": 0.0, "num_input_tokens_seen": 3598640, "step": 6330 }, { "epoch": 111.14159292035399, "grad_norm": 3.4211325328215025e-06, "learning_rate": 0.28181881912245743, "loss": 0.0, "num_input_tokens_seen": 3601328, "step": 6335 }, { "epoch": 111.23008849557522, "grad_norm": 8.913207238947507e-07, "learning_rate": 0.2817906992776195, "loss": 0.0, "num_input_tokens_seen": 3604192, "step": 6340 }, { "epoch": 111.31858407079646, "grad_norm": 1.8401816532787052e-06, "learning_rate": 0.28176255910899967, "loss": 0.0, "num_input_tokens_seen": 3606800, "step": 6345 }, { "epoch": 111.40707964601769, "grad_norm": 2.5008516786328983e-06, "learning_rate": 0.2817343986209373, "loss": 0.0, "num_input_tokens_seen": 3609728, "step": 6350 }, { "epoch": 111.49557522123894, "grad_norm": 5.574369879468577e-06, "learning_rate": 0.2817062178177753, "loss": 0.0, "num_input_tokens_seen": 3612480, "step": 6355 }, { "epoch": 111.58407079646018, "grad_norm": 2.0235231659171404e-06, "learning_rate": 0.2816780167038593, "loss": 0.0, "num_input_tokens_seen": 3615760, "step": 6360 }, { "epoch": 111.67256637168141, "grad_norm": 2.8737561024172464e-06, "learning_rate": 0.28164979528353834, "loss": 0.0, "num_input_tokens_seen": 3618656, "step": 6365 }, { "epoch": 111.76106194690266, "grad_norm": 3.7827453525096644e-06, "learning_rate": 0.28162155356116453, "loss": 0.0, "num_input_tokens_seen": 3621264, "step": 6370 }, { "epoch": 111.84955752212389, "grad_norm": 1.1472453707028762e-06, "learning_rate": 0.28159329154109314, "loss": 0.0, "num_input_tokens_seen": 3624000, "step": 6375 }, { "epoch": 111.93805309734513, "grad_norm": 5.60845455765957e-06, "learning_rate": 0.28156500922768246, "loss": 0.0, "num_input_tokens_seen": 3627072, "step": 6380 }, { "epoch": 112.01769911504425, "grad_norm": 3.014007916135597e-06, "learning_rate": 0.28153670662529406, "loss": 0.0, "num_input_tokens_seen": 3629896, "step": 6385 }, { "epoch": 112.10619469026548, "grad_norm": 6.613041705350042e-07, "learning_rate": 0.28150838373829246, "loss": 0.0, "num_input_tokens_seen": 3632520, "step": 6390 }, { "epoch": 112.19469026548673, "grad_norm": 9.81797984422883e-06, "learning_rate": 0.2814800405710455, "loss": 0.0, "num_input_tokens_seen": 3635432, "step": 6395 }, { "epoch": 112.28318584070796, "grad_norm": 2.9945929327368503e-06, "learning_rate": 0.2814516771279239, "loss": 0.0, "num_input_tokens_seen": 3638584, "step": 6400 }, { "epoch": 112.28318584070796, "eval_loss": 0.41279280185699463, "eval_runtime": 0.9376, "eval_samples_per_second": 26.662, "eval_steps_per_second": 13.864, "num_input_tokens_seen": 3638584, "step": 6400 }, { "epoch": 112.3716814159292, "grad_norm": 4.5655096982955e-06, "learning_rate": 0.28142329341330186, "loss": 0.0, "num_input_tokens_seen": 3641288, "step": 6405 }, { "epoch": 112.46017699115045, "grad_norm": 2.089519739456591e-06, "learning_rate": 0.2813948894315564, "loss": 0.0, "num_input_tokens_seen": 3643800, "step": 6410 }, { "epoch": 112.54867256637168, "grad_norm": 4.490387709665811e-06, "learning_rate": 0.2813664651870677, "loss": 0.0, "num_input_tokens_seen": 3646792, "step": 6415 }, { "epoch": 112.63716814159292, "grad_norm": 8.071289130384685e-07, "learning_rate": 0.28133802068421926, "loss": 0.0, "num_input_tokens_seen": 3649960, "step": 6420 }, { "epoch": 112.72566371681415, "grad_norm": 1.0283761184837203e-06, "learning_rate": 0.28130955592739754, "loss": 0.0, "num_input_tokens_seen": 3653080, "step": 6425 }, { "epoch": 112.8141592920354, "grad_norm": 6.037210482645605e-07, "learning_rate": 0.2812810709209922, "loss": 0.0, "num_input_tokens_seen": 3655432, "step": 6430 }, { "epoch": 112.90265486725664, "grad_norm": 2.893356167987804e-06, "learning_rate": 0.2812525656693959, "loss": 0.0, "num_input_tokens_seen": 3658296, "step": 6435 }, { "epoch": 112.99115044247787, "grad_norm": 1.4307754554465646e-06, "learning_rate": 0.28122404017700453, "loss": 0.0, "num_input_tokens_seen": 3661048, "step": 6440 }, { "epoch": 113.070796460177, "grad_norm": 3.616989260990522e-06, "learning_rate": 0.2811954944482171, "loss": 0.0, "num_input_tokens_seen": 3663752, "step": 6445 }, { "epoch": 113.15929203539822, "grad_norm": 2.33480545830389e-06, "learning_rate": 0.2811669284874358, "loss": 0.0, "num_input_tokens_seen": 3666504, "step": 6450 }, { "epoch": 113.24778761061947, "grad_norm": 1.8145689182347269e-06, "learning_rate": 0.2811383422990657, "loss": 0.0, "num_input_tokens_seen": 3669464, "step": 6455 }, { "epoch": 113.33628318584071, "grad_norm": 5.135151695867535e-06, "learning_rate": 0.2811097358875152, "loss": 0.0, "num_input_tokens_seen": 3672584, "step": 6460 }, { "epoch": 113.42477876106194, "grad_norm": 2.878039822462597e-06, "learning_rate": 0.2810811092571959, "loss": 0.0, "num_input_tokens_seen": 3675400, "step": 6465 }, { "epoch": 113.51327433628319, "grad_norm": 7.295124078154913e-07, "learning_rate": 0.28105246241252224, "loss": 0.0, "num_input_tokens_seen": 3678488, "step": 6470 }, { "epoch": 113.60176991150442, "grad_norm": 8.516425964444352e-07, "learning_rate": 0.28102379535791194, "loss": 0.0, "num_input_tokens_seen": 3681240, "step": 6475 }, { "epoch": 113.69026548672566, "grad_norm": 1.1380113846826134e-06, "learning_rate": 0.2809951080977859, "loss": 0.0, "num_input_tokens_seen": 3684456, "step": 6480 }, { "epoch": 113.77876106194691, "grad_norm": 2.962567350550671e-06, "learning_rate": 0.28096640063656797, "loss": 0.0, "num_input_tokens_seen": 3687240, "step": 6485 }, { "epoch": 113.86725663716814, "grad_norm": 3.2509383345313836e-06, "learning_rate": 0.2809376729786852, "loss": 0.0, "num_input_tokens_seen": 3689736, "step": 6490 }, { "epoch": 113.95575221238938, "grad_norm": 3.876380560541293e-06, "learning_rate": 0.28090892512856785, "loss": 0.0, "num_input_tokens_seen": 3692584, "step": 6495 }, { "epoch": 114.03539823008849, "grad_norm": 5.931256964686327e-06, "learning_rate": 0.2808801570906491, "loss": 0.0, "num_input_tokens_seen": 3694592, "step": 6500 }, { "epoch": 114.12389380530973, "grad_norm": 1.6424693285443936e-06, "learning_rate": 0.2808513688693654, "loss": 0.0, "num_input_tokens_seen": 3697552, "step": 6505 }, { "epoch": 114.21238938053098, "grad_norm": 4.631248884834349e-06, "learning_rate": 0.28082256046915627, "loss": 0.0, "num_input_tokens_seen": 3700240, "step": 6510 }, { "epoch": 114.30088495575221, "grad_norm": 4.553704911813838e-06, "learning_rate": 0.28079373189446427, "loss": 0.0, "num_input_tokens_seen": 3703280, "step": 6515 }, { "epoch": 114.38938053097345, "grad_norm": 3.377109578650561e-06, "learning_rate": 0.28076488314973513, "loss": 0.0, "num_input_tokens_seen": 3706368, "step": 6520 }, { "epoch": 114.47787610619469, "grad_norm": 3.315898993605515e-06, "learning_rate": 0.28073601423941774, "loss": 0.0, "num_input_tokens_seen": 3708960, "step": 6525 }, { "epoch": 114.56637168141593, "grad_norm": 7.3036717367358506e-06, "learning_rate": 0.28070712516796403, "loss": 0.0, "num_input_tokens_seen": 3712064, "step": 6530 }, { "epoch": 114.65486725663717, "grad_norm": 4.499315195971576e-07, "learning_rate": 0.28067821593982906, "loss": 0.0, "num_input_tokens_seen": 3715072, "step": 6535 }, { "epoch": 114.7433628318584, "grad_norm": 5.083532869321061e-06, "learning_rate": 0.28064928655947097, "loss": 0.0, "num_input_tokens_seen": 3718304, "step": 6540 }, { "epoch": 114.83185840707965, "grad_norm": 3.659087951746187e-06, "learning_rate": 0.28062033703135103, "loss": 0.0, "num_input_tokens_seen": 3720928, "step": 6545 }, { "epoch": 114.92035398230088, "grad_norm": 3.8036662317608716e-06, "learning_rate": 0.2805913673599337, "loss": 0.0, "num_input_tokens_seen": 3723792, "step": 6550 }, { "epoch": 115.0, "grad_norm": 3.122393650301092e-07, "learning_rate": 0.2805623775496864, "loss": 0.0, "num_input_tokens_seen": 3726240, "step": 6555 }, { "epoch": 115.08849557522124, "grad_norm": 8.868903023540042e-06, "learning_rate": 0.2805333676050797, "loss": 0.0, "num_input_tokens_seen": 3729184, "step": 6560 }, { "epoch": 115.17699115044248, "grad_norm": 2.1956632281217026e-06, "learning_rate": 0.2805043375305873, "loss": 0.0, "num_input_tokens_seen": 3732400, "step": 6565 }, { "epoch": 115.26548672566372, "grad_norm": 3.1212086923915194e-06, "learning_rate": 0.2804752873306861, "loss": 0.0, "num_input_tokens_seen": 3735344, "step": 6570 }, { "epoch": 115.35398230088495, "grad_norm": 2.4761682198004564e-06, "learning_rate": 0.2804462170098559, "loss": 0.0, "num_input_tokens_seen": 3738208, "step": 6575 }, { "epoch": 115.4424778761062, "grad_norm": 3.0259809591370868e-06, "learning_rate": 0.2804171265725797, "loss": 0.0, "num_input_tokens_seen": 3741184, "step": 6580 }, { "epoch": 115.53097345132744, "grad_norm": 3.7790910027979407e-06, "learning_rate": 0.28038801602334373, "loss": 0.0, "num_input_tokens_seen": 3743648, "step": 6585 }, { "epoch": 115.61946902654867, "grad_norm": 1.2967542488695472e-06, "learning_rate": 0.28035888536663717, "loss": 0.0, "num_input_tokens_seen": 3746544, "step": 6590 }, { "epoch": 115.70796460176992, "grad_norm": 5.396545020630583e-06, "learning_rate": 0.2803297346069522, "loss": 0.0, "num_input_tokens_seen": 3749008, "step": 6595 }, { "epoch": 115.79646017699115, "grad_norm": 6.649771421507467e-06, "learning_rate": 0.28030056374878437, "loss": 0.0, "num_input_tokens_seen": 3752608, "step": 6600 }, { "epoch": 115.79646017699115, "eval_loss": 0.4014095067977905, "eval_runtime": 0.9189, "eval_samples_per_second": 27.205, "eval_steps_per_second": 14.147, "num_input_tokens_seen": 3752608, "step": 6600 }, { "epoch": 115.88495575221239, "grad_norm": 1.8778681578623946e-06, "learning_rate": 0.2802713727966321, "loss": 0.0, "num_input_tokens_seen": 3755152, "step": 6605 }, { "epoch": 115.97345132743362, "grad_norm": 1.8126271470464417e-06, "learning_rate": 0.28024216175499717, "loss": 0.0, "num_input_tokens_seen": 3758048, "step": 6610 }, { "epoch": 116.05309734513274, "grad_norm": 2.566056764408131e-06, "learning_rate": 0.2802129306283841, "loss": 0.0, "num_input_tokens_seen": 3760768, "step": 6615 }, { "epoch": 116.14159292035399, "grad_norm": 5.246491127763875e-06, "learning_rate": 0.28018367942130074, "loss": 0.0, "num_input_tokens_seen": 3763296, "step": 6620 }, { "epoch": 116.23008849557522, "grad_norm": 1.5780573221491068e-06, "learning_rate": 0.28015440813825804, "loss": 0.0, "num_input_tokens_seen": 3766656, "step": 6625 }, { "epoch": 116.31858407079646, "grad_norm": 3.5800467230728827e-06, "learning_rate": 0.28012511678377006, "loss": 0.0, "num_input_tokens_seen": 3769568, "step": 6630 }, { "epoch": 116.40707964601769, "grad_norm": 4.50969628218445e-06, "learning_rate": 0.28009580536235373, "loss": 0.0, "num_input_tokens_seen": 3772480, "step": 6635 }, { "epoch": 116.49557522123894, "grad_norm": 1.1545778306754073e-06, "learning_rate": 0.28006647387852934, "loss": 0.0, "num_input_tokens_seen": 3775360, "step": 6640 }, { "epoch": 116.58407079646018, "grad_norm": 7.052641421978478e-07, "learning_rate": 0.28003712233682015, "loss": 0.0, "num_input_tokens_seen": 3778352, "step": 6645 }, { "epoch": 116.67256637168141, "grad_norm": 3.890377684001578e-06, "learning_rate": 0.2800077507417526, "loss": 0.0, "num_input_tokens_seen": 3781216, "step": 6650 }, { "epoch": 116.76106194690266, "grad_norm": 3.996365194325335e-06, "learning_rate": 0.2799783590978561, "loss": 0.0, "num_input_tokens_seen": 3783664, "step": 6655 }, { "epoch": 116.84955752212389, "grad_norm": 6.993755505391164e-06, "learning_rate": 0.2799489474096632, "loss": 0.0, "num_input_tokens_seen": 3786576, "step": 6660 }, { "epoch": 116.93805309734513, "grad_norm": 5.268841505312594e-06, "learning_rate": 0.27991951568170953, "loss": 0.0, "num_input_tokens_seen": 3789504, "step": 6665 }, { "epoch": 117.01769911504425, "grad_norm": 6.807047157053603e-06, "learning_rate": 0.2798900639185339, "loss": 0.0, "num_input_tokens_seen": 3791752, "step": 6670 }, { "epoch": 117.10619469026548, "grad_norm": 7.140005891415058e-06, "learning_rate": 0.2798605921246781, "loss": 0.0, "num_input_tokens_seen": 3794520, "step": 6675 }, { "epoch": 117.19469026548673, "grad_norm": 3.160890400977223e-06, "learning_rate": 0.2798311003046871, "loss": 0.0, "num_input_tokens_seen": 3797096, "step": 6680 }, { "epoch": 117.28318584070796, "grad_norm": 1.093963533094211e-06, "learning_rate": 0.2798015884631089, "loss": 0.0, "num_input_tokens_seen": 3799576, "step": 6685 }, { "epoch": 117.3716814159292, "grad_norm": 1.8338051859245752e-06, "learning_rate": 0.27977205660449445, "loss": 0.0, "num_input_tokens_seen": 3802632, "step": 6690 }, { "epoch": 117.46017699115045, "grad_norm": 2.9383779747149674e-06, "learning_rate": 0.2797425047333981, "loss": 0.0, "num_input_tokens_seen": 3805864, "step": 6695 }, { "epoch": 117.54867256637168, "grad_norm": 4.917459136777325e-06, "learning_rate": 0.27971293285437715, "loss": 0.0, "num_input_tokens_seen": 3809096, "step": 6700 }, { "epoch": 117.63716814159292, "grad_norm": 3.287883600933128e-06, "learning_rate": 0.2796833409719918, "loss": 0.0, "num_input_tokens_seen": 3811912, "step": 6705 }, { "epoch": 117.72566371681415, "grad_norm": 2.6435270683577983e-06, "learning_rate": 0.27965372909080566, "loss": 0.0, "num_input_tokens_seen": 3814424, "step": 6710 }, { "epoch": 117.8141592920354, "grad_norm": 5.0904209274449386e-06, "learning_rate": 0.27962409721538506, "loss": 0.0, "num_input_tokens_seen": 3817496, "step": 6715 }, { "epoch": 117.90265486725664, "grad_norm": 1.1088524161095847e-06, "learning_rate": 0.27959444535029976, "loss": 0.0, "num_input_tokens_seen": 3820184, "step": 6720 }, { "epoch": 117.99115044247787, "grad_norm": 6.124179890321102e-06, "learning_rate": 0.27956477350012243, "loss": 0.0, "num_input_tokens_seen": 3823400, "step": 6725 }, { "epoch": 118.070796460177, "grad_norm": 2.953082685053232e-06, "learning_rate": 0.27953508166942875, "loss": 0.0, "num_input_tokens_seen": 3825952, "step": 6730 }, { "epoch": 118.15929203539822, "grad_norm": 3.6296173675509635e-06, "learning_rate": 0.27950536986279767, "loss": 0.0, "num_input_tokens_seen": 3828912, "step": 6735 }, { "epoch": 118.24778761061947, "grad_norm": 1.3488142940332182e-06, "learning_rate": 0.2794756380848111, "loss": 0.0, "num_input_tokens_seen": 3831824, "step": 6740 }, { "epoch": 118.33628318584071, "grad_norm": 1.1574784366530366e-06, "learning_rate": 0.279445886340054, "loss": 0.0, "num_input_tokens_seen": 3834688, "step": 6745 }, { "epoch": 118.42477876106194, "grad_norm": 3.3695471302053193e-06, "learning_rate": 0.27941611463311455, "loss": 0.0, "num_input_tokens_seen": 3837680, "step": 6750 }, { "epoch": 118.51327433628319, "grad_norm": 3.107046950390213e-06, "learning_rate": 0.2793863229685839, "loss": 0.0, "num_input_tokens_seen": 3840176, "step": 6755 }, { "epoch": 118.60176991150442, "grad_norm": 1.6883093394426396e-06, "learning_rate": 0.27935651135105627, "loss": 0.0, "num_input_tokens_seen": 3843536, "step": 6760 }, { "epoch": 118.69026548672566, "grad_norm": 3.5395726172282593e-06, "learning_rate": 0.279326679785129, "loss": 0.0, "num_input_tokens_seen": 3846512, "step": 6765 }, { "epoch": 118.77876106194691, "grad_norm": 3.279932798250229e-06, "learning_rate": 0.2792968282754024, "loss": 0.0, "num_input_tokens_seen": 3849184, "step": 6770 }, { "epoch": 118.86725663716814, "grad_norm": 1.5161696182985906e-06, "learning_rate": 0.2792669568264801, "loss": 0.0, "num_input_tokens_seen": 3852064, "step": 6775 }, { "epoch": 118.95575221238938, "grad_norm": 7.331223514484009e-06, "learning_rate": 0.27923706544296856, "loss": 0.0, "num_input_tokens_seen": 3854688, "step": 6780 }, { "epoch": 119.03539823008849, "grad_norm": 4.011179044027813e-06, "learning_rate": 0.2792071541294775, "loss": 0.0, "num_input_tokens_seen": 3856896, "step": 6785 }, { "epoch": 119.12389380530973, "grad_norm": 3.297665898571722e-06, "learning_rate": 0.27917722289061947, "loss": 0.0, "num_input_tokens_seen": 3859152, "step": 6790 }, { "epoch": 119.21238938053098, "grad_norm": 3.4152069474657765e-06, "learning_rate": 0.27914727173101034, "loss": 0.0, "num_input_tokens_seen": 3862256, "step": 6795 }, { "epoch": 119.30088495575221, "grad_norm": 2.4761127406236483e-06, "learning_rate": 0.279117300655269, "loss": 0.0, "num_input_tokens_seen": 3865376, "step": 6800 }, { "epoch": 119.30088495575221, "eval_loss": 0.413632333278656, "eval_runtime": 0.9441, "eval_samples_per_second": 26.481, "eval_steps_per_second": 13.77, "num_input_tokens_seen": 3865376, "step": 6800 }, { "epoch": 119.38938053097345, "grad_norm": 1.7857523744169157e-06, "learning_rate": 0.2790873096680173, "loss": 0.0, "num_input_tokens_seen": 3868048, "step": 6805 }, { "epoch": 119.47787610619469, "grad_norm": 1.6539314628971624e-06, "learning_rate": 0.2790572987738802, "loss": 0.0, "num_input_tokens_seen": 3870816, "step": 6810 }, { "epoch": 119.56637168141593, "grad_norm": 4.1911630432878155e-06, "learning_rate": 0.27902726797748584, "loss": 0.0, "num_input_tokens_seen": 3874048, "step": 6815 }, { "epoch": 119.65486725663717, "grad_norm": 2.0297484297771007e-06, "learning_rate": 0.2789972172834652, "loss": 0.0, "num_input_tokens_seen": 3876832, "step": 6820 }, { "epoch": 119.7433628318584, "grad_norm": 2.7727110136765987e-06, "learning_rate": 0.2789671466964527, "loss": 0.0, "num_input_tokens_seen": 3879296, "step": 6825 }, { "epoch": 119.83185840707965, "grad_norm": 4.3897871364606544e-06, "learning_rate": 0.2789370562210854, "loss": 0.0, "num_input_tokens_seen": 3882592, "step": 6830 }, { "epoch": 119.92035398230088, "grad_norm": 3.456360900599975e-06, "learning_rate": 0.27890694586200376, "loss": 0.0, "num_input_tokens_seen": 3885584, "step": 6835 }, { "epoch": 120.0, "grad_norm": 4.630872808775166e-06, "learning_rate": 0.2788768156238511, "loss": 0.0, "num_input_tokens_seen": 3888056, "step": 6840 }, { "epoch": 120.08849557522124, "grad_norm": 4.375242042442551e-06, "learning_rate": 0.27884666551127385, "loss": 0.0, "num_input_tokens_seen": 3890744, "step": 6845 }, { "epoch": 120.17699115044248, "grad_norm": 3.999995897174813e-06, "learning_rate": 0.2788164955289217, "loss": 0.0, "num_input_tokens_seen": 3893640, "step": 6850 }, { "epoch": 120.26548672566372, "grad_norm": 2.1822904727741843e-06, "learning_rate": 0.27878630568144697, "loss": 0.0, "num_input_tokens_seen": 3896264, "step": 6855 }, { "epoch": 120.35398230088495, "grad_norm": 3.2314619602402672e-06, "learning_rate": 0.2787560959735056, "loss": 0.0, "num_input_tokens_seen": 3899192, "step": 6860 }, { "epoch": 120.4424778761062, "grad_norm": 2.4225992092397064e-06, "learning_rate": 0.27872586640975616, "loss": 0.0, "num_input_tokens_seen": 3901880, "step": 6865 }, { "epoch": 120.53097345132744, "grad_norm": 1.96384416994988e-06, "learning_rate": 0.27869561699486045, "loss": 0.0, "num_input_tokens_seen": 3904824, "step": 6870 }, { "epoch": 120.61946902654867, "grad_norm": 2.0964187115168897e-06, "learning_rate": 0.2786653477334833, "loss": 0.0, "num_input_tokens_seen": 3907720, "step": 6875 }, { "epoch": 120.70796460176992, "grad_norm": 2.5468459625699325e-06, "learning_rate": 0.2786350586302926, "loss": 0.0, "num_input_tokens_seen": 3911064, "step": 6880 }, { "epoch": 120.79646017699115, "grad_norm": 3.301451670267852e-06, "learning_rate": 0.27860474968995935, "loss": 0.0, "num_input_tokens_seen": 3913784, "step": 6885 }, { "epoch": 120.88495575221239, "grad_norm": 5.2054256229894236e-06, "learning_rate": 0.27857442091715756, "loss": 0.0, "num_input_tokens_seen": 3916504, "step": 6890 }, { "epoch": 120.97345132743362, "grad_norm": 2.01965758606093e-06, "learning_rate": 0.27854407231656425, "loss": 0.0, "num_input_tokens_seen": 3919400, "step": 6895 }, { "epoch": 121.05309734513274, "grad_norm": 2.4063390355877345e-06, "learning_rate": 0.2785137038928596, "loss": 0.0, "num_input_tokens_seen": 3921912, "step": 6900 }, { "epoch": 121.14159292035399, "grad_norm": 3.7687573239963967e-06, "learning_rate": 0.27848331565072687, "loss": 0.0, "num_input_tokens_seen": 3925128, "step": 6905 }, { "epoch": 121.23008849557522, "grad_norm": 3.864171958412044e-06, "learning_rate": 0.27845290759485225, "loss": 0.0, "num_input_tokens_seen": 3928264, "step": 6910 }, { "epoch": 121.31858407079646, "grad_norm": 1.2314530977164395e-06, "learning_rate": 0.278422479729925, "loss": 0.0, "num_input_tokens_seen": 3930824, "step": 6915 }, { "epoch": 121.40707964601769, "grad_norm": 8.771188504397287e-07, "learning_rate": 0.2783920320606375, "loss": 0.0, "num_input_tokens_seen": 3933864, "step": 6920 }, { "epoch": 121.49557522123894, "grad_norm": 2.6947134301735787e-06, "learning_rate": 0.2783615645916852, "loss": 0.0, "num_input_tokens_seen": 3936504, "step": 6925 }, { "epoch": 121.58407079646018, "grad_norm": 6.234613465494476e-07, "learning_rate": 0.2783310773277666, "loss": 0.0, "num_input_tokens_seen": 3939752, "step": 6930 }, { "epoch": 121.67256637168141, "grad_norm": 2.1099717741890345e-06, "learning_rate": 0.2783005702735831, "loss": 0.0, "num_input_tokens_seen": 3942872, "step": 6935 }, { "epoch": 121.76106194690266, "grad_norm": 3.747256414499134e-06, "learning_rate": 0.2782700434338394, "loss": 0.0, "num_input_tokens_seen": 3945368, "step": 6940 }, { "epoch": 121.84955752212389, "grad_norm": 8.903879802346637e-07, "learning_rate": 0.278239496813243, "loss": 0.0, "num_input_tokens_seen": 3948120, "step": 6945 }, { "epoch": 121.93805309734513, "grad_norm": 2.3803333988325903e-06, "learning_rate": 0.27820893041650463, "loss": 0.0, "num_input_tokens_seen": 3951208, "step": 6950 }, { "epoch": 122.01769911504425, "grad_norm": 2.3865293314884184e-06, "learning_rate": 0.27817834424833804, "loss": 0.0, "num_input_tokens_seen": 3953560, "step": 6955 }, { "epoch": 122.10619469026548, "grad_norm": 3.477048721833853e-06, "learning_rate": 0.27814773831345996, "loss": 0.0, "num_input_tokens_seen": 3955960, "step": 6960 }, { "epoch": 122.19469026548673, "grad_norm": 5.5165551202662755e-06, "learning_rate": 0.2781171126165902, "loss": 0.0, "num_input_tokens_seen": 3958984, "step": 6965 }, { "epoch": 122.28318584070796, "grad_norm": 2.6355198770033894e-06, "learning_rate": 0.2780864671624517, "loss": 0.0, "num_input_tokens_seen": 3962200, "step": 6970 }, { "epoch": 122.3716814159292, "grad_norm": 3.882851615344407e-06, "learning_rate": 0.27805580195577034, "loss": 0.0, "num_input_tokens_seen": 3964696, "step": 6975 }, { "epoch": 122.46017699115045, "grad_norm": 3.325897750983131e-06, "learning_rate": 0.2780251170012751, "loss": 0.0, "num_input_tokens_seen": 3967752, "step": 6980 }, { "epoch": 122.54867256637168, "grad_norm": 1.4203116052158293e-06, "learning_rate": 0.27799441230369787, "loss": 0.0, "num_input_tokens_seen": 3970712, "step": 6985 }, { "epoch": 122.63716814159292, "grad_norm": 4.30909676651936e-06, "learning_rate": 0.27796368786777387, "loss": 0.0, "num_input_tokens_seen": 3974008, "step": 6990 }, { "epoch": 122.72566371681415, "grad_norm": 4.551810889097396e-06, "learning_rate": 0.277932943698241, "loss": 0.0, "num_input_tokens_seen": 3976472, "step": 6995 }, { "epoch": 122.8141592920354, "grad_norm": 3.155375225105672e-06, "learning_rate": 0.2779021797998406, "loss": 0.0, "num_input_tokens_seen": 3979464, "step": 7000 }, { "epoch": 122.8141592920354, "eval_loss": 0.407375693321228, "eval_runtime": 0.9338, "eval_samples_per_second": 26.773, "eval_steps_per_second": 13.922, "num_input_tokens_seen": 3979464, "step": 7000 }, { "epoch": 122.90265486725664, "grad_norm": 4.651253220799845e-06, "learning_rate": 0.2778713961773167, "loss": 0.0, "num_input_tokens_seen": 3982408, "step": 7005 }, { "epoch": 122.99115044247787, "grad_norm": 1.4820664091530489e-06, "learning_rate": 0.2778405928354166, "loss": 0.0, "num_input_tokens_seen": 3985096, "step": 7010 }, { "epoch": 123.070796460177, "grad_norm": 1.1768074728024658e-06, "learning_rate": 0.27780976977889055, "loss": 0.0, "num_input_tokens_seen": 3988384, "step": 7015 }, { "epoch": 123.15929203539822, "grad_norm": 1.434250634702039e-06, "learning_rate": 0.27777892701249185, "loss": 0.0, "num_input_tokens_seen": 3991360, "step": 7020 }, { "epoch": 123.24778761061947, "grad_norm": 1.737721731842612e-06, "learning_rate": 0.2777480645409768, "loss": 0.0, "num_input_tokens_seen": 3994176, "step": 7025 }, { "epoch": 123.33628318584071, "grad_norm": 3.4436475289112423e-06, "learning_rate": 0.27771718236910486, "loss": 0.0, "num_input_tokens_seen": 3996816, "step": 7030 }, { "epoch": 123.42477876106194, "grad_norm": 1.8211046608485049e-06, "learning_rate": 0.27768628050163835, "loss": 0.0, "num_input_tokens_seen": 3999552, "step": 7035 }, { "epoch": 123.51327433628319, "grad_norm": 2.9162717964936746e-06, "learning_rate": 0.2776553589433428, "loss": 0.0, "num_input_tokens_seen": 4002288, "step": 7040 }, { "epoch": 123.60176991150442, "grad_norm": 3.795616066781804e-06, "learning_rate": 0.27762441769898666, "loss": 0.0, "num_input_tokens_seen": 4005264, "step": 7045 }, { "epoch": 123.69026548672566, "grad_norm": 2.852388888641144e-06, "learning_rate": 0.2775934567733415, "loss": 0.0, "num_input_tokens_seen": 4008352, "step": 7050 }, { "epoch": 123.77876106194691, "grad_norm": 3.563500968084554e-06, "learning_rate": 0.2775624761711819, "loss": 0.0, "num_input_tokens_seen": 4010816, "step": 7055 }, { "epoch": 123.86725663716814, "grad_norm": 5.068246764494688e-07, "learning_rate": 0.2775314758972854, "loss": 0.0, "num_input_tokens_seen": 4013584, "step": 7060 }, { "epoch": 123.95575221238938, "grad_norm": 7.284073490154697e-06, "learning_rate": 0.2775004559564327, "loss": 0.0, "num_input_tokens_seen": 4016400, "step": 7065 }, { "epoch": 124.03539823008849, "grad_norm": 2.6385296223452315e-06, "learning_rate": 0.2774694163534073, "loss": 0.0, "num_input_tokens_seen": 4019080, "step": 7070 }, { "epoch": 124.12389380530973, "grad_norm": 3.490981498543988e-06, "learning_rate": 0.27743835709299614, "loss": 0.0, "num_input_tokens_seen": 4022488, "step": 7075 }, { "epoch": 124.21238938053098, "grad_norm": 1.6377737210859777e-06, "learning_rate": 0.2774072781799888, "loss": 0.0, "num_input_tokens_seen": 4025128, "step": 7080 }, { "epoch": 124.30088495575221, "grad_norm": 2.8329607175692217e-06, "learning_rate": 0.27737617961917804, "loss": 0.0, "num_input_tokens_seen": 4027976, "step": 7085 }, { "epoch": 124.38938053097345, "grad_norm": 1.1439022955528344e-06, "learning_rate": 0.27734506141535964, "loss": 0.0, "num_input_tokens_seen": 4031496, "step": 7090 }, { "epoch": 124.47787610619469, "grad_norm": 4.5753054109809455e-06, "learning_rate": 0.2773139235733325, "loss": 0.0, "num_input_tokens_seen": 4034536, "step": 7095 }, { "epoch": 124.56637168141593, "grad_norm": 3.6338060453999788e-06, "learning_rate": 0.2772827660978984, "loss": 0.0, "num_input_tokens_seen": 4037096, "step": 7100 }, { "epoch": 124.65486725663717, "grad_norm": 4.07169500249438e-06, "learning_rate": 0.27725158899386226, "loss": 0.0, "num_input_tokens_seen": 4039448, "step": 7105 }, { "epoch": 124.7433628318584, "grad_norm": 3.5508842302078847e-06, "learning_rate": 0.27722039226603196, "loss": 0.0, "num_input_tokens_seen": 4042760, "step": 7110 }, { "epoch": 124.83185840707965, "grad_norm": 2.80201106761524e-06, "learning_rate": 0.2771891759192184, "loss": 0.0, "num_input_tokens_seen": 4045176, "step": 7115 }, { "epoch": 124.92035398230088, "grad_norm": 2.346901055716444e-06, "learning_rate": 0.2771579399582355, "loss": 0.0, "num_input_tokens_seen": 4047864, "step": 7120 }, { "epoch": 125.0, "grad_norm": 8.2645084376054e-07, "learning_rate": 0.2771266843879004, "loss": 0.0, "num_input_tokens_seen": 4050248, "step": 7125 }, { "epoch": 125.08849557522124, "grad_norm": 3.392387270650943e-06, "learning_rate": 0.2770954092130329, "loss": 0.0, "num_input_tokens_seen": 4053352, "step": 7130 }, { "epoch": 125.17699115044248, "grad_norm": 2.370608399360208e-06, "learning_rate": 0.27706411443845613, "loss": 0.0, "num_input_tokens_seen": 4056088, "step": 7135 }, { "epoch": 125.26548672566372, "grad_norm": 2.3745396902086213e-06, "learning_rate": 0.27703280006899617, "loss": 0.0, "num_input_tokens_seen": 4058968, "step": 7140 }, { "epoch": 125.35398230088495, "grad_norm": 1.6972855974017875e-06, "learning_rate": 0.277001466109482, "loss": 0.0, "num_input_tokens_seen": 4061976, "step": 7145 }, { "epoch": 125.4424778761062, "grad_norm": 1.5718990198365645e-06, "learning_rate": 0.2769701125647458, "loss": 0.0, "num_input_tokens_seen": 4064696, "step": 7150 }, { "epoch": 125.53097345132744, "grad_norm": 5.397087988967542e-06, "learning_rate": 0.27693873943962266, "loss": 0.0, "num_input_tokens_seen": 4067592, "step": 7155 }, { "epoch": 125.61946902654867, "grad_norm": 2.3461698219762184e-06, "learning_rate": 0.2769073467389506, "loss": 0.0, "num_input_tokens_seen": 4070696, "step": 7160 }, { "epoch": 125.70796460176992, "grad_norm": 4.251372047292534e-06, "learning_rate": 0.2768759344675709, "loss": 0.0, "num_input_tokens_seen": 4073112, "step": 7165 }, { "epoch": 125.79646017699115, "grad_norm": 2.785760671031312e-06, "learning_rate": 0.27684450263032767, "loss": 0.0, "num_input_tokens_seen": 4076072, "step": 7170 }, { "epoch": 125.88495575221239, "grad_norm": 9.322918117504742e-07, "learning_rate": 0.2768130512320682, "loss": 0.0, "num_input_tokens_seen": 4078920, "step": 7175 }, { "epoch": 125.97345132743362, "grad_norm": 7.3579521995270625e-06, "learning_rate": 0.27678158027764244, "loss": 0.0, "num_input_tokens_seen": 4082120, "step": 7180 }, { "epoch": 126.05309734513274, "grad_norm": 1.2371692719170824e-06, "learning_rate": 0.27675008977190385, "loss": 0.0, "num_input_tokens_seen": 4084544, "step": 7185 }, { "epoch": 126.14159292035399, "grad_norm": 3.168425564581412e-06, "learning_rate": 0.2767185797197086, "loss": 0.0, "num_input_tokens_seen": 4087728, "step": 7190 }, { "epoch": 126.23008849557522, "grad_norm": 1.663713987909432e-06, "learning_rate": 0.2766870501259159, "loss": 0.0, "num_input_tokens_seen": 4090704, "step": 7195 }, { "epoch": 126.31858407079646, "grad_norm": 6.459242740675109e-07, "learning_rate": 0.276655500995388, "loss": 0.0, "num_input_tokens_seen": 4093296, "step": 7200 }, { "epoch": 126.31858407079646, "eval_loss": 0.40883001685142517, "eval_runtime": 0.9342, "eval_samples_per_second": 26.762, "eval_steps_per_second": 13.916, "num_input_tokens_seen": 4093296, "step": 7200 }, { "epoch": 126.40707964601769, "grad_norm": 1.406119281455176e-06, "learning_rate": 0.27662393233299015, "loss": 0.0, "num_input_tokens_seen": 4096144, "step": 7205 }, { "epoch": 126.49557522123894, "grad_norm": 2.4896346531022573e-06, "learning_rate": 0.27659234414359074, "loss": 0.0, "num_input_tokens_seen": 4098608, "step": 7210 }, { "epoch": 126.58407079646018, "grad_norm": 3.908672624675091e-06, "learning_rate": 0.27656073643206097, "loss": 0.0, "num_input_tokens_seen": 4101792, "step": 7215 }, { "epoch": 126.67256637168141, "grad_norm": 1.4107804418017622e-06, "learning_rate": 0.27652910920327517, "loss": 0.0, "num_input_tokens_seen": 4105184, "step": 7220 }, { "epoch": 126.76106194690266, "grad_norm": 3.983769147453131e-06, "learning_rate": 0.2764974624621107, "loss": 0.0, "num_input_tokens_seen": 4107952, "step": 7225 }, { "epoch": 126.84955752212389, "grad_norm": 3.553057467797771e-06, "learning_rate": 0.2764657962134479, "loss": 0.0, "num_input_tokens_seen": 4110480, "step": 7230 }, { "epoch": 126.93805309734513, "grad_norm": 5.219312129156606e-07, "learning_rate": 0.27643411046217, "loss": 0.0, "num_input_tokens_seen": 4113280, "step": 7235 }, { "epoch": 127.01769911504425, "grad_norm": 1.3866506378690246e-06, "learning_rate": 0.27640240521316334, "loss": 0.0, "num_input_tokens_seen": 4115912, "step": 7240 }, { "epoch": 127.10619469026548, "grad_norm": 2.599622121124412e-06, "learning_rate": 0.2763706804713174, "loss": 0.0, "num_input_tokens_seen": 4118840, "step": 7245 }, { "epoch": 127.19469026548673, "grad_norm": 4.0783652366371825e-06, "learning_rate": 0.2763389362415245, "loss": 0.0, "num_input_tokens_seen": 4121624, "step": 7250 }, { "epoch": 127.28318584070796, "grad_norm": 2.447305178066017e-06, "learning_rate": 0.27630717252867987, "loss": 0.0, "num_input_tokens_seen": 4124152, "step": 7255 }, { "epoch": 127.3716814159292, "grad_norm": 5.003168439543515e-07, "learning_rate": 0.276275389337682, "loss": 0.0, "num_input_tokens_seen": 4127016, "step": 7260 }, { "epoch": 127.46017699115045, "grad_norm": 2.0464924546104157e-06, "learning_rate": 0.2762435866734322, "loss": 0.0, "num_input_tokens_seen": 4129592, "step": 7265 }, { "epoch": 127.54867256637168, "grad_norm": 3.9745498270349344e-07, "learning_rate": 0.27621176454083485, "loss": 0.0, "num_input_tokens_seen": 4132920, "step": 7270 }, { "epoch": 127.63716814159292, "grad_norm": 2.276132136103115e-06, "learning_rate": 0.2761799229447973, "loss": 0.0, "num_input_tokens_seen": 4136024, "step": 7275 }, { "epoch": 127.72566371681415, "grad_norm": 7.875633855292108e-06, "learning_rate": 0.27614806189023006, "loss": 0.0, "num_input_tokens_seen": 4138840, "step": 7280 }, { "epoch": 127.8141592920354, "grad_norm": 2.725835201999871e-06, "learning_rate": 0.27611618138204636, "loss": 0.0, "num_input_tokens_seen": 4142152, "step": 7285 }, { "epoch": 127.90265486725664, "grad_norm": 3.948205176129704e-06, "learning_rate": 0.2760842814251626, "loss": 0.0, "num_input_tokens_seen": 4144856, "step": 7290 }, { "epoch": 127.99115044247787, "grad_norm": 2.159420091629727e-06, "learning_rate": 0.2760523620244982, "loss": 0.0, "num_input_tokens_seen": 4148088, "step": 7295 }, { "epoch": 128.07079646017698, "grad_norm": 4.992456979380222e-06, "learning_rate": 0.27602042318497544, "loss": 0.0, "num_input_tokens_seen": 4150680, "step": 7300 }, { "epoch": 128.15929203539824, "grad_norm": 4.14657097280724e-06, "learning_rate": 0.2759884649115198, "loss": 0.0, "num_input_tokens_seen": 4153544, "step": 7305 }, { "epoch": 128.24778761061947, "grad_norm": 1.643747850721411e-06, "learning_rate": 0.2759564872090596, "loss": 0.0, "num_input_tokens_seen": 4156136, "step": 7310 }, { "epoch": 128.3362831858407, "grad_norm": 2.422985289740609e-06, "learning_rate": 0.2759244900825262, "loss": 0.0, "num_input_tokens_seen": 4158792, "step": 7315 }, { "epoch": 128.42477876106196, "grad_norm": 4.1400830923521426e-06, "learning_rate": 0.2758924735368539, "loss": 0.0, "num_input_tokens_seen": 4161672, "step": 7320 }, { "epoch": 128.5132743362832, "grad_norm": 3.872611614497146e-06, "learning_rate": 0.27586043757698014, "loss": 0.0, "num_input_tokens_seen": 4164632, "step": 7325 }, { "epoch": 128.60176991150442, "grad_norm": 3.5173750347894384e-06, "learning_rate": 0.27582838220784534, "loss": 0.0, "num_input_tokens_seen": 4167768, "step": 7330 }, { "epoch": 128.69026548672565, "grad_norm": 1.8281762095284648e-06, "learning_rate": 0.27579630743439265, "loss": 0.0, "num_input_tokens_seen": 4170616, "step": 7335 }, { "epoch": 128.7787610619469, "grad_norm": 1.611850393601344e-06, "learning_rate": 0.2757642132615686, "loss": 0.0, "num_input_tokens_seen": 4173320, "step": 7340 }, { "epoch": 128.86725663716814, "grad_norm": 2.340627361263614e-06, "learning_rate": 0.2757320996943223, "loss": 0.0, "num_input_tokens_seen": 4176216, "step": 7345 }, { "epoch": 128.95575221238937, "grad_norm": 5.252745268080616e-06, "learning_rate": 0.2756999667376062, "loss": 0.0, "num_input_tokens_seen": 4178760, "step": 7350 }, { "epoch": 129.0353982300885, "grad_norm": 2.025994035648182e-06, "learning_rate": 0.2756678143963756, "loss": 0.0, "num_input_tokens_seen": 4181184, "step": 7355 }, { "epoch": 129.12389380530973, "grad_norm": 3.8053046864661155e-06, "learning_rate": 0.2756356426755888, "loss": 0.0, "num_input_tokens_seen": 4184208, "step": 7360 }, { "epoch": 129.21238938053096, "grad_norm": 2.5977421955758473e-06, "learning_rate": 0.27560345158020705, "loss": 0.0, "num_input_tokens_seen": 4187680, "step": 7365 }, { "epoch": 129.30088495575222, "grad_norm": 1.7599833199710702e-06, "learning_rate": 0.27557124111519465, "loss": 0.0, "num_input_tokens_seen": 4190400, "step": 7370 }, { "epoch": 129.38938053097345, "grad_norm": 1.4635571687904303e-06, "learning_rate": 0.27553901128551883, "loss": 0.0, "num_input_tokens_seen": 4192720, "step": 7375 }, { "epoch": 129.47787610619469, "grad_norm": 2.6848951506508456e-07, "learning_rate": 0.2755067620961498, "loss": 0.0, "num_input_tokens_seen": 4195584, "step": 7380 }, { "epoch": 129.56637168141592, "grad_norm": 3.4480333397368668e-06, "learning_rate": 0.27547449355206094, "loss": 0.0, "num_input_tokens_seen": 4198048, "step": 7385 }, { "epoch": 129.65486725663717, "grad_norm": 1.635221678952803e-06, "learning_rate": 0.2754422056582283, "loss": 0.0, "num_input_tokens_seen": 4201184, "step": 7390 }, { "epoch": 129.7433628318584, "grad_norm": 3.1470490284846164e-06, "learning_rate": 0.27540989841963115, "loss": 0.0, "num_input_tokens_seen": 4203936, "step": 7395 }, { "epoch": 129.83185840707964, "grad_norm": 1.1596499689403572e-06, "learning_rate": 0.27537757184125167, "loss": 0.0, "num_input_tokens_seen": 4207120, "step": 7400 }, { "epoch": 129.83185840707964, "eval_loss": 0.3879455327987671, "eval_runtime": 0.926, "eval_samples_per_second": 26.998, "eval_steps_per_second": 14.039, "num_input_tokens_seen": 4207120, "step": 7400 }, { "epoch": 129.9203539823009, "grad_norm": 2.054187461908441e-06, "learning_rate": 0.275345225928075, "loss": 0.0, "num_input_tokens_seen": 4210320, "step": 7405 }, { "epoch": 130.0, "grad_norm": 4.068767339049373e-06, "learning_rate": 0.2753128606850893, "loss": 0.0, "num_input_tokens_seen": 4212296, "step": 7410 }, { "epoch": 130.08849557522123, "grad_norm": 2.931662265837076e-06, "learning_rate": 0.2752804761172858, "loss": 0.0, "num_input_tokens_seen": 4215336, "step": 7415 }, { "epoch": 130.1769911504425, "grad_norm": 6.467627713391266e-07, "learning_rate": 0.27524807222965836, "loss": 0.0, "num_input_tokens_seen": 4218280, "step": 7420 }, { "epoch": 130.26548672566372, "grad_norm": 1.985734570553177e-06, "learning_rate": 0.27521564902720436, "loss": 0.0, "num_input_tokens_seen": 4221432, "step": 7425 }, { "epoch": 130.35398230088495, "grad_norm": 1.315662757406244e-06, "learning_rate": 0.2751832065149236, "loss": 0.0, "num_input_tokens_seen": 4224120, "step": 7430 }, { "epoch": 130.44247787610618, "grad_norm": 1.2041650734317955e-06, "learning_rate": 0.2751507446978193, "loss": 0.0, "num_input_tokens_seen": 4226808, "step": 7435 }, { "epoch": 130.53097345132744, "grad_norm": 2.2794738470111042e-06, "learning_rate": 0.2751182635808974, "loss": 0.0, "num_input_tokens_seen": 4229880, "step": 7440 }, { "epoch": 130.61946902654867, "grad_norm": 3.62906666850904e-06, "learning_rate": 0.27508576316916694, "loss": 0.0, "num_input_tokens_seen": 4232296, "step": 7445 }, { "epoch": 130.7079646017699, "grad_norm": 1.011663243843941e-06, "learning_rate": 0.2750532434676399, "loss": 0.0, "num_input_tokens_seen": 4235080, "step": 7450 }, { "epoch": 130.79646017699116, "grad_norm": 1.333742261522275e-06, "learning_rate": 0.27502070448133115, "loss": 0.0, "num_input_tokens_seen": 4238360, "step": 7455 }, { "epoch": 130.8849557522124, "grad_norm": 2.7180410597793525e-06, "learning_rate": 0.2749881462152587, "loss": 0.0, "num_input_tokens_seen": 4241000, "step": 7460 }, { "epoch": 130.97345132743362, "grad_norm": 1.5921714293654077e-06, "learning_rate": 0.2749555686744434, "loss": 0.0, "num_input_tokens_seen": 4244472, "step": 7465 }, { "epoch": 131.05309734513276, "grad_norm": 2.906670943048084e-06, "learning_rate": 0.2749229718639091, "loss": 0.0, "num_input_tokens_seen": 4246536, "step": 7470 }, { "epoch": 131.141592920354, "grad_norm": 1.947333657881245e-06, "learning_rate": 0.27489035578868265, "loss": 0.0, "num_input_tokens_seen": 4249544, "step": 7475 }, { "epoch": 131.23008849557522, "grad_norm": 2.3165996481111506e-06, "learning_rate": 0.2748577204537939, "loss": 0.0, "num_input_tokens_seen": 4252808, "step": 7480 }, { "epoch": 131.31858407079645, "grad_norm": 2.2897620510775596e-06, "learning_rate": 0.2748250658642756, "loss": 0.0, "num_input_tokens_seen": 4255528, "step": 7485 }, { "epoch": 131.4070796460177, "grad_norm": 2.6887184958468424e-06, "learning_rate": 0.2747923920251634, "loss": 0.0, "num_input_tokens_seen": 4258472, "step": 7490 }, { "epoch": 131.49557522123894, "grad_norm": 3.372822448000079e-06, "learning_rate": 0.27475969894149627, "loss": 0.0, "num_input_tokens_seen": 4261464, "step": 7495 }, { "epoch": 131.58407079646017, "grad_norm": 3.821893187705427e-06, "learning_rate": 0.2747269866183156, "loss": 0.0, "num_input_tokens_seen": 4263784, "step": 7500 }, { "epoch": 131.67256637168143, "grad_norm": 1.6657512560414034e-06, "learning_rate": 0.27469425506066625, "loss": 0.0, "num_input_tokens_seen": 4267032, "step": 7505 }, { "epoch": 131.76106194690266, "grad_norm": 2.578732164693065e-06, "learning_rate": 0.27466150427359576, "loss": 0.0, "num_input_tokens_seen": 4269560, "step": 7510 }, { "epoch": 131.8495575221239, "grad_norm": 7.081524131535843e-07, "learning_rate": 0.2746287342621547, "loss": 0.0, "num_input_tokens_seen": 4272664, "step": 7515 }, { "epoch": 131.93805309734512, "grad_norm": 3.395412477402715e-06, "learning_rate": 0.2745959450313966, "loss": 0.0, "num_input_tokens_seen": 4275608, "step": 7520 }, { "epoch": 132.01769911504425, "grad_norm": 1.1865499800478574e-06, "learning_rate": 0.27456313658637804, "loss": 0.0, "num_input_tokens_seen": 4277864, "step": 7525 }, { "epoch": 132.10619469026548, "grad_norm": 1.5715024801465916e-06, "learning_rate": 0.27453030893215846, "loss": 0.0, "num_input_tokens_seen": 4280696, "step": 7530 }, { "epoch": 132.1946902654867, "grad_norm": 1.074575266102329e-06, "learning_rate": 0.2744974620738003, "loss": 0.0, "num_input_tokens_seen": 4283400, "step": 7535 }, { "epoch": 132.28318584070797, "grad_norm": 2.633507619975717e-06, "learning_rate": 0.27446459601636897, "loss": 0.0, "num_input_tokens_seen": 4286648, "step": 7540 }, { "epoch": 132.3716814159292, "grad_norm": 1.1460482483016676e-06, "learning_rate": 0.2744317107649328, "loss": 0.0, "num_input_tokens_seen": 4289512, "step": 7545 }, { "epoch": 132.46017699115043, "grad_norm": 2.3290574517886853e-06, "learning_rate": 0.2743988063245631, "loss": 0.0, "num_input_tokens_seen": 4292472, "step": 7550 }, { "epoch": 132.5486725663717, "grad_norm": 1.6709360579625354e-06, "learning_rate": 0.2743658827003342, "loss": 0.0, "num_input_tokens_seen": 4295480, "step": 7555 }, { "epoch": 132.63716814159292, "grad_norm": 1.612057872080186e-06, "learning_rate": 0.27433293989732327, "loss": 0.0, "num_input_tokens_seen": 4298536, "step": 7560 }, { "epoch": 132.72566371681415, "grad_norm": 9.594857601769036e-07, "learning_rate": 0.27429997792061056, "loss": 0.0, "num_input_tokens_seen": 4301528, "step": 7565 }, { "epoch": 132.81415929203538, "grad_norm": 3.479040344700479e-07, "learning_rate": 0.27426699677527927, "loss": 0.0, "num_input_tokens_seen": 4304312, "step": 7570 }, { "epoch": 132.90265486725664, "grad_norm": 2.661088728928007e-06, "learning_rate": 0.2742339964664154, "loss": 0.0, "num_input_tokens_seen": 4306552, "step": 7575 }, { "epoch": 132.99115044247787, "grad_norm": 3.380514215223229e-07, "learning_rate": 0.274200976999108, "loss": 0.0, "num_input_tokens_seen": 4309288, "step": 7580 }, { "epoch": 133.07079646017698, "grad_norm": 1.1710498029060545e-06, "learning_rate": 0.27416793837844916, "loss": 0.0, "num_input_tokens_seen": 4312024, "step": 7585 }, { "epoch": 133.15929203539824, "grad_norm": 1.810665139601042e-06, "learning_rate": 0.27413488060953384, "loss": 0.0, "num_input_tokens_seen": 4314760, "step": 7590 }, { "epoch": 133.24778761061947, "grad_norm": 1.6951661336861434e-06, "learning_rate": 0.27410180369745996, "loss": 0.0, "num_input_tokens_seen": 4317400, "step": 7595 }, { "epoch": 133.3362831858407, "grad_norm": 1.9393255570321344e-06, "learning_rate": 0.27406870764732844, "loss": 0.0, "num_input_tokens_seen": 4320568, "step": 7600 }, { "epoch": 133.3362831858407, "eval_loss": 0.4211263358592987, "eval_runtime": 0.9103, "eval_samples_per_second": 27.464, "eval_steps_per_second": 14.281, "num_input_tokens_seen": 4320568, "step": 7600 }, { "epoch": 133.42477876106196, "grad_norm": 2.211630317106028e-06, "learning_rate": 0.27403559246424297, "loss": 0.0, "num_input_tokens_seen": 4322968, "step": 7605 }, { "epoch": 133.5132743362832, "grad_norm": 3.3390167573088547e-06, "learning_rate": 0.2740024581533105, "loss": 0.0, "num_input_tokens_seen": 4325864, "step": 7610 }, { "epoch": 133.60176991150442, "grad_norm": 2.4082764866761863e-06, "learning_rate": 0.2739693047196406, "loss": 0.0, "num_input_tokens_seen": 4328504, "step": 7615 }, { "epoch": 133.69026548672565, "grad_norm": 5.271755867397587e-07, "learning_rate": 0.27393613216834606, "loss": 0.0, "num_input_tokens_seen": 4331016, "step": 7620 }, { "epoch": 133.7787610619469, "grad_norm": 2.8120541628595674e-06, "learning_rate": 0.2739029405045424, "loss": 0.0, "num_input_tokens_seen": 4333976, "step": 7625 }, { "epoch": 133.86725663716814, "grad_norm": 1.2799810065189376e-06, "learning_rate": 0.2738697297333483, "loss": 0.0, "num_input_tokens_seen": 4336936, "step": 7630 }, { "epoch": 133.95575221238937, "grad_norm": 6.666624585704994e-07, "learning_rate": 0.2738364998598852, "loss": 0.0, "num_input_tokens_seen": 4340040, "step": 7635 }, { "epoch": 134.0353982300885, "grad_norm": 2.05279934561986e-06, "learning_rate": 0.27380325088927765, "loss": 0.0, "num_input_tokens_seen": 4342576, "step": 7640 }, { "epoch": 134.12389380530973, "grad_norm": 2.1348430436773924e-06, "learning_rate": 0.27376998282665294, "loss": 0.0, "num_input_tokens_seen": 4345248, "step": 7645 }, { "epoch": 134.21238938053096, "grad_norm": 2.7338310246705078e-06, "learning_rate": 0.27373669567714154, "loss": 0.0, "num_input_tokens_seen": 4349056, "step": 7650 }, { "epoch": 134.30088495575222, "grad_norm": 4.942755822412437e-07, "learning_rate": 0.27370338944587663, "loss": 0.0, "num_input_tokens_seen": 4351904, "step": 7655 }, { "epoch": 134.38938053097345, "grad_norm": 4.829840918318951e-07, "learning_rate": 0.27367006413799455, "loss": 0.0, "num_input_tokens_seen": 4354880, "step": 7660 }, { "epoch": 134.47787610619469, "grad_norm": 1.1831547226393013e-06, "learning_rate": 0.2736367197586345, "loss": 0.0, "num_input_tokens_seen": 4357456, "step": 7665 }, { "epoch": 134.56637168141592, "grad_norm": 2.951824171759654e-06, "learning_rate": 0.2736033563129385, "loss": 0.0, "num_input_tokens_seen": 4360032, "step": 7670 }, { "epoch": 134.65486725663717, "grad_norm": 1.8372145405010087e-06, "learning_rate": 0.27356997380605164, "loss": 0.0, "num_input_tokens_seen": 4362768, "step": 7675 }, { "epoch": 134.7433628318584, "grad_norm": 1.0507243359825225e-06, "learning_rate": 0.27353657224312194, "loss": 0.0, "num_input_tokens_seen": 4365536, "step": 7680 }, { "epoch": 134.83185840707964, "grad_norm": 1.4001676618136116e-06, "learning_rate": 0.2735031516293004, "loss": 0.0, "num_input_tokens_seen": 4368656, "step": 7685 }, { "epoch": 134.9203539823009, "grad_norm": 8.213565365622344e-07, "learning_rate": 0.2734697119697408, "loss": 0.0, "num_input_tokens_seen": 4371408, "step": 7690 }, { "epoch": 135.0, "grad_norm": 1.2125043213018216e-05, "learning_rate": 0.27343625326959997, "loss": 0.0, "num_input_tokens_seen": 4373936, "step": 7695 }, { "epoch": 135.08849557522123, "grad_norm": 1.4485783594864188e-07, "learning_rate": 0.27340277553403775, "loss": 0.0, "num_input_tokens_seen": 4376576, "step": 7700 }, { "epoch": 135.1769911504425, "grad_norm": 5.033311026636511e-06, "learning_rate": 0.2733692787682167, "loss": 0.0, "num_input_tokens_seen": 4379200, "step": 7705 }, { "epoch": 135.26548672566372, "grad_norm": 5.990499175823061e-06, "learning_rate": 0.27333576297730255, "loss": 0.0, "num_input_tokens_seen": 4381760, "step": 7710 }, { "epoch": 135.35398230088495, "grad_norm": 2.0432455585250864e-06, "learning_rate": 0.2733022281664638, "loss": 0.0, "num_input_tokens_seen": 4384368, "step": 7715 }, { "epoch": 135.44247787610618, "grad_norm": 1.1822924079751829e-06, "learning_rate": 0.273268674340872, "loss": 0.0, "num_input_tokens_seen": 4387568, "step": 7720 }, { "epoch": 135.53097345132744, "grad_norm": 1.3702095884582377e-06, "learning_rate": 0.27323510150570146, "loss": 0.0, "num_input_tokens_seen": 4390400, "step": 7725 }, { "epoch": 135.61946902654867, "grad_norm": 1.8252983409183798e-06, "learning_rate": 0.27320150966612966, "loss": 0.0, "num_input_tokens_seen": 4393760, "step": 7730 }, { "epoch": 135.7079646017699, "grad_norm": 4.600169631885365e-06, "learning_rate": 0.2731678988273368, "loss": 0.0, "num_input_tokens_seen": 4397072, "step": 7735 }, { "epoch": 135.79646017699116, "grad_norm": 9.859957117441809e-07, "learning_rate": 0.27313426899450605, "loss": 0.0, "num_input_tokens_seen": 4400416, "step": 7740 }, { "epoch": 135.8849557522124, "grad_norm": 3.031257165275747e-06, "learning_rate": 0.27310062017282366, "loss": 0.0, "num_input_tokens_seen": 4403344, "step": 7745 }, { "epoch": 135.97345132743362, "grad_norm": 2.0704792405012995e-06, "learning_rate": 0.2730669523674787, "loss": 0.0, "num_input_tokens_seen": 4406080, "step": 7750 }, { "epoch": 136.05309734513276, "grad_norm": 1.4861944919175585e-06, "learning_rate": 0.2730332655836631, "loss": 0.0, "num_input_tokens_seen": 4408264, "step": 7755 }, { "epoch": 136.141592920354, "grad_norm": 2.050362581940135e-06, "learning_rate": 0.2729995598265718, "loss": 0.0, "num_input_tokens_seen": 4410808, "step": 7760 }, { "epoch": 136.23008849557522, "grad_norm": 3.0440328373515513e-06, "learning_rate": 0.2729658351014027, "loss": 0.0, "num_input_tokens_seen": 4413320, "step": 7765 }, { "epoch": 136.31858407079645, "grad_norm": 2.187155587307643e-06, "learning_rate": 0.27293209141335656, "loss": 0.0, "num_input_tokens_seen": 4415896, "step": 7770 }, { "epoch": 136.4070796460177, "grad_norm": 2.1134374037501402e-06, "learning_rate": 0.27289832876763703, "loss": 0.0, "num_input_tokens_seen": 4418248, "step": 7775 }, { "epoch": 136.49557522123894, "grad_norm": 2.068020194201381e-06, "learning_rate": 0.27286454716945074, "loss": 0.0, "num_input_tokens_seen": 4421496, "step": 7780 }, { "epoch": 136.58407079646017, "grad_norm": 2.855913635357865e-06, "learning_rate": 0.27283074662400725, "loss": 0.0, "num_input_tokens_seen": 4424920, "step": 7785 }, { "epoch": 136.67256637168143, "grad_norm": 1.506061266809411e-07, "learning_rate": 0.2727969271365191, "loss": 0.0, "num_input_tokens_seen": 4428104, "step": 7790 }, { "epoch": 136.76106194690266, "grad_norm": 5.784546033282822e-07, "learning_rate": 0.2727630887122016, "loss": 0.0, "num_input_tokens_seen": 4431048, "step": 7795 }, { "epoch": 136.8495575221239, "grad_norm": 1.1050015018554404e-06, "learning_rate": 0.27272923135627314, "loss": 0.0, "num_input_tokens_seen": 4434056, "step": 7800 }, { "epoch": 136.8495575221239, "eval_loss": 0.4221038818359375, "eval_runtime": 0.9251, "eval_samples_per_second": 27.025, "eval_steps_per_second": 14.053, "num_input_tokens_seen": 4434056, "step": 7800 }, { "epoch": 136.93805309734512, "grad_norm": 3.824180566880386e-06, "learning_rate": 0.2726953550739548, "loss": 0.0, "num_input_tokens_seen": 4437096, "step": 7805 }, { "epoch": 137.01769911504425, "grad_norm": 3.354754426254658e-06, "learning_rate": 0.27266145987047086, "loss": 0.0, "num_input_tokens_seen": 4439648, "step": 7810 }, { "epoch": 137.10619469026548, "grad_norm": 2.237787157355342e-06, "learning_rate": 0.27262754575104836, "loss": 0.0, "num_input_tokens_seen": 4442304, "step": 7815 }, { "epoch": 137.1946902654867, "grad_norm": 3.6302756143413717e-06, "learning_rate": 0.27259361272091726, "loss": 0.0, "num_input_tokens_seen": 4445168, "step": 7820 }, { "epoch": 137.28318584070797, "grad_norm": 2.558928144935635e-06, "learning_rate": 0.27255966078531046, "loss": 0.0, "num_input_tokens_seen": 4447856, "step": 7825 }, { "epoch": 137.3716814159292, "grad_norm": 1.3781807410850888e-06, "learning_rate": 0.2725256899494638, "loss": 0.0, "num_input_tokens_seen": 4450352, "step": 7830 }, { "epoch": 137.46017699115043, "grad_norm": 1.2830137166019995e-06, "learning_rate": 0.272491700218616, "loss": 0.0, "num_input_tokens_seen": 4452832, "step": 7835 }, { "epoch": 137.5486725663717, "grad_norm": 8.643453384138411e-07, "learning_rate": 0.27245769159800876, "loss": 0.0, "num_input_tokens_seen": 4456032, "step": 7840 }, { "epoch": 137.63716814159292, "grad_norm": 2.3787374630046543e-06, "learning_rate": 0.2724236640928865, "loss": 0.0, "num_input_tokens_seen": 4459216, "step": 7845 }, { "epoch": 137.72566371681415, "grad_norm": 8.536243853995984e-07, "learning_rate": 0.27238961770849673, "loss": 0.0, "num_input_tokens_seen": 4462432, "step": 7850 }, { "epoch": 137.81415929203538, "grad_norm": 1.7645246543906978e-06, "learning_rate": 0.27235555245008997, "loss": 0.0, "num_input_tokens_seen": 4465264, "step": 7855 }, { "epoch": 137.90265486725664, "grad_norm": 3.5011140653296025e-07, "learning_rate": 0.2723214683229193, "loss": 0.0, "num_input_tokens_seen": 4467952, "step": 7860 }, { "epoch": 137.99115044247787, "grad_norm": 2.963653741971939e-06, "learning_rate": 0.27228736533224107, "loss": 0.0, "num_input_tokens_seen": 4470720, "step": 7865 }, { "epoch": 138.07079646017698, "grad_norm": 1.947083774211933e-06, "learning_rate": 0.27225324348331437, "loss": 0.0, "num_input_tokens_seen": 4473336, "step": 7870 }, { "epoch": 138.15929203539824, "grad_norm": 2.6873017304751556e-06, "learning_rate": 0.27221910278140116, "loss": 0.0, "num_input_tokens_seen": 4475688, "step": 7875 }, { "epoch": 138.24778761061947, "grad_norm": 8.336010068887845e-07, "learning_rate": 0.2721849432317664, "loss": 0.0, "num_input_tokens_seen": 4478488, "step": 7880 }, { "epoch": 138.3362831858407, "grad_norm": 1.235025820278679e-06, "learning_rate": 0.2721507648396779, "loss": 0.0, "num_input_tokens_seen": 4481448, "step": 7885 }, { "epoch": 138.42477876106196, "grad_norm": 8.724937856641191e-07, "learning_rate": 0.27211656761040653, "loss": 0.0, "num_input_tokens_seen": 4484328, "step": 7890 }, { "epoch": 138.5132743362832, "grad_norm": 7.095731575645914e-07, "learning_rate": 0.2720823515492257, "loss": 0.0, "num_input_tokens_seen": 4487240, "step": 7895 }, { "epoch": 138.60176991150442, "grad_norm": 3.375738288013963e-06, "learning_rate": 0.27204811666141215, "loss": 0.0, "num_input_tokens_seen": 4490040, "step": 7900 }, { "epoch": 138.69026548672565, "grad_norm": 1.004642740554118e-06, "learning_rate": 0.2720138629522452, "loss": 0.0, "num_input_tokens_seen": 4492888, "step": 7905 }, { "epoch": 138.7787610619469, "grad_norm": 2.5842957711574854e-06, "learning_rate": 0.2719795904270073, "loss": 0.0, "num_input_tokens_seen": 4496184, "step": 7910 }, { "epoch": 138.86725663716814, "grad_norm": 2.1217927042016527e-06, "learning_rate": 0.2719452990909837, "loss": 0.0, "num_input_tokens_seen": 4499032, "step": 7915 }, { "epoch": 138.95575221238937, "grad_norm": 1.5800801520526875e-06, "learning_rate": 0.2719109889494625, "loss": 0.0, "num_input_tokens_seen": 4501736, "step": 7920 }, { "epoch": 139.0353982300885, "grad_norm": 1.1334875580359949e-06, "learning_rate": 0.27187666000773475, "loss": 0.0, "num_input_tokens_seen": 4504168, "step": 7925 }, { "epoch": 139.12389380530973, "grad_norm": 3.297619059594581e-06, "learning_rate": 0.2718423122710944, "loss": 0.0, "num_input_tokens_seen": 4507304, "step": 7930 }, { "epoch": 139.21238938053096, "grad_norm": 4.119043524042354e-07, "learning_rate": 0.2718079457448384, "loss": 0.0, "num_input_tokens_seen": 4509976, "step": 7935 }, { "epoch": 139.30088495575222, "grad_norm": 4.211226951156277e-06, "learning_rate": 0.27177356043426637, "loss": 0.0, "num_input_tokens_seen": 4512648, "step": 7940 }, { "epoch": 139.38938053097345, "grad_norm": 2.8310605557635427e-06, "learning_rate": 0.27173915634468104, "loss": 0.0, "num_input_tokens_seen": 4515400, "step": 7945 }, { "epoch": 139.47787610619469, "grad_norm": 1.5799756738488213e-06, "learning_rate": 0.27170473348138796, "loss": 0.0, "num_input_tokens_seen": 4518744, "step": 7950 }, { "epoch": 139.56637168141592, "grad_norm": 2.1102755454194266e-06, "learning_rate": 0.27167029184969554, "loss": 0.0, "num_input_tokens_seen": 4521736, "step": 7955 }, { "epoch": 139.65486725663717, "grad_norm": 3.2922637274168665e-06, "learning_rate": 0.27163583145491504, "loss": 0.0, "num_input_tokens_seen": 4524632, "step": 7960 }, { "epoch": 139.7433628318584, "grad_norm": 1.495198262091435e-06, "learning_rate": 0.2716013523023608, "loss": 0.0, "num_input_tokens_seen": 4527400, "step": 7965 }, { "epoch": 139.83185840707964, "grad_norm": 2.209550530096749e-06, "learning_rate": 0.27156685439734995, "loss": 0.0, "num_input_tokens_seen": 4530216, "step": 7970 }, { "epoch": 139.9203539823009, "grad_norm": 3.5334937820152845e-06, "learning_rate": 0.2715323377452024, "loss": 0.0, "num_input_tokens_seen": 4533128, "step": 7975 }, { "epoch": 140.0, "grad_norm": 1.4905256762176577e-07, "learning_rate": 0.2714978023512411, "loss": 0.0, "num_input_tokens_seen": 4535568, "step": 7980 }, { "epoch": 140.08849557522123, "grad_norm": 1.4943009318812983e-06, "learning_rate": 0.2714632482207918, "loss": 0.0, "num_input_tokens_seen": 4538912, "step": 7985 }, { "epoch": 140.1769911504425, "grad_norm": 1.144013936027477e-06, "learning_rate": 0.2714286753591833, "loss": 0.0, "num_input_tokens_seen": 4541728, "step": 7990 }, { "epoch": 140.26548672566372, "grad_norm": 7.170142453105655e-07, "learning_rate": 0.27139408377174706, "loss": 0.0, "num_input_tokens_seen": 4545056, "step": 7995 }, { "epoch": 140.35398230088495, "grad_norm": 3.2837024264154024e-06, "learning_rate": 0.27135947346381756, "loss": 0.0, "num_input_tokens_seen": 4547840, "step": 8000 }, { "epoch": 140.35398230088495, "eval_loss": 0.425972580909729, "eval_runtime": 0.9271, "eval_samples_per_second": 26.966, "eval_steps_per_second": 14.023, "num_input_tokens_seen": 4547840, "step": 8000 }, { "epoch": 140.44247787610618, "grad_norm": 1.8611339100971236e-06, "learning_rate": 0.2713248444407322, "loss": 0.0, "num_input_tokens_seen": 4550544, "step": 8005 }, { "epoch": 140.53097345132744, "grad_norm": 2.380748810537625e-06, "learning_rate": 0.27129019670783106, "loss": 0.0, "num_input_tokens_seen": 4553536, "step": 8010 }, { "epoch": 140.61946902654867, "grad_norm": 8.501598927068699e-07, "learning_rate": 0.27125553027045746, "loss": 0.0, "num_input_tokens_seen": 4556096, "step": 8015 }, { "epoch": 140.7079646017699, "grad_norm": 9.299871521761816e-07, "learning_rate": 0.2712208451339572, "loss": 0.0, "num_input_tokens_seen": 4558784, "step": 8020 }, { "epoch": 140.79646017699116, "grad_norm": 1.6729885601307615e-06, "learning_rate": 0.27118614130367935, "loss": 0.0, "num_input_tokens_seen": 4561232, "step": 8025 }, { "epoch": 140.8849557522124, "grad_norm": 9.672380656411406e-07, "learning_rate": 0.2711514187849756, "loss": 0.0, "num_input_tokens_seen": 4564544, "step": 8030 }, { "epoch": 140.97345132743362, "grad_norm": 2.4918872441048734e-06, "learning_rate": 0.27111667758320057, "loss": 0.0, "num_input_tokens_seen": 4567280, "step": 8035 }, { "epoch": 141.05309734513276, "grad_norm": 2.13628072742722e-06, "learning_rate": 0.27108191770371176, "loss": 0.0, "num_input_tokens_seen": 4569480, "step": 8040 }, { "epoch": 141.141592920354, "grad_norm": 2.6584909846860683e-06, "learning_rate": 0.2710471391518697, "loss": 0.0, "num_input_tokens_seen": 4572360, "step": 8045 }, { "epoch": 141.23008849557522, "grad_norm": 3.3419446481275372e-06, "learning_rate": 0.2710123419330375, "loss": 0.0, "num_input_tokens_seen": 4575320, "step": 8050 }, { "epoch": 141.31858407079645, "grad_norm": 3.1182862585410476e-06, "learning_rate": 0.2709775260525816, "loss": 0.0, "num_input_tokens_seen": 4578104, "step": 8055 }, { "epoch": 141.4070796460177, "grad_norm": 8.383607337236754e-07, "learning_rate": 0.27094269151587075, "loss": 0.0, "num_input_tokens_seen": 4581096, "step": 8060 }, { "epoch": 141.49557522123894, "grad_norm": 7.536648354289355e-07, "learning_rate": 0.27090783832827703, "loss": 0.0, "num_input_tokens_seen": 4584184, "step": 8065 }, { "epoch": 141.58407079646017, "grad_norm": 2.3588422664033715e-06, "learning_rate": 0.2708729664951753, "loss": 0.0, "num_input_tokens_seen": 4587320, "step": 8070 }, { "epoch": 141.67256637168143, "grad_norm": 5.175935484658112e-07, "learning_rate": 0.27083807602194304, "loss": 0.0, "num_input_tokens_seen": 4589944, "step": 8075 }, { "epoch": 141.76106194690266, "grad_norm": 1.3177993878343841e-06, "learning_rate": 0.270803166913961, "loss": 0.0, "num_input_tokens_seen": 4592568, "step": 8080 }, { "epoch": 141.8495575221239, "grad_norm": 5.873963573321817e-07, "learning_rate": 0.27076823917661247, "loss": 0.0, "num_input_tokens_seen": 4595848, "step": 8085 }, { "epoch": 141.93805309734512, "grad_norm": 2.1653681869793218e-06, "learning_rate": 0.2707332928152838, "loss": 0.0, "num_input_tokens_seen": 4598648, "step": 8090 }, { "epoch": 142.01769911504425, "grad_norm": 5.08426694523223e-07, "learning_rate": 0.2706983278353641, "loss": 0.0, "num_input_tokens_seen": 4601320, "step": 8095 }, { "epoch": 142.10619469026548, "grad_norm": 3.683581553559634e-06, "learning_rate": 0.27066334424224553, "loss": 0.0, "num_input_tokens_seen": 4604488, "step": 8100 }, { "epoch": 142.1946902654867, "grad_norm": 5.387262831391126e-07, "learning_rate": 0.27062834204132297, "loss": 0.0, "num_input_tokens_seen": 4607032, "step": 8105 }, { "epoch": 142.28318584070797, "grad_norm": 1.8587423937788117e-06, "learning_rate": 0.27059332123799407, "loss": 0.0, "num_input_tokens_seen": 4609544, "step": 8110 }, { "epoch": 142.3716814159292, "grad_norm": 8.367191526303941e-07, "learning_rate": 0.27055828183765956, "loss": 0.0, "num_input_tokens_seen": 4612680, "step": 8115 }, { "epoch": 142.46017699115043, "grad_norm": 1.8290587604496977e-06, "learning_rate": 0.270523223845723, "loss": 0.0, "num_input_tokens_seen": 4615560, "step": 8120 }, { "epoch": 142.5486725663717, "grad_norm": 1.0620278771966696e-06, "learning_rate": 0.2704881472675907, "loss": 0.0, "num_input_tokens_seen": 4618184, "step": 8125 }, { "epoch": 142.63716814159292, "grad_norm": 3.884665147779742e-06, "learning_rate": 0.270453052108672, "loss": 0.0, "num_input_tokens_seen": 4620632, "step": 8130 }, { "epoch": 142.72566371681415, "grad_norm": 1.165385356216575e-06, "learning_rate": 0.2704179383743789, "loss": 0.0, "num_input_tokens_seen": 4623784, "step": 8135 }, { "epoch": 142.81415929203538, "grad_norm": 7.183800221355341e-07, "learning_rate": 0.27038280607012644, "loss": 0.0, "num_input_tokens_seen": 4627128, "step": 8140 }, { "epoch": 142.90265486725664, "grad_norm": 1.1918377822439652e-06, "learning_rate": 0.27034765520133247, "loss": 0.0, "num_input_tokens_seen": 4630616, "step": 8145 }, { "epoch": 142.99115044247787, "grad_norm": 1.988286612686352e-06, "learning_rate": 0.2703124857734177, "loss": 0.0, "num_input_tokens_seen": 4633144, "step": 8150 }, { "epoch": 143.07079646017698, "grad_norm": 1.168030053122493e-06, "learning_rate": 0.27027729779180565, "loss": 0.0, "num_input_tokens_seen": 4635648, "step": 8155 }, { "epoch": 143.15929203539824, "grad_norm": 1.2617624634003732e-06, "learning_rate": 0.27024209126192283, "loss": 0.0, "num_input_tokens_seen": 4638848, "step": 8160 }, { "epoch": 143.24778761061947, "grad_norm": 3.2698553695809096e-06, "learning_rate": 0.2702068661891984, "loss": 0.0, "num_input_tokens_seen": 4641744, "step": 8165 }, { "epoch": 143.3362831858407, "grad_norm": 2.126940472635397e-07, "learning_rate": 0.2701716225790647, "loss": 0.0, "num_input_tokens_seen": 4644528, "step": 8170 }, { "epoch": 143.42477876106196, "grad_norm": 1.668901632001507e-06, "learning_rate": 0.27013636043695655, "loss": 0.0, "num_input_tokens_seen": 4647616, "step": 8175 }, { "epoch": 143.5132743362832, "grad_norm": 1.188125679618679e-06, "learning_rate": 0.27010107976831194, "loss": 0.0, "num_input_tokens_seen": 4650352, "step": 8180 }, { "epoch": 143.60176991150442, "grad_norm": 3.431245431784191e-06, "learning_rate": 0.2700657805785715, "loss": 0.0, "num_input_tokens_seen": 4652992, "step": 8185 }, { "epoch": 143.69026548672565, "grad_norm": 1.2613695616892073e-06, "learning_rate": 0.2700304628731789, "loss": 0.0, "num_input_tokens_seen": 4655984, "step": 8190 }, { "epoch": 143.7787610619469, "grad_norm": 2.2148542484501377e-06, "learning_rate": 0.26999512665758046, "loss": 0.0, "num_input_tokens_seen": 4659088, "step": 8195 }, { "epoch": 143.86725663716814, "grad_norm": 1.7765278244041838e-06, "learning_rate": 0.2699597719372256, "loss": 0.0, "num_input_tokens_seen": 4662192, "step": 8200 }, { "epoch": 143.86725663716814, "eval_loss": 0.43572017550468445, "eval_runtime": 0.94, "eval_samples_per_second": 26.595, "eval_steps_per_second": 13.829, "num_input_tokens_seen": 4662192, "step": 8200 }, { "epoch": 143.95575221238937, "grad_norm": 2.297618266311474e-06, "learning_rate": 0.26992439871756635, "loss": 0.0, "num_input_tokens_seen": 4664672, "step": 8205 }, { "epoch": 144.0353982300885, "grad_norm": 9.57463726081187e-07, "learning_rate": 0.2698890070040578, "loss": 0.0, "num_input_tokens_seen": 4666768, "step": 8210 }, { "epoch": 144.12389380530973, "grad_norm": 1.433054080735019e-06, "learning_rate": 0.2698535968021577, "loss": 0.0, "num_input_tokens_seen": 4669056, "step": 8215 }, { "epoch": 144.21238938053096, "grad_norm": 4.112759597774129e-06, "learning_rate": 0.26981816811732684, "loss": 0.0, "num_input_tokens_seen": 4672064, "step": 8220 }, { "epoch": 144.30088495575222, "grad_norm": 1.440706341782061e-06, "learning_rate": 0.26978272095502875, "loss": 0.0, "num_input_tokens_seen": 4675344, "step": 8225 }, { "epoch": 144.38938053097345, "grad_norm": 1.855520963545132e-06, "learning_rate": 0.26974725532072974, "loss": 0.0, "num_input_tokens_seen": 4678432, "step": 8230 }, { "epoch": 144.47787610619469, "grad_norm": 1.926011236719205e-06, "learning_rate": 0.26971177121989914, "loss": 0.0, "num_input_tokens_seen": 4681344, "step": 8235 }, { "epoch": 144.56637168141592, "grad_norm": 1.1347461850164109e-06, "learning_rate": 0.2696762686580091, "loss": 0.0, "num_input_tokens_seen": 4684192, "step": 8240 }, { "epoch": 144.65486725663717, "grad_norm": 7.20717650892766e-07, "learning_rate": 0.26964074764053436, "loss": 0.0, "num_input_tokens_seen": 4686688, "step": 8245 }, { "epoch": 144.7433628318584, "grad_norm": 2.19362800635281e-06, "learning_rate": 0.2696052081729529, "loss": 0.0, "num_input_tokens_seen": 4689776, "step": 8250 }, { "epoch": 144.83185840707964, "grad_norm": 1.6048933275669697e-06, "learning_rate": 0.2695696502607453, "loss": 0.0, "num_input_tokens_seen": 4692384, "step": 8255 }, { "epoch": 144.9203539823009, "grad_norm": 1.9305489331600256e-06, "learning_rate": 0.26953407390939504, "loss": 0.0, "num_input_tokens_seen": 4695744, "step": 8260 }, { "epoch": 145.0, "grad_norm": 5.580675974670157e-07, "learning_rate": 0.26949847912438835, "loss": 0.0, "num_input_tokens_seen": 4698160, "step": 8265 }, { "epoch": 145.08849557522123, "grad_norm": 2.5248259589716326e-06, "learning_rate": 0.26946286591121454, "loss": 0.0, "num_input_tokens_seen": 4701280, "step": 8270 }, { "epoch": 145.1769911504425, "grad_norm": 6.652600745837844e-07, "learning_rate": 0.2694272342753655, "loss": 0.0, "num_input_tokens_seen": 4703744, "step": 8275 }, { "epoch": 145.26548672566372, "grad_norm": 2.722687781897548e-07, "learning_rate": 0.26939158422233617, "loss": 0.0, "num_input_tokens_seen": 4706736, "step": 8280 }, { "epoch": 145.35398230088495, "grad_norm": 1.662066210883495e-06, "learning_rate": 0.26935591575762413, "loss": 0.0, "num_input_tokens_seen": 4709312, "step": 8285 }, { "epoch": 145.44247787610618, "grad_norm": 7.525932232965715e-07, "learning_rate": 0.26932022888672996, "loss": 0.0, "num_input_tokens_seen": 4712288, "step": 8290 }, { "epoch": 145.53097345132744, "grad_norm": 9.225707913174119e-07, "learning_rate": 0.26928452361515703, "loss": 0.0, "num_input_tokens_seen": 4715216, "step": 8295 }, { "epoch": 145.61946902654867, "grad_norm": 1.267751031264197e-06, "learning_rate": 0.26924879994841155, "loss": 0.0, "num_input_tokens_seen": 4717840, "step": 8300 }, { "epoch": 145.7079646017699, "grad_norm": 7.996579824975925e-07, "learning_rate": 0.2692130578920025, "loss": 0.0, "num_input_tokens_seen": 4720560, "step": 8305 }, { "epoch": 145.79646017699116, "grad_norm": 2.0890181531285634e-06, "learning_rate": 0.26917729745144187, "loss": 0.0, "num_input_tokens_seen": 4723824, "step": 8310 }, { "epoch": 145.8849557522124, "grad_norm": 3.611046395235462e-06, "learning_rate": 0.2691415186322443, "loss": 0.0, "num_input_tokens_seen": 4726704, "step": 8315 }, { "epoch": 145.97345132743362, "grad_norm": 9.63198203862703e-07, "learning_rate": 0.2691057214399273, "loss": 0.0, "num_input_tokens_seen": 4729616, "step": 8320 }, { "epoch": 146.05309734513276, "grad_norm": 1.2794440635843785e-06, "learning_rate": 0.2690699058800113, "loss": 0.0, "num_input_tokens_seen": 4731848, "step": 8325 }, { "epoch": 146.141592920354, "grad_norm": 2.506681937575195e-07, "learning_rate": 0.2690340719580194, "loss": 0.0, "num_input_tokens_seen": 4734840, "step": 8330 }, { "epoch": 146.23008849557522, "grad_norm": 1.680533159742481e-06, "learning_rate": 0.2689982196794778, "loss": 0.0, "num_input_tokens_seen": 4737624, "step": 8335 }, { "epoch": 146.31858407079645, "grad_norm": 4.36949733284564e-07, "learning_rate": 0.2689623490499153, "loss": 0.0, "num_input_tokens_seen": 4740712, "step": 8340 }, { "epoch": 146.4070796460177, "grad_norm": 1.5520868146268185e-06, "learning_rate": 0.2689264600748636, "loss": 0.0, "num_input_tokens_seen": 4743656, "step": 8345 }, { "epoch": 146.49557522123894, "grad_norm": 1.3675189620698802e-06, "learning_rate": 0.26889055275985724, "loss": 0.0, "num_input_tokens_seen": 4746168, "step": 8350 }, { "epoch": 146.58407079646017, "grad_norm": 1.6949893506534863e-06, "learning_rate": 0.2688546271104335, "loss": 0.0, "num_input_tokens_seen": 4748712, "step": 8355 }, { "epoch": 146.67256637168143, "grad_norm": 1.5522243757004617e-06, "learning_rate": 0.26881868313213275, "loss": 0.0, "num_input_tokens_seen": 4751848, "step": 8360 }, { "epoch": 146.76106194690266, "grad_norm": 9.136535368270415e-07, "learning_rate": 0.2687827208304978, "loss": 0.0, "num_input_tokens_seen": 4754888, "step": 8365 }, { "epoch": 146.8495575221239, "grad_norm": 7.618065751557879e-07, "learning_rate": 0.26874674021107464, "loss": 0.0, "num_input_tokens_seen": 4757928, "step": 8370 }, { "epoch": 146.93805309734512, "grad_norm": 8.28911765893281e-07, "learning_rate": 0.2687107412794118, "loss": 0.0, "num_input_tokens_seen": 4760760, "step": 8375 }, { "epoch": 147.01769911504425, "grad_norm": 2.386605501669692e-06, "learning_rate": 0.26867472404106096, "loss": 0.0, "num_input_tokens_seen": 4763120, "step": 8380 }, { "epoch": 147.10619469026548, "grad_norm": 1.2971486285096034e-06, "learning_rate": 0.26863868850157624, "loss": 0.0, "num_input_tokens_seen": 4765792, "step": 8385 }, { "epoch": 147.1946902654867, "grad_norm": 1.7380691588186892e-06, "learning_rate": 0.26860263466651485, "loss": 0.0, "num_input_tokens_seen": 4768768, "step": 8390 }, { "epoch": 147.28318584070797, "grad_norm": 3.438392923271749e-06, "learning_rate": 0.26856656254143674, "loss": 0.0, "num_input_tokens_seen": 4771664, "step": 8395 }, { "epoch": 147.3716814159292, "grad_norm": 1.3516093986254418e-06, "learning_rate": 0.2685304721319047, "loss": 0.0, "num_input_tokens_seen": 4774160, "step": 8400 }, { "epoch": 147.3716814159292, "eval_loss": 0.4341481328010559, "eval_runtime": 0.9103, "eval_samples_per_second": 27.464, "eval_steps_per_second": 14.281, "num_input_tokens_seen": 4774160, "step": 8400 }, { "epoch": 147.46017699115043, "grad_norm": 8.994955464913801e-07, "learning_rate": 0.2684943634434843, "loss": 0.0, "num_input_tokens_seen": 4776896, "step": 8405 }, { "epoch": 147.5486725663717, "grad_norm": 1.2780436691173236e-06, "learning_rate": 0.268458236481744, "loss": 0.0, "num_input_tokens_seen": 4779664, "step": 8410 }, { "epoch": 147.63716814159292, "grad_norm": 1.9733815861400217e-06, "learning_rate": 0.2684220912522549, "loss": 0.0, "num_input_tokens_seen": 4782400, "step": 8415 }, { "epoch": 147.72566371681415, "grad_norm": 2.12465647564386e-06, "learning_rate": 0.2683859277605913, "loss": 0.0, "num_input_tokens_seen": 4785120, "step": 8420 }, { "epoch": 147.81415929203538, "grad_norm": 7.23618370557233e-07, "learning_rate": 0.2683497460123298, "loss": 0.0, "num_input_tokens_seen": 4787952, "step": 8425 }, { "epoch": 147.90265486725664, "grad_norm": 2.522502882129629e-06, "learning_rate": 0.26831354601305013, "loss": 0.0, "num_input_tokens_seen": 4790736, "step": 8430 }, { "epoch": 147.99115044247787, "grad_norm": 6.726561423420208e-07, "learning_rate": 0.26827732776833496, "loss": 0.0, "num_input_tokens_seen": 4794000, "step": 8435 }, { "epoch": 148.07079646017698, "grad_norm": 2.220075202785665e-06, "learning_rate": 0.26824109128376944, "loss": 0.0, "num_input_tokens_seen": 4796384, "step": 8440 }, { "epoch": 148.15929203539824, "grad_norm": 1.4330845488075283e-06, "learning_rate": 0.2682048365649417, "loss": 0.0, "num_input_tokens_seen": 4799536, "step": 8445 }, { "epoch": 148.24778761061947, "grad_norm": 1.7297571730523487e-06, "learning_rate": 0.2681685636174428, "loss": 0.0, "num_input_tokens_seen": 4801936, "step": 8450 }, { "epoch": 148.3362831858407, "grad_norm": 9.141668328993546e-07, "learning_rate": 0.2681322724468663, "loss": 0.0, "num_input_tokens_seen": 4804416, "step": 8455 }, { "epoch": 148.42477876106196, "grad_norm": 5.987529334561259e-07, "learning_rate": 0.2680959630588089, "loss": 0.0, "num_input_tokens_seen": 4807184, "step": 8460 }, { "epoch": 148.5132743362832, "grad_norm": 2.6630442562236567e-07, "learning_rate": 0.26805963545886985, "loss": 0.0, "num_input_tokens_seen": 4809936, "step": 8465 }, { "epoch": 148.60176991150442, "grad_norm": 1.9067272205575136e-06, "learning_rate": 0.26802328965265143, "loss": 0.0, "num_input_tokens_seen": 4812720, "step": 8470 }, { "epoch": 148.69026548672565, "grad_norm": 2.3890640932222595e-06, "learning_rate": 0.26798692564575854, "loss": 0.0, "num_input_tokens_seen": 4815968, "step": 8475 }, { "epoch": 148.7787610619469, "grad_norm": 1.1290983366052387e-06, "learning_rate": 0.26795054344379904, "loss": 0.0, "num_input_tokens_seen": 4818896, "step": 8480 }, { "epoch": 148.86725663716814, "grad_norm": 1.9242877442593453e-06, "learning_rate": 0.2679141430523835, "loss": 0.0, "num_input_tokens_seen": 4821952, "step": 8485 }, { "epoch": 148.95575221238937, "grad_norm": 1.814750589801406e-06, "learning_rate": 0.2678777244771252, "loss": 0.0, "num_input_tokens_seen": 4824848, "step": 8490 }, { "epoch": 149.0353982300885, "grad_norm": 9.21267599096609e-07, "learning_rate": 0.2678412877236405, "loss": 0.0, "num_input_tokens_seen": 4827704, "step": 8495 }, { "epoch": 149.12389380530973, "grad_norm": 1.1250490388192702e-06, "learning_rate": 0.2678048327975484, "loss": 0.0, "num_input_tokens_seen": 4830552, "step": 8500 }, { "epoch": 149.21238938053096, "grad_norm": 2.1502512481674785e-07, "learning_rate": 0.2677683597044706, "loss": 0.0, "num_input_tokens_seen": 4833368, "step": 8505 }, { "epoch": 149.30088495575222, "grad_norm": 6.522372473227733e-07, "learning_rate": 0.2677318684500318, "loss": 0.0, "num_input_tokens_seen": 4836584, "step": 8510 }, { "epoch": 149.38938053097345, "grad_norm": 1.949638999576564e-06, "learning_rate": 0.2676953590398593, "loss": 0.0, "num_input_tokens_seen": 4839592, "step": 8515 }, { "epoch": 149.47787610619469, "grad_norm": 7.812561193532019e-07, "learning_rate": 0.2676588314795834, "loss": 0.0, "num_input_tokens_seen": 4842328, "step": 8520 }, { "epoch": 149.56637168141592, "grad_norm": 1.173032273982244e-06, "learning_rate": 0.26762228577483715, "loss": 0.0, "num_input_tokens_seen": 4844728, "step": 8525 }, { "epoch": 149.65486725663717, "grad_norm": 7.457414312739274e-07, "learning_rate": 0.2675857219312563, "loss": 0.0, "num_input_tokens_seen": 4847288, "step": 8530 }, { "epoch": 149.7433628318584, "grad_norm": 9.66074026109709e-07, "learning_rate": 0.2675491399544794, "loss": 0.0, "num_input_tokens_seen": 4850424, "step": 8535 }, { "epoch": 149.83185840707964, "grad_norm": 1.872904931587982e-06, "learning_rate": 0.2675125398501479, "loss": 0.0, "num_input_tokens_seen": 4853496, "step": 8540 }, { "epoch": 149.9203539823009, "grad_norm": 3.0786708293817355e-07, "learning_rate": 0.26747592162390604, "loss": 0.0, "num_input_tokens_seen": 4856296, "step": 8545 }, { "epoch": 150.0, "grad_norm": 1.2924523389301612e-06, "learning_rate": 0.26743928528140076, "loss": 0.0, "num_input_tokens_seen": 4859016, "step": 8550 }, { "epoch": 150.08849557522123, "grad_norm": 9.03906936855492e-07, "learning_rate": 0.26740263082828186, "loss": 0.0, "num_input_tokens_seen": 4861608, "step": 8555 }, { "epoch": 150.1769911504425, "grad_norm": 2.6549898848315934e-06, "learning_rate": 0.2673659582702019, "loss": 0.0, "num_input_tokens_seen": 4864600, "step": 8560 }, { "epoch": 150.26548672566372, "grad_norm": 1.963453314601793e-06, "learning_rate": 0.2673292676128163, "loss": 0.0, "num_input_tokens_seen": 4867384, "step": 8565 }, { "epoch": 150.35398230088495, "grad_norm": 1.5582936612190679e-06, "learning_rate": 0.2672925588617831, "loss": 0.0, "num_input_tokens_seen": 4870200, "step": 8570 }, { "epoch": 150.44247787610618, "grad_norm": 7.766580552015512e-07, "learning_rate": 0.2672558320227634, "loss": 0.0, "num_input_tokens_seen": 4873048, "step": 8575 }, { "epoch": 150.53097345132744, "grad_norm": 1.7041674027495901e-06, "learning_rate": 0.2672190871014209, "loss": 0.0, "num_input_tokens_seen": 4875720, "step": 8580 }, { "epoch": 150.61946902654867, "grad_norm": 1.4939794255042216e-06, "learning_rate": 0.267182324103422, "loss": 0.0, "num_input_tokens_seen": 4878888, "step": 8585 }, { "epoch": 150.7079646017699, "grad_norm": 1.1828825563497958e-06, "learning_rate": 0.2671455430344362, "loss": 0.0, "num_input_tokens_seen": 4881928, "step": 8590 }, { "epoch": 150.79646017699116, "grad_norm": 1.229747113029589e-06, "learning_rate": 0.2671087439001355, "loss": 0.0, "num_input_tokens_seen": 4884856, "step": 8595 }, { "epoch": 150.8849557522124, "grad_norm": 9.956369240171625e-07, "learning_rate": 0.2670719267061948, "loss": 0.0, "num_input_tokens_seen": 4887640, "step": 8600 }, { "epoch": 150.8849557522124, "eval_loss": 0.4432040750980377, "eval_runtime": 0.9406, "eval_samples_per_second": 26.578, "eval_steps_per_second": 13.821, "num_input_tokens_seen": 4887640, "step": 8600 }, { "epoch": 150.97345132743362, "grad_norm": 1.0886783456953708e-06, "learning_rate": 0.2670350914582918, "loss": 0.0, "num_input_tokens_seen": 4890808, "step": 8605 }, { "epoch": 151.05309734513276, "grad_norm": 1.8027479882221087e-06, "learning_rate": 0.26699823816210694, "loss": 0.0, "num_input_tokens_seen": 4893112, "step": 8610 }, { "epoch": 151.141592920354, "grad_norm": 7.300220090655785e-07, "learning_rate": 0.26696136682332344, "loss": 0.0, "num_input_tokens_seen": 4895576, "step": 8615 }, { "epoch": 151.23008849557522, "grad_norm": 1.020078798319446e-06, "learning_rate": 0.2669244774476274, "loss": 0.0, "num_input_tokens_seen": 4898424, "step": 8620 }, { "epoch": 151.31858407079645, "grad_norm": 4.604262642260437e-07, "learning_rate": 0.2668875700407075, "loss": 0.0, "num_input_tokens_seen": 4901528, "step": 8625 }, { "epoch": 151.4070796460177, "grad_norm": 1.647059434617404e-06, "learning_rate": 0.26685064460825547, "loss": 0.0, "num_input_tokens_seen": 4904328, "step": 8630 }, { "epoch": 151.49557522123894, "grad_norm": 1.9853325738949934e-06, "learning_rate": 0.26681370115596553, "loss": 0.0, "num_input_tokens_seen": 4907400, "step": 8635 }, { "epoch": 151.58407079646017, "grad_norm": 1.21973152999999e-06, "learning_rate": 0.26677673968953497, "loss": 0.0, "num_input_tokens_seen": 4910600, "step": 8640 }, { "epoch": 151.67256637168143, "grad_norm": 2.035937768596341e-06, "learning_rate": 0.2667397602146636, "loss": 0.0, "num_input_tokens_seen": 4914008, "step": 8645 }, { "epoch": 151.76106194690266, "grad_norm": 9.016679314299836e-07, "learning_rate": 0.2667027627370542, "loss": 0.0, "num_input_tokens_seen": 4916760, "step": 8650 }, { "epoch": 151.8495575221239, "grad_norm": 9.068806434697763e-07, "learning_rate": 0.26666574726241216, "loss": 0.0, "num_input_tokens_seen": 4919976, "step": 8655 }, { "epoch": 151.93805309734512, "grad_norm": 1.8864385538108763e-06, "learning_rate": 0.2666287137964458, "loss": 0.0, "num_input_tokens_seen": 4922728, "step": 8660 }, { "epoch": 152.01769911504425, "grad_norm": 1.6715118817955954e-06, "learning_rate": 0.26659166234486614, "loss": 0.0, "num_input_tokens_seen": 4925128, "step": 8665 }, { "epoch": 152.10619469026548, "grad_norm": 2.8944025416421937e-06, "learning_rate": 0.2665545929133869, "loss": 0.0, "num_input_tokens_seen": 4927704, "step": 8670 }, { "epoch": 152.1946902654867, "grad_norm": 8.428706905760919e-07, "learning_rate": 0.2665175055077248, "loss": 0.0, "num_input_tokens_seen": 4930392, "step": 8675 }, { "epoch": 152.28318584070797, "grad_norm": 9.350391110274359e-07, "learning_rate": 0.2664804001335991, "loss": 0.0, "num_input_tokens_seen": 4933624, "step": 8680 }, { "epoch": 152.3716814159292, "grad_norm": 2.3547474370388954e-07, "learning_rate": 0.26644327679673185, "loss": 0.0, "num_input_tokens_seen": 4936360, "step": 8685 }, { "epoch": 152.46017699115043, "grad_norm": 7.122666261238919e-07, "learning_rate": 0.26640613550284803, "loss": 0.0, "num_input_tokens_seen": 4939368, "step": 8690 }, { "epoch": 152.5486725663717, "grad_norm": 3.163095811942185e-07, "learning_rate": 0.26636897625767525, "loss": 0.0, "num_input_tokens_seen": 4942776, "step": 8695 }, { "epoch": 152.63716814159292, "grad_norm": 7.810217539372388e-07, "learning_rate": 0.266331799066944, "loss": 0.0, "num_input_tokens_seen": 4945656, "step": 8700 }, { "epoch": 152.72566371681415, "grad_norm": 1.4472894918071688e-06, "learning_rate": 0.2662946039363874, "loss": 0.0, "num_input_tokens_seen": 4948312, "step": 8705 }, { "epoch": 152.81415929203538, "grad_norm": 1.1650843134702882e-06, "learning_rate": 0.2662573908717414, "loss": 0.0, "num_input_tokens_seen": 4951720, "step": 8710 }, { "epoch": 152.90265486725664, "grad_norm": 6.411382855731063e-07, "learning_rate": 0.2662201598787447, "loss": 0.0, "num_input_tokens_seen": 4954392, "step": 8715 }, { "epoch": 152.99115044247787, "grad_norm": 2.009155195992207e-06, "learning_rate": 0.2661829109631389, "loss": 0.0, "num_input_tokens_seen": 4957192, "step": 8720 }, { "epoch": 153.07079646017698, "grad_norm": 1.2237003375048516e-06, "learning_rate": 0.26614564413066816, "loss": 0.0, "num_input_tokens_seen": 4959344, "step": 8725 }, { "epoch": 153.15929203539824, "grad_norm": 1.6196739807128324e-06, "learning_rate": 0.2661083593870795, "loss": 0.0, "num_input_tokens_seen": 4963120, "step": 8730 }, { "epoch": 153.24778761061947, "grad_norm": 8.343562285517692e-07, "learning_rate": 0.26607105673812276, "loss": 0.0, "num_input_tokens_seen": 4965952, "step": 8735 }, { "epoch": 153.3362831858407, "grad_norm": 2.3750326363369823e-06, "learning_rate": 0.2660337361895504, "loss": 0.0, "num_input_tokens_seen": 4968464, "step": 8740 }, { "epoch": 153.42477876106196, "grad_norm": 1.583334324095631e-06, "learning_rate": 0.26599639774711775, "loss": 0.0, "num_input_tokens_seen": 4971920, "step": 8745 }, { "epoch": 153.5132743362832, "grad_norm": 6.503027520921023e-07, "learning_rate": 0.2659590414165829, "loss": 0.0, "num_input_tokens_seen": 4974480, "step": 8750 }, { "epoch": 153.60176991150442, "grad_norm": 1.3154509588275687e-06, "learning_rate": 0.2659216672037066, "loss": 0.0, "num_input_tokens_seen": 4977408, "step": 8755 }, { "epoch": 153.69026548672565, "grad_norm": 2.032919837802183e-06, "learning_rate": 0.26588427511425244, "loss": 0.0, "num_input_tokens_seen": 4980384, "step": 8760 }, { "epoch": 153.7787610619469, "grad_norm": 3.977224878326524e-07, "learning_rate": 0.26584686515398676, "loss": 0.0, "num_input_tokens_seen": 4983376, "step": 8765 }, { "epoch": 153.86725663716814, "grad_norm": 1.9783938114414923e-06, "learning_rate": 0.2658094373286787, "loss": 0.0, "num_input_tokens_seen": 4986032, "step": 8770 }, { "epoch": 153.95575221238937, "grad_norm": 1.1970090554314083e-06, "learning_rate": 0.2657719916441, "loss": 0.0, "num_input_tokens_seen": 4988576, "step": 8775 }, { "epoch": 154.0353982300885, "grad_norm": 8.171315357685671e-07, "learning_rate": 0.2657345281060253, "loss": 0.0, "num_input_tokens_seen": 4990864, "step": 8780 }, { "epoch": 154.12389380530973, "grad_norm": 1.4886775261402363e-06, "learning_rate": 0.26569704672023203, "loss": 0.0, "num_input_tokens_seen": 4993824, "step": 8785 }, { "epoch": 154.21238938053096, "grad_norm": 1.4394377103599254e-06, "learning_rate": 0.26565954749250015, "loss": 0.0, "num_input_tokens_seen": 4997072, "step": 8790 }, { "epoch": 154.30088495575222, "grad_norm": 2.2829183308203937e-06, "learning_rate": 0.2656220304286126, "loss": 0.0, "num_input_tokens_seen": 4999696, "step": 8795 }, { "epoch": 154.38938053097345, "grad_norm": 1.7466638837504433e-06, "learning_rate": 0.265584495534355, "loss": 0.0, "num_input_tokens_seen": 5002864, "step": 8800 }, { "epoch": 154.38938053097345, "eval_loss": 0.44304025173187256, "eval_runtime": 0.9395, "eval_samples_per_second": 26.61, "eval_steps_per_second": 13.837, "num_input_tokens_seen": 5002864, "step": 8800 }, { "epoch": 154.47787610619469, "grad_norm": 6.001200745231472e-07, "learning_rate": 0.2655469428155156, "loss": 0.0, "num_input_tokens_seen": 5005568, "step": 8805 }, { "epoch": 154.56637168141592, "grad_norm": 8.604246204413357e-07, "learning_rate": 0.2655093722778856, "loss": 0.0, "num_input_tokens_seen": 5008672, "step": 8810 }, { "epoch": 154.65486725663717, "grad_norm": 1.41474146175824e-07, "learning_rate": 0.2654717839272588, "loss": 0.0, "num_input_tokens_seen": 5011440, "step": 8815 }, { "epoch": 154.7433628318584, "grad_norm": 1.5609085721735028e-06, "learning_rate": 0.2654341777694318, "loss": 0.0, "num_input_tokens_seen": 5014576, "step": 8820 }, { "epoch": 154.83185840707964, "grad_norm": 1.1463945384093677e-06, "learning_rate": 0.265396553810204, "loss": 0.0, "num_input_tokens_seen": 5017440, "step": 8825 }, { "epoch": 154.9203539823009, "grad_norm": 1.7370635987390415e-06, "learning_rate": 0.26535891205537737, "loss": 0.0, "num_input_tokens_seen": 5019984, "step": 8830 }, { "epoch": 155.0, "grad_norm": 5.155172402737662e-06, "learning_rate": 0.26532125251075683, "loss": 0.0, "num_input_tokens_seen": 5022264, "step": 8835 }, { "epoch": 155.08849557522123, "grad_norm": 1.9065513470195583e-06, "learning_rate": 0.26528357518214996, "loss": 0.0, "num_input_tokens_seen": 5025208, "step": 8840 }, { "epoch": 155.1769911504425, "grad_norm": 3.7089532725076424e-06, "learning_rate": 0.26524588007536704, "loss": 0.0, "num_input_tokens_seen": 5027896, "step": 8845 }, { "epoch": 155.26548672566372, "grad_norm": 8.65292406615481e-07, "learning_rate": 0.26520816719622115, "loss": 0.0, "num_input_tokens_seen": 5031048, "step": 8850 }, { "epoch": 155.35398230088495, "grad_norm": 2.86147752603938e-07, "learning_rate": 0.2651704365505281, "loss": 0.0, "num_input_tokens_seen": 5033688, "step": 8855 }, { "epoch": 155.44247787610618, "grad_norm": 6.300498966993473e-07, "learning_rate": 0.26513268814410634, "loss": 0.0, "num_input_tokens_seen": 5036840, "step": 8860 }, { "epoch": 155.53097345132744, "grad_norm": 6.79839899930812e-07, "learning_rate": 0.2650949219827773, "loss": 0.0, "num_input_tokens_seen": 5039656, "step": 8865 }, { "epoch": 155.61946902654867, "grad_norm": 1.2234685300427373e-06, "learning_rate": 0.26505713807236486, "loss": 0.0, "num_input_tokens_seen": 5042568, "step": 8870 }, { "epoch": 155.7079646017699, "grad_norm": 1.1138057516291155e-06, "learning_rate": 0.26501933641869585, "loss": 0.0, "num_input_tokens_seen": 5045416, "step": 8875 }, { "epoch": 155.79646017699116, "grad_norm": 1.7852321434475016e-06, "learning_rate": 0.26498151702759976, "loss": 0.0, "num_input_tokens_seen": 5048088, "step": 8880 }, { "epoch": 155.8849557522124, "grad_norm": 4.615691295839497e-07, "learning_rate": 0.2649436799049088, "loss": 0.0, "num_input_tokens_seen": 5050776, "step": 8885 }, { "epoch": 155.97345132743362, "grad_norm": 1.99706028070068e-06, "learning_rate": 0.2649058250564579, "loss": 0.0, "num_input_tokens_seen": 5053928, "step": 8890 }, { "epoch": 156.05309734513276, "grad_norm": 9.188113381242147e-07, "learning_rate": 0.26486795248808476, "loss": 0.0, "num_input_tokens_seen": 5056392, "step": 8895 }, { "epoch": 156.141592920354, "grad_norm": 9.84950247584493e-07, "learning_rate": 0.2648300622056298, "loss": 0.0, "num_input_tokens_seen": 5059352, "step": 8900 }, { "epoch": 156.23008849557522, "grad_norm": 1.0143780855287332e-06, "learning_rate": 0.2647921542149363, "loss": 0.0, "num_input_tokens_seen": 5062344, "step": 8905 }, { "epoch": 156.31858407079645, "grad_norm": 1.947491227838327e-06, "learning_rate": 0.26475422852185, "loss": 0.0, "num_input_tokens_seen": 5065304, "step": 8910 }, { "epoch": 156.4070796460177, "grad_norm": 1.5824990668988903e-06, "learning_rate": 0.2647162851322196, "loss": 0.0, "num_input_tokens_seen": 5067720, "step": 8915 }, { "epoch": 156.49557522123894, "grad_norm": 1.1737329259631224e-06, "learning_rate": 0.2646783240518964, "loss": 0.0, "num_input_tokens_seen": 5071192, "step": 8920 }, { "epoch": 156.58407079646017, "grad_norm": 6.932904739187506e-07, "learning_rate": 0.26464034528673447, "loss": 0.0, "num_input_tokens_seen": 5074184, "step": 8925 }, { "epoch": 156.67256637168143, "grad_norm": 1.7572021988598863e-06, "learning_rate": 0.26460234884259065, "loss": 0.0, "num_input_tokens_seen": 5076616, "step": 8930 }, { "epoch": 156.76106194690266, "grad_norm": 1.3810505379296956e-06, "learning_rate": 0.2645643347253245, "loss": 0.0, "num_input_tokens_seen": 5078904, "step": 8935 }, { "epoch": 156.8495575221239, "grad_norm": 2.4241919618361862e-06, "learning_rate": 0.2645263029407982, "loss": 0.0, "num_input_tokens_seen": 5081912, "step": 8940 }, { "epoch": 156.93805309734512, "grad_norm": 1.4474469480774133e-06, "learning_rate": 0.2644882534948767, "loss": 0.0, "num_input_tokens_seen": 5085192, "step": 8945 }, { "epoch": 157.01769911504425, "grad_norm": 3.1579475034959614e-07, "learning_rate": 0.2644501863934278, "loss": 0.0, "num_input_tokens_seen": 5087352, "step": 8950 }, { "epoch": 157.10619469026548, "grad_norm": 1.1474867278593592e-06, "learning_rate": 0.26441210164232193, "loss": 0.0, "num_input_tokens_seen": 5090616, "step": 8955 }, { "epoch": 157.1946902654867, "grad_norm": 1.6119929568958469e-06, "learning_rate": 0.26437399924743216, "loss": 0.0, "num_input_tokens_seen": 5093528, "step": 8960 }, { "epoch": 157.28318584070797, "grad_norm": 6.755896606591705e-07, "learning_rate": 0.26433587921463436, "loss": 0.0, "num_input_tokens_seen": 5096264, "step": 8965 }, { "epoch": 157.3716814159292, "grad_norm": 1.8368114069744479e-06, "learning_rate": 0.2642977415498072, "loss": 0.0, "num_input_tokens_seen": 5099288, "step": 8970 }, { "epoch": 157.46017699115043, "grad_norm": 9.063800234798691e-07, "learning_rate": 0.26425958625883195, "loss": 0.0, "num_input_tokens_seen": 5102136, "step": 8975 }, { "epoch": 157.5486725663717, "grad_norm": 7.810195370439033e-07, "learning_rate": 0.2642214133475926, "loss": 0.0, "num_input_tokens_seen": 5104952, "step": 8980 }, { "epoch": 157.63716814159292, "grad_norm": 1.3804611853629467e-06, "learning_rate": 0.26418322282197587, "loss": 0.0, "num_input_tokens_seen": 5107672, "step": 8985 }, { "epoch": 157.72566371681415, "grad_norm": 2.571811364759924e-06, "learning_rate": 0.2641450146878714, "loss": 0.0, "num_input_tokens_seen": 5110504, "step": 8990 }, { "epoch": 157.81415929203538, "grad_norm": 1.11361441668123e-06, "learning_rate": 0.26410678895117107, "loss": 0.0, "num_input_tokens_seen": 5113192, "step": 8995 }, { "epoch": 157.90265486725664, "grad_norm": 6.888587904541055e-07, "learning_rate": 0.26406854561777, "loss": 0.0, "num_input_tokens_seen": 5116216, "step": 9000 }, { "epoch": 157.90265486725664, "eval_loss": 0.4386080205440521, "eval_runtime": 0.9434, "eval_samples_per_second": 26.501, "eval_steps_per_second": 13.78, "num_input_tokens_seen": 5116216, "step": 9000 }, { "epoch": 157.99115044247787, "grad_norm": 1.0334141506973538e-06, "learning_rate": 0.26403028469356576, "loss": 0.0, "num_input_tokens_seen": 5119064, "step": 9005 }, { "epoch": 158.07079646017698, "grad_norm": 1.0628543805069057e-06, "learning_rate": 0.2639920061844585, "loss": 0.0, "num_input_tokens_seen": 5121440, "step": 9010 }, { "epoch": 158.15929203539824, "grad_norm": 5.750115406044642e-07, "learning_rate": 0.2639537100963515, "loss": 0.0, "num_input_tokens_seen": 5124080, "step": 9015 }, { "epoch": 158.24778761061947, "grad_norm": 1.7414230342183146e-06, "learning_rate": 0.26391539643515033, "loss": 0.0, "num_input_tokens_seen": 5126608, "step": 9020 }, { "epoch": 158.3362831858407, "grad_norm": 1.7845078446043772e-06, "learning_rate": 0.26387706520676346, "loss": 0.0, "num_input_tokens_seen": 5129872, "step": 9025 }, { "epoch": 158.42477876106196, "grad_norm": 1.4294797665570513e-06, "learning_rate": 0.26383871641710205, "loss": 0.0, "num_input_tokens_seen": 5132784, "step": 9030 }, { "epoch": 158.5132743362832, "grad_norm": 7.858392336856923e-07, "learning_rate": 0.26380035007208, "loss": 0.0, "num_input_tokens_seen": 5135456, "step": 9035 }, { "epoch": 158.60176991150442, "grad_norm": 1.175102397610317e-06, "learning_rate": 0.26376196617761394, "loss": 0.0, "num_input_tokens_seen": 5138720, "step": 9040 }, { "epoch": 158.69026548672565, "grad_norm": 9.693820857137325e-07, "learning_rate": 0.263723564739623, "loss": 0.0, "num_input_tokens_seen": 5141424, "step": 9045 }, { "epoch": 158.7787610619469, "grad_norm": 1.7227825992449652e-06, "learning_rate": 0.2636851457640293, "loss": 0.0, "num_input_tokens_seen": 5144544, "step": 9050 }, { "epoch": 158.86725663716814, "grad_norm": 9.72829866441316e-07, "learning_rate": 0.26364670925675737, "loss": 0.0, "num_input_tokens_seen": 5147264, "step": 9055 }, { "epoch": 158.95575221238937, "grad_norm": 1.9631072518677684e-06, "learning_rate": 0.2636082552237347, "loss": 0.0, "num_input_tokens_seen": 5150592, "step": 9060 }, { "epoch": 159.0353982300885, "grad_norm": 3.9169088950075093e-07, "learning_rate": 0.26356978367089146, "loss": 0.0, "num_input_tokens_seen": 5153320, "step": 9065 }, { "epoch": 159.12389380530973, "grad_norm": 1.3532903722079936e-06, "learning_rate": 0.26353129460416036, "loss": 0.0, "num_input_tokens_seen": 5156296, "step": 9070 }, { "epoch": 159.21238938053096, "grad_norm": 8.96110918802151e-07, "learning_rate": 0.2634927880294769, "loss": 0.0, "num_input_tokens_seen": 5159224, "step": 9075 }, { "epoch": 159.30088495575222, "grad_norm": 5.45693922049395e-07, "learning_rate": 0.26345426395277927, "loss": 0.0, "num_input_tokens_seen": 5162072, "step": 9080 }, { "epoch": 159.38938053097345, "grad_norm": 2.0904196844639955e-06, "learning_rate": 0.2634157223800084, "loss": 0.0, "num_input_tokens_seen": 5164808, "step": 9085 }, { "epoch": 159.47787610619469, "grad_norm": 7.680549174438056e-07, "learning_rate": 0.26337716331710787, "loss": 0.0, "num_input_tokens_seen": 5167720, "step": 9090 }, { "epoch": 159.56637168141592, "grad_norm": 7.592185511384741e-07, "learning_rate": 0.2633385867700239, "loss": 0.0, "num_input_tokens_seen": 5170504, "step": 9095 }, { "epoch": 159.65486725663717, "grad_norm": 2.313399363629287e-06, "learning_rate": 0.2632999927447056, "loss": 0.0, "num_input_tokens_seen": 5173800, "step": 9100 }, { "epoch": 159.7433628318584, "grad_norm": 1.8627988538355567e-06, "learning_rate": 0.2632613812471046, "loss": 0.0, "num_input_tokens_seen": 5176872, "step": 9105 }, { "epoch": 159.83185840707964, "grad_norm": 3.197208684468933e-07, "learning_rate": 0.2632227522831753, "loss": 0.0, "num_input_tokens_seen": 5179384, "step": 9110 }, { "epoch": 159.9203539823009, "grad_norm": 1.413410927852965e-06, "learning_rate": 0.26318410585887475, "loss": 0.0, "num_input_tokens_seen": 5181848, "step": 9115 }, { "epoch": 160.0, "grad_norm": 1.0365952221036423e-05, "learning_rate": 0.2631454419801627, "loss": 0.0, "num_input_tokens_seen": 5184128, "step": 9120 }, { "epoch": 160.08849557522123, "grad_norm": 1.0116607427335111e-06, "learning_rate": 0.2631067606530016, "loss": 0.0, "num_input_tokens_seen": 5187152, "step": 9125 }, { "epoch": 160.1769911504425, "grad_norm": 4.734545768769749e-07, "learning_rate": 0.2630680618833567, "loss": 0.0, "num_input_tokens_seen": 5189888, "step": 9130 }, { "epoch": 160.26548672566372, "grad_norm": 5.837300136590784e-07, "learning_rate": 0.26302934567719566, "loss": 0.0, "num_input_tokens_seen": 5192880, "step": 9135 }, { "epoch": 160.35398230088495, "grad_norm": 2.1910798864155367e-07, "learning_rate": 0.2629906120404892, "loss": 0.0, "num_input_tokens_seen": 5195648, "step": 9140 }, { "epoch": 160.44247787610618, "grad_norm": 1.3909684639656916e-06, "learning_rate": 0.26295186097921036, "loss": 0.0, "num_input_tokens_seen": 5198352, "step": 9145 }, { "epoch": 160.53097345132744, "grad_norm": 6.163019179439289e-07, "learning_rate": 0.2629130924993351, "loss": 0.0, "num_input_tokens_seen": 5200992, "step": 9150 }, { "epoch": 160.61946902654867, "grad_norm": 8.044802939366491e-07, "learning_rate": 0.2628743066068421, "loss": 0.0, "num_input_tokens_seen": 5204016, "step": 9155 }, { "epoch": 160.7079646017699, "grad_norm": 7.983333034644602e-07, "learning_rate": 0.26283550330771244, "loss": 0.0, "num_input_tokens_seen": 5207072, "step": 9160 }, { "epoch": 160.79646017699116, "grad_norm": 2.2133244215183367e-07, "learning_rate": 0.2627966826079303, "loss": 0.0, "num_input_tokens_seen": 5210256, "step": 9165 }, { "epoch": 160.8849557522124, "grad_norm": 7.287785024345794e-07, "learning_rate": 0.26275784451348216, "loss": 0.0, "num_input_tokens_seen": 5213088, "step": 9170 }, { "epoch": 160.97345132743362, "grad_norm": 3.061009863358777e-07, "learning_rate": 0.2627189890303574, "loss": 0.0, "num_input_tokens_seen": 5215904, "step": 9175 }, { "epoch": 161.05309734513276, "grad_norm": 7.594845783387427e-07, "learning_rate": 0.262680116164548, "loss": 0.0, "num_input_tokens_seen": 5218216, "step": 9180 }, { "epoch": 161.141592920354, "grad_norm": 1.5278326372936135e-06, "learning_rate": 0.2626412259220487, "loss": 0.0, "num_input_tokens_seen": 5220856, "step": 9185 }, { "epoch": 161.23008849557522, "grad_norm": 2.279214641021099e-06, "learning_rate": 0.2626023183088568, "loss": 0.0, "num_input_tokens_seen": 5223368, "step": 9190 }, { "epoch": 161.31858407079645, "grad_norm": 2.4150747321982635e-06, "learning_rate": 0.26256339333097234, "loss": 0.0, "num_input_tokens_seen": 5226040, "step": 9195 }, { "epoch": 161.4070796460177, "grad_norm": 5.783414280813304e-07, "learning_rate": 0.2625244509943981, "loss": 0.0, "num_input_tokens_seen": 5229496, "step": 9200 }, { "epoch": 161.4070796460177, "eval_loss": 0.44973084330558777, "eval_runtime": 0.9304, "eval_samples_per_second": 26.87, "eval_steps_per_second": 13.972, "num_input_tokens_seen": 5229496, "step": 9200 }, { "epoch": 161.49557522123894, "grad_norm": 1.740652692205913e-06, "learning_rate": 0.2624854913051395, "loss": 0.0, "num_input_tokens_seen": 5232456, "step": 9205 }, { "epoch": 161.58407079646017, "grad_norm": 1.299213977290492e-06, "learning_rate": 0.26244651426920446, "loss": 0.0, "num_input_tokens_seen": 5235352, "step": 9210 }, { "epoch": 161.67256637168143, "grad_norm": 1.174952899418713e-06, "learning_rate": 0.26240751989260386, "loss": 0.0, "num_input_tokens_seen": 5238584, "step": 9215 }, { "epoch": 161.76106194690266, "grad_norm": 1.5093143019839772e-06, "learning_rate": 0.2623685081813511, "loss": 0.0, "num_input_tokens_seen": 5241432, "step": 9220 }, { "epoch": 161.8495575221239, "grad_norm": 1.150908246927429e-06, "learning_rate": 0.2623294791414623, "loss": 0.0, "num_input_tokens_seen": 5243896, "step": 9225 }, { "epoch": 161.93805309734512, "grad_norm": 1.1311979051242815e-06, "learning_rate": 0.26229043277895614, "loss": 0.0, "num_input_tokens_seen": 5246424, "step": 9230 }, { "epoch": 162.01769911504425, "grad_norm": 1.8442494820192223e-06, "learning_rate": 0.2622513690998542, "loss": 0.0, "num_input_tokens_seen": 5248776, "step": 9235 }, { "epoch": 162.10619469026548, "grad_norm": 1.0949501074719592e-06, "learning_rate": 0.26221228811018044, "loss": 0.0, "num_input_tokens_seen": 5251608, "step": 9240 }, { "epoch": 162.1946902654867, "grad_norm": 3.0772244485888223e-07, "learning_rate": 0.2621731898159617, "loss": 0.0, "num_input_tokens_seen": 5254232, "step": 9245 }, { "epoch": 162.28318584070797, "grad_norm": 1.7691534139885334e-06, "learning_rate": 0.26213407422322743, "loss": 0.0, "num_input_tokens_seen": 5257112, "step": 9250 }, { "epoch": 162.3716814159292, "grad_norm": 8.651633152112481e-07, "learning_rate": 0.2620949413380098, "loss": 0.0, "num_input_tokens_seen": 5259864, "step": 9255 }, { "epoch": 162.46017699115043, "grad_norm": 5.826173605782969e-07, "learning_rate": 0.26205579116634353, "loss": 0.0, "num_input_tokens_seen": 5263208, "step": 9260 }, { "epoch": 162.5486725663717, "grad_norm": 1.442405960005999e-06, "learning_rate": 0.26201662371426604, "loss": 0.0, "num_input_tokens_seen": 5266040, "step": 9265 }, { "epoch": 162.63716814159292, "grad_norm": 1.3707209518543095e-06, "learning_rate": 0.2619774389878175, "loss": 0.0, "num_input_tokens_seen": 5268712, "step": 9270 }, { "epoch": 162.72566371681415, "grad_norm": 2.5841563910944387e-07, "learning_rate": 0.2619382369930407, "loss": 0.0, "num_input_tokens_seen": 5271544, "step": 9275 }, { "epoch": 162.81415929203538, "grad_norm": 9.527062161396316e-07, "learning_rate": 0.261899017735981, "loss": 0.0, "num_input_tokens_seen": 5274616, "step": 9280 }, { "epoch": 162.90265486725664, "grad_norm": 7.39605695798673e-07, "learning_rate": 0.2618597812226866, "loss": 0.0, "num_input_tokens_seen": 5277656, "step": 9285 }, { "epoch": 162.99115044247787, "grad_norm": 3.2651999504196283e-07, "learning_rate": 0.2618205274592082, "loss": 0.0, "num_input_tokens_seen": 5280744, "step": 9290 }, { "epoch": 163.07079646017698, "grad_norm": 1.5065702427818906e-06, "learning_rate": 0.2617812564515992, "loss": 0.0, "num_input_tokens_seen": 5283384, "step": 9295 }, { "epoch": 163.15929203539824, "grad_norm": 2.5946926598408027e-06, "learning_rate": 0.2617419682059158, "loss": 0.0, "num_input_tokens_seen": 5286088, "step": 9300 }, { "epoch": 163.24778761061947, "grad_norm": 1.169985807791818e-06, "learning_rate": 0.26170266272821663, "loss": 0.0, "num_input_tokens_seen": 5289016, "step": 9305 }, { "epoch": 163.3362831858407, "grad_norm": 5.555161806114484e-07, "learning_rate": 0.26166334002456315, "loss": 0.0, "num_input_tokens_seen": 5291928, "step": 9310 }, { "epoch": 163.42477876106196, "grad_norm": 1.4536717571900226e-06, "learning_rate": 0.2616240001010194, "loss": 0.0, "num_input_tokens_seen": 5294776, "step": 9315 }, { "epoch": 163.5132743362832, "grad_norm": 1.8220508763988619e-06, "learning_rate": 0.26158464296365197, "loss": 0.0, "num_input_tokens_seen": 5297224, "step": 9320 }, { "epoch": 163.60176991150442, "grad_norm": 1.6166519571925164e-06, "learning_rate": 0.2615452686185304, "loss": 0.0, "num_input_tokens_seen": 5300056, "step": 9325 }, { "epoch": 163.69026548672565, "grad_norm": 4.776167088493821e-07, "learning_rate": 0.26150587707172673, "loss": 0.0, "num_input_tokens_seen": 5303032, "step": 9330 }, { "epoch": 163.7787610619469, "grad_norm": 2.1290040876920102e-06, "learning_rate": 0.2614664683293154, "loss": 0.0, "num_input_tokens_seen": 5306136, "step": 9335 }, { "epoch": 163.86725663716814, "grad_norm": 1.2265254554222338e-06, "learning_rate": 0.26142704239737397, "loss": 0.0, "num_input_tokens_seen": 5309416, "step": 9340 }, { "epoch": 163.95575221238937, "grad_norm": 1.3173472552807652e-06, "learning_rate": 0.26138759928198235, "loss": 0.0, "num_input_tokens_seen": 5312040, "step": 9345 }, { "epoch": 164.0353982300885, "grad_norm": 1.1018003078788752e-06, "learning_rate": 0.26134813898922304, "loss": 0.0, "num_input_tokens_seen": 5314136, "step": 9350 }, { "epoch": 164.12389380530973, "grad_norm": 3.201470519798022e-07, "learning_rate": 0.26130866152518145, "loss": 0.0, "num_input_tokens_seen": 5317080, "step": 9355 }, { "epoch": 164.21238938053096, "grad_norm": 1.6069276398411603e-06, "learning_rate": 0.2612691668959455, "loss": 0.0, "num_input_tokens_seen": 5320120, "step": 9360 }, { "epoch": 164.30088495575222, "grad_norm": 1.5328344034060137e-06, "learning_rate": 0.2612296551076057, "loss": 0.0, "num_input_tokens_seen": 5323048, "step": 9365 }, { "epoch": 164.38938053097345, "grad_norm": 8.522584380443732e-07, "learning_rate": 0.26119012616625525, "loss": 0.0, "num_input_tokens_seen": 5325864, "step": 9370 }, { "epoch": 164.47787610619469, "grad_norm": 5.240049176791217e-07, "learning_rate": 0.26115058007799, "loss": 0.0, "num_input_tokens_seen": 5328824, "step": 9375 }, { "epoch": 164.56637168141592, "grad_norm": 1.8962005015055183e-06, "learning_rate": 0.26111101684890864, "loss": 0.0, "num_input_tokens_seen": 5331480, "step": 9380 }, { "epoch": 164.65486725663717, "grad_norm": 2.594476427475456e-06, "learning_rate": 0.26107143648511205, "loss": 0.0, "num_input_tokens_seen": 5334504, "step": 9385 }, { "epoch": 164.7433628318584, "grad_norm": 7.52058042507997e-07, "learning_rate": 0.2610318389927042, "loss": 0.0, "num_input_tokens_seen": 5337752, "step": 9390 }, { "epoch": 164.83185840707964, "grad_norm": 9.514160410617478e-07, "learning_rate": 0.26099222437779146, "loss": 0.0, "num_input_tokens_seen": 5340600, "step": 9395 }, { "epoch": 164.9203539823009, "grad_norm": 1.0113332109540352e-06, "learning_rate": 0.26095259264648285, "loss": 0.0, "num_input_tokens_seen": 5343528, "step": 9400 }, { "epoch": 164.9203539823009, "eval_loss": 0.4416623115539551, "eval_runtime": 0.9501, "eval_samples_per_second": 26.314, "eval_steps_per_second": 13.683, "num_input_tokens_seen": 5343528, "step": 9400 }, { "epoch": 165.0, "grad_norm": 1.1688463246173342e-06, "learning_rate": 0.2609129438048902, "loss": 0.0, "num_input_tokens_seen": 5345792, "step": 9405 }, { "epoch": 165.08849557522123, "grad_norm": 2.050467173830839e-07, "learning_rate": 0.2608732778591278, "loss": 0.0, "num_input_tokens_seen": 5349024, "step": 9410 }, { "epoch": 165.1769911504425, "grad_norm": 6.003189128023223e-07, "learning_rate": 0.2608335948153126, "loss": 0.0, "num_input_tokens_seen": 5351648, "step": 9415 }, { "epoch": 165.26548672566372, "grad_norm": 6.018974545440869e-07, "learning_rate": 0.26079389467956426, "loss": 0.0, "num_input_tokens_seen": 5354752, "step": 9420 }, { "epoch": 165.35398230088495, "grad_norm": 2.4366556772292824e-06, "learning_rate": 0.26075417745800505, "loss": 0.0, "num_input_tokens_seen": 5357888, "step": 9425 }, { "epoch": 165.44247787610618, "grad_norm": 1.507936531197629e-06, "learning_rate": 0.26071444315675985, "loss": 0.0, "num_input_tokens_seen": 5360512, "step": 9430 }, { "epoch": 165.53097345132744, "grad_norm": 7.135993200790836e-07, "learning_rate": 0.2606746917819562, "loss": 0.0, "num_input_tokens_seen": 5363216, "step": 9435 }, { "epoch": 165.61946902654867, "grad_norm": 1.9940539459639695e-06, "learning_rate": 0.2606349233397242, "loss": 0.0, "num_input_tokens_seen": 5366000, "step": 9440 }, { "epoch": 165.7079646017699, "grad_norm": 5.69863175314822e-07, "learning_rate": 0.26059513783619676, "loss": 0.0, "num_input_tokens_seen": 5368528, "step": 9445 }, { "epoch": 165.79646017699116, "grad_norm": 1.7007643009492313e-06, "learning_rate": 0.26055533527750924, "loss": 0.0, "num_input_tokens_seen": 5371136, "step": 9450 }, { "epoch": 165.8849557522124, "grad_norm": 1.6074332052085083e-06, "learning_rate": 0.26051551566979964, "loss": 0.0, "num_input_tokens_seen": 5374288, "step": 9455 }, { "epoch": 165.97345132743362, "grad_norm": 5.490290959642152e-07, "learning_rate": 0.26047567901920876, "loss": 0.0, "num_input_tokens_seen": 5377472, "step": 9460 }, { "epoch": 166.05309734513276, "grad_norm": 5.187025635677855e-07, "learning_rate": 0.2604358253318798, "loss": 0.0, "num_input_tokens_seen": 5379560, "step": 9465 }, { "epoch": 166.141592920354, "grad_norm": 6.283880225055327e-07, "learning_rate": 0.26039595461395876, "loss": 0.0, "num_input_tokens_seen": 5382488, "step": 9470 }, { "epoch": 166.23008849557522, "grad_norm": 7.323864110730938e-07, "learning_rate": 0.26035606687159424, "loss": 0.0, "num_input_tokens_seen": 5386232, "step": 9475 }, { "epoch": 166.31858407079645, "grad_norm": 1.0003367378885741e-06, "learning_rate": 0.26031616211093733, "loss": 0.0, "num_input_tokens_seen": 5389112, "step": 9480 }, { "epoch": 166.4070796460177, "grad_norm": 1.2375026017252821e-06, "learning_rate": 0.26027624033814195, "loss": 0.0, "num_input_tokens_seen": 5391752, "step": 9485 }, { "epoch": 166.49557522123894, "grad_norm": 1.7135698726633564e-06, "learning_rate": 0.2602363015593645, "loss": 0.0, "num_input_tokens_seen": 5394056, "step": 9490 }, { "epoch": 166.58407079646017, "grad_norm": 3.3928154152818024e-07, "learning_rate": 0.26019634578076395, "loss": 0.0, "num_input_tokens_seen": 5396792, "step": 9495 }, { "epoch": 166.67256637168143, "grad_norm": 1.1863111240018043e-06, "learning_rate": 0.26015637300850214, "loss": 0.0, "num_input_tokens_seen": 5399960, "step": 9500 }, { "epoch": 166.76106194690266, "grad_norm": 4.0022237612902245e-07, "learning_rate": 0.26011638324874325, "loss": 0.0, "num_input_tokens_seen": 5403048, "step": 9505 }, { "epoch": 166.8495575221239, "grad_norm": 1.009422476272448e-06, "learning_rate": 0.2600763765076543, "loss": 0.0, "num_input_tokens_seen": 5406024, "step": 9510 }, { "epoch": 166.93805309734512, "grad_norm": 1.1389446399334702e-06, "learning_rate": 0.2600363527914048, "loss": 0.0, "num_input_tokens_seen": 5408744, "step": 9515 }, { "epoch": 167.01769911504425, "grad_norm": 1.4645391956946696e-06, "learning_rate": 0.25999631210616686, "loss": 0.0, "num_input_tokens_seen": 5410896, "step": 9520 }, { "epoch": 167.10619469026548, "grad_norm": 1.3896102473154315e-06, "learning_rate": 0.25995625445811527, "loss": 0.0, "num_input_tokens_seen": 5413488, "step": 9525 }, { "epoch": 167.1946902654867, "grad_norm": 1.2741423915940686e-06, "learning_rate": 0.2599161798534275, "loss": 0.0, "num_input_tokens_seen": 5416224, "step": 9530 }, { "epoch": 167.28318584070797, "grad_norm": 1.3902574664825806e-06, "learning_rate": 0.25987608829828346, "loss": 0.0, "num_input_tokens_seen": 5418960, "step": 9535 }, { "epoch": 167.3716814159292, "grad_norm": 8.274520268969354e-07, "learning_rate": 0.25983597979886586, "loss": 0.0, "num_input_tokens_seen": 5421488, "step": 9540 }, { "epoch": 167.46017699115043, "grad_norm": 1.2534086408777512e-06, "learning_rate": 0.2597958543613599, "loss": 0.0, "num_input_tokens_seen": 5424528, "step": 9545 }, { "epoch": 167.5486725663717, "grad_norm": 7.321469297494332e-07, "learning_rate": 0.25975571199195335, "loss": 0.0, "num_input_tokens_seen": 5427264, "step": 9550 }, { "epoch": 167.63716814159292, "grad_norm": 5.270725296213641e-07, "learning_rate": 0.25971555269683677, "loss": 0.0, "num_input_tokens_seen": 5430304, "step": 9555 }, { "epoch": 167.72566371681415, "grad_norm": 7.7315627322605e-07, "learning_rate": 0.25967537648220324, "loss": 0.0, "num_input_tokens_seen": 5433264, "step": 9560 }, { "epoch": 167.81415929203538, "grad_norm": 7.044670269351627e-07, "learning_rate": 0.2596351833542483, "loss": 0.0, "num_input_tokens_seen": 5436464, "step": 9565 }, { "epoch": 167.90265486725664, "grad_norm": 5.958944484518724e-07, "learning_rate": 0.25959497331917036, "loss": 0.0, "num_input_tokens_seen": 5439456, "step": 9570 }, { "epoch": 167.99115044247787, "grad_norm": 9.419439379598771e-07, "learning_rate": 0.2595547463831703, "loss": 0.0, "num_input_tokens_seen": 5442544, "step": 9575 }, { "epoch": 168.07079646017698, "grad_norm": 6.483644483523676e-07, "learning_rate": 0.25951450255245156, "loss": 0.0, "num_input_tokens_seen": 5444544, "step": 9580 }, { "epoch": 168.15929203539824, "grad_norm": 5.369107611841173e-07, "learning_rate": 0.2594742418332203, "loss": 0.0, "num_input_tokens_seen": 5447296, "step": 9585 }, { "epoch": 168.24778761061947, "grad_norm": 6.7873276066166e-07, "learning_rate": 0.2594339642316852, "loss": 0.0, "num_input_tokens_seen": 5449888, "step": 9590 }, { "epoch": 168.3362831858407, "grad_norm": 1.827297296586039e-06, "learning_rate": 0.2593936697540576, "loss": 0.0, "num_input_tokens_seen": 5452416, "step": 9595 }, { "epoch": 168.42477876106196, "grad_norm": 1.3565155541073182e-06, "learning_rate": 0.2593533584065514, "loss": 0.0, "num_input_tokens_seen": 5455520, "step": 9600 }, { "epoch": 168.42477876106196, "eval_loss": 0.44280382990837097, "eval_runtime": 0.9379, "eval_samples_per_second": 26.655, "eval_steps_per_second": 13.861, "num_input_tokens_seen": 5455520, "step": 9600 }, { "epoch": 168.5132743362832, "grad_norm": 8.965440656538703e-07, "learning_rate": 0.2593130301953831, "loss": 0.0, "num_input_tokens_seen": 5457984, "step": 9605 }, { "epoch": 168.60176991150442, "grad_norm": 1.221045749844052e-06, "learning_rate": 0.2592726851267718, "loss": 0.0, "num_input_tokens_seen": 5460992, "step": 9610 }, { "epoch": 168.69026548672565, "grad_norm": 3.6759220733983966e-07, "learning_rate": 0.2592323232069393, "loss": 0.0, "num_input_tokens_seen": 5464464, "step": 9615 }, { "epoch": 168.7787610619469, "grad_norm": 1.3608339486381738e-06, "learning_rate": 0.25919194444210986, "loss": 0.0, "num_input_tokens_seen": 5467280, "step": 9620 }, { "epoch": 168.86725663716814, "grad_norm": 5.421613877842901e-07, "learning_rate": 0.2591515488385103, "loss": 0.0, "num_input_tokens_seen": 5470064, "step": 9625 }, { "epoch": 168.95575221238937, "grad_norm": 9.259190392185701e-07, "learning_rate": 0.2591111364023704, "loss": 0.0, "num_input_tokens_seen": 5473632, "step": 9630 }, { "epoch": 169.0353982300885, "grad_norm": 5.97123460011062e-07, "learning_rate": 0.259070707139922, "loss": 0.0, "num_input_tokens_seen": 5476040, "step": 9635 }, { "epoch": 169.12389380530973, "grad_norm": 2.086968464709571e-07, "learning_rate": 0.25903026105739985, "loss": 0.0, "num_input_tokens_seen": 5479496, "step": 9640 }, { "epoch": 169.21238938053096, "grad_norm": 4.2667556954256725e-07, "learning_rate": 0.2589897981610413, "loss": 0.0, "num_input_tokens_seen": 5482344, "step": 9645 }, { "epoch": 169.30088495575222, "grad_norm": 6.6142064270025e-07, "learning_rate": 0.2589493184570863, "loss": 0.0, "num_input_tokens_seen": 5485160, "step": 9650 }, { "epoch": 169.38938053097345, "grad_norm": 8.537323878954339e-07, "learning_rate": 0.25890882195177717, "loss": 0.0, "num_input_tokens_seen": 5488504, "step": 9655 }, { "epoch": 169.47787610619469, "grad_norm": 4.222642076001648e-07, "learning_rate": 0.25886830865135907, "loss": 0.0, "num_input_tokens_seen": 5491368, "step": 9660 }, { "epoch": 169.56637168141592, "grad_norm": 4.42291366198333e-07, "learning_rate": 0.25882777856207967, "loss": 0.0, "num_input_tokens_seen": 5494088, "step": 9665 }, { "epoch": 169.65486725663717, "grad_norm": 6.075916303416307e-07, "learning_rate": 0.2587872316901892, "loss": 0.0, "num_input_tokens_seen": 5496600, "step": 9670 }, { "epoch": 169.7433628318584, "grad_norm": 8.180441000149585e-07, "learning_rate": 0.25874666804194046, "loss": 0.0, "num_input_tokens_seen": 5499688, "step": 9675 }, { "epoch": 169.83185840707964, "grad_norm": 1.3343995988179813e-06, "learning_rate": 0.258706087623589, "loss": 0.0, "num_input_tokens_seen": 5502408, "step": 9680 }, { "epoch": 169.9203539823009, "grad_norm": 2.598568471512408e-07, "learning_rate": 0.25866549044139264, "loss": 0.0, "num_input_tokens_seen": 5504648, "step": 9685 }, { "epoch": 170.0, "grad_norm": 1.9071860890562675e-07, "learning_rate": 0.25862487650161214, "loss": 0.0, "num_input_tokens_seen": 5507520, "step": 9690 }, { "epoch": 170.08849557522123, "grad_norm": 5.159791385267454e-07, "learning_rate": 0.2585842458105106, "loss": 0.0, "num_input_tokens_seen": 5510368, "step": 9695 }, { "epoch": 170.1769911504425, "grad_norm": 1.3275857781991363e-06, "learning_rate": 0.2585435983743538, "loss": 0.0, "num_input_tokens_seen": 5513312, "step": 9700 }, { "epoch": 170.26548672566372, "grad_norm": 8.732565106583934e-07, "learning_rate": 0.2585029341994101, "loss": 0.0, "num_input_tokens_seen": 5516144, "step": 9705 }, { "epoch": 170.35398230088495, "grad_norm": 5.096934501125361e-07, "learning_rate": 0.2584622532919504, "loss": 0.0, "num_input_tokens_seen": 5519584, "step": 9710 }, { "epoch": 170.44247787610618, "grad_norm": 3.5953905808128184e-07, "learning_rate": 0.2584215556582482, "loss": 0.0, "num_input_tokens_seen": 5522768, "step": 9715 }, { "epoch": 170.53097345132744, "grad_norm": 1.2492635050875833e-06, "learning_rate": 0.25838084130457967, "loss": 0.0, "num_input_tokens_seen": 5525888, "step": 9720 }, { "epoch": 170.61946902654867, "grad_norm": 9.357937074128131e-07, "learning_rate": 0.2583401102372234, "loss": 0.0, "num_input_tokens_seen": 5528976, "step": 9725 }, { "epoch": 170.7079646017699, "grad_norm": 6.92861135576095e-07, "learning_rate": 0.2582993624624606, "loss": 0.0, "num_input_tokens_seen": 5531600, "step": 9730 }, { "epoch": 170.79646017699116, "grad_norm": 1.4500195675282157e-06, "learning_rate": 0.25825859798657513, "loss": 0.0, "num_input_tokens_seen": 5534096, "step": 9735 }, { "epoch": 170.8849557522124, "grad_norm": 8.545809464521881e-07, "learning_rate": 0.25821781681585343, "loss": 0.0, "num_input_tokens_seen": 5537040, "step": 9740 }, { "epoch": 170.97345132743362, "grad_norm": 1.7535298866278026e-06, "learning_rate": 0.2581770189565844, "loss": 0.0, "num_input_tokens_seen": 5539408, "step": 9745 }, { "epoch": 171.05309734513276, "grad_norm": 1.0923268973783706e-06, "learning_rate": 0.25813620441505963, "loss": 0.0, "num_input_tokens_seen": 5541880, "step": 9750 }, { "epoch": 171.141592920354, "grad_norm": 5.707390755560482e-07, "learning_rate": 0.2580953731975732, "loss": 0.0, "num_input_tokens_seen": 5544680, "step": 9755 }, { "epoch": 171.23008849557522, "grad_norm": 4.5402796899907116e-07, "learning_rate": 0.2580545253104218, "loss": 0.0, "num_input_tokens_seen": 5547368, "step": 9760 }, { "epoch": 171.31858407079645, "grad_norm": 1.0678242006179062e-06, "learning_rate": 0.2580136607599047, "loss": 0.0, "num_input_tokens_seen": 5550840, "step": 9765 }, { "epoch": 171.4070796460177, "grad_norm": 6.227437552297488e-07, "learning_rate": 0.2579727795523238, "loss": 0.0, "num_input_tokens_seen": 5553928, "step": 9770 }, { "epoch": 171.49557522123894, "grad_norm": 5.642093015012506e-07, "learning_rate": 0.25793188169398334, "loss": 0.0, "num_input_tokens_seen": 5557528, "step": 9775 }, { "epoch": 171.58407079646017, "grad_norm": 8.428304454355384e-07, "learning_rate": 0.25789096719119037, "loss": 0.0, "num_input_tokens_seen": 5560232, "step": 9780 }, { "epoch": 171.67256637168143, "grad_norm": 1.4495470850306447e-06, "learning_rate": 0.2578500360502544, "loss": 0.0, "num_input_tokens_seen": 5563320, "step": 9785 }, { "epoch": 171.76106194690266, "grad_norm": 3.746903018964076e-07, "learning_rate": 0.2578090882774876, "loss": 0.0, "num_input_tokens_seen": 5566072, "step": 9790 }, { "epoch": 171.8495575221239, "grad_norm": 2.463800115037884e-07, "learning_rate": 0.25776812387920456, "loss": 0.0, "num_input_tokens_seen": 5568776, "step": 9795 }, { "epoch": 171.93805309734512, "grad_norm": 5.784648351436772e-07, "learning_rate": 0.2577271428617225, "loss": 0.0, "num_input_tokens_seen": 5571144, "step": 9800 }, { "epoch": 171.93805309734512, "eval_loss": 0.46453505754470825, "eval_runtime": 0.9478, "eval_samples_per_second": 26.378, "eval_steps_per_second": 13.716, "num_input_tokens_seen": 5571144, "step": 9800 }, { "epoch": 172.01769911504425, "grad_norm": 3.9363231962852296e-07, "learning_rate": 0.25768614523136124, "loss": 0.0, "num_input_tokens_seen": 5573536, "step": 9805 }, { "epoch": 172.10619469026548, "grad_norm": 4.1034496689462685e-07, "learning_rate": 0.25764513099444314, "loss": 0.0, "num_input_tokens_seen": 5577200, "step": 9810 }, { "epoch": 172.1946902654867, "grad_norm": 4.3027793594774266e-07, "learning_rate": 0.25760410015729307, "loss": 0.0, "num_input_tokens_seen": 5579760, "step": 9815 }, { "epoch": 172.28318584070797, "grad_norm": 1.1344837957949494e-06, "learning_rate": 0.2575630527262385, "loss": 0.0, "num_input_tokens_seen": 5582272, "step": 9820 }, { "epoch": 172.3716814159292, "grad_norm": 1.294854087063868e-06, "learning_rate": 0.25752198870760945, "loss": 0.0, "num_input_tokens_seen": 5584960, "step": 9825 }, { "epoch": 172.46017699115043, "grad_norm": 6.653154400737549e-07, "learning_rate": 0.2574809081077386, "loss": 0.0, "num_input_tokens_seen": 5587952, "step": 9830 }, { "epoch": 172.5486725663717, "grad_norm": 3.500682623780449e-07, "learning_rate": 0.257439810932961, "loss": 0.0, "num_input_tokens_seen": 5590816, "step": 9835 }, { "epoch": 172.63716814159292, "grad_norm": 4.631496040019556e-07, "learning_rate": 0.2573986971896144, "loss": 0.0, "num_input_tokens_seen": 5593600, "step": 9840 }, { "epoch": 172.72566371681415, "grad_norm": 7.678991096327081e-07, "learning_rate": 0.257357566884039, "loss": 0.0, "num_input_tokens_seen": 5596416, "step": 9845 }, { "epoch": 172.81415929203538, "grad_norm": 5.57580676741054e-07, "learning_rate": 0.25731642002257765, "loss": 0.0, "num_input_tokens_seen": 5599328, "step": 9850 }, { "epoch": 172.90265486725664, "grad_norm": 5.268072413855407e-07, "learning_rate": 0.25727525661157574, "loss": 0.0, "num_input_tokens_seen": 5602256, "step": 9855 }, { "epoch": 172.99115044247787, "grad_norm": 9.959489943867084e-07, "learning_rate": 0.2572340766573811, "loss": 0.0, "num_input_tokens_seen": 5605344, "step": 9860 }, { "epoch": 173.07079646017698, "grad_norm": 5.619958756142296e-07, "learning_rate": 0.25719288016634434, "loss": 0.0, "num_input_tokens_seen": 5608000, "step": 9865 }, { "epoch": 173.15929203539824, "grad_norm": 6.375897214638826e-07, "learning_rate": 0.25715166714481835, "loss": 0.0, "num_input_tokens_seen": 5610912, "step": 9870 }, { "epoch": 173.24778761061947, "grad_norm": 8.576348022870661e-07, "learning_rate": 0.2571104375991587, "loss": 0.0, "num_input_tokens_seen": 5613472, "step": 9875 }, { "epoch": 173.3362831858407, "grad_norm": 3.8613978858847986e-07, "learning_rate": 0.2570691915357236, "loss": 0.0, "num_input_tokens_seen": 5616848, "step": 9880 }, { "epoch": 173.42477876106196, "grad_norm": 1.133868977376551e-06, "learning_rate": 0.2570279289608736, "loss": 0.0, "num_input_tokens_seen": 5619792, "step": 9885 }, { "epoch": 173.5132743362832, "grad_norm": 1.475868316447304e-06, "learning_rate": 0.256986649880972, "loss": 0.0, "num_input_tokens_seen": 5622464, "step": 9890 }, { "epoch": 173.60176991150442, "grad_norm": 6.175059752422385e-07, "learning_rate": 0.25694535430238447, "loss": 0.0, "num_input_tokens_seen": 5625072, "step": 9895 }, { "epoch": 173.69026548672565, "grad_norm": 6.138965318314149e-07, "learning_rate": 0.25690404223147933, "loss": 0.0, "num_input_tokens_seen": 5627920, "step": 9900 }, { "epoch": 173.7787610619469, "grad_norm": 6.891769999128883e-07, "learning_rate": 0.2568627136746275, "loss": 0.0, "num_input_tokens_seen": 5631152, "step": 9905 }, { "epoch": 173.86725663716814, "grad_norm": 7.149875500545022e-07, "learning_rate": 0.25682136863820226, "loss": 0.0, "num_input_tokens_seen": 5633968, "step": 9910 }, { "epoch": 173.95575221238937, "grad_norm": 7.502736707465374e-07, "learning_rate": 0.25678000712857957, "loss": 0.0, "num_input_tokens_seen": 5636880, "step": 9915 }, { "epoch": 174.0353982300885, "grad_norm": 6.72746693908266e-07, "learning_rate": 0.2567386291521379, "loss": 0.0, "num_input_tokens_seen": 5638976, "step": 9920 }, { "epoch": 174.12389380530973, "grad_norm": 3.2097449320644955e-07, "learning_rate": 0.2566972347152583, "loss": 0.0, "num_input_tokens_seen": 5641904, "step": 9925 }, { "epoch": 174.21238938053096, "grad_norm": 4.426210864494351e-07, "learning_rate": 0.2566558238243242, "loss": 0.0, "num_input_tokens_seen": 5644704, "step": 9930 }, { "epoch": 174.30088495575222, "grad_norm": 1.5719176360562415e-07, "learning_rate": 0.25661439648572176, "loss": 0.0, "num_input_tokens_seen": 5647760, "step": 9935 }, { "epoch": 174.38938053097345, "grad_norm": 6.890215900057228e-07, "learning_rate": 0.25657295270583963, "loss": 0.0, "num_input_tokens_seen": 5650896, "step": 9940 }, { "epoch": 174.47787610619469, "grad_norm": 8.40659254208731e-07, "learning_rate": 0.25653149249106894, "loss": 0.0, "num_input_tokens_seen": 5653824, "step": 9945 }, { "epoch": 174.56637168141592, "grad_norm": 1.111713004320336e-06, "learning_rate": 0.25649001584780323, "loss": 0.0, "num_input_tokens_seen": 5656496, "step": 9950 }, { "epoch": 174.65486725663717, "grad_norm": 1.3682382586921449e-06, "learning_rate": 0.2564485227824389, "loss": 0.0, "num_input_tokens_seen": 5659728, "step": 9955 }, { "epoch": 174.7433628318584, "grad_norm": 6.153516665108327e-07, "learning_rate": 0.25640701330137466, "loss": 0.0, "num_input_tokens_seen": 5662688, "step": 9960 }, { "epoch": 174.83185840707964, "grad_norm": 4.057859825934429e-07, "learning_rate": 0.2563654874110117, "loss": 0.0, "num_input_tokens_seen": 5665344, "step": 9965 }, { "epoch": 174.9203539823009, "grad_norm": 6.175017688292428e-07, "learning_rate": 0.256323945117754, "loss": 0.0, "num_input_tokens_seen": 5668368, "step": 9970 }, { "epoch": 175.0, "grad_norm": 1.9634464933915297e-06, "learning_rate": 0.2562823864280078, "loss": 0.0, "num_input_tokens_seen": 5670672, "step": 9975 }, { "epoch": 175.08849557522123, "grad_norm": 2.096967932629923e-07, "learning_rate": 0.25624081134818194, "loss": 0.0, "num_input_tokens_seen": 5673520, "step": 9980 }, { "epoch": 175.1769911504425, "grad_norm": 8.006753660083632e-07, "learning_rate": 0.2561992198846879, "loss": 0.0, "num_input_tokens_seen": 5676432, "step": 9985 }, { "epoch": 175.26548672566372, "grad_norm": 5.378751097850909e-07, "learning_rate": 0.25615761204393955, "loss": 0.0, "num_input_tokens_seen": 5679040, "step": 9990 }, { "epoch": 175.35398230088495, "grad_norm": 1.0169601409870666e-06, "learning_rate": 0.2561159878323534, "loss": 0.0, "num_input_tokens_seen": 5682224, "step": 9995 }, { "epoch": 175.44247787610618, "grad_norm": 6.552631930389907e-07, "learning_rate": 0.2560743472563483, "loss": 0.0, "num_input_tokens_seen": 5684752, "step": 10000 }, { "epoch": 175.44247787610618, "eval_loss": 0.46158474683761597, "eval_runtime": 0.9334, "eval_samples_per_second": 26.783, "eval_steps_per_second": 13.927, "num_input_tokens_seen": 5684752, "step": 10000 }, { "epoch": 175.53097345132744, "grad_norm": 1.7069710338546429e-06, "learning_rate": 0.25603269032234593, "loss": 0.0, "num_input_tokens_seen": 5687408, "step": 10005 }, { "epoch": 175.61946902654867, "grad_norm": 6.926238143023511e-07, "learning_rate": 0.2559910170367702, "loss": 0.0, "num_input_tokens_seen": 5690576, "step": 10010 }, { "epoch": 175.7079646017699, "grad_norm": 1.1514555353642209e-07, "learning_rate": 0.2559493274060477, "loss": 0.0, "num_input_tokens_seen": 5693824, "step": 10015 }, { "epoch": 175.79646017699116, "grad_norm": 4.4904780338583805e-07, "learning_rate": 0.2559076214366074, "loss": 0.0, "num_input_tokens_seen": 5696208, "step": 10020 }, { "epoch": 175.8849557522124, "grad_norm": 1.7226157922323182e-07, "learning_rate": 0.25586589913488106, "loss": 0.0, "num_input_tokens_seen": 5699024, "step": 10025 }, { "epoch": 175.97345132743362, "grad_norm": 9.8963880645897e-07, "learning_rate": 0.2558241605073026, "loss": 0.0, "num_input_tokens_seen": 5702400, "step": 10030 }, { "epoch": 176.05309734513276, "grad_norm": 3.4393423220535624e-07, "learning_rate": 0.25578240556030873, "loss": 0.0, "num_input_tokens_seen": 5705040, "step": 10035 }, { "epoch": 176.141592920354, "grad_norm": 7.927820320219325e-07, "learning_rate": 0.2557406343003386, "loss": 0.0, "num_input_tokens_seen": 5707824, "step": 10040 }, { "epoch": 176.23008849557522, "grad_norm": 7.080615205268259e-07, "learning_rate": 0.25569884673383375, "loss": 0.0, "num_input_tokens_seen": 5710928, "step": 10045 }, { "epoch": 176.31858407079645, "grad_norm": 8.161716209542647e-07, "learning_rate": 0.25565704286723856, "loss": 0.0, "num_input_tokens_seen": 5713984, "step": 10050 }, { "epoch": 176.4070796460177, "grad_norm": 8.949215271059074e-07, "learning_rate": 0.25561522270699955, "loss": 0.0, "num_input_tokens_seen": 5716688, "step": 10055 }, { "epoch": 176.49557522123894, "grad_norm": 1.1462577731435886e-06, "learning_rate": 0.25557338625956594, "loss": 0.0, "num_input_tokens_seen": 5719504, "step": 10060 }, { "epoch": 176.58407079646017, "grad_norm": 7.322515784835559e-07, "learning_rate": 0.25553153353138947, "loss": 0.0, "num_input_tokens_seen": 5723040, "step": 10065 }, { "epoch": 176.67256637168143, "grad_norm": 1.1235404144827044e-06, "learning_rate": 0.2554896645289243, "loss": 0.0, "num_input_tokens_seen": 5726336, "step": 10070 }, { "epoch": 176.76106194690266, "grad_norm": 5.732352974519017e-07, "learning_rate": 0.2554477792586272, "loss": 0.0, "num_input_tokens_seen": 5728752, "step": 10075 }, { "epoch": 176.8495575221239, "grad_norm": 1.2697099691649782e-06, "learning_rate": 0.25540587772695744, "loss": 0.0, "num_input_tokens_seen": 5731760, "step": 10080 }, { "epoch": 176.93805309734512, "grad_norm": 1.6324030411851709e-06, "learning_rate": 0.2553639599403767, "loss": 0.0, "num_input_tokens_seen": 5734512, "step": 10085 }, { "epoch": 177.01769911504425, "grad_norm": 4.800507440450019e-07, "learning_rate": 0.2553220259053493, "loss": 0.0, "num_input_tokens_seen": 5736544, "step": 10090 }, { "epoch": 177.10619469026548, "grad_norm": 1.3244165302239708e-06, "learning_rate": 0.2552800756283419, "loss": 0.0, "num_input_tokens_seen": 5739264, "step": 10095 }, { "epoch": 177.1946902654867, "grad_norm": 1.3003855201532133e-06, "learning_rate": 0.25523810911582373, "loss": 0.0, "num_input_tokens_seen": 5741616, "step": 10100 }, { "epoch": 177.28318584070797, "grad_norm": 1.2432545304363884e-07, "learning_rate": 0.25519612637426675, "loss": 0.0, "num_input_tokens_seen": 5744560, "step": 10105 }, { "epoch": 177.3716814159292, "grad_norm": 2.2067681015869312e-07, "learning_rate": 0.25515412741014504, "loss": 0.0, "num_input_tokens_seen": 5747488, "step": 10110 }, { "epoch": 177.46017699115043, "grad_norm": 5.107368110657262e-07, "learning_rate": 0.2551121122299355, "loss": 0.0, "num_input_tokens_seen": 5750416, "step": 10115 }, { "epoch": 177.5486725663717, "grad_norm": 9.266151437259396e-07, "learning_rate": 0.2550700808401173, "loss": 0.0, "num_input_tokens_seen": 5753440, "step": 10120 }, { "epoch": 177.63716814159292, "grad_norm": 6.896650575072272e-07, "learning_rate": 0.2550280332471722, "loss": 0.0, "num_input_tokens_seen": 5756464, "step": 10125 }, { "epoch": 177.72566371681415, "grad_norm": 7.001908102211019e-07, "learning_rate": 0.2549859694575845, "loss": 0.0, "num_input_tokens_seen": 5759648, "step": 10130 }, { "epoch": 177.81415929203538, "grad_norm": 7.504602308472386e-07, "learning_rate": 0.254943889477841, "loss": 0.0, "num_input_tokens_seen": 5762496, "step": 10135 }, { "epoch": 177.90265486725664, "grad_norm": 1.500200141890673e-07, "learning_rate": 0.25490179331443097, "loss": 0.0, "num_input_tokens_seen": 5765104, "step": 10140 }, { "epoch": 177.99115044247787, "grad_norm": 1.0296648724761326e-06, "learning_rate": 0.25485968097384615, "loss": 0.0, "num_input_tokens_seen": 5768032, "step": 10145 }, { "epoch": 178.07079646017698, "grad_norm": 5.137089260642824e-07, "learning_rate": 0.25481755246258075, "loss": 0.0, "num_input_tokens_seen": 5770576, "step": 10150 }, { "epoch": 178.15929203539824, "grad_norm": 1.202865405502962e-06, "learning_rate": 0.2547754077871315, "loss": 0.0, "num_input_tokens_seen": 5773232, "step": 10155 }, { "epoch": 178.24778761061947, "grad_norm": 1.043835737846166e-07, "learning_rate": 0.25473324695399774, "loss": 0.0, "num_input_tokens_seen": 5776272, "step": 10160 }, { "epoch": 178.3362831858407, "grad_norm": 4.298837836813618e-07, "learning_rate": 0.25469106996968105, "loss": 0.0, "num_input_tokens_seen": 5779056, "step": 10165 }, { "epoch": 178.42477876106196, "grad_norm": 1.0468470463820267e-06, "learning_rate": 0.2546488768406858, "loss": 0.0, "num_input_tokens_seen": 5782176, "step": 10170 }, { "epoch": 178.5132743362832, "grad_norm": 4.6743105031055165e-07, "learning_rate": 0.25460666757351863, "loss": 0.0, "num_input_tokens_seen": 5784656, "step": 10175 }, { "epoch": 178.60176991150442, "grad_norm": 6.569048878191097e-07, "learning_rate": 0.25456444217468877, "loss": 0.0, "num_input_tokens_seen": 5787488, "step": 10180 }, { "epoch": 178.69026548672565, "grad_norm": 5.913069571761298e-07, "learning_rate": 0.25452220065070785, "loss": 0.0, "num_input_tokens_seen": 5790464, "step": 10185 }, { "epoch": 178.7787610619469, "grad_norm": 3.1503839181823423e-07, "learning_rate": 0.2544799430080901, "loss": 0.0, "num_input_tokens_seen": 5793568, "step": 10190 }, { "epoch": 178.86725663716814, "grad_norm": 7.537661304013454e-07, "learning_rate": 0.2544376692533522, "loss": 0.0, "num_input_tokens_seen": 5796240, "step": 10195 }, { "epoch": 178.95575221238937, "grad_norm": 1.484410461216612e-07, "learning_rate": 0.2543953793930132, "loss": 0.0, "num_input_tokens_seen": 5799088, "step": 10200 }, { "epoch": 178.95575221238937, "eval_loss": 0.4657347798347473, "eval_runtime": 0.943, "eval_samples_per_second": 26.511, "eval_steps_per_second": 13.786, "num_input_tokens_seen": 5799088, "step": 10200 }, { "epoch": 179.0353982300885, "grad_norm": 6.438323225665954e-07, "learning_rate": 0.2543530734335948, "loss": 0.0, "num_input_tokens_seen": 5801504, "step": 10205 }, { "epoch": 179.12389380530973, "grad_norm": 3.868573799081787e-07, "learning_rate": 0.2543107513816211, "loss": 0.0, "num_input_tokens_seen": 5804720, "step": 10210 }, { "epoch": 179.21238938053096, "grad_norm": 1.1935974271182204e-06, "learning_rate": 0.25426841324361865, "loss": 0.0, "num_input_tokens_seen": 5807760, "step": 10215 }, { "epoch": 179.30088495575222, "grad_norm": 2.2406334210245404e-07, "learning_rate": 0.2542260590261166, "loss": 0.0, "num_input_tokens_seen": 5810400, "step": 10220 }, { "epoch": 179.38938053097345, "grad_norm": 4.5803599846294674e-07, "learning_rate": 0.2541836887356465, "loss": 0.0, "num_input_tokens_seen": 5813040, "step": 10225 }, { "epoch": 179.47787610619469, "grad_norm": 8.532252877557767e-07, "learning_rate": 0.2541413023787423, "loss": 0.0, "num_input_tokens_seen": 5815760, "step": 10230 }, { "epoch": 179.56637168141592, "grad_norm": 3.699542787671817e-07, "learning_rate": 0.2540988999619405, "loss": 0.0, "num_input_tokens_seen": 5818896, "step": 10235 }, { "epoch": 179.65486725663717, "grad_norm": 5.080584060124238e-07, "learning_rate": 0.25405648149178023, "loss": 0.0, "num_input_tokens_seen": 5821712, "step": 10240 }, { "epoch": 179.7433628318584, "grad_norm": 1.140921654041449e-06, "learning_rate": 0.2540140469748028, "loss": 0.0, "num_input_tokens_seen": 5824880, "step": 10245 }, { "epoch": 179.83185840707964, "grad_norm": 9.539871825836599e-07, "learning_rate": 0.25397159641755224, "loss": 0.0, "num_input_tokens_seen": 5827120, "step": 10250 }, { "epoch": 179.9203539823009, "grad_norm": 5.078371145827987e-07, "learning_rate": 0.2539291298265749, "loss": 0.0, "num_input_tokens_seen": 5829744, "step": 10255 }, { "epoch": 180.0, "grad_norm": 3.6598241877072724e-06, "learning_rate": 0.2538866472084197, "loss": 0.0, "num_input_tokens_seen": 5832608, "step": 10260 }, { "epoch": 180.08849557522123, "grad_norm": 3.426382875204581e-07, "learning_rate": 0.25384414856963794, "loss": 0.0, "num_input_tokens_seen": 5835344, "step": 10265 }, { "epoch": 180.1769911504425, "grad_norm": 2.6591936830300256e-07, "learning_rate": 0.25380163391678356, "loss": 0.0, "num_input_tokens_seen": 5838000, "step": 10270 }, { "epoch": 180.26548672566372, "grad_norm": 8.167453984242456e-07, "learning_rate": 0.2537591032564127, "loss": 0.0, "num_input_tokens_seen": 5841456, "step": 10275 }, { "epoch": 180.35398230088495, "grad_norm": 7.224388127724524e-07, "learning_rate": 0.25371655659508424, "loss": 0.0, "num_input_tokens_seen": 5844736, "step": 10280 }, { "epoch": 180.44247787610618, "grad_norm": 3.1182091220216535e-07, "learning_rate": 0.25367399393935935, "loss": 0.0, "num_input_tokens_seen": 5847664, "step": 10285 }, { "epoch": 180.53097345132744, "grad_norm": 5.951781076873885e-07, "learning_rate": 0.25363141529580174, "loss": 0.0, "num_input_tokens_seen": 5850640, "step": 10290 }, { "epoch": 180.61946902654867, "grad_norm": 6.274538577599742e-07, "learning_rate": 0.2535888206709776, "loss": 0.0, "num_input_tokens_seen": 5853648, "step": 10295 }, { "epoch": 180.7079646017699, "grad_norm": 3.1341474482360354e-07, "learning_rate": 0.2535462100714555, "loss": 0.0, "num_input_tokens_seen": 5856320, "step": 10300 }, { "epoch": 180.79646017699116, "grad_norm": 1.0557417908785283e-06, "learning_rate": 0.2535035835038066, "loss": 0.0, "num_input_tokens_seen": 5859488, "step": 10305 }, { "epoch": 180.8849557522124, "grad_norm": 5.690814077752293e-07, "learning_rate": 0.2534609409746044, "loss": 0.0, "num_input_tokens_seen": 5862000, "step": 10310 }, { "epoch": 180.97345132743362, "grad_norm": 1.494498746978934e-07, "learning_rate": 0.253418282490425, "loss": 0.0, "num_input_tokens_seen": 5864784, "step": 10315 }, { "epoch": 181.05309734513276, "grad_norm": 8.032748723962868e-07, "learning_rate": 0.2533756080578467, "loss": 0.0, "num_input_tokens_seen": 5867224, "step": 10320 }, { "epoch": 181.141592920354, "grad_norm": 3.888093544901494e-07, "learning_rate": 0.25333291768345056, "loss": 0.0, "num_input_tokens_seen": 5870120, "step": 10325 }, { "epoch": 181.23008849557522, "grad_norm": 3.0545811569027137e-07, "learning_rate": 0.25329021137381996, "loss": 0.0, "num_input_tokens_seen": 5873176, "step": 10330 }, { "epoch": 181.31858407079645, "grad_norm": 5.146350190443627e-07, "learning_rate": 0.25324748913554074, "loss": 0.0, "num_input_tokens_seen": 5875640, "step": 10335 }, { "epoch": 181.4070796460177, "grad_norm": 3.896085729593324e-07, "learning_rate": 0.2532047509752013, "loss": 0.0, "num_input_tokens_seen": 5878280, "step": 10340 }, { "epoch": 181.49557522123894, "grad_norm": 5.762699402112048e-07, "learning_rate": 0.25316199689939217, "loss": 0.0, "num_input_tokens_seen": 5881016, "step": 10345 }, { "epoch": 181.58407079646017, "grad_norm": 3.710241571752704e-07, "learning_rate": 0.2531192269147068, "loss": 0.0, "num_input_tokens_seen": 5884168, "step": 10350 }, { "epoch": 181.67256637168143, "grad_norm": 2.6285010790161323e-07, "learning_rate": 0.2530764410277407, "loss": 0.0, "num_input_tokens_seen": 5886712, "step": 10355 }, { "epoch": 181.76106194690266, "grad_norm": 3.969768727074552e-07, "learning_rate": 0.25303363924509203, "loss": 0.0, "num_input_tokens_seen": 5889544, "step": 10360 }, { "epoch": 181.8495575221239, "grad_norm": 7.360694098679232e-07, "learning_rate": 0.25299082157336145, "loss": 0.0, "num_input_tokens_seen": 5892392, "step": 10365 }, { "epoch": 181.93805309734512, "grad_norm": 1.5729705182820908e-07, "learning_rate": 0.2529479880191519, "loss": 0.0, "num_input_tokens_seen": 5895608, "step": 10370 }, { "epoch": 182.01769911504425, "grad_norm": 3.913989132797724e-07, "learning_rate": 0.2529051385890689, "loss": 0.0, "num_input_tokens_seen": 5898128, "step": 10375 }, { "epoch": 182.10619469026548, "grad_norm": 3.455609487446054e-07, "learning_rate": 0.2528622732897203, "loss": 0.0, "num_input_tokens_seen": 5900848, "step": 10380 }, { "epoch": 182.1946902654867, "grad_norm": 4.55203974070173e-07, "learning_rate": 0.25281939212771654, "loss": 0.0, "num_input_tokens_seen": 5903488, "step": 10385 }, { "epoch": 182.28318584070797, "grad_norm": 2.2679968481043034e-07, "learning_rate": 0.2527764951096704, "loss": 0.0, "num_input_tokens_seen": 5906112, "step": 10390 }, { "epoch": 182.3716814159292, "grad_norm": 1.0591805903459317e-06, "learning_rate": 0.2527335822421971, "loss": 0.0, "num_input_tokens_seen": 5909056, "step": 10395 }, { "epoch": 182.46017699115043, "grad_norm": 5.844327688464546e-07, "learning_rate": 0.25269065353191444, "loss": 0.0, "num_input_tokens_seen": 5911888, "step": 10400 }, { "epoch": 182.46017699115043, "eval_loss": 0.4707694351673126, "eval_runtime": 0.942, "eval_samples_per_second": 26.54, "eval_steps_per_second": 13.801, "num_input_tokens_seen": 5911888, "step": 10400 }, { "epoch": 182.5486725663717, "grad_norm": 4.958702106705459e-07, "learning_rate": 0.2526477089854425, "loss": 0.0, "num_input_tokens_seen": 5914992, "step": 10405 }, { "epoch": 182.63716814159292, "grad_norm": 1.3877202036383096e-06, "learning_rate": 0.25260474860940385, "loss": 0.0, "num_input_tokens_seen": 5918080, "step": 10410 }, { "epoch": 182.72566371681415, "grad_norm": 9.782097549759783e-07, "learning_rate": 0.2525617724104236, "loss": 0.0, "num_input_tokens_seen": 5920704, "step": 10415 }, { "epoch": 182.81415929203538, "grad_norm": 4.6713273604837013e-07, "learning_rate": 0.25251878039512915, "loss": 0.0, "num_input_tokens_seen": 5923680, "step": 10420 }, { "epoch": 182.90265486725664, "grad_norm": 1.989144209346705e-07, "learning_rate": 0.25247577257015047, "loss": 0.0, "num_input_tokens_seen": 5926752, "step": 10425 }, { "epoch": 182.99115044247787, "grad_norm": 6.125210916252399e-07, "learning_rate": 0.2524327489421198, "loss": 0.0, "num_input_tokens_seen": 5929520, "step": 10430 }, { "epoch": 183.07079646017698, "grad_norm": 7.330514222303464e-07, "learning_rate": 0.25238970951767203, "loss": 0.0, "num_input_tokens_seen": 5932064, "step": 10435 }, { "epoch": 183.15929203539824, "grad_norm": 6.831447763033793e-07, "learning_rate": 0.25234665430344433, "loss": 0.0, "num_input_tokens_seen": 5935312, "step": 10440 }, { "epoch": 183.24778761061947, "grad_norm": 6.221484909474384e-07, "learning_rate": 0.2523035833060764, "loss": 0.0, "num_input_tokens_seen": 5938096, "step": 10445 }, { "epoch": 183.3362831858407, "grad_norm": 4.300223110931256e-07, "learning_rate": 0.2522604965322103, "loss": 0.0, "num_input_tokens_seen": 5941376, "step": 10450 }, { "epoch": 183.42477876106196, "grad_norm": 4.1622288904363813e-07, "learning_rate": 0.25221739398849047, "loss": 0.0, "num_input_tokens_seen": 5944368, "step": 10455 }, { "epoch": 183.5132743362832, "grad_norm": 5.485777592184604e-07, "learning_rate": 0.252174275681564, "loss": 0.0, "num_input_tokens_seen": 5946816, "step": 10460 }, { "epoch": 183.60176991150442, "grad_norm": 8.706378480383137e-07, "learning_rate": 0.2521311416180802, "loss": 0.0, "num_input_tokens_seen": 5949456, "step": 10465 }, { "epoch": 183.69026548672565, "grad_norm": 7.577494898214354e-07, "learning_rate": 0.25208799180469094, "loss": 0.0, "num_input_tokens_seen": 5952544, "step": 10470 }, { "epoch": 183.7787610619469, "grad_norm": 9.773308420335525e-07, "learning_rate": 0.2520448262480504, "loss": 0.0, "num_input_tokens_seen": 5955488, "step": 10475 }, { "epoch": 183.86725663716814, "grad_norm": 4.0792343725115643e-07, "learning_rate": 0.25200164495481525, "loss": 0.0, "num_input_tokens_seen": 5958112, "step": 10480 }, { "epoch": 183.95575221238937, "grad_norm": 8.518703680238104e-07, "learning_rate": 0.25195844793164474, "loss": 0.0, "num_input_tokens_seen": 5960688, "step": 10485 }, { "epoch": 184.0353982300885, "grad_norm": 3.7727272683696356e-07, "learning_rate": 0.2519152351852001, "loss": 0.0, "num_input_tokens_seen": 5963120, "step": 10490 }, { "epoch": 184.12389380530973, "grad_norm": 7.621957820447278e-07, "learning_rate": 0.25187200672214555, "loss": 0.0, "num_input_tokens_seen": 5965872, "step": 10495 }, { "epoch": 184.21238938053096, "grad_norm": 3.7039231415292306e-07, "learning_rate": 0.2518287625491473, "loss": 0.0, "num_input_tokens_seen": 5968512, "step": 10500 }, { "epoch": 184.30088495575222, "grad_norm": 6.50580147976143e-07, "learning_rate": 0.25178550267287425, "loss": 0.0, "num_input_tokens_seen": 5972032, "step": 10505 }, { "epoch": 184.38938053097345, "grad_norm": 3.7202289604465477e-07, "learning_rate": 0.2517422270999976, "loss": 0.0, "num_input_tokens_seen": 5975248, "step": 10510 }, { "epoch": 184.47787610619469, "grad_norm": 7.721676524852228e-07, "learning_rate": 0.2516989358371909, "loss": 0.0, "num_input_tokens_seen": 5977968, "step": 10515 }, { "epoch": 184.56637168141592, "grad_norm": 1.0812516393343685e-06, "learning_rate": 0.25165562889113025, "loss": 0.0, "num_input_tokens_seen": 5980656, "step": 10520 }, { "epoch": 184.65486725663717, "grad_norm": 3.520802636103326e-07, "learning_rate": 0.2516123062684942, "loss": 0.0, "num_input_tokens_seen": 5983440, "step": 10525 }, { "epoch": 184.7433628318584, "grad_norm": 6.168631330183416e-07, "learning_rate": 0.25156896797596356, "loss": 0.0, "num_input_tokens_seen": 5986064, "step": 10530 }, { "epoch": 184.83185840707964, "grad_norm": 8.278086056634493e-07, "learning_rate": 0.2515256140202216, "loss": 0.0, "num_input_tokens_seen": 5988640, "step": 10535 }, { "epoch": 184.9203539823009, "grad_norm": 9.347334071208024e-07, "learning_rate": 0.25148224440795425, "loss": 0.0, "num_input_tokens_seen": 5991168, "step": 10540 }, { "epoch": 185.0, "grad_norm": 4.119514869671548e-06, "learning_rate": 0.2514388591458494, "loss": 0.0, "num_input_tokens_seen": 5993944, "step": 10545 }, { "epoch": 185.08849557522123, "grad_norm": 9.773356168807368e-07, "learning_rate": 0.2513954582405977, "loss": 0.0, "num_input_tokens_seen": 5996888, "step": 10550 }, { "epoch": 185.1769911504425, "grad_norm": 8.215463935812295e-07, "learning_rate": 0.2513520416988922, "loss": 0.0, "num_input_tokens_seen": 6000120, "step": 10555 }, { "epoch": 185.26548672566372, "grad_norm": 5.190559022594243e-07, "learning_rate": 0.2513086095274281, "loss": 0.0, "num_input_tokens_seen": 6003112, "step": 10560 }, { "epoch": 185.35398230088495, "grad_norm": 2.912716183800512e-07, "learning_rate": 0.25126516173290336, "loss": 0.0, "num_input_tokens_seen": 6005704, "step": 10565 }, { "epoch": 185.44247787610618, "grad_norm": 4.3505318103598256e-07, "learning_rate": 0.2512216983220181, "loss": 0.0, "num_input_tokens_seen": 6008600, "step": 10570 }, { "epoch": 185.53097345132744, "grad_norm": 3.2258657256534207e-07, "learning_rate": 0.25117821930147494, "loss": 0.0, "num_input_tokens_seen": 6011480, "step": 10575 }, { "epoch": 185.61946902654867, "grad_norm": 6.446313705055218e-07, "learning_rate": 0.2511347246779788, "loss": 0.0, "num_input_tokens_seen": 6014408, "step": 10580 }, { "epoch": 185.7079646017699, "grad_norm": 5.882522486899688e-07, "learning_rate": 0.25109121445823723, "loss": 0.0, "num_input_tokens_seen": 6017176, "step": 10585 }, { "epoch": 185.79646017699116, "grad_norm": 4.255907697370276e-07, "learning_rate": 0.25104768864896004, "loss": 0.0, "num_input_tokens_seen": 6019752, "step": 10590 }, { "epoch": 185.8849557522124, "grad_norm": 2.937666749858181e-07, "learning_rate": 0.2510041472568594, "loss": 0.0, "num_input_tokens_seen": 6022696, "step": 10595 }, { "epoch": 185.97345132743362, "grad_norm": 3.41511196211286e-07, "learning_rate": 0.25096059028864987, "loss": 0.0, "num_input_tokens_seen": 6025544, "step": 10600 }, { "epoch": 185.97345132743362, "eval_loss": 0.47358882427215576, "eval_runtime": 0.9403, "eval_samples_per_second": 26.588, "eval_steps_per_second": 13.826, "num_input_tokens_seen": 6025544, "step": 10600 }, { "epoch": 186.05309734513276, "grad_norm": 7.141936180232733e-07, "learning_rate": 0.25091701775104863, "loss": 0.0, "num_input_tokens_seen": 6027568, "step": 10605 }, { "epoch": 186.141592920354, "grad_norm": 9.398655151926505e-07, "learning_rate": 0.250873429650775, "loss": 0.0, "num_input_tokens_seen": 6030480, "step": 10610 }, { "epoch": 186.23008849557522, "grad_norm": 5.027687279834936e-07, "learning_rate": 0.25082982599455095, "loss": 0.0, "num_input_tokens_seen": 6033408, "step": 10615 }, { "epoch": 186.31858407079645, "grad_norm": 3.0971995101936045e-07, "learning_rate": 0.2507862067891006, "loss": 0.0, "num_input_tokens_seen": 6036192, "step": 10620 }, { "epoch": 186.4070796460177, "grad_norm": 4.382305860417546e-07, "learning_rate": 0.25074257204115064, "loss": 0.0, "num_input_tokens_seen": 6038880, "step": 10625 }, { "epoch": 186.49557522123894, "grad_norm": 4.243970295192412e-07, "learning_rate": 0.25069892175742997, "loss": 0.0, "num_input_tokens_seen": 6041856, "step": 10630 }, { "epoch": 186.58407079646017, "grad_norm": 6.796063303227129e-07, "learning_rate": 0.25065525594467014, "loss": 0.0, "num_input_tokens_seen": 6044752, "step": 10635 }, { "epoch": 186.67256637168143, "grad_norm": 8.313458579323196e-07, "learning_rate": 0.2506115746096049, "loss": 0.0, "num_input_tokens_seen": 6047632, "step": 10640 }, { "epoch": 186.76106194690266, "grad_norm": 1.4733642217379384e-07, "learning_rate": 0.25056787775897055, "loss": 0.0, "num_input_tokens_seen": 6050464, "step": 10645 }, { "epoch": 186.8495575221239, "grad_norm": 5.794728963337548e-07, "learning_rate": 0.2505241653995056, "loss": 0.0, "num_input_tokens_seen": 6053968, "step": 10650 }, { "epoch": 186.93805309734512, "grad_norm": 2.1404898120636062e-07, "learning_rate": 0.25048043753795113, "loss": 0.0, "num_input_tokens_seen": 6056832, "step": 10655 }, { "epoch": 187.01769911504425, "grad_norm": 6.169322546156764e-07, "learning_rate": 0.2504366941810504, "loss": 0.0, "num_input_tokens_seen": 6059416, "step": 10660 }, { "epoch": 187.10619469026548, "grad_norm": 2.6862571189667506e-07, "learning_rate": 0.2503929353355493, "loss": 0.0, "num_input_tokens_seen": 6061800, "step": 10665 }, { "epoch": 187.1946902654867, "grad_norm": 2.2368506336079008e-07, "learning_rate": 0.250349161008196, "loss": 0.0, "num_input_tokens_seen": 6065224, "step": 10670 }, { "epoch": 187.28318584070797, "grad_norm": 3.9494253201155516e-07, "learning_rate": 0.2503053712057409, "loss": 0.0, "num_input_tokens_seen": 6068200, "step": 10675 }, { "epoch": 187.3716814159292, "grad_norm": 7.574935523280146e-08, "learning_rate": 0.25026156593493715, "loss": 0.0, "num_input_tokens_seen": 6071112, "step": 10680 }, { "epoch": 187.46017699115043, "grad_norm": 4.361864682778105e-07, "learning_rate": 0.2502177452025399, "loss": 0.0, "num_input_tokens_seen": 6074088, "step": 10685 }, { "epoch": 187.5486725663717, "grad_norm": 1.7279249675539177e-07, "learning_rate": 0.25017390901530695, "loss": 0.0, "num_input_tokens_seen": 6076520, "step": 10690 }, { "epoch": 187.63716814159292, "grad_norm": 1.795526429759775e-07, "learning_rate": 0.2501300573799984, "loss": 0.0, "num_input_tokens_seen": 6079496, "step": 10695 }, { "epoch": 187.72566371681415, "grad_norm": 3.1657737054047175e-07, "learning_rate": 0.2500861903033766, "loss": 0.0, "num_input_tokens_seen": 6082168, "step": 10700 }, { "epoch": 187.81415929203538, "grad_norm": 1.2217277571835439e-06, "learning_rate": 0.25004230779220654, "loss": 0.0, "num_input_tokens_seen": 6085144, "step": 10705 }, { "epoch": 187.90265486725664, "grad_norm": 3.095866816238413e-07, "learning_rate": 0.24999840985325542, "loss": 0.0, "num_input_tokens_seen": 6088328, "step": 10710 }, { "epoch": 187.99115044247787, "grad_norm": 8.483309557050234e-07, "learning_rate": 0.24995449649329285, "loss": 0.0, "num_input_tokens_seen": 6090984, "step": 10715 }, { "epoch": 188.07079646017698, "grad_norm": 4.945620162288833e-07, "learning_rate": 0.2499105677190908, "loss": 0.0, "num_input_tokens_seen": 6093456, "step": 10720 }, { "epoch": 188.15929203539824, "grad_norm": 4.1252650362366694e-07, "learning_rate": 0.24986662353742364, "loss": 0.0, "num_input_tokens_seen": 6096960, "step": 10725 }, { "epoch": 188.24778761061947, "grad_norm": 1.4448303886638314e-07, "learning_rate": 0.24982266395506814, "loss": 0.0, "num_input_tokens_seen": 6099568, "step": 10730 }, { "epoch": 188.3362831858407, "grad_norm": 7.48989066323702e-07, "learning_rate": 0.2497786889788034, "loss": 0.0, "num_input_tokens_seen": 6102288, "step": 10735 }, { "epoch": 188.42477876106196, "grad_norm": 2.1444556352889776e-07, "learning_rate": 0.24973469861541095, "loss": 0.0, "num_input_tokens_seen": 6105888, "step": 10740 }, { "epoch": 188.5132743362832, "grad_norm": 3.073825212140946e-07, "learning_rate": 0.24969069287167456, "loss": 0.0, "num_input_tokens_seen": 6108912, "step": 10745 }, { "epoch": 188.60176991150442, "grad_norm": 4.427854491950711e-07, "learning_rate": 0.2496466717543806, "loss": 0.0, "num_input_tokens_seen": 6111760, "step": 10750 }, { "epoch": 188.69026548672565, "grad_norm": 4.917750402455567e-07, "learning_rate": 0.24960263527031762, "loss": 0.0, "num_input_tokens_seen": 6114768, "step": 10755 }, { "epoch": 188.7787610619469, "grad_norm": 6.384632911249355e-07, "learning_rate": 0.24955858342627657, "loss": 0.0, "num_input_tokens_seen": 6116992, "step": 10760 }, { "epoch": 188.86725663716814, "grad_norm": 5.255380983726354e-07, "learning_rate": 0.24951451622905083, "loss": 0.0, "num_input_tokens_seen": 6119472, "step": 10765 }, { "epoch": 188.95575221238937, "grad_norm": 1.154272922576638e-06, "learning_rate": 0.24947043368543612, "loss": 0.0, "num_input_tokens_seen": 6122544, "step": 10770 }, { "epoch": 189.0353982300885, "grad_norm": 4.429795126270619e-07, "learning_rate": 0.2494263358022305, "loss": 0.0, "num_input_tokens_seen": 6125072, "step": 10775 }, { "epoch": 189.12389380530973, "grad_norm": 2.4528475250917836e-07, "learning_rate": 0.24938222258623444, "loss": 0.0, "num_input_tokens_seen": 6128112, "step": 10780 }, { "epoch": 189.21238938053096, "grad_norm": 1.6990765061564161e-07, "learning_rate": 0.24933809404425075, "loss": 0.0, "num_input_tokens_seen": 6130800, "step": 10785 }, { "epoch": 189.30088495575222, "grad_norm": 3.0469183798231825e-07, "learning_rate": 0.24929395018308453, "loss": 0.0, "num_input_tokens_seen": 6133456, "step": 10790 }, { "epoch": 189.38938053097345, "grad_norm": 3.0293321628960257e-07, "learning_rate": 0.24924979100954348, "loss": 0.0, "num_input_tokens_seen": 6136384, "step": 10795 }, { "epoch": 189.47787610619469, "grad_norm": 2.8457790790525905e-07, "learning_rate": 0.24920561653043735, "loss": 0.0, "num_input_tokens_seen": 6139264, "step": 10800 }, { "epoch": 189.47787610619469, "eval_loss": 0.47310909628868103, "eval_runtime": 0.9433, "eval_samples_per_second": 26.504, "eval_steps_per_second": 13.782, "num_input_tokens_seen": 6139264, "step": 10800 }, { "epoch": 189.56637168141592, "grad_norm": 3.9557707509629836e-07, "learning_rate": 0.24916142675257846, "loss": 0.0, "num_input_tokens_seen": 6142000, "step": 10805 }, { "epoch": 189.65486725663717, "grad_norm": 3.168356101923564e-07, "learning_rate": 0.24911722168278144, "loss": 0.0, "num_input_tokens_seen": 6144832, "step": 10810 }, { "epoch": 189.7433628318584, "grad_norm": 3.8287581105578283e-07, "learning_rate": 0.24907300132786328, "loss": 0.0, "num_input_tokens_seen": 6147456, "step": 10815 }, { "epoch": 189.83185840707964, "grad_norm": 1.1402554491724004e-06, "learning_rate": 0.24902876569464322, "loss": 0.0, "num_input_tokens_seen": 6150800, "step": 10820 }, { "epoch": 189.9203539823009, "grad_norm": 6.70919177991891e-07, "learning_rate": 0.24898451478994305, "loss": 0.0, "num_input_tokens_seen": 6154256, "step": 10825 }, { "epoch": 190.0, "grad_norm": 7.349685802182648e-07, "learning_rate": 0.2489402486205868, "loss": 0.0, "num_input_tokens_seen": 6156512, "step": 10830 }, { "epoch": 190.08849557522123, "grad_norm": 4.452994630810281e-07, "learning_rate": 0.24889596719340085, "loss": 0.0, "num_input_tokens_seen": 6159904, "step": 10835 }, { "epoch": 190.1769911504425, "grad_norm": 8.583847943555156e-08, "learning_rate": 0.24885167051521392, "loss": 0.0, "num_input_tokens_seen": 6162944, "step": 10840 }, { "epoch": 190.26548672566372, "grad_norm": 1.063407239598746e-06, "learning_rate": 0.24880735859285716, "loss": 0.0, "num_input_tokens_seen": 6165680, "step": 10845 }, { "epoch": 190.35398230088495, "grad_norm": 6.24448659891641e-07, "learning_rate": 0.24876303143316406, "loss": 0.0, "num_input_tokens_seen": 6168448, "step": 10850 }, { "epoch": 190.44247787610618, "grad_norm": 4.4572138335752243e-07, "learning_rate": 0.24871868904297031, "loss": 0.0, "num_input_tokens_seen": 6171136, "step": 10855 }, { "epoch": 190.53097345132744, "grad_norm": 1.3379678875935497e-07, "learning_rate": 0.24867433142911416, "loss": 0.0, "num_input_tokens_seen": 6173744, "step": 10860 }, { "epoch": 190.61946902654867, "grad_norm": 3.1760416163706395e-07, "learning_rate": 0.24862995859843612, "loss": 0.0, "num_input_tokens_seen": 6176416, "step": 10865 }, { "epoch": 190.7079646017699, "grad_norm": 5.91529044413619e-07, "learning_rate": 0.24858557055777897, "loss": 0.0, "num_input_tokens_seen": 6179328, "step": 10870 }, { "epoch": 190.79646017699116, "grad_norm": 5.314742566042696e-07, "learning_rate": 0.24854116731398793, "loss": 0.0, "num_input_tokens_seen": 6182416, "step": 10875 }, { "epoch": 190.8849557522124, "grad_norm": 5.482298206516134e-07, "learning_rate": 0.24849674887391052, "loss": 0.0, "num_input_tokens_seen": 6185072, "step": 10880 }, { "epoch": 190.97345132743362, "grad_norm": 4.6992704483272973e-07, "learning_rate": 0.2484523152443967, "loss": 0.0, "num_input_tokens_seen": 6187904, "step": 10885 }, { "epoch": 191.05309734513276, "grad_norm": 8.088842378128902e-07, "learning_rate": 0.24840786643229862, "loss": 0.0, "num_input_tokens_seen": 6190096, "step": 10890 }, { "epoch": 191.141592920354, "grad_norm": 1.7810582164656807e-07, "learning_rate": 0.2483634024444709, "loss": 0.0, "num_input_tokens_seen": 6193072, "step": 10895 }, { "epoch": 191.23008849557522, "grad_norm": 2.264718119704412e-07, "learning_rate": 0.24831892328777033, "loss": 0.0, "num_input_tokens_seen": 6195648, "step": 10900 }, { "epoch": 191.31858407079645, "grad_norm": 3.506106622808147e-07, "learning_rate": 0.2482744289690563, "loss": 0.0, "num_input_tokens_seen": 6198896, "step": 10905 }, { "epoch": 191.4070796460177, "grad_norm": 3.3023960099853866e-07, "learning_rate": 0.2482299194951903, "loss": 0.0, "num_input_tokens_seen": 6201488, "step": 10910 }, { "epoch": 191.49557522123894, "grad_norm": 1.1667943908832967e-06, "learning_rate": 0.2481853948730363, "loss": 0.0, "num_input_tokens_seen": 6204720, "step": 10915 }, { "epoch": 191.58407079646017, "grad_norm": 2.918106076776894e-07, "learning_rate": 0.24814085510946052, "loss": 0.0, "num_input_tokens_seen": 6207168, "step": 10920 }, { "epoch": 191.67256637168143, "grad_norm": 4.5092676259628206e-07, "learning_rate": 0.24809630021133158, "loss": 0.0, "num_input_tokens_seen": 6210000, "step": 10925 }, { "epoch": 191.76106194690266, "grad_norm": 2.7836446747642185e-07, "learning_rate": 0.24805173018552037, "loss": 0.0, "num_input_tokens_seen": 6212704, "step": 10930 }, { "epoch": 191.8495575221239, "grad_norm": 3.0935845529711514e-07, "learning_rate": 0.2480071450389002, "loss": 0.0, "num_input_tokens_seen": 6215680, "step": 10935 }, { "epoch": 191.93805309734512, "grad_norm": 6.661985594291764e-07, "learning_rate": 0.24796254477834662, "loss": 0.0, "num_input_tokens_seen": 6218800, "step": 10940 }, { "epoch": 192.01769911504425, "grad_norm": 6.033015438333678e-07, "learning_rate": 0.24791792941073754, "loss": 0.0, "num_input_tokens_seen": 6221184, "step": 10945 }, { "epoch": 192.10619469026548, "grad_norm": 4.358778085133963e-07, "learning_rate": 0.2478732989429533, "loss": 0.0, "num_input_tokens_seen": 6223936, "step": 10950 }, { "epoch": 192.1946902654867, "grad_norm": 4.037546830204519e-07, "learning_rate": 0.24782865338187632, "loss": 0.0, "num_input_tokens_seen": 6227248, "step": 10955 }, { "epoch": 192.28318584070797, "grad_norm": 2.5660742153377214e-07, "learning_rate": 0.2477839927343916, "loss": 0.0, "num_input_tokens_seen": 6229952, "step": 10960 }, { "epoch": 192.3716814159292, "grad_norm": 3.144311904179631e-07, "learning_rate": 0.2477393170073864, "loss": 0.0, "num_input_tokens_seen": 6232816, "step": 10965 }, { "epoch": 192.46017699115043, "grad_norm": 4.99984196267178e-07, "learning_rate": 0.2476946262077503, "loss": 0.0, "num_input_tokens_seen": 6235552, "step": 10970 }, { "epoch": 192.5486725663717, "grad_norm": 2.242415746422921e-07, "learning_rate": 0.24764992034237507, "loss": 0.0, "num_input_tokens_seen": 6238512, "step": 10975 }, { "epoch": 192.63716814159292, "grad_norm": 3.632996481428563e-07, "learning_rate": 0.24760519941815498, "loss": 0.0, "num_input_tokens_seen": 6241136, "step": 10980 }, { "epoch": 192.72566371681415, "grad_norm": 1.8644517751909007e-07, "learning_rate": 0.2475604634419866, "loss": 0.0, "num_input_tokens_seen": 6244112, "step": 10985 }, { "epoch": 192.81415929203538, "grad_norm": 4.404055005124974e-07, "learning_rate": 0.24751571242076872, "loss": 0.0, "num_input_tokens_seen": 6247248, "step": 10990 }, { "epoch": 192.90265486725664, "grad_norm": 1.6823021553591389e-07, "learning_rate": 0.2474709463614025, "loss": 0.0, "num_input_tokens_seen": 6250080, "step": 10995 }, { "epoch": 192.99115044247787, "grad_norm": 5.242434326646617e-07, "learning_rate": 0.24742616527079145, "loss": 0.0, "num_input_tokens_seen": 6252832, "step": 11000 }, { "epoch": 192.99115044247787, "eval_loss": 0.47574570775032043, "eval_runtime": 0.9371, "eval_samples_per_second": 26.679, "eval_steps_per_second": 13.873, "num_input_tokens_seen": 6252832, "step": 11000 }, { "epoch": 193.07079646017698, "grad_norm": 1.8304254467693681e-07, "learning_rate": 0.24738136915584139, "loss": 0.0, "num_input_tokens_seen": 6254912, "step": 11005 }, { "epoch": 193.15929203539824, "grad_norm": 5.626996539831453e-07, "learning_rate": 0.24733655802346047, "loss": 0.0, "num_input_tokens_seen": 6257696, "step": 11010 }, { "epoch": 193.24778761061947, "grad_norm": 5.216946874497808e-07, "learning_rate": 0.24729173188055906, "loss": 0.0, "num_input_tokens_seen": 6260304, "step": 11015 }, { "epoch": 193.3362831858407, "grad_norm": 7.933174401841825e-07, "learning_rate": 0.24724689073404996, "loss": 0.0, "num_input_tokens_seen": 6263312, "step": 11020 }, { "epoch": 193.42477876106196, "grad_norm": 2.512471439786168e-07, "learning_rate": 0.24720203459084822, "loss": 0.0, "num_input_tokens_seen": 6266928, "step": 11025 }, { "epoch": 193.5132743362832, "grad_norm": 3.270527884069452e-07, "learning_rate": 0.24715716345787123, "loss": 0.0, "num_input_tokens_seen": 6269808, "step": 11030 }, { "epoch": 193.60176991150442, "grad_norm": 1.2024506190755346e-07, "learning_rate": 0.2471122773420387, "loss": 0.0, "num_input_tokens_seen": 6272512, "step": 11035 }, { "epoch": 193.69026548672565, "grad_norm": 3.6714033058160567e-07, "learning_rate": 0.24706737625027259, "loss": 0.0, "num_input_tokens_seen": 6275904, "step": 11040 }, { "epoch": 193.7787610619469, "grad_norm": 2.5506221845716937e-07, "learning_rate": 0.24702246018949725, "loss": 0.0, "num_input_tokens_seen": 6279312, "step": 11045 }, { "epoch": 193.86725663716814, "grad_norm": 5.016304953642248e-07, "learning_rate": 0.2469775291666393, "loss": 0.0, "num_input_tokens_seen": 6281760, "step": 11050 }, { "epoch": 193.95575221238937, "grad_norm": 5.275255148262659e-07, "learning_rate": 0.24693258318862765, "loss": 0.0, "num_input_tokens_seen": 6284656, "step": 11055 }, { "epoch": 194.0353982300885, "grad_norm": 2.295623602321939e-07, "learning_rate": 0.2468876222623935, "loss": 0.0, "num_input_tokens_seen": 6286984, "step": 11060 }, { "epoch": 194.12389380530973, "grad_norm": 5.454732558973774e-07, "learning_rate": 0.2468426463948705, "loss": 0.0, "num_input_tokens_seen": 6289928, "step": 11065 }, { "epoch": 194.21238938053096, "grad_norm": 4.812580982616055e-07, "learning_rate": 0.24679765559299438, "loss": 0.0, "num_input_tokens_seen": 6292840, "step": 11070 }, { "epoch": 194.30088495575222, "grad_norm": 3.1782917631062446e-07, "learning_rate": 0.24675264986370332, "loss": 0.0, "num_input_tokens_seen": 6295624, "step": 11075 }, { "epoch": 194.38938053097345, "grad_norm": 3.3239402341678215e-07, "learning_rate": 0.2467076292139378, "loss": 0.0, "num_input_tokens_seen": 6298104, "step": 11080 }, { "epoch": 194.47787610619469, "grad_norm": 4.2444514747330686e-07, "learning_rate": 0.24666259365064055, "loss": 0.0, "num_input_tokens_seen": 6300776, "step": 11085 }, { "epoch": 194.56637168141592, "grad_norm": 5.164253593648027e-07, "learning_rate": 0.24661754318075663, "loss": 0.0, "num_input_tokens_seen": 6303560, "step": 11090 }, { "epoch": 194.65486725663717, "grad_norm": 1.2986433262085484e-07, "learning_rate": 0.2465724778112334, "loss": 0.0, "num_input_tokens_seen": 6306280, "step": 11095 }, { "epoch": 194.7433628318584, "grad_norm": 3.0520570248881995e-07, "learning_rate": 0.24652739754902042, "loss": 0.0, "num_input_tokens_seen": 6309400, "step": 11100 }, { "epoch": 194.83185840707964, "grad_norm": 3.675654909329751e-07, "learning_rate": 0.24648230240106975, "loss": 0.0, "num_input_tokens_seen": 6312760, "step": 11105 }, { "epoch": 194.9203539823009, "grad_norm": 1.2132759366068058e-06, "learning_rate": 0.2464371923743356, "loss": 0.0, "num_input_tokens_seen": 6315624, "step": 11110 }, { "epoch": 195.0, "grad_norm": 8.689604555911501e-07, "learning_rate": 0.24639206747577444, "loss": 0.0, "num_input_tokens_seen": 6318352, "step": 11115 }, { "epoch": 195.08849557522123, "grad_norm": 1.115298076115323e-07, "learning_rate": 0.24634692771234515, "loss": 0.0, "num_input_tokens_seen": 6321360, "step": 11120 }, { "epoch": 195.1769911504425, "grad_norm": 1.6852425233082613e-07, "learning_rate": 0.2463017730910088, "loss": 0.0, "num_input_tokens_seen": 6324256, "step": 11125 }, { "epoch": 195.26548672566372, "grad_norm": 3.0951372309573344e-07, "learning_rate": 0.2462566036187289, "loss": 0.0, "num_input_tokens_seen": 6326896, "step": 11130 }, { "epoch": 195.35398230088495, "grad_norm": 3.50245045410702e-07, "learning_rate": 0.24621141930247106, "loss": 0.0, "num_input_tokens_seen": 6329600, "step": 11135 }, { "epoch": 195.44247787610618, "grad_norm": 1.501759356870025e-07, "learning_rate": 0.2461662201492033, "loss": 0.0, "num_input_tokens_seen": 6332768, "step": 11140 }, { "epoch": 195.53097345132744, "grad_norm": 4.48300284006109e-07, "learning_rate": 0.24612100616589586, "loss": 0.0, "num_input_tokens_seen": 6335536, "step": 11145 }, { "epoch": 195.61946902654867, "grad_norm": 1.1534446286987077e-07, "learning_rate": 0.24607577735952135, "loss": 0.0, "num_input_tokens_seen": 6338064, "step": 11150 }, { "epoch": 195.7079646017699, "grad_norm": 6.801561198699346e-07, "learning_rate": 0.24603053373705464, "loss": 0.0, "num_input_tokens_seen": 6341056, "step": 11155 }, { "epoch": 195.79646017699116, "grad_norm": 3.6943600889571826e-07, "learning_rate": 0.2459852753054728, "loss": 0.0, "num_input_tokens_seen": 6343856, "step": 11160 }, { "epoch": 195.8849557522124, "grad_norm": 2.777157419586729e-07, "learning_rate": 0.24594000207175526, "loss": 0.0, "num_input_tokens_seen": 6346928, "step": 11165 }, { "epoch": 195.97345132743362, "grad_norm": 7.308519798243651e-07, "learning_rate": 0.2458947140428838, "loss": 0.0, "num_input_tokens_seen": 6349888, "step": 11170 }, { "epoch": 196.05309734513276, "grad_norm": 4.2593634930199187e-07, "learning_rate": 0.24584941122584233, "loss": 0.0, "num_input_tokens_seen": 6352344, "step": 11175 }, { "epoch": 196.141592920354, "grad_norm": 2.902134212945384e-07, "learning_rate": 0.24580409362761713, "loss": 0.0, "num_input_tokens_seen": 6355608, "step": 11180 }, { "epoch": 196.23008849557522, "grad_norm": 6.046739713383431e-07, "learning_rate": 0.2457587612551967, "loss": 0.0, "num_input_tokens_seen": 6358552, "step": 11185 }, { "epoch": 196.31858407079645, "grad_norm": 6.458273560383532e-07, "learning_rate": 0.24571341411557193, "loss": 0.0, "num_input_tokens_seen": 6360840, "step": 11190 }, { "epoch": 196.4070796460177, "grad_norm": 1.0938633465684688e-07, "learning_rate": 0.2456680522157359, "loss": 0.0, "num_input_tokens_seen": 6363720, "step": 11195 }, { "epoch": 196.49557522123894, "grad_norm": 4.265176585249719e-07, "learning_rate": 0.245622675562684, "loss": 0.0, "num_input_tokens_seen": 6366440, "step": 11200 }, { "epoch": 196.49557522123894, "eval_loss": 0.48474833369255066, "eval_runtime": 0.9172, "eval_samples_per_second": 27.256, "eval_steps_per_second": 14.173, "num_input_tokens_seen": 6366440, "step": 11200 }, { "epoch": 196.58407079646017, "grad_norm": 1.1758405662476434e-06, "learning_rate": 0.24557728416341384, "loss": 0.0, "num_input_tokens_seen": 6369384, "step": 11205 }, { "epoch": 196.67256637168143, "grad_norm": 6.129566827439703e-07, "learning_rate": 0.24553187802492538, "loss": 0.0, "num_input_tokens_seen": 6372072, "step": 11210 }, { "epoch": 196.76106194690266, "grad_norm": 3.5082680938103294e-07, "learning_rate": 0.24548645715422074, "loss": 0.0, "num_input_tokens_seen": 6374968, "step": 11215 }, { "epoch": 196.8495575221239, "grad_norm": 4.1631631120253587e-07, "learning_rate": 0.2454410215583045, "loss": 0.0, "num_input_tokens_seen": 6377592, "step": 11220 }, { "epoch": 196.93805309734512, "grad_norm": 5.586352926911786e-07, "learning_rate": 0.24539557124418332, "loss": 0.0, "num_input_tokens_seen": 6380888, "step": 11225 }, { "epoch": 197.01769911504425, "grad_norm": 2.568944807990192e-07, "learning_rate": 0.24535010621886624, "loss": 0.0, "num_input_tokens_seen": 6383104, "step": 11230 }, { "epoch": 197.10619469026548, "grad_norm": 3.545693516571191e-07, "learning_rate": 0.2453046264893646, "loss": 0.0, "num_input_tokens_seen": 6386400, "step": 11235 }, { "epoch": 197.1946902654867, "grad_norm": 5.338868618309789e-07, "learning_rate": 0.24525913206269184, "loss": 0.0, "num_input_tokens_seen": 6389392, "step": 11240 }, { "epoch": 197.28318584070797, "grad_norm": 1.4497980771466246e-07, "learning_rate": 0.2452136229458638, "loss": 0.0, "num_input_tokens_seen": 6391968, "step": 11245 }, { "epoch": 197.3716814159292, "grad_norm": 1.6952149906046543e-07, "learning_rate": 0.24516809914589857, "loss": 0.0, "num_input_tokens_seen": 6394720, "step": 11250 }, { "epoch": 197.46017699115043, "grad_norm": 2.4893796535252477e-07, "learning_rate": 0.2451225606698165, "loss": 0.0, "num_input_tokens_seen": 6397808, "step": 11255 }, { "epoch": 197.5486725663717, "grad_norm": 8.085440583727177e-08, "learning_rate": 0.2450770075246402, "loss": 0.0, "num_input_tokens_seen": 6400448, "step": 11260 }, { "epoch": 197.63716814159292, "grad_norm": 1.0553313245509344e-07, "learning_rate": 0.24503143971739455, "loss": 0.0, "num_input_tokens_seen": 6403136, "step": 11265 }, { "epoch": 197.72566371681415, "grad_norm": 3.5066133818872913e-07, "learning_rate": 0.24498585725510663, "loss": 0.0, "num_input_tokens_seen": 6406288, "step": 11270 }, { "epoch": 197.81415929203538, "grad_norm": 6.866970920782478e-07, "learning_rate": 0.24494026014480583, "loss": 0.0, "num_input_tokens_seen": 6408896, "step": 11275 }, { "epoch": 197.90265486725664, "grad_norm": 7.089213909239334e-07, "learning_rate": 0.24489464839352387, "loss": 0.0, "num_input_tokens_seen": 6411728, "step": 11280 }, { "epoch": 197.99115044247787, "grad_norm": 3.9694168663118035e-07, "learning_rate": 0.2448490220082946, "loss": 0.0, "num_input_tokens_seen": 6414592, "step": 11285 }, { "epoch": 198.07079646017698, "grad_norm": 5.000907208341232e-07, "learning_rate": 0.24480338099615415, "loss": 0.0, "num_input_tokens_seen": 6416920, "step": 11290 }, { "epoch": 198.15929203539824, "grad_norm": 3.50579909991211e-07, "learning_rate": 0.244757725364141, "loss": 0.0, "num_input_tokens_seen": 6419336, "step": 11295 }, { "epoch": 198.24778761061947, "grad_norm": 3.625450801791885e-07, "learning_rate": 0.24471205511929583, "loss": 0.0, "num_input_tokens_seen": 6422376, "step": 11300 }, { "epoch": 198.3362831858407, "grad_norm": 2.760376105470641e-07, "learning_rate": 0.24466637026866145, "loss": 0.0, "num_input_tokens_seen": 6425320, "step": 11305 }, { "epoch": 198.42477876106196, "grad_norm": 3.347263941577694e-07, "learning_rate": 0.2446206708192832, "loss": 0.0, "num_input_tokens_seen": 6428056, "step": 11310 }, { "epoch": 198.5132743362832, "grad_norm": 1.5345820258971798e-07, "learning_rate": 0.2445749567782084, "loss": 0.0, "num_input_tokens_seen": 6431480, "step": 11315 }, { "epoch": 198.60176991150442, "grad_norm": 1.6261822111118818e-07, "learning_rate": 0.2445292281524868, "loss": 0.0, "num_input_tokens_seen": 6433976, "step": 11320 }, { "epoch": 198.69026548672565, "grad_norm": 4.3261323412480124e-07, "learning_rate": 0.24448348494917022, "loss": 0.0, "num_input_tokens_seen": 6437080, "step": 11325 }, { "epoch": 198.7787610619469, "grad_norm": 6.27820270437951e-07, "learning_rate": 0.24443772717531295, "loss": 0.0, "num_input_tokens_seen": 6439896, "step": 11330 }, { "epoch": 198.86725663716814, "grad_norm": 6.888078587508062e-07, "learning_rate": 0.24439195483797138, "loss": 0.0, "num_input_tokens_seen": 6442888, "step": 11335 }, { "epoch": 198.95575221238937, "grad_norm": 2.819663222908275e-07, "learning_rate": 0.24434616794420416, "loss": 0.0, "num_input_tokens_seen": 6445448, "step": 11340 }, { "epoch": 199.0353982300885, "grad_norm": 4.622407061560807e-07, "learning_rate": 0.24430036650107223, "loss": 0.0, "num_input_tokens_seen": 6447712, "step": 11345 }, { "epoch": 199.12389380530973, "grad_norm": 9.098673814378344e-08, "learning_rate": 0.2442545505156387, "loss": 0.0, "num_input_tokens_seen": 6450336, "step": 11350 }, { "epoch": 199.21238938053096, "grad_norm": 3.381737769814208e-07, "learning_rate": 0.24420871999496904, "loss": 0.0, "num_input_tokens_seen": 6453584, "step": 11355 }, { "epoch": 199.30088495575222, "grad_norm": 3.6445760542846983e-07, "learning_rate": 0.24416287494613084, "loss": 0.0, "num_input_tokens_seen": 6456752, "step": 11360 }, { "epoch": 199.38938053097345, "grad_norm": 8.263517088380468e-07, "learning_rate": 0.24411701537619399, "loss": 0.0, "num_input_tokens_seen": 6459536, "step": 11365 }, { "epoch": 199.47787610619469, "grad_norm": 2.625263846312009e-07, "learning_rate": 0.24407114129223062, "loss": 0.0, "num_input_tokens_seen": 6462160, "step": 11370 }, { "epoch": 199.56637168141592, "grad_norm": 4.878698405264004e-07, "learning_rate": 0.2440252527013151, "loss": 0.0, "num_input_tokens_seen": 6465072, "step": 11375 }, { "epoch": 199.65486725663717, "grad_norm": 4.358525700354221e-07, "learning_rate": 0.24397934961052403, "loss": 0.0, "num_input_tokens_seen": 6467920, "step": 11380 }, { "epoch": 199.7433628318584, "grad_norm": 7.192636530817254e-07, "learning_rate": 0.24393343202693618, "loss": 0.0, "num_input_tokens_seen": 6470416, "step": 11385 }, { "epoch": 199.83185840707964, "grad_norm": 1.615396740817232e-07, "learning_rate": 0.2438874999576327, "loss": 0.0, "num_input_tokens_seen": 6473280, "step": 11390 }, { "epoch": 199.9203539823009, "grad_norm": 3.636731094047718e-07, "learning_rate": 0.24384155340969688, "loss": 0.0, "num_input_tokens_seen": 6476512, "step": 11395 }, { "epoch": 200.0, "grad_norm": 3.462122322162031e-07, "learning_rate": 0.24379559239021423, "loss": 0.0, "num_input_tokens_seen": 6478776, "step": 11400 }, { "epoch": 200.0, "eval_loss": 0.48118656873703003, "eval_runtime": 0.9116, "eval_samples_per_second": 27.425, "eval_steps_per_second": 14.261, "num_input_tokens_seen": 6478776, "step": 11400 }, { "epoch": 200.08849557522123, "grad_norm": 4.416397132445127e-07, "learning_rate": 0.2437496169062725, "loss": 0.0, "num_input_tokens_seen": 6481576, "step": 11405 }, { "epoch": 200.1769911504425, "grad_norm": 5.445846227303264e-07, "learning_rate": 0.24370362696496176, "loss": 0.0, "num_input_tokens_seen": 6484520, "step": 11410 }, { "epoch": 200.26548672566372, "grad_norm": 2.674276231573458e-07, "learning_rate": 0.24365762257337417, "loss": 0.0, "num_input_tokens_seen": 6487592, "step": 11415 }, { "epoch": 200.35398230088495, "grad_norm": 1.491927292818218e-07, "learning_rate": 0.2436116037386042, "loss": 0.0, "num_input_tokens_seen": 6490152, "step": 11420 }, { "epoch": 200.44247787610618, "grad_norm": 1.8416470481952274e-07, "learning_rate": 0.24356557046774852, "loss": 0.0, "num_input_tokens_seen": 6493016, "step": 11425 }, { "epoch": 200.53097345132744, "grad_norm": 4.906454478259548e-07, "learning_rate": 0.24351952276790606, "loss": 0.0, "num_input_tokens_seen": 6496104, "step": 11430 }, { "epoch": 200.61946902654867, "grad_norm": 2.184129783699973e-07, "learning_rate": 0.24347346064617797, "loss": 0.0, "num_input_tokens_seen": 6498632, "step": 11435 }, { "epoch": 200.7079646017699, "grad_norm": 1.2045755681810988e-07, "learning_rate": 0.24342738410966758, "loss": 0.0, "num_input_tokens_seen": 6501512, "step": 11440 }, { "epoch": 200.79646017699116, "grad_norm": 2.598614514681685e-07, "learning_rate": 0.24338129316548046, "loss": 0.0, "num_input_tokens_seen": 6504200, "step": 11445 }, { "epoch": 200.8849557522124, "grad_norm": 5.688012265636644e-07, "learning_rate": 0.24333518782072444, "loss": 0.0, "num_input_tokens_seen": 6507496, "step": 11450 }, { "epoch": 200.97345132743362, "grad_norm": 4.67436024109702e-07, "learning_rate": 0.24328906808250952, "loss": 0.0, "num_input_tokens_seen": 6510568, "step": 11455 }, { "epoch": 201.05309734513276, "grad_norm": 3.0688272545376094e-07, "learning_rate": 0.243242933957948, "loss": 0.0, "num_input_tokens_seen": 6512936, "step": 11460 }, { "epoch": 201.141592920354, "grad_norm": 2.7619387310551247e-07, "learning_rate": 0.24319678545415427, "loss": 0.0, "num_input_tokens_seen": 6515416, "step": 11465 }, { "epoch": 201.23008849557522, "grad_norm": 2.7301692284709134e-07, "learning_rate": 0.24315062257824507, "loss": 0.0, "num_input_tokens_seen": 6518744, "step": 11470 }, { "epoch": 201.31858407079645, "grad_norm": 2.3796430070888164e-07, "learning_rate": 0.24310444533733921, "loss": 0.0, "num_input_tokens_seen": 6521080, "step": 11475 }, { "epoch": 201.4070796460177, "grad_norm": 1.3575788671005284e-07, "learning_rate": 0.2430582537385579, "loss": 0.0, "num_input_tokens_seen": 6523720, "step": 11480 }, { "epoch": 201.49557522123894, "grad_norm": 4.96651011872018e-07, "learning_rate": 0.2430120477890244, "loss": 0.0, "num_input_tokens_seen": 6526760, "step": 11485 }, { "epoch": 201.58407079646017, "grad_norm": 1.0637005942726319e-07, "learning_rate": 0.24296582749586426, "loss": 0.0, "num_input_tokens_seen": 6529992, "step": 11490 }, { "epoch": 201.67256637168143, "grad_norm": 8.248828748946835e-07, "learning_rate": 0.24291959286620526, "loss": 0.0, "num_input_tokens_seen": 6532584, "step": 11495 }, { "epoch": 201.76106194690266, "grad_norm": 5.344727469491772e-07, "learning_rate": 0.24287334390717738, "loss": 0.0, "num_input_tokens_seen": 6535768, "step": 11500 }, { "epoch": 201.8495575221239, "grad_norm": 1.5034217426546093e-07, "learning_rate": 0.24282708062591268, "loss": 0.0, "num_input_tokens_seen": 6538728, "step": 11505 }, { "epoch": 201.93805309734512, "grad_norm": 2.912214114303424e-08, "learning_rate": 0.24278080302954563, "loss": 0.0, "num_input_tokens_seen": 6541320, "step": 11510 }, { "epoch": 202.01769911504425, "grad_norm": 2.110160863821875e-07, "learning_rate": 0.24273451112521283, "loss": 0.0, "num_input_tokens_seen": 6543928, "step": 11515 }, { "epoch": 202.10619469026548, "grad_norm": 3.68499087244345e-07, "learning_rate": 0.242688204920053, "loss": 0.0, "num_input_tokens_seen": 6546680, "step": 11520 }, { "epoch": 202.1946902654867, "grad_norm": 2.1557346485678863e-07, "learning_rate": 0.24264188442120715, "loss": 0.0, "num_input_tokens_seen": 6549400, "step": 11525 }, { "epoch": 202.28318584070797, "grad_norm": 1.2224849399444793e-07, "learning_rate": 0.24259554963581853, "loss": 0.0, "num_input_tokens_seen": 6552088, "step": 11530 }, { "epoch": 202.3716814159292, "grad_norm": 3.830428454421053e-07, "learning_rate": 0.24254920057103257, "loss": 0.0, "num_input_tokens_seen": 6555048, "step": 11535 }, { "epoch": 202.46017699115043, "grad_norm": 6.217058512447693e-07, "learning_rate": 0.24250283723399685, "loss": 0.0, "num_input_tokens_seen": 6558280, "step": 11540 }, { "epoch": 202.5486725663717, "grad_norm": 4.879277639702195e-07, "learning_rate": 0.24245645963186108, "loss": 0.0, "num_input_tokens_seen": 6561240, "step": 11545 }, { "epoch": 202.63716814159292, "grad_norm": 5.153269171387365e-07, "learning_rate": 0.2424100677717774, "loss": 0.0, "num_input_tokens_seen": 6564040, "step": 11550 }, { "epoch": 202.72566371681415, "grad_norm": 5.82323707476462e-07, "learning_rate": 0.24236366166090004, "loss": 0.0, "num_input_tokens_seen": 6566536, "step": 11555 }, { "epoch": 202.81415929203538, "grad_norm": 1.086180887455157e-07, "learning_rate": 0.24231724130638527, "loss": 0.0, "num_input_tokens_seen": 6569368, "step": 11560 }, { "epoch": 202.90265486725664, "grad_norm": 4.842882503908186e-07, "learning_rate": 0.2422708067153917, "loss": 0.0, "num_input_tokens_seen": 6572024, "step": 11565 }, { "epoch": 202.99115044247787, "grad_norm": 3.9859395428720745e-07, "learning_rate": 0.24222435789508026, "loss": 0.0, "num_input_tokens_seen": 6574904, "step": 11570 }, { "epoch": 203.07079646017698, "grad_norm": 2.642480581016571e-07, "learning_rate": 0.24217789485261387, "loss": 0.0, "num_input_tokens_seen": 6577192, "step": 11575 }, { "epoch": 203.15929203539824, "grad_norm": 1.8290812420218572e-07, "learning_rate": 0.2421314175951577, "loss": 0.0, "num_input_tokens_seen": 6580680, "step": 11580 }, { "epoch": 203.24778761061947, "grad_norm": 8.232578352362907e-08, "learning_rate": 0.2420849261298791, "loss": 0.0, "num_input_tokens_seen": 6583640, "step": 11585 }, { "epoch": 203.3362831858407, "grad_norm": 3.479897259239806e-07, "learning_rate": 0.24203842046394775, "loss": 0.0, "num_input_tokens_seen": 6586648, "step": 11590 }, { "epoch": 203.42477876106196, "grad_norm": 2.8679508545792487e-07, "learning_rate": 0.24199190060453535, "loss": 0.0, "num_input_tokens_seen": 6589480, "step": 11595 }, { "epoch": 203.5132743362832, "grad_norm": 4.1507590253786475e-07, "learning_rate": 0.2419453665588158, "loss": 0.0, "num_input_tokens_seen": 6592280, "step": 11600 }, { "epoch": 203.5132743362832, "eval_loss": 0.487193763256073, "eval_runtime": 0.933, "eval_samples_per_second": 26.795, "eval_steps_per_second": 13.933, "num_input_tokens_seen": 6592280, "step": 11600 }, { "epoch": 203.60176991150442, "grad_norm": 3.4720304142865643e-07, "learning_rate": 0.24189881833396523, "loss": 0.0, "num_input_tokens_seen": 6594760, "step": 11605 }, { "epoch": 203.69026548672565, "grad_norm": 4.1194010691469884e-07, "learning_rate": 0.24185225593716203, "loss": 0.0, "num_input_tokens_seen": 6598040, "step": 11610 }, { "epoch": 203.7787610619469, "grad_norm": 9.797552280588206e-08, "learning_rate": 0.2418056793755867, "loss": 0.0, "num_input_tokens_seen": 6600792, "step": 11615 }, { "epoch": 203.86725663716814, "grad_norm": 2.943382355624635e-07, "learning_rate": 0.24175908865642187, "loss": 0.0, "num_input_tokens_seen": 6604136, "step": 11620 }, { "epoch": 203.95575221238937, "grad_norm": 3.446189396072441e-07, "learning_rate": 0.24171248378685248, "loss": 0.0, "num_input_tokens_seen": 6606392, "step": 11625 }, { "epoch": 204.0353982300885, "grad_norm": 6.046129215064866e-07, "learning_rate": 0.24166586477406554, "loss": 0.0, "num_input_tokens_seen": 6608488, "step": 11630 }, { "epoch": 204.12389380530973, "grad_norm": 1.3305665902407782e-07, "learning_rate": 0.24161923162525034, "loss": 0.0, "num_input_tokens_seen": 6611128, "step": 11635 }, { "epoch": 204.21238938053096, "grad_norm": 1.0226756330666831e-07, "learning_rate": 0.2415725843475982, "loss": 0.0, "num_input_tokens_seen": 6613832, "step": 11640 }, { "epoch": 204.30088495575222, "grad_norm": 6.238636842681444e-07, "learning_rate": 0.24152592294830286, "loss": 0.0, "num_input_tokens_seen": 6617144, "step": 11645 }, { "epoch": 204.38938053097345, "grad_norm": 5.202887791710964e-07, "learning_rate": 0.24147924743455995, "loss": 0.0, "num_input_tokens_seen": 6619848, "step": 11650 }, { "epoch": 204.47787610619469, "grad_norm": 2.871303195206565e-07, "learning_rate": 0.24143255781356754, "loss": 0.0, "num_input_tokens_seen": 6622664, "step": 11655 }, { "epoch": 204.56637168141592, "grad_norm": 4.174555101599253e-07, "learning_rate": 0.24138585409252566, "loss": 0.0, "num_input_tokens_seen": 6625672, "step": 11660 }, { "epoch": 204.65486725663717, "grad_norm": 1.121352752875282e-07, "learning_rate": 0.24133913627863662, "loss": 0.0, "num_input_tokens_seen": 6628312, "step": 11665 }, { "epoch": 204.7433628318584, "grad_norm": 3.0508829240716295e-07, "learning_rate": 0.241292404379105, "loss": 0.0, "num_input_tokens_seen": 6631224, "step": 11670 }, { "epoch": 204.83185840707964, "grad_norm": 7.1947698643271e-07, "learning_rate": 0.24124565840113735, "loss": 0.0, "num_input_tokens_seen": 6634568, "step": 11675 }, { "epoch": 204.9203539823009, "grad_norm": 1.4557302563389385e-07, "learning_rate": 0.2411988983519425, "loss": 0.0, "num_input_tokens_seen": 6637528, "step": 11680 }, { "epoch": 205.0, "grad_norm": 4.814737053493445e-07, "learning_rate": 0.24115212423873145, "loss": 0.0, "num_input_tokens_seen": 6639624, "step": 11685 }, { "epoch": 205.08849557522123, "grad_norm": 2.769475315744785e-07, "learning_rate": 0.24110533606871737, "loss": 0.0, "num_input_tokens_seen": 6642280, "step": 11690 }, { "epoch": 205.1769911504425, "grad_norm": 4.091311041065637e-07, "learning_rate": 0.24105853384911552, "loss": 0.0, "num_input_tokens_seen": 6645784, "step": 11695 }, { "epoch": 205.26548672566372, "grad_norm": 3.298055162304081e-07, "learning_rate": 0.24101171758714346, "loss": 0.0, "num_input_tokens_seen": 6648008, "step": 11700 }, { "epoch": 205.35398230088495, "grad_norm": 2.610799185731594e-07, "learning_rate": 0.24096488729002086, "loss": 0.0, "num_input_tokens_seen": 6650904, "step": 11705 }, { "epoch": 205.44247787610618, "grad_norm": 5.6814862148257816e-08, "learning_rate": 0.24091804296496946, "loss": 0.0, "num_input_tokens_seen": 6653848, "step": 11710 }, { "epoch": 205.53097345132744, "grad_norm": 3.9590460687577433e-07, "learning_rate": 0.2408711846192133, "loss": 0.0, "num_input_tokens_seen": 6656632, "step": 11715 }, { "epoch": 205.61946902654867, "grad_norm": 3.9307300880864204e-07, "learning_rate": 0.24082431225997855, "loss": 0.0, "num_input_tokens_seen": 6659528, "step": 11720 }, { "epoch": 205.7079646017699, "grad_norm": 1.100673472365088e-07, "learning_rate": 0.24077742589449344, "loss": 0.0, "num_input_tokens_seen": 6663000, "step": 11725 }, { "epoch": 205.79646017699116, "grad_norm": 2.5154989202746947e-07, "learning_rate": 0.24073052552998844, "loss": 0.0, "num_input_tokens_seen": 6665848, "step": 11730 }, { "epoch": 205.8849557522124, "grad_norm": 4.735847767278756e-07, "learning_rate": 0.2406836111736963, "loss": 0.0, "num_input_tokens_seen": 6668472, "step": 11735 }, { "epoch": 205.97345132743362, "grad_norm": 2.0581003923325625e-07, "learning_rate": 0.2406366828328517, "loss": 0.0, "num_input_tokens_seen": 6671320, "step": 11740 }, { "epoch": 206.05309734513276, "grad_norm": 1.9081984703461785e-07, "learning_rate": 0.2405897405146915, "loss": 0.0, "num_input_tokens_seen": 6673536, "step": 11745 }, { "epoch": 206.141592920354, "grad_norm": 4.60091484910663e-07, "learning_rate": 0.240542784226455, "loss": 0.0, "num_input_tokens_seen": 6676256, "step": 11750 }, { "epoch": 206.23008849557522, "grad_norm": 4.968562734575244e-07, "learning_rate": 0.24049581397538328, "loss": 0.0, "num_input_tokens_seen": 6679232, "step": 11755 }, { "epoch": 206.31858407079645, "grad_norm": 2.0141001755291654e-07, "learning_rate": 0.24044882976871984, "loss": 0.0, "num_input_tokens_seen": 6682224, "step": 11760 }, { "epoch": 206.4070796460177, "grad_norm": 3.458565700498184e-08, "learning_rate": 0.2404018316137102, "loss": 0.0, "num_input_tokens_seen": 6684880, "step": 11765 }, { "epoch": 206.49557522123894, "grad_norm": 1.6193952490084484e-07, "learning_rate": 0.24035481951760204, "loss": 0.0, "num_input_tokens_seen": 6688224, "step": 11770 }, { "epoch": 206.58407079646017, "grad_norm": 1.4019437344359176e-07, "learning_rate": 0.2403077934876452, "loss": 0.0, "num_input_tokens_seen": 6691040, "step": 11775 }, { "epoch": 206.67256637168143, "grad_norm": 5.110609890834894e-08, "learning_rate": 0.2402607535310918, "loss": 0.0, "num_input_tokens_seen": 6694016, "step": 11780 }, { "epoch": 206.76106194690266, "grad_norm": 1.9071717360930052e-07, "learning_rate": 0.2402136996551959, "loss": 0.0, "num_input_tokens_seen": 6696736, "step": 11785 }, { "epoch": 206.8495575221239, "grad_norm": 2.2265949439770338e-07, "learning_rate": 0.24016663186721376, "loss": 0.0, "num_input_tokens_seen": 6700016, "step": 11790 }, { "epoch": 206.93805309734512, "grad_norm": 5.683350536855869e-07, "learning_rate": 0.24011955017440395, "loss": 0.0, "num_input_tokens_seen": 6702592, "step": 11795 }, { "epoch": 207.01769911504425, "grad_norm": 3.197432079105056e-07, "learning_rate": 0.24007245458402696, "loss": 0.0, "num_input_tokens_seen": 6704968, "step": 11800 }, { "epoch": 207.01769911504425, "eval_loss": 0.4892892837524414, "eval_runtime": 0.9367, "eval_samples_per_second": 26.69, "eval_steps_per_second": 13.879, "num_input_tokens_seen": 6704968, "step": 11800 }, { "epoch": 207.10619469026548, "grad_norm": 1.49448098341054e-07, "learning_rate": 0.2400253451033456, "loss": 0.0, "num_input_tokens_seen": 6708008, "step": 11805 }, { "epoch": 207.1946902654867, "grad_norm": 2.9182066896282777e-07, "learning_rate": 0.23997822173962463, "loss": 0.0, "num_input_tokens_seen": 6711368, "step": 11810 }, { "epoch": 207.28318584070797, "grad_norm": 2.892212762617419e-07, "learning_rate": 0.23993108450013118, "loss": 0.0, "num_input_tokens_seen": 6714280, "step": 11815 }, { "epoch": 207.3716814159292, "grad_norm": 4.6587470592385216e-07, "learning_rate": 0.2398839333921343, "loss": 0.0, "num_input_tokens_seen": 6716840, "step": 11820 }, { "epoch": 207.46017699115043, "grad_norm": 6.0321710293465e-08, "learning_rate": 0.23983676842290536, "loss": 0.0, "num_input_tokens_seen": 6719288, "step": 11825 }, { "epoch": 207.5486725663717, "grad_norm": 1.510777565272292e-07, "learning_rate": 0.2397895895997178, "loss": 0.0, "num_input_tokens_seen": 6722488, "step": 11830 }, { "epoch": 207.63716814159292, "grad_norm": 3.27690344192888e-07, "learning_rate": 0.23974239692984714, "loss": 0.0, "num_input_tokens_seen": 6725464, "step": 11835 }, { "epoch": 207.72566371681415, "grad_norm": 2.2574933211672032e-07, "learning_rate": 0.2396951904205711, "loss": 0.0, "num_input_tokens_seen": 6728376, "step": 11840 }, { "epoch": 207.81415929203538, "grad_norm": 1.3871260762243764e-07, "learning_rate": 0.23964797007916952, "loss": 0.0, "num_input_tokens_seen": 6730856, "step": 11845 }, { "epoch": 207.90265486725664, "grad_norm": 1.014870392168632e-07, "learning_rate": 0.23960073591292436, "loss": 0.0, "num_input_tokens_seen": 6733608, "step": 11850 }, { "epoch": 207.99115044247787, "grad_norm": 2.5451330998293997e-07, "learning_rate": 0.2395534879291197, "loss": 0.0, "num_input_tokens_seen": 6736904, "step": 11855 }, { "epoch": 208.07079646017698, "grad_norm": 4.804853688256117e-07, "learning_rate": 0.23950622613504186, "loss": 0.0, "num_input_tokens_seen": 6739352, "step": 11860 }, { "epoch": 208.15929203539824, "grad_norm": 1.5932776875615673e-07, "learning_rate": 0.2394589505379791, "loss": 0.0, "num_input_tokens_seen": 6742152, "step": 11865 }, { "epoch": 208.24778761061947, "grad_norm": 7.910145427558746e-07, "learning_rate": 0.23941166114522197, "loss": 0.0, "num_input_tokens_seen": 6745224, "step": 11870 }, { "epoch": 208.3362831858407, "grad_norm": 3.9410454633070913e-07, "learning_rate": 0.23936435796406308, "loss": 0.0, "num_input_tokens_seen": 6748200, "step": 11875 }, { "epoch": 208.42477876106196, "grad_norm": 2.309620299456583e-07, "learning_rate": 0.23931704100179715, "loss": 0.0, "num_input_tokens_seen": 6750888, "step": 11880 }, { "epoch": 208.5132743362832, "grad_norm": 2.068713769176611e-07, "learning_rate": 0.2392697102657211, "loss": 0.0, "num_input_tokens_seen": 6753848, "step": 11885 }, { "epoch": 208.60176991150442, "grad_norm": 3.2528348015148367e-07, "learning_rate": 0.23922236576313388, "loss": 0.0, "num_input_tokens_seen": 6756264, "step": 11890 }, { "epoch": 208.69026548672565, "grad_norm": 2.4071962911875744e-07, "learning_rate": 0.2391750075013366, "loss": 0.0, "num_input_tokens_seen": 6759224, "step": 11895 }, { "epoch": 208.7787610619469, "grad_norm": 2.5311922513537866e-07, "learning_rate": 0.2391276354876326, "loss": 0.0, "num_input_tokens_seen": 6761736, "step": 11900 }, { "epoch": 208.86725663716814, "grad_norm": 4.633786545582552e-07, "learning_rate": 0.23908024972932707, "loss": 0.0, "num_input_tokens_seen": 6764824, "step": 11905 }, { "epoch": 208.95575221238937, "grad_norm": 1.3194527070936601e-07, "learning_rate": 0.2390328502337276, "loss": 0.0, "num_input_tokens_seen": 6768040, "step": 11910 }, { "epoch": 209.0353982300885, "grad_norm": 1.6709616090793133e-07, "learning_rate": 0.23898543700814376, "loss": 0.0, "num_input_tokens_seen": 6770584, "step": 11915 }, { "epoch": 209.12389380530973, "grad_norm": 6.593207046989846e-08, "learning_rate": 0.2389380100598873, "loss": 0.0, "num_input_tokens_seen": 6773256, "step": 11920 }, { "epoch": 209.21238938053096, "grad_norm": 2.7999178087156906e-07, "learning_rate": 0.23889056939627207, "loss": 0.0, "num_input_tokens_seen": 6775960, "step": 11925 }, { "epoch": 209.30088495575222, "grad_norm": 2.3850557795412897e-07, "learning_rate": 0.23884311502461386, "loss": 0.0, "num_input_tokens_seen": 6778840, "step": 11930 }, { "epoch": 209.38938053097345, "grad_norm": 8.749361057880378e-08, "learning_rate": 0.23879564695223088, "loss": 0.0, "num_input_tokens_seen": 6781656, "step": 11935 }, { "epoch": 209.47787610619469, "grad_norm": 6.715486478015009e-08, "learning_rate": 0.23874816518644332, "loss": 0.0, "num_input_tokens_seen": 6784824, "step": 11940 }, { "epoch": 209.56637168141592, "grad_norm": 2.8082251901651034e-07, "learning_rate": 0.23870066973457335, "loss": 0.0, "num_input_tokens_seen": 6787592, "step": 11945 }, { "epoch": 209.65486725663717, "grad_norm": 4.257181274169852e-07, "learning_rate": 0.23865316060394545, "loss": 0.0, "num_input_tokens_seen": 6790600, "step": 11950 }, { "epoch": 209.7433628318584, "grad_norm": 2.644700884957274e-07, "learning_rate": 0.2386056378018861, "loss": 0.0, "num_input_tokens_seen": 6793096, "step": 11955 }, { "epoch": 209.83185840707964, "grad_norm": 3.445805418778036e-07, "learning_rate": 0.2385581013357239, "loss": 0.0, "num_input_tokens_seen": 6796776, "step": 11960 }, { "epoch": 209.9203539823009, "grad_norm": 5.839167442900361e-07, "learning_rate": 0.23851055121278958, "loss": 0.0, "num_input_tokens_seen": 6799464, "step": 11965 }, { "epoch": 210.0, "grad_norm": 4.547521825770673e-07, "learning_rate": 0.23846298744041594, "loss": 0.0, "num_input_tokens_seen": 6802096, "step": 11970 }, { "epoch": 210.08849557522123, "grad_norm": 4.2158333712905005e-07, "learning_rate": 0.23841541002593802, "loss": 0.0, "num_input_tokens_seen": 6804736, "step": 11975 }, { "epoch": 210.1769911504425, "grad_norm": 2.382707009473961e-07, "learning_rate": 0.23836781897669276, "loss": 0.0, "num_input_tokens_seen": 6807408, "step": 11980 }, { "epoch": 210.26548672566372, "grad_norm": 2.783126262784208e-07, "learning_rate": 0.23832021430001926, "loss": 0.0, "num_input_tokens_seen": 6810368, "step": 11985 }, { "epoch": 210.35398230088495, "grad_norm": 1.1486446283015539e-07, "learning_rate": 0.2382725960032588, "loss": 0.0, "num_input_tokens_seen": 6813776, "step": 11990 }, { "epoch": 210.44247787610618, "grad_norm": 3.9150376096586115e-07, "learning_rate": 0.23822496409375482, "loss": 0.0, "num_input_tokens_seen": 6816432, "step": 11995 }, { "epoch": 210.53097345132744, "grad_norm": 3.304718632080039e-07, "learning_rate": 0.2381773185788526, "loss": 0.0, "num_input_tokens_seen": 6819568, "step": 12000 }, { "epoch": 210.53097345132744, "eval_loss": 0.49621883034706116, "eval_runtime": 0.9242, "eval_samples_per_second": 27.05, "eval_steps_per_second": 14.066, "num_input_tokens_seen": 6819568, "step": 12000 }, { "epoch": 210.61946902654867, "grad_norm": 2.0781872933639534e-07, "learning_rate": 0.2381296594658998, "loss": 0.0, "num_input_tokens_seen": 6822048, "step": 12005 }, { "epoch": 210.7079646017699, "grad_norm": 5.349688763089944e-07, "learning_rate": 0.238081986762246, "loss": 0.0, "num_input_tokens_seen": 6824960, "step": 12010 }, { "epoch": 210.79646017699116, "grad_norm": 2.2202142702099081e-07, "learning_rate": 0.23803430047524293, "loss": 0.0, "num_input_tokens_seen": 6827856, "step": 12015 }, { "epoch": 210.8849557522124, "grad_norm": 4.923164809156333e-08, "learning_rate": 0.23798660061224441, "loss": 0.0, "num_input_tokens_seen": 6831024, "step": 12020 }, { "epoch": 210.97345132743362, "grad_norm": 1.6533751079350623e-07, "learning_rate": 0.23793888718060632, "loss": 0.0, "num_input_tokens_seen": 6833952, "step": 12025 }, { "epoch": 211.05309734513276, "grad_norm": 2.785128856430674e-07, "learning_rate": 0.23789116018768675, "loss": 0.0, "num_input_tokens_seen": 6836016, "step": 12030 }, { "epoch": 211.141592920354, "grad_norm": 1.1303449554134204e-07, "learning_rate": 0.2378434196408458, "loss": 0.0, "num_input_tokens_seen": 6838512, "step": 12035 }, { "epoch": 211.23008849557522, "grad_norm": 1.3228979867108137e-07, "learning_rate": 0.23779566554744563, "loss": 0.0, "num_input_tokens_seen": 6841424, "step": 12040 }, { "epoch": 211.31858407079645, "grad_norm": 3.3929444498426165e-07, "learning_rate": 0.23774789791485051, "loss": 0.0, "num_input_tokens_seen": 6844480, "step": 12045 }, { "epoch": 211.4070796460177, "grad_norm": 1.4902263956173556e-07, "learning_rate": 0.2377001167504268, "loss": 0.0, "num_input_tokens_seen": 6847456, "step": 12050 }, { "epoch": 211.49557522123894, "grad_norm": 1.856122935350868e-07, "learning_rate": 0.23765232206154302, "loss": 0.0, "num_input_tokens_seen": 6850112, "step": 12055 }, { "epoch": 211.58407079646017, "grad_norm": 1.5214016002573771e-07, "learning_rate": 0.23760451385556966, "loss": 0.0, "num_input_tokens_seen": 6853136, "step": 12060 }, { "epoch": 211.67256637168143, "grad_norm": 9.710696957654363e-08, "learning_rate": 0.23755669213987932, "loss": 0.0, "num_input_tokens_seen": 6856240, "step": 12065 }, { "epoch": 211.76106194690266, "grad_norm": 5.941586778135388e-08, "learning_rate": 0.23750885692184676, "loss": 0.0, "num_input_tokens_seen": 6859168, "step": 12070 }, { "epoch": 211.8495575221239, "grad_norm": 1.0096700009398774e-07, "learning_rate": 0.23746100820884875, "loss": 0.0, "num_input_tokens_seen": 6862240, "step": 12075 }, { "epoch": 211.93805309734512, "grad_norm": 3.790553364524385e-07, "learning_rate": 0.23741314600826421, "loss": 0.0, "num_input_tokens_seen": 6865344, "step": 12080 }, { "epoch": 212.01769911504425, "grad_norm": 1.1807700417421074e-07, "learning_rate": 0.23736527032747406, "loss": 0.0, "num_input_tokens_seen": 6868176, "step": 12085 }, { "epoch": 212.10619469026548, "grad_norm": 6.57779537505121e-08, "learning_rate": 0.23731738117386128, "loss": 0.0, "num_input_tokens_seen": 6871296, "step": 12090 }, { "epoch": 212.1946902654867, "grad_norm": 2.130368557118345e-07, "learning_rate": 0.237269478554811, "loss": 0.0, "num_input_tokens_seen": 6873888, "step": 12095 }, { "epoch": 212.28318584070797, "grad_norm": 7.235352228462943e-08, "learning_rate": 0.23722156247771053, "loss": 0.0, "num_input_tokens_seen": 6876848, "step": 12100 }, { "epoch": 212.3716814159292, "grad_norm": 3.394007137558219e-07, "learning_rate": 0.23717363294994895, "loss": 0.0, "num_input_tokens_seen": 6880000, "step": 12105 }, { "epoch": 212.46017699115043, "grad_norm": 3.5401970421844453e-07, "learning_rate": 0.2371256899789177, "loss": 0.0, "num_input_tokens_seen": 6882928, "step": 12110 }, { "epoch": 212.5486725663717, "grad_norm": 1.1584475601011945e-07, "learning_rate": 0.23707773357201017, "loss": 0.0, "num_input_tokens_seen": 6886208, "step": 12115 }, { "epoch": 212.63716814159292, "grad_norm": 3.043722074380639e-07, "learning_rate": 0.2370297637366218, "loss": 0.0, "num_input_tokens_seen": 6888736, "step": 12120 }, { "epoch": 212.72566371681415, "grad_norm": 3.242463435526588e-07, "learning_rate": 0.23698178048015026, "loss": 0.0, "num_input_tokens_seen": 6891696, "step": 12125 }, { "epoch": 212.81415929203538, "grad_norm": 4.4308862356956524e-07, "learning_rate": 0.236933783809995, "loss": 0.0, "num_input_tokens_seen": 6894224, "step": 12130 }, { "epoch": 212.90265486725664, "grad_norm": 3.022300063548755e-07, "learning_rate": 0.23688577373355785, "loss": 0.0, "num_input_tokens_seen": 6896720, "step": 12135 }, { "epoch": 212.99115044247787, "grad_norm": 3.8581887906730117e-07, "learning_rate": 0.23683775025824247, "loss": 0.0, "num_input_tokens_seen": 6899408, "step": 12140 }, { "epoch": 213.07079646017698, "grad_norm": 1.3239019835964427e-07, "learning_rate": 0.2367897133914548, "loss": 0.0, "num_input_tokens_seen": 6902216, "step": 12145 }, { "epoch": 213.15929203539824, "grad_norm": 2.5055794594663894e-07, "learning_rate": 0.2367416631406026, "loss": 0.0, "num_input_tokens_seen": 6905544, "step": 12150 }, { "epoch": 213.24778761061947, "grad_norm": 3.6806500247621443e-07, "learning_rate": 0.23669359951309588, "loss": 0.0, "num_input_tokens_seen": 6907976, "step": 12155 }, { "epoch": 213.3362831858407, "grad_norm": 3.078547763379902e-07, "learning_rate": 0.23664552251634666, "loss": 0.0, "num_input_tokens_seen": 6911176, "step": 12160 }, { "epoch": 213.42477876106196, "grad_norm": 1.072717807915069e-07, "learning_rate": 0.23659743215776907, "loss": 0.0, "num_input_tokens_seen": 6913848, "step": 12165 }, { "epoch": 213.5132743362832, "grad_norm": 1.450858491125473e-07, "learning_rate": 0.23654932844477908, "loss": 0.0, "num_input_tokens_seen": 6916952, "step": 12170 }, { "epoch": 213.60176991150442, "grad_norm": 4.917617957289622e-07, "learning_rate": 0.23650121138479507, "loss": 0.0, "num_input_tokens_seen": 6919560, "step": 12175 }, { "epoch": 213.69026548672565, "grad_norm": 3.639090380147536e-07, "learning_rate": 0.23645308098523724, "loss": 0.0, "num_input_tokens_seen": 6922472, "step": 12180 }, { "epoch": 213.7787610619469, "grad_norm": 3.5976827916783805e-07, "learning_rate": 0.23640493725352785, "loss": 0.0, "num_input_tokens_seen": 6925528, "step": 12185 }, { "epoch": 213.86725663716814, "grad_norm": 1.562461733328746e-07, "learning_rate": 0.2363567801970913, "loss": 0.0, "num_input_tokens_seen": 6928072, "step": 12190 }, { "epoch": 213.95575221238937, "grad_norm": 2.707745068164513e-07, "learning_rate": 0.236308609823354, "loss": 0.0, "num_input_tokens_seen": 6930616, "step": 12195 }, { "epoch": 214.0353982300885, "grad_norm": 5.0929379824538046e-08, "learning_rate": 0.23626042613974452, "loss": 0.0, "num_input_tokens_seen": 6933264, "step": 12200 }, { "epoch": 214.0353982300885, "eval_loss": 0.4928906261920929, "eval_runtime": 0.9317, "eval_samples_per_second": 26.831, "eval_steps_per_second": 13.952, "num_input_tokens_seen": 6933264, "step": 12200 }, { "epoch": 214.12389380530973, "grad_norm": 4.867177949563484e-07, "learning_rate": 0.23621222915369325, "loss": 0.0, "num_input_tokens_seen": 6935952, "step": 12205 }, { "epoch": 214.21238938053096, "grad_norm": 2.3095871881650964e-07, "learning_rate": 0.23616401887263283, "loss": 0.0, "num_input_tokens_seen": 6938960, "step": 12210 }, { "epoch": 214.30088495575222, "grad_norm": 2.0928104049744434e-07, "learning_rate": 0.23611579530399793, "loss": 0.0, "num_input_tokens_seen": 6941936, "step": 12215 }, { "epoch": 214.38938053097345, "grad_norm": 3.0740426382180885e-07, "learning_rate": 0.23606755845522517, "loss": 0.0, "num_input_tokens_seen": 6944496, "step": 12220 }, { "epoch": 214.47787610619469, "grad_norm": 1.9969229470007122e-07, "learning_rate": 0.23601930833375329, "loss": 0.0, "num_input_tokens_seen": 6947264, "step": 12225 }, { "epoch": 214.56637168141592, "grad_norm": 1.302764616184504e-07, "learning_rate": 0.23597104494702312, "loss": 0.0, "num_input_tokens_seen": 6949744, "step": 12230 }, { "epoch": 214.65486725663717, "grad_norm": 1.2968605744845263e-07, "learning_rate": 0.23592276830247744, "loss": 0.0, "num_input_tokens_seen": 6952432, "step": 12235 }, { "epoch": 214.7433628318584, "grad_norm": 5.0475485124934494e-08, "learning_rate": 0.2358744784075611, "loss": 0.0, "num_input_tokens_seen": 6955008, "step": 12240 }, { "epoch": 214.83185840707964, "grad_norm": 4.78043091334257e-07, "learning_rate": 0.235826175269721, "loss": 0.0, "num_input_tokens_seen": 6957856, "step": 12245 }, { "epoch": 214.9203539823009, "grad_norm": 1.6798730939626694e-07, "learning_rate": 0.23577785889640612, "loss": 0.0, "num_input_tokens_seen": 6960784, "step": 12250 }, { "epoch": 215.0, "grad_norm": 1.3436637402719498e-07, "learning_rate": 0.23572952929506744, "loss": 0.0, "num_input_tokens_seen": 6963416, "step": 12255 }, { "epoch": 215.08849557522123, "grad_norm": 1.5469250058686157e-07, "learning_rate": 0.23568118647315803, "loss": 0.0, "num_input_tokens_seen": 6966216, "step": 12260 }, { "epoch": 215.1769911504425, "grad_norm": 1.9714543952886743e-07, "learning_rate": 0.23563283043813296, "loss": 0.0, "num_input_tokens_seen": 6968904, "step": 12265 }, { "epoch": 215.26548672566372, "grad_norm": 3.8524711953868973e-07, "learning_rate": 0.23558446119744922, "loss": 0.0, "num_input_tokens_seen": 6971272, "step": 12270 }, { "epoch": 215.35398230088495, "grad_norm": 1.973434962110332e-07, "learning_rate": 0.23553607875856608, "loss": 0.0, "num_input_tokens_seen": 6973976, "step": 12275 }, { "epoch": 215.44247787610618, "grad_norm": 2.0153437674252928e-07, "learning_rate": 0.2354876831289447, "loss": 0.0, "num_input_tokens_seen": 6977736, "step": 12280 }, { "epoch": 215.53097345132744, "grad_norm": 1.9678871865380643e-07, "learning_rate": 0.23543927431604827, "loss": 0.0, "num_input_tokens_seen": 6980936, "step": 12285 }, { "epoch": 215.61946902654867, "grad_norm": 2.2862732862449775e-07, "learning_rate": 0.23539085232734203, "loss": 0.0, "num_input_tokens_seen": 6984104, "step": 12290 }, { "epoch": 215.7079646017699, "grad_norm": 4.569604925563908e-07, "learning_rate": 0.2353424171702933, "loss": 0.0, "num_input_tokens_seen": 6986696, "step": 12295 }, { "epoch": 215.79646017699116, "grad_norm": 2.44468139953824e-07, "learning_rate": 0.23529396885237133, "loss": 0.0, "num_input_tokens_seen": 6989560, "step": 12300 }, { "epoch": 215.8849557522124, "grad_norm": 5.362099386729824e-07, "learning_rate": 0.2352455073810475, "loss": 0.0, "num_input_tokens_seen": 6992344, "step": 12305 }, { "epoch": 215.97345132743362, "grad_norm": 2.0994924909700785e-07, "learning_rate": 0.23519703276379517, "loss": 0.0, "num_input_tokens_seen": 6995064, "step": 12310 }, { "epoch": 216.05309734513276, "grad_norm": 5.53833388039493e-08, "learning_rate": 0.2351485450080897, "loss": 0.0, "num_input_tokens_seen": 6997408, "step": 12315 }, { "epoch": 216.141592920354, "grad_norm": 1.0898384772417558e-07, "learning_rate": 0.2351000441214086, "loss": 0.0, "num_input_tokens_seen": 7000240, "step": 12320 }, { "epoch": 216.23008849557522, "grad_norm": 4.014283092601545e-07, "learning_rate": 0.23505153011123125, "loss": 0.0, "num_input_tokens_seen": 7002960, "step": 12325 }, { "epoch": 216.31858407079645, "grad_norm": 2.1063546284949553e-07, "learning_rate": 0.23500300298503912, "loss": 0.0, "num_input_tokens_seen": 7005824, "step": 12330 }, { "epoch": 216.4070796460177, "grad_norm": 3.056090918107657e-07, "learning_rate": 0.23495446275031576, "loss": 0.0, "num_input_tokens_seen": 7008576, "step": 12335 }, { "epoch": 216.49557522123894, "grad_norm": 1.0643684333899728e-07, "learning_rate": 0.2349059094145466, "loss": 0.0, "num_input_tokens_seen": 7011344, "step": 12340 }, { "epoch": 216.58407079646017, "grad_norm": 2.2202651450697886e-07, "learning_rate": 0.2348573429852192, "loss": 0.0, "num_input_tokens_seen": 7014416, "step": 12345 }, { "epoch": 216.67256637168143, "grad_norm": 2.74347371487238e-07, "learning_rate": 0.23480876346982313, "loss": 0.0, "num_input_tokens_seen": 7017344, "step": 12350 }, { "epoch": 216.76106194690266, "grad_norm": 7.482920949541949e-08, "learning_rate": 0.23476017087585, "loss": 0.0, "num_input_tokens_seen": 7020400, "step": 12355 }, { "epoch": 216.8495575221239, "grad_norm": 2.5974929940275615e-07, "learning_rate": 0.23471156521079334, "loss": 0.0, "num_input_tokens_seen": 7023536, "step": 12360 }, { "epoch": 216.93805309734512, "grad_norm": 5.025373184253112e-07, "learning_rate": 0.23466294648214875, "loss": 0.0, "num_input_tokens_seen": 7026192, "step": 12365 }, { "epoch": 217.01769911504425, "grad_norm": 1.2951481664913445e-07, "learning_rate": 0.2346143146974139, "loss": 0.0, "num_input_tokens_seen": 7028648, "step": 12370 }, { "epoch": 217.10619469026548, "grad_norm": 8.34119617820761e-08, "learning_rate": 0.23456566986408836, "loss": 0.0, "num_input_tokens_seen": 7031272, "step": 12375 }, { "epoch": 217.1946902654867, "grad_norm": 1.7626679493787378e-07, "learning_rate": 0.23451701198967384, "loss": 0.0, "num_input_tokens_seen": 7033928, "step": 12380 }, { "epoch": 217.28318584070797, "grad_norm": 1.2963946005584148e-07, "learning_rate": 0.23446834108167397, "loss": 0.0, "num_input_tokens_seen": 7036984, "step": 12385 }, { "epoch": 217.3716814159292, "grad_norm": 1.960343922746688e-07, "learning_rate": 0.23441965714759438, "loss": 0.0, "num_input_tokens_seen": 7039480, "step": 12390 }, { "epoch": 217.46017699115043, "grad_norm": 6.779532668588217e-08, "learning_rate": 0.23437096019494277, "loss": 0.0, "num_input_tokens_seen": 7042536, "step": 12395 }, { "epoch": 217.5486725663717, "grad_norm": 9.813528123459037e-08, "learning_rate": 0.23432225023122885, "loss": 0.0, "num_input_tokens_seen": 7045688, "step": 12400 }, { "epoch": 217.5486725663717, "eval_loss": 0.49578797817230225, "eval_runtime": 0.9371, "eval_samples_per_second": 26.677, "eval_steps_per_second": 13.872, "num_input_tokens_seen": 7045688, "step": 12400 }, { "epoch": 217.63716814159292, "grad_norm": 1.702029379657688e-07, "learning_rate": 0.23427352726396428, "loss": 0.0, "num_input_tokens_seen": 7048344, "step": 12405 }, { "epoch": 217.72566371681415, "grad_norm": 1.6143071945862175e-07, "learning_rate": 0.2342247913006628, "loss": 0.0, "num_input_tokens_seen": 7051480, "step": 12410 }, { "epoch": 217.81415929203538, "grad_norm": 1.7043353750523238e-07, "learning_rate": 0.23417604234883999, "loss": 0.0, "num_input_tokens_seen": 7054264, "step": 12415 }, { "epoch": 217.90265486725664, "grad_norm": 1.3728536885082576e-07, "learning_rate": 0.23412728041601363, "loss": 0.0, "num_input_tokens_seen": 7057272, "step": 12420 }, { "epoch": 217.99115044247787, "grad_norm": 3.205842347142607e-07, "learning_rate": 0.23407850550970347, "loss": 0.0, "num_input_tokens_seen": 7060104, "step": 12425 }, { "epoch": 218.07079646017698, "grad_norm": 1.9919704641324643e-07, "learning_rate": 0.23402971763743116, "loss": 0.0, "num_input_tokens_seen": 7062728, "step": 12430 }, { "epoch": 218.15929203539824, "grad_norm": 3.390366885014373e-07, "learning_rate": 0.23398091680672037, "loss": 0.0, "num_input_tokens_seen": 7065416, "step": 12435 }, { "epoch": 218.24778761061947, "grad_norm": 2.4981272872537374e-07, "learning_rate": 0.23393210302509687, "loss": 0.0, "num_input_tokens_seen": 7068200, "step": 12440 }, { "epoch": 218.3362831858407, "grad_norm": 4.092896119800571e-07, "learning_rate": 0.23388327630008832, "loss": 0.0, "num_input_tokens_seen": 7070984, "step": 12445 }, { "epoch": 218.42477876106196, "grad_norm": 2.1598164323677338e-07, "learning_rate": 0.23383443663922443, "loss": 0.0, "num_input_tokens_seen": 7074312, "step": 12450 }, { "epoch": 218.5132743362832, "grad_norm": 2.551159923314117e-07, "learning_rate": 0.23378558405003685, "loss": 0.0, "num_input_tokens_seen": 7077048, "step": 12455 }, { "epoch": 218.60176991150442, "grad_norm": 3.0948223184168455e-07, "learning_rate": 0.2337367185400593, "loss": 0.0, "num_input_tokens_seen": 7079784, "step": 12460 }, { "epoch": 218.69026548672565, "grad_norm": 8.587687716499204e-08, "learning_rate": 0.23368784011682747, "loss": 0.0, "num_input_tokens_seen": 7083080, "step": 12465 }, { "epoch": 218.7787610619469, "grad_norm": 1.4631768863182515e-07, "learning_rate": 0.23363894878787902, "loss": 0.0, "num_input_tokens_seen": 7085848, "step": 12470 }, { "epoch": 218.86725663716814, "grad_norm": 4.377039601877186e-07, "learning_rate": 0.23359004456075352, "loss": 0.0, "num_input_tokens_seen": 7088760, "step": 12475 }, { "epoch": 218.95575221238937, "grad_norm": 3.220252438040916e-07, "learning_rate": 0.23354112744299277, "loss": 0.0, "num_input_tokens_seen": 7091816, "step": 12480 }, { "epoch": 219.0353982300885, "grad_norm": 1.4506402123970474e-07, "learning_rate": 0.2334921974421403, "loss": 0.0, "num_input_tokens_seen": 7094424, "step": 12485 }, { "epoch": 219.12389380530973, "grad_norm": 2.0734843531045044e-07, "learning_rate": 0.23344325456574178, "loss": 0.0, "num_input_tokens_seen": 7097208, "step": 12490 }, { "epoch": 219.21238938053096, "grad_norm": 3.7821507703483803e-07, "learning_rate": 0.23339429882134477, "loss": 0.0, "num_input_tokens_seen": 7100344, "step": 12495 }, { "epoch": 219.30088495575222, "grad_norm": 2.0057552774233045e-07, "learning_rate": 0.23334533021649884, "loss": 0.0, "num_input_tokens_seen": 7103320, "step": 12500 }, { "epoch": 219.38938053097345, "grad_norm": 1.8468847429176094e-07, "learning_rate": 0.23329634875875566, "loss": 0.0, "num_input_tokens_seen": 7106216, "step": 12505 }, { "epoch": 219.47787610619469, "grad_norm": 1.6738044905650895e-07, "learning_rate": 0.23324735445566874, "loss": 0.0, "num_input_tokens_seen": 7109080, "step": 12510 }, { "epoch": 219.56637168141592, "grad_norm": 1.2254810144440853e-07, "learning_rate": 0.2331983473147936, "loss": 0.0, "num_input_tokens_seen": 7112168, "step": 12515 }, { "epoch": 219.65486725663717, "grad_norm": 8.626222438579134e-07, "learning_rate": 0.23314932734368776, "loss": 0.0, "num_input_tokens_seen": 7115144, "step": 12520 }, { "epoch": 219.7433628318584, "grad_norm": 3.243315802592406e-07, "learning_rate": 0.2331002945499107, "loss": 0.0, "num_input_tokens_seen": 7117848, "step": 12525 }, { "epoch": 219.83185840707964, "grad_norm": 1.2940510885073309e-07, "learning_rate": 0.23305124894102397, "loss": 0.0, "num_input_tokens_seen": 7120664, "step": 12530 }, { "epoch": 219.9203539823009, "grad_norm": 8.2855230232326e-08, "learning_rate": 0.23300219052459092, "loss": 0.0, "num_input_tokens_seen": 7123208, "step": 12535 }, { "epoch": 220.0, "grad_norm": 4.7215301890446426e-08, "learning_rate": 0.23295311930817708, "loss": 0.0, "num_input_tokens_seen": 7125552, "step": 12540 }, { "epoch": 220.08849557522123, "grad_norm": 5.516052326015597e-08, "learning_rate": 0.23290403529934972, "loss": 0.0, "num_input_tokens_seen": 7128336, "step": 12545 }, { "epoch": 220.1769911504425, "grad_norm": 2.0745031292790372e-07, "learning_rate": 0.23285493850567832, "loss": 0.0, "num_input_tokens_seen": 7131072, "step": 12550 }, { "epoch": 220.26548672566372, "grad_norm": 2.7110854716738686e-07, "learning_rate": 0.23280582893473414, "loss": 0.0, "num_input_tokens_seen": 7133808, "step": 12555 }, { "epoch": 220.35398230088495, "grad_norm": 2.0625986962841125e-07, "learning_rate": 0.2327567065940906, "loss": 0.0, "num_input_tokens_seen": 7137520, "step": 12560 }, { "epoch": 220.44247787610618, "grad_norm": 1.8880187724334974e-07, "learning_rate": 0.23270757149132285, "loss": 0.0, "num_input_tokens_seen": 7140544, "step": 12565 }, { "epoch": 220.53097345132744, "grad_norm": 1.3934661069470167e-07, "learning_rate": 0.23265842363400827, "loss": 0.0, "num_input_tokens_seen": 7143216, "step": 12570 }, { "epoch": 220.61946902654867, "grad_norm": 3.14423658664964e-07, "learning_rate": 0.23260926302972595, "loss": 0.0, "num_input_tokens_seen": 7146096, "step": 12575 }, { "epoch": 220.7079646017699, "grad_norm": 9.249797017218953e-08, "learning_rate": 0.2325600896860572, "loss": 0.0, "num_input_tokens_seen": 7149184, "step": 12580 }, { "epoch": 220.79646017699116, "grad_norm": 6.314449763067387e-08, "learning_rate": 0.23251090361058505, "loss": 0.0, "num_input_tokens_seen": 7152144, "step": 12585 }, { "epoch": 220.8849557522124, "grad_norm": 8.207099000401286e-08, "learning_rate": 0.23246170481089476, "loss": 0.0, "num_input_tokens_seen": 7154704, "step": 12590 }, { "epoch": 220.97345132743362, "grad_norm": 2.479816032519011e-07, "learning_rate": 0.23241249329457317, "loss": 0.0, "num_input_tokens_seen": 7157568, "step": 12595 }, { "epoch": 221.05309734513276, "grad_norm": 2.1852331144600612e-07, "learning_rate": 0.23236326906920957, "loss": 0.0, "num_input_tokens_seen": 7159888, "step": 12600 }, { "epoch": 221.05309734513276, "eval_loss": 0.5160678625106812, "eval_runtime": 0.9363, "eval_samples_per_second": 26.701, "eval_steps_per_second": 13.885, "num_input_tokens_seen": 7159888, "step": 12600 }, { "epoch": 221.141592920354, "grad_norm": 8.579769428251893e-08, "learning_rate": 0.2323140321423948, "loss": 0.0, "num_input_tokens_seen": 7162768, "step": 12605 }, { "epoch": 221.23008849557522, "grad_norm": 1.3107484164720518e-07, "learning_rate": 0.23226478252172184, "loss": 0.0, "num_input_tokens_seen": 7165776, "step": 12610 }, { "epoch": 221.31858407079645, "grad_norm": 2.907549401243159e-07, "learning_rate": 0.23221552021478561, "loss": 0.0, "num_input_tokens_seen": 7168688, "step": 12615 }, { "epoch": 221.4070796460177, "grad_norm": 2.2111785824563412e-07, "learning_rate": 0.232166245229183, "loss": 0.0, "num_input_tokens_seen": 7171600, "step": 12620 }, { "epoch": 221.49557522123894, "grad_norm": 4.810115328268694e-08, "learning_rate": 0.2321169575725128, "loss": 0.0, "num_input_tokens_seen": 7174656, "step": 12625 }, { "epoch": 221.58407079646017, "grad_norm": 1.5246392592871416e-07, "learning_rate": 0.23206765725237577, "loss": 0.0, "num_input_tokens_seen": 7177648, "step": 12630 }, { "epoch": 221.67256637168143, "grad_norm": 2.4809074261611386e-07, "learning_rate": 0.2320183442763747, "loss": 0.0, "num_input_tokens_seen": 7180208, "step": 12635 }, { "epoch": 221.76106194690266, "grad_norm": 2.0712660386834614e-07, "learning_rate": 0.23196901865211422, "loss": 0.0, "num_input_tokens_seen": 7183504, "step": 12640 }, { "epoch": 221.8495575221239, "grad_norm": 2.2103148467067513e-07, "learning_rate": 0.231919680387201, "loss": 0.0, "num_input_tokens_seen": 7186512, "step": 12645 }, { "epoch": 221.93805309734512, "grad_norm": 3.521013809404394e-07, "learning_rate": 0.23187032948924358, "loss": 0.0, "num_input_tokens_seen": 7188912, "step": 12650 }, { "epoch": 222.01769911504425, "grad_norm": 3.639564170043741e-07, "learning_rate": 0.23182096596585247, "loss": 0.0, "num_input_tokens_seen": 7191504, "step": 12655 }, { "epoch": 222.10619469026548, "grad_norm": 3.236130226014211e-07, "learning_rate": 0.23177158982464025, "loss": 0.0, "num_input_tokens_seen": 7194384, "step": 12660 }, { "epoch": 222.1946902654867, "grad_norm": 3.971410365011252e-07, "learning_rate": 0.23172220107322122, "loss": 0.0, "num_input_tokens_seen": 7196960, "step": 12665 }, { "epoch": 222.28318584070797, "grad_norm": 1.2077116195996496e-07, "learning_rate": 0.23167279971921184, "loss": 0.0, "num_input_tokens_seen": 7200176, "step": 12670 }, { "epoch": 222.3716814159292, "grad_norm": 1.7236239102658146e-07, "learning_rate": 0.23162338577023034, "loss": 0.0, "num_input_tokens_seen": 7203392, "step": 12675 }, { "epoch": 222.46017699115043, "grad_norm": 2.6268776309734676e-07, "learning_rate": 0.23157395923389704, "loss": 0.0, "num_input_tokens_seen": 7206432, "step": 12680 }, { "epoch": 222.5486725663717, "grad_norm": 2.8908664262417005e-07, "learning_rate": 0.2315245201178341, "loss": 0.0, "num_input_tokens_seen": 7209296, "step": 12685 }, { "epoch": 222.63716814159292, "grad_norm": 2.0516149845661857e-07, "learning_rate": 0.23147506842966564, "loss": 0.0, "num_input_tokens_seen": 7212336, "step": 12690 }, { "epoch": 222.72566371681415, "grad_norm": 1.4075017418235802e-07, "learning_rate": 0.23142560417701774, "loss": 0.0, "num_input_tokens_seen": 7214960, "step": 12695 }, { "epoch": 222.81415929203538, "grad_norm": 4.5211596244598695e-08, "learning_rate": 0.23137612736751845, "loss": 0.0, "num_input_tokens_seen": 7217520, "step": 12700 }, { "epoch": 222.90265486725664, "grad_norm": 2.22119666659637e-07, "learning_rate": 0.23132663800879766, "loss": 0.0, "num_input_tokens_seen": 7220528, "step": 12705 }, { "epoch": 222.99115044247787, "grad_norm": 2.241112611045537e-07, "learning_rate": 0.2312771361084873, "loss": 0.0, "num_input_tokens_seen": 7223104, "step": 12710 }, { "epoch": 223.07079646017698, "grad_norm": 1.6822690440676524e-07, "learning_rate": 0.23122762167422112, "loss": 0.0, "num_input_tokens_seen": 7225568, "step": 12715 }, { "epoch": 223.15929203539824, "grad_norm": 3.1796182042853616e-07, "learning_rate": 0.23117809471363493, "loss": 0.0, "num_input_tokens_seen": 7228464, "step": 12720 }, { "epoch": 223.24778761061947, "grad_norm": 1.1719263426357429e-07, "learning_rate": 0.23112855523436637, "loss": 0.0, "num_input_tokens_seen": 7231024, "step": 12725 }, { "epoch": 223.3362831858407, "grad_norm": 1.4325594577258016e-07, "learning_rate": 0.23107900324405511, "loss": 0.0, "num_input_tokens_seen": 7233744, "step": 12730 }, { "epoch": 223.42477876106196, "grad_norm": 5.29597173226648e-07, "learning_rate": 0.2310294387503426, "loss": 0.0, "num_input_tokens_seen": 7237136, "step": 12735 }, { "epoch": 223.5132743362832, "grad_norm": 2.1905952962697484e-07, "learning_rate": 0.23097986176087237, "loss": 0.0, "num_input_tokens_seen": 7240288, "step": 12740 }, { "epoch": 223.60176991150442, "grad_norm": 5.510364786687205e-08, "learning_rate": 0.23093027228328986, "loss": 0.0, "num_input_tokens_seen": 7243232, "step": 12745 }, { "epoch": 223.69026548672565, "grad_norm": 2.2768264784644998e-07, "learning_rate": 0.23088067032524226, "loss": 0.0, "num_input_tokens_seen": 7246096, "step": 12750 }, { "epoch": 223.7787610619469, "grad_norm": 3.597275508582243e-07, "learning_rate": 0.23083105589437888, "loss": 0.0, "num_input_tokens_seen": 7249152, "step": 12755 }, { "epoch": 223.86725663716814, "grad_norm": 7.595461681830784e-08, "learning_rate": 0.23078142899835094, "loss": 0.0, "num_input_tokens_seen": 7252032, "step": 12760 }, { "epoch": 223.95575221238937, "grad_norm": 1.969575293969683e-07, "learning_rate": 0.23073178964481147, "loss": 0.0, "num_input_tokens_seen": 7254384, "step": 12765 }, { "epoch": 224.0353982300885, "grad_norm": 4.378934193027817e-07, "learning_rate": 0.2306821378414155, "loss": 0.0, "num_input_tokens_seen": 7256824, "step": 12770 }, { "epoch": 224.12389380530973, "grad_norm": 2.5744733989085944e-07, "learning_rate": 0.2306324735958199, "loss": 0.0, "num_input_tokens_seen": 7260104, "step": 12775 }, { "epoch": 224.21238938053096, "grad_norm": 1.6714179196242185e-07, "learning_rate": 0.23058279691568362, "loss": 0.0, "num_input_tokens_seen": 7263144, "step": 12780 }, { "epoch": 224.30088495575222, "grad_norm": 1.7217217873621848e-07, "learning_rate": 0.23053310780866745, "loss": 0.0, "num_input_tokens_seen": 7266136, "step": 12785 }, { "epoch": 224.38938053097345, "grad_norm": 8.126347950110357e-08, "learning_rate": 0.23048340628243397, "loss": 0.0, "num_input_tokens_seen": 7268968, "step": 12790 }, { "epoch": 224.47787610619469, "grad_norm": 3.05055408489352e-07, "learning_rate": 0.23043369234464783, "loss": 0.0, "num_input_tokens_seen": 7271496, "step": 12795 }, { "epoch": 224.56637168141592, "grad_norm": 8.278031060626745e-08, "learning_rate": 0.2303839660029755, "loss": 0.0, "num_input_tokens_seen": 7274296, "step": 12800 }, { "epoch": 224.56637168141592, "eval_loss": 0.507752001285553, "eval_runtime": 0.9413, "eval_samples_per_second": 26.558, "eval_steps_per_second": 13.81, "num_input_tokens_seen": 7274296, "step": 12800 }, { "epoch": 224.65486725663717, "grad_norm": 1.308851835801761e-07, "learning_rate": 0.23033422726508548, "loss": 0.0, "num_input_tokens_seen": 7277256, "step": 12805 }, { "epoch": 224.7433628318584, "grad_norm": 1.4374191437127593e-07, "learning_rate": 0.23028447613864808, "loss": 0.0, "num_input_tokens_seen": 7280152, "step": 12810 }, { "epoch": 224.83185840707964, "grad_norm": 1.5049036505843105e-07, "learning_rate": 0.2302347126313355, "loss": 0.0, "num_input_tokens_seen": 7282760, "step": 12815 }, { "epoch": 224.9203539823009, "grad_norm": 1.5567887601264374e-07, "learning_rate": 0.23018493675082197, "loss": 0.0, "num_input_tokens_seen": 7285448, "step": 12820 }, { "epoch": 225.0, "grad_norm": 3.368114676050027e-07, "learning_rate": 0.2301351485047835, "loss": 0.0, "num_input_tokens_seen": 7288280, "step": 12825 }, { "epoch": 225.08849557522123, "grad_norm": 2.876383007333061e-07, "learning_rate": 0.23008534790089813, "loss": 0.0, "num_input_tokens_seen": 7291208, "step": 12830 }, { "epoch": 225.1769911504425, "grad_norm": 8.366114201407981e-08, "learning_rate": 0.2300355349468457, "loss": 0.0, "num_input_tokens_seen": 7293928, "step": 12835 }, { "epoch": 225.26548672566372, "grad_norm": 2.457952632539673e-07, "learning_rate": 0.22998570965030793, "loss": 0.0, "num_input_tokens_seen": 7296728, "step": 12840 }, { "epoch": 225.35398230088495, "grad_norm": 2.9464618478414195e-07, "learning_rate": 0.22993587201896862, "loss": 0.0, "num_input_tokens_seen": 7299848, "step": 12845 }, { "epoch": 225.44247787610618, "grad_norm": 4.159136324233259e-07, "learning_rate": 0.2298860220605133, "loss": 0.0, "num_input_tokens_seen": 7303112, "step": 12850 }, { "epoch": 225.53097345132744, "grad_norm": 2.878837790376565e-07, "learning_rate": 0.22983615978262942, "loss": 0.0, "num_input_tokens_seen": 7305960, "step": 12855 }, { "epoch": 225.61946902654867, "grad_norm": 1.3703025558697846e-07, "learning_rate": 0.22978628519300648, "loss": 0.0, "num_input_tokens_seen": 7308984, "step": 12860 }, { "epoch": 225.7079646017699, "grad_norm": 2.308554769570037e-07, "learning_rate": 0.22973639829933568, "loss": 0.0, "num_input_tokens_seen": 7311832, "step": 12865 }, { "epoch": 225.79646017699116, "grad_norm": 1.1249062481510919e-07, "learning_rate": 0.22968649910931027, "loss": 0.0, "num_input_tokens_seen": 7314552, "step": 12870 }, { "epoch": 225.8849557522124, "grad_norm": 1.3017057653996744e-07, "learning_rate": 0.22963658763062528, "loss": 0.0, "num_input_tokens_seen": 7317736, "step": 12875 }, { "epoch": 225.97345132743362, "grad_norm": 1.5673307984798157e-07, "learning_rate": 0.22958666387097765, "loss": 0.0, "num_input_tokens_seen": 7320104, "step": 12880 }, { "epoch": 226.05309734513276, "grad_norm": 2.300128443266658e-07, "learning_rate": 0.22953672783806633, "loss": 0.0, "num_input_tokens_seen": 7322248, "step": 12885 }, { "epoch": 226.141592920354, "grad_norm": 1.1761974860746705e-07, "learning_rate": 0.22948677953959207, "loss": 0.0, "num_input_tokens_seen": 7325208, "step": 12890 }, { "epoch": 226.23008849557522, "grad_norm": 4.197062608568558e-08, "learning_rate": 0.2294368189832575, "loss": 0.0, "num_input_tokens_seen": 7327992, "step": 12895 }, { "epoch": 226.31858407079645, "grad_norm": 3.5693901168087905e-07, "learning_rate": 0.2293868461767672, "loss": 0.0, "num_input_tokens_seen": 7330936, "step": 12900 }, { "epoch": 226.4070796460177, "grad_norm": 1.2009093097731238e-07, "learning_rate": 0.22933686112782758, "loss": 0.0, "num_input_tokens_seen": 7333400, "step": 12905 }, { "epoch": 226.49557522123894, "grad_norm": 2.52236162623376e-07, "learning_rate": 0.22928686384414698, "loss": 0.0, "num_input_tokens_seen": 7336056, "step": 12910 }, { "epoch": 226.58407079646017, "grad_norm": 2.0431423308764352e-07, "learning_rate": 0.22923685433343552, "loss": 0.0, "num_input_tokens_seen": 7339064, "step": 12915 }, { "epoch": 226.67256637168143, "grad_norm": 8.888812175200655e-08, "learning_rate": 0.22918683260340542, "loss": 0.0, "num_input_tokens_seen": 7342264, "step": 12920 }, { "epoch": 226.76106194690266, "grad_norm": 3.0201735512491723e-07, "learning_rate": 0.2291367986617706, "loss": 0.0, "num_input_tokens_seen": 7344872, "step": 12925 }, { "epoch": 226.8495575221239, "grad_norm": 1.9705346687715064e-07, "learning_rate": 0.22908675251624697, "loss": 0.0, "num_input_tokens_seen": 7347752, "step": 12930 }, { "epoch": 226.93805309734512, "grad_norm": 1.715404494007089e-07, "learning_rate": 0.22903669417455216, "loss": 0.0, "num_input_tokens_seen": 7350632, "step": 12935 }, { "epoch": 227.01769911504425, "grad_norm": 1.8676725943578276e-08, "learning_rate": 0.22898662364440592, "loss": 0.0, "num_input_tokens_seen": 7353528, "step": 12940 }, { "epoch": 227.10619469026548, "grad_norm": 9.794651134598098e-08, "learning_rate": 0.2289365409335297, "loss": 0.0, "num_input_tokens_seen": 7356520, "step": 12945 }, { "epoch": 227.1946902654867, "grad_norm": 1.692681337317481e-07, "learning_rate": 0.2288864460496469, "loss": 0.0, "num_input_tokens_seen": 7359640, "step": 12950 }, { "epoch": 227.28318584070797, "grad_norm": 1.4435357797992765e-07, "learning_rate": 0.22883633900048272, "loss": 0.0, "num_input_tokens_seen": 7362504, "step": 12955 }, { "epoch": 227.3716814159292, "grad_norm": 3.7982462686159124e-07, "learning_rate": 0.2287862197937644, "loss": 0.0, "num_input_tokens_seen": 7365048, "step": 12960 }, { "epoch": 227.46017699115043, "grad_norm": 1.5413110077133751e-07, "learning_rate": 0.2287360884372209, "loss": 0.0, "num_input_tokens_seen": 7367976, "step": 12965 }, { "epoch": 227.5486725663717, "grad_norm": 2.7868668439623434e-07, "learning_rate": 0.22868594493858307, "loss": 0.0, "num_input_tokens_seen": 7370664, "step": 12970 }, { "epoch": 227.63716814159292, "grad_norm": 1.2751722522352793e-07, "learning_rate": 0.2286357893055837, "loss": 0.0, "num_input_tokens_seen": 7373832, "step": 12975 }, { "epoch": 227.72566371681415, "grad_norm": 1.1333040106364933e-07, "learning_rate": 0.22858562154595746, "loss": 0.0, "num_input_tokens_seen": 7376888, "step": 12980 }, { "epoch": 227.81415929203538, "grad_norm": 2.826891147833521e-07, "learning_rate": 0.22853544166744078, "loss": 0.0, "num_input_tokens_seen": 7379784, "step": 12985 }, { "epoch": 227.90265486725664, "grad_norm": 2.091875188625636e-07, "learning_rate": 0.22848524967777206, "loss": 0.0, "num_input_tokens_seen": 7382792, "step": 12990 }, { "epoch": 227.99115044247787, "grad_norm": 2.9369294907155563e-07, "learning_rate": 0.22843504558469152, "loss": 0.0, "num_input_tokens_seen": 7385400, "step": 12995 }, { "epoch": 228.07079646017698, "grad_norm": 9.852700344481491e-08, "learning_rate": 0.2283848293959413, "loss": 0.0, "num_input_tokens_seen": 7387544, "step": 13000 }, { "epoch": 228.07079646017698, "eval_loss": 0.5155118703842163, "eval_runtime": 0.9364, "eval_samples_per_second": 26.697, "eval_steps_per_second": 13.882, "num_input_tokens_seen": 7387544, "step": 13000 }, { "epoch": 228.15929203539824, "grad_norm": 2.473234985700401e-07, "learning_rate": 0.22833460111926532, "loss": 0.0, "num_input_tokens_seen": 7390040, "step": 13005 }, { "epoch": 228.24778761061947, "grad_norm": 1.5035975309274363e-07, "learning_rate": 0.22828436076240946, "loss": 0.0, "num_input_tokens_seen": 7392728, "step": 13010 }, { "epoch": 228.3362831858407, "grad_norm": 4.0559064018452773e-07, "learning_rate": 0.22823410833312135, "loss": 0.0, "num_input_tokens_seen": 7395432, "step": 13015 }, { "epoch": 228.42477876106196, "grad_norm": 1.3695444067707285e-07, "learning_rate": 0.2281838438391506, "loss": 0.0, "num_input_tokens_seen": 7398024, "step": 13020 }, { "epoch": 228.5132743362832, "grad_norm": 1.201549935103685e-07, "learning_rate": 0.22813356728824863, "loss": 0.0, "num_input_tokens_seen": 7401032, "step": 13025 }, { "epoch": 228.60176991150442, "grad_norm": 1.7291259268858994e-07, "learning_rate": 0.2280832786881687, "loss": 0.0, "num_input_tokens_seen": 7403880, "step": 13030 }, { "epoch": 228.69026548672565, "grad_norm": 3.930601266688427e-08, "learning_rate": 0.22803297804666592, "loss": 0.0, "num_input_tokens_seen": 7407064, "step": 13035 }, { "epoch": 228.7787610619469, "grad_norm": 1.4642513868068363e-08, "learning_rate": 0.22798266537149728, "loss": 0.0, "num_input_tokens_seen": 7409832, "step": 13040 }, { "epoch": 228.86725663716814, "grad_norm": 2.330566530872602e-07, "learning_rate": 0.22793234067042167, "loss": 0.0, "num_input_tokens_seen": 7412776, "step": 13045 }, { "epoch": 228.95575221238937, "grad_norm": 1.506413696006348e-07, "learning_rate": 0.22788200395119979, "loss": 0.0, "num_input_tokens_seen": 7415768, "step": 13050 }, { "epoch": 229.0353982300885, "grad_norm": 3.2726933341109543e-07, "learning_rate": 0.2278316552215942, "loss": 0.0, "num_input_tokens_seen": 7418296, "step": 13055 }, { "epoch": 229.12389380530973, "grad_norm": 1.5190136082310346e-07, "learning_rate": 0.22778129448936918, "loss": 0.0, "num_input_tokens_seen": 7420952, "step": 13060 }, { "epoch": 229.21238938053096, "grad_norm": 1.734777299589041e-07, "learning_rate": 0.22773092176229118, "loss": 0.0, "num_input_tokens_seen": 7423912, "step": 13065 }, { "epoch": 229.30088495575222, "grad_norm": 2.0497672892361152e-07, "learning_rate": 0.22768053704812816, "loss": 0.0, "num_input_tokens_seen": 7426264, "step": 13070 }, { "epoch": 229.38938053097345, "grad_norm": 2.3342381894053688e-07, "learning_rate": 0.22763014035465018, "loss": 0.0, "num_input_tokens_seen": 7429368, "step": 13075 }, { "epoch": 229.47787610619469, "grad_norm": 4.9142450109229685e-08, "learning_rate": 0.22757973168962892, "loss": 0.0, "num_input_tokens_seen": 7432120, "step": 13080 }, { "epoch": 229.56637168141592, "grad_norm": 1.5448104306869936e-07, "learning_rate": 0.22752931106083818, "loss": 0.0, "num_input_tokens_seen": 7435144, "step": 13085 }, { "epoch": 229.65486725663717, "grad_norm": 1.029226694981844e-07, "learning_rate": 0.22747887847605341, "loss": 0.0, "num_input_tokens_seen": 7438040, "step": 13090 }, { "epoch": 229.7433628318584, "grad_norm": 1.1244738118421083e-07, "learning_rate": 0.22742843394305184, "loss": 0.0, "num_input_tokens_seen": 7441160, "step": 13095 }, { "epoch": 229.83185840707964, "grad_norm": 2.1851153064744722e-07, "learning_rate": 0.22737797746961272, "loss": 0.0, "num_input_tokens_seen": 7444408, "step": 13100 }, { "epoch": 229.9203539823009, "grad_norm": 2.03382512609096e-07, "learning_rate": 0.22732750906351712, "loss": 0.0, "num_input_tokens_seen": 7447208, "step": 13105 }, { "epoch": 230.0, "grad_norm": 2.6268194730505456e-08, "learning_rate": 0.22727702873254785, "loss": 0.0, "num_input_tokens_seen": 7449656, "step": 13110 }, { "epoch": 230.08849557522123, "grad_norm": 2.272135901648653e-07, "learning_rate": 0.22722653648448968, "loss": 0.0, "num_input_tokens_seen": 7452312, "step": 13115 }, { "epoch": 230.1769911504425, "grad_norm": 1.3316589786427357e-07, "learning_rate": 0.22717603232712902, "loss": 0.0, "num_input_tokens_seen": 7454904, "step": 13120 }, { "epoch": 230.26548672566372, "grad_norm": 5.073731657034841e-08, "learning_rate": 0.22712551626825436, "loss": 0.0, "num_input_tokens_seen": 7457400, "step": 13125 }, { "epoch": 230.35398230088495, "grad_norm": 1.4682116500353004e-07, "learning_rate": 0.2270749883156559, "loss": 0.0, "num_input_tokens_seen": 7460424, "step": 13130 }, { "epoch": 230.44247787610618, "grad_norm": 5.7583520174375735e-08, "learning_rate": 0.22702444847712563, "loss": 0.0, "num_input_tokens_seen": 7463832, "step": 13135 }, { "epoch": 230.53097345132744, "grad_norm": 1.5088373572780256e-07, "learning_rate": 0.22697389676045743, "loss": 0.0, "num_input_tokens_seen": 7466520, "step": 13140 }, { "epoch": 230.61946902654867, "grad_norm": 4.2896863305941224e-07, "learning_rate": 0.22692333317344704, "loss": 0.0, "num_input_tokens_seen": 7469576, "step": 13145 }, { "epoch": 230.7079646017699, "grad_norm": 1.4654298752247996e-07, "learning_rate": 0.22687275772389198, "loss": 0.0, "num_input_tokens_seen": 7472120, "step": 13150 }, { "epoch": 230.79646017699116, "grad_norm": 1.0079144630026349e-07, "learning_rate": 0.22682217041959168, "loss": 0.0, "num_input_tokens_seen": 7475128, "step": 13155 }, { "epoch": 230.8849557522124, "grad_norm": 7.909792287819073e-08, "learning_rate": 0.2267715712683473, "loss": 0.0, "num_input_tokens_seen": 7478616, "step": 13160 }, { "epoch": 230.97345132743362, "grad_norm": 1.0788301096908981e-07, "learning_rate": 0.22672096027796182, "loss": 0.0, "num_input_tokens_seen": 7481336, "step": 13165 }, { "epoch": 231.05309734513276, "grad_norm": 1.3944898569206998e-07, "learning_rate": 0.22667033745624016, "loss": 0.0, "num_input_tokens_seen": 7483656, "step": 13170 }, { "epoch": 231.141592920354, "grad_norm": 8.761520575717441e-08, "learning_rate": 0.22661970281098895, "loss": 0.0, "num_input_tokens_seen": 7486552, "step": 13175 }, { "epoch": 231.23008849557522, "grad_norm": 1.9537120010681974e-07, "learning_rate": 0.22656905635001667, "loss": 0.0, "num_input_tokens_seen": 7489160, "step": 13180 }, { "epoch": 231.31858407079645, "grad_norm": 2.7822039783131913e-07, "learning_rate": 0.2265183980811337, "loss": 0.0, "num_input_tokens_seen": 7492088, "step": 13185 }, { "epoch": 231.4070796460177, "grad_norm": 2.3056364284457231e-07, "learning_rate": 0.22646772801215218, "loss": 0.0, "num_input_tokens_seen": 7494856, "step": 13190 }, { "epoch": 231.49557522123894, "grad_norm": 2.64628283730417e-07, "learning_rate": 0.22641704615088598, "loss": 0.0, "num_input_tokens_seen": 7497224, "step": 13195 }, { "epoch": 231.58407079646017, "grad_norm": 9.605379602817266e-08, "learning_rate": 0.22636635250515103, "loss": 0.0, "num_input_tokens_seen": 7500200, "step": 13200 }, { "epoch": 231.58407079646017, "eval_loss": 0.5223559737205505, "eval_runtime": 0.9387, "eval_samples_per_second": 26.632, "eval_steps_per_second": 13.849, "num_input_tokens_seen": 7500200, "step": 13200 }, { "epoch": 231.67256637168143, "grad_norm": 5.35792210598629e-08, "learning_rate": 0.2263156470827648, "loss": 0.0, "num_input_tokens_seen": 7503064, "step": 13205 }, { "epoch": 231.76106194690266, "grad_norm": 1.3816706712077576e-07, "learning_rate": 0.22626492989154678, "loss": 0.0, "num_input_tokens_seen": 7506248, "step": 13210 }, { "epoch": 231.8495575221239, "grad_norm": 1.2032234053549473e-07, "learning_rate": 0.22621420093931813, "loss": 0.0, "num_input_tokens_seen": 7509816, "step": 13215 }, { "epoch": 231.93805309734512, "grad_norm": 2.911886838319333e-07, "learning_rate": 0.22616346023390194, "loss": 0.0, "num_input_tokens_seen": 7512536, "step": 13220 }, { "epoch": 232.01769911504425, "grad_norm": 8.917180593925877e-08, "learning_rate": 0.22611270778312306, "loss": 0.0, "num_input_tokens_seen": 7514808, "step": 13225 }, { "epoch": 232.10619469026548, "grad_norm": 8.42208436324654e-08, "learning_rate": 0.2260619435948081, "loss": 0.0, "num_input_tokens_seen": 7517448, "step": 13230 }, { "epoch": 232.1946902654867, "grad_norm": 2.317940044349598e-07, "learning_rate": 0.22601116767678567, "loss": 0.0, "num_input_tokens_seen": 7520392, "step": 13235 }, { "epoch": 232.28318584070797, "grad_norm": 1.4000461590057967e-07, "learning_rate": 0.2259603800368859, "loss": 0.0, "num_input_tokens_seen": 7523544, "step": 13240 }, { "epoch": 232.3716814159292, "grad_norm": 2.2937180688131775e-07, "learning_rate": 0.22590958068294098, "loss": 0.0, "num_input_tokens_seen": 7526312, "step": 13245 }, { "epoch": 232.46017699115043, "grad_norm": 1.5351567128618626e-08, "learning_rate": 0.22585876962278478, "loss": 0.0, "num_input_tokens_seen": 7529512, "step": 13250 }, { "epoch": 232.5486725663717, "grad_norm": 2.931399762928777e-07, "learning_rate": 0.22580794686425298, "loss": 0.0, "num_input_tokens_seen": 7532680, "step": 13255 }, { "epoch": 232.63716814159292, "grad_norm": 1.6108195666220126e-07, "learning_rate": 0.22575711241518312, "loss": 0.0, "num_input_tokens_seen": 7535384, "step": 13260 }, { "epoch": 232.72566371681415, "grad_norm": 2.2461414062036056e-07, "learning_rate": 0.22570626628341453, "loss": 0.0, "num_input_tokens_seen": 7538376, "step": 13265 }, { "epoch": 232.81415929203538, "grad_norm": 2.2685225076202187e-07, "learning_rate": 0.22565540847678828, "loss": 0.0, "num_input_tokens_seen": 7540728, "step": 13270 }, { "epoch": 232.90265486725664, "grad_norm": 1.0076487200194606e-07, "learning_rate": 0.2256045390031473, "loss": 0.0, "num_input_tokens_seen": 7543848, "step": 13275 }, { "epoch": 232.99115044247787, "grad_norm": 4.863818858780178e-08, "learning_rate": 0.22555365787033627, "loss": 0.0, "num_input_tokens_seen": 7546680, "step": 13280 }, { "epoch": 233.07079646017698, "grad_norm": 9.012196500179925e-08, "learning_rate": 0.22550276508620173, "loss": 0.0, "num_input_tokens_seen": 7548792, "step": 13285 }, { "epoch": 233.15929203539824, "grad_norm": 8.210285074028434e-08, "learning_rate": 0.22545186065859202, "loss": 0.0, "num_input_tokens_seen": 7552184, "step": 13290 }, { "epoch": 233.24778761061947, "grad_norm": 9.337158246580657e-08, "learning_rate": 0.2254009445953572, "loss": 0.0, "num_input_tokens_seen": 7555480, "step": 13295 }, { "epoch": 233.3362831858407, "grad_norm": 2.3670577320444863e-07, "learning_rate": 0.22535001690434917, "loss": 0.0, "num_input_tokens_seen": 7558584, "step": 13300 }, { "epoch": 233.42477876106196, "grad_norm": 1.2158277229445957e-07, "learning_rate": 0.22529907759342163, "loss": 0.0, "num_input_tokens_seen": 7561160, "step": 13305 }, { "epoch": 233.5132743362832, "grad_norm": 5.333944486096698e-08, "learning_rate": 0.22524812667043007, "loss": 0.0, "num_input_tokens_seen": 7563464, "step": 13310 }, { "epoch": 233.60176991150442, "grad_norm": 1.5588577184644237e-07, "learning_rate": 0.22519716414323177, "loss": 0.0, "num_input_tokens_seen": 7566280, "step": 13315 }, { "epoch": 233.69026548672565, "grad_norm": 9.762036512483974e-08, "learning_rate": 0.22514619001968567, "loss": 0.0, "num_input_tokens_seen": 7569448, "step": 13320 }, { "epoch": 233.7787610619469, "grad_norm": 1.3770301166005083e-07, "learning_rate": 0.2250952043076528, "loss": 0.0, "num_input_tokens_seen": 7572344, "step": 13325 }, { "epoch": 233.86725663716814, "grad_norm": 3.9409073337992595e-07, "learning_rate": 0.2250442070149957, "loss": 0.0, "num_input_tokens_seen": 7575016, "step": 13330 }, { "epoch": 233.95575221238937, "grad_norm": 2.6318477353015624e-07, "learning_rate": 0.22499319814957885, "loss": 0.0, "num_input_tokens_seen": 7577656, "step": 13335 }, { "epoch": 234.0353982300885, "grad_norm": 4.177179491193783e-08, "learning_rate": 0.2249421777192684, "loss": 0.0, "num_input_tokens_seen": 7580016, "step": 13340 }, { "epoch": 234.12389380530973, "grad_norm": 1.7240311933619523e-07, "learning_rate": 0.22489114573193236, "loss": 0.0, "num_input_tokens_seen": 7583104, "step": 13345 }, { "epoch": 234.21238938053096, "grad_norm": 1.5972338474057324e-07, "learning_rate": 0.2248401021954405, "loss": 0.0, "num_input_tokens_seen": 7585952, "step": 13350 }, { "epoch": 234.30088495575222, "grad_norm": 1.933827320499404e-07, "learning_rate": 0.22478904711766443, "loss": 0.0, "num_input_tokens_seen": 7588480, "step": 13355 }, { "epoch": 234.38938053097345, "grad_norm": 7.857140360556514e-08, "learning_rate": 0.22473798050647734, "loss": 0.0, "num_input_tokens_seen": 7591248, "step": 13360 }, { "epoch": 234.47787610619469, "grad_norm": 1.0612455270120336e-07, "learning_rate": 0.22468690236975453, "loss": 0.0, "num_input_tokens_seen": 7594544, "step": 13365 }, { "epoch": 234.56637168141592, "grad_norm": 6.72360087605739e-08, "learning_rate": 0.22463581271537272, "loss": 0.0, "num_input_tokens_seen": 7597600, "step": 13370 }, { "epoch": 234.65486725663717, "grad_norm": 1.9408464879688836e-07, "learning_rate": 0.22458471155121076, "loss": 0.0, "num_input_tokens_seen": 7600336, "step": 13375 }, { "epoch": 234.7433628318584, "grad_norm": 3.3308771207885e-07, "learning_rate": 0.2245335988851489, "loss": 0.0, "num_input_tokens_seen": 7603536, "step": 13380 }, { "epoch": 234.83185840707964, "grad_norm": 1.9260458827830007e-07, "learning_rate": 0.2244824747250695, "loss": 0.0, "num_input_tokens_seen": 7606048, "step": 13385 }, { "epoch": 234.9203539823009, "grad_norm": 3.122568728031183e-07, "learning_rate": 0.22443133907885646, "loss": 0.0, "num_input_tokens_seen": 7609008, "step": 13390 }, { "epoch": 235.0, "grad_norm": 5.298866767589061e-08, "learning_rate": 0.22438019195439557, "loss": 0.0, "num_input_tokens_seen": 7611336, "step": 13395 }, { "epoch": 235.08849557522123, "grad_norm": 5.59909842934303e-08, "learning_rate": 0.22432903335957435, "loss": 0.0, "num_input_tokens_seen": 7614696, "step": 13400 }, { "epoch": 235.08849557522123, "eval_loss": 0.5160706043243408, "eval_runtime": 0.9512, "eval_samples_per_second": 26.284, "eval_steps_per_second": 13.667, "num_input_tokens_seen": 7614696, "step": 13400 }, { "epoch": 235.1769911504425, "grad_norm": 8.799434425554864e-08, "learning_rate": 0.22427786330228214, "loss": 0.0, "num_input_tokens_seen": 7617704, "step": 13405 }, { "epoch": 235.26548672566372, "grad_norm": 7.064246432264554e-08, "learning_rate": 0.22422668179040997, "loss": 0.0, "num_input_tokens_seen": 7620856, "step": 13410 }, { "epoch": 235.35398230088495, "grad_norm": 1.2515444325345015e-07, "learning_rate": 0.2241754888318507, "loss": 0.0, "num_input_tokens_seen": 7623880, "step": 13415 }, { "epoch": 235.44247787610618, "grad_norm": 2.0693806845883955e-07, "learning_rate": 0.22412428443449886, "loss": 0.0, "num_input_tokens_seen": 7626696, "step": 13420 }, { "epoch": 235.53097345132744, "grad_norm": 1.0405786099454417e-07, "learning_rate": 0.22407306860625087, "loss": 0.0, "num_input_tokens_seen": 7629208, "step": 13425 }, { "epoch": 235.61946902654867, "grad_norm": 2.2646235109391455e-08, "learning_rate": 0.22402184135500483, "loss": 0.0, "num_input_tokens_seen": 7631656, "step": 13430 }, { "epoch": 235.7079646017699, "grad_norm": 2.432120709272567e-07, "learning_rate": 0.22397060268866067, "loss": 0.0, "num_input_tokens_seen": 7634376, "step": 13435 }, { "epoch": 235.79646017699116, "grad_norm": 1.0972663488928447e-07, "learning_rate": 0.22391935261511994, "loss": 0.0, "num_input_tokens_seen": 7637336, "step": 13440 }, { "epoch": 235.8849557522124, "grad_norm": 9.786468524453085e-08, "learning_rate": 0.22386809114228615, "loss": 0.0, "num_input_tokens_seen": 7640328, "step": 13445 }, { "epoch": 235.97345132743362, "grad_norm": 1.4379567403466353e-07, "learning_rate": 0.22381681827806446, "loss": 0.0, "num_input_tokens_seen": 7643384, "step": 13450 }, { "epoch": 236.05309734513276, "grad_norm": 2.1530219385113014e-07, "learning_rate": 0.22376553403036173, "loss": 0.0, "num_input_tokens_seen": 7645784, "step": 13455 }, { "epoch": 236.141592920354, "grad_norm": 2.0641188314129977e-07, "learning_rate": 0.22371423840708662, "loss": 0.0, "num_input_tokens_seen": 7649064, "step": 13460 }, { "epoch": 236.23008849557522, "grad_norm": 1.4548278670645232e-07, "learning_rate": 0.22366293141614962, "loss": 0.0, "num_input_tokens_seen": 7651896, "step": 13465 }, { "epoch": 236.31858407079645, "grad_norm": 1.2186619358089956e-07, "learning_rate": 0.22361161306546287, "loss": 0.0, "num_input_tokens_seen": 7655032, "step": 13470 }, { "epoch": 236.4070796460177, "grad_norm": 2.2030299362540973e-07, "learning_rate": 0.22356028336294037, "loss": 0.0, "num_input_tokens_seen": 7657464, "step": 13475 }, { "epoch": 236.49557522123894, "grad_norm": 2.1448151699132723e-07, "learning_rate": 0.2235089423164977, "loss": 0.0, "num_input_tokens_seen": 7660840, "step": 13480 }, { "epoch": 236.58407079646017, "grad_norm": 1.9818853047581797e-07, "learning_rate": 0.22345758993405243, "loss": 0.0, "num_input_tokens_seen": 7663400, "step": 13485 }, { "epoch": 236.67256637168143, "grad_norm": 1.9654122240808647e-07, "learning_rate": 0.2234062262235236, "loss": 0.0, "num_input_tokens_seen": 7666120, "step": 13490 }, { "epoch": 236.76106194690266, "grad_norm": 5.6811575888104926e-08, "learning_rate": 0.22335485119283222, "loss": 0.0, "num_input_tokens_seen": 7668920, "step": 13495 }, { "epoch": 236.8495575221239, "grad_norm": 1.7074833635888353e-07, "learning_rate": 0.22330346484990093, "loss": 0.0, "num_input_tokens_seen": 7671784, "step": 13500 }, { "epoch": 236.93805309734512, "grad_norm": 1.4003654769112472e-07, "learning_rate": 0.22325206720265425, "loss": 0.0, "num_input_tokens_seen": 7674408, "step": 13505 }, { "epoch": 237.01769911504425, "grad_norm": 1.3728376302424294e-07, "learning_rate": 0.2232006582590182, "loss": 0.0, "num_input_tokens_seen": 7676832, "step": 13510 }, { "epoch": 237.10619469026548, "grad_norm": 2.2931004650672548e-07, "learning_rate": 0.22314923802692077, "loss": 0.0, "num_input_tokens_seen": 7679792, "step": 13515 }, { "epoch": 237.1946902654867, "grad_norm": 9.959889268884581e-08, "learning_rate": 0.22309780651429156, "loss": 0.0, "num_input_tokens_seen": 7682752, "step": 13520 }, { "epoch": 237.28318584070797, "grad_norm": 3.415758840219496e-07, "learning_rate": 0.22304636372906203, "loss": 0.0, "num_input_tokens_seen": 7685760, "step": 13525 }, { "epoch": 237.3716814159292, "grad_norm": 2.431520442769397e-07, "learning_rate": 0.22299490967916522, "loss": 0.0, "num_input_tokens_seen": 7689168, "step": 13530 }, { "epoch": 237.46017699115043, "grad_norm": 1.1407337296986952e-07, "learning_rate": 0.22294344437253602, "loss": 0.0, "num_input_tokens_seen": 7691856, "step": 13535 }, { "epoch": 237.5486725663717, "grad_norm": 1.829622391369412e-07, "learning_rate": 0.22289196781711101, "loss": 0.0, "num_input_tokens_seen": 7694384, "step": 13540 }, { "epoch": 237.63716814159292, "grad_norm": 1.09208642129488e-07, "learning_rate": 0.2228404800208286, "loss": 0.0, "num_input_tokens_seen": 7697440, "step": 13545 }, { "epoch": 237.72566371681415, "grad_norm": 1.0060184507665326e-07, "learning_rate": 0.22278898099162875, "loss": 0.0, "num_input_tokens_seen": 7700336, "step": 13550 }, { "epoch": 237.81415929203538, "grad_norm": 1.0756939161638002e-07, "learning_rate": 0.22273747073745337, "loss": 0.0, "num_input_tokens_seen": 7703616, "step": 13555 }, { "epoch": 237.90265486725664, "grad_norm": 5.3694403590043294e-08, "learning_rate": 0.22268594926624588, "loss": 0.0, "num_input_tokens_seen": 7706144, "step": 13560 }, { "epoch": 237.99115044247787, "grad_norm": 1.2821382711081242e-07, "learning_rate": 0.22263441658595162, "loss": 0.0, "num_input_tokens_seen": 7708704, "step": 13565 }, { "epoch": 238.07079646017698, "grad_norm": 2.1772461877844762e-07, "learning_rate": 0.2225828727045175, "loss": 0.0, "num_input_tokens_seen": 7710984, "step": 13570 }, { "epoch": 238.15929203539824, "grad_norm": 7.493192555330097e-08, "learning_rate": 0.22253131762989228, "loss": 0.0, "num_input_tokens_seen": 7713608, "step": 13575 }, { "epoch": 238.24778761061947, "grad_norm": 1.4854119001483923e-07, "learning_rate": 0.2224797513700264, "loss": 0.0, "num_input_tokens_seen": 7716488, "step": 13580 }, { "epoch": 238.3362831858407, "grad_norm": 1.404228697765575e-07, "learning_rate": 0.22242817393287204, "loss": 0.0, "num_input_tokens_seen": 7719640, "step": 13585 }, { "epoch": 238.42477876106196, "grad_norm": 1.1873232352854757e-07, "learning_rate": 0.22237658532638305, "loss": 0.0, "num_input_tokens_seen": 7722200, "step": 13590 }, { "epoch": 238.5132743362832, "grad_norm": 5.755083165581709e-08, "learning_rate": 0.22232498555851513, "loss": 0.0, "num_input_tokens_seen": 7724840, "step": 13595 }, { "epoch": 238.60176991150442, "grad_norm": 2.927222908510885e-07, "learning_rate": 0.22227337463722546, "loss": 0.0, "num_input_tokens_seen": 7727608, "step": 13600 }, { "epoch": 238.60176991150442, "eval_loss": 0.5257773399353027, "eval_runtime": 0.9416, "eval_samples_per_second": 26.55, "eval_steps_per_second": 13.806, "num_input_tokens_seen": 7727608, "step": 13600 }, { "epoch": 238.69026548672565, "grad_norm": 1.5359999849806627e-07, "learning_rate": 0.2222217525704732, "loss": 0.0, "num_input_tokens_seen": 7730376, "step": 13605 }, { "epoch": 238.7787610619469, "grad_norm": 1.1623088624901357e-07, "learning_rate": 0.22217011936621908, "loss": 0.0, "num_input_tokens_seen": 7733128, "step": 13610 }, { "epoch": 238.86725663716814, "grad_norm": 1.3574191370935296e-07, "learning_rate": 0.22211847503242566, "loss": 0.0, "num_input_tokens_seen": 7736056, "step": 13615 }, { "epoch": 238.95575221238937, "grad_norm": 4.7832873661945996e-08, "learning_rate": 0.22206681957705704, "loss": 0.0, "num_input_tokens_seen": 7739048, "step": 13620 }, { "epoch": 239.0353982300885, "grad_norm": 1.9300378184539113e-08, "learning_rate": 0.2220151530080792, "loss": 0.0, "num_input_tokens_seen": 7741696, "step": 13625 }, { "epoch": 239.12389380530973, "grad_norm": 1.7185854517265398e-07, "learning_rate": 0.2219634753334598, "loss": 0.0, "num_input_tokens_seen": 7744384, "step": 13630 }, { "epoch": 239.21238938053096, "grad_norm": 1.3765111361863092e-07, "learning_rate": 0.22191178656116817, "loss": 0.0, "num_input_tokens_seen": 7747776, "step": 13635 }, { "epoch": 239.30088495575222, "grad_norm": 7.344385011265331e-08, "learning_rate": 0.2218600866991753, "loss": 0.0, "num_input_tokens_seen": 7750400, "step": 13640 }, { "epoch": 239.38938053097345, "grad_norm": 1.611604147910839e-07, "learning_rate": 0.221808375755454, "loss": 0.0, "num_input_tokens_seen": 7753376, "step": 13645 }, { "epoch": 239.47787610619469, "grad_norm": 2.5138470860497364e-08, "learning_rate": 0.22175665373797881, "loss": 0.0, "num_input_tokens_seen": 7756048, "step": 13650 }, { "epoch": 239.56637168141592, "grad_norm": 9.119344923647077e-08, "learning_rate": 0.22170492065472583, "loss": 0.0, "num_input_tokens_seen": 7758720, "step": 13655 }, { "epoch": 239.65486725663717, "grad_norm": 3.137144233278377e-08, "learning_rate": 0.221653176513673, "loss": 0.0, "num_input_tokens_seen": 7762096, "step": 13660 }, { "epoch": 239.7433628318584, "grad_norm": 1.8929809186829516e-07, "learning_rate": 0.2216014213227999, "loss": 0.0, "num_input_tokens_seen": 7764944, "step": 13665 }, { "epoch": 239.83185840707964, "grad_norm": 1.9127828920773027e-07, "learning_rate": 0.22154965509008784, "loss": 0.0, "num_input_tokens_seen": 7767696, "step": 13670 }, { "epoch": 239.9203539823009, "grad_norm": 1.0361125646340952e-07, "learning_rate": 0.2214978778235198, "loss": 0.0, "num_input_tokens_seen": 7770096, "step": 13675 }, { "epoch": 240.0, "grad_norm": 8.150314556587546e-07, "learning_rate": 0.2214460895310805, "loss": 0.0, "num_input_tokens_seen": 7772536, "step": 13680 }, { "epoch": 240.08849557522123, "grad_norm": 1.164557446031722e-07, "learning_rate": 0.22139429022075635, "loss": 0.0, "num_input_tokens_seen": 7775784, "step": 13685 }, { "epoch": 240.1769911504425, "grad_norm": 1.4205002685230284e-07, "learning_rate": 0.22134247990053546, "loss": 0.0, "num_input_tokens_seen": 7778312, "step": 13690 }, { "epoch": 240.26548672566372, "grad_norm": 1.675143579404903e-07, "learning_rate": 0.2212906585784076, "loss": 0.0, "num_input_tokens_seen": 7781112, "step": 13695 }, { "epoch": 240.35398230088495, "grad_norm": 1.510160885231926e-08, "learning_rate": 0.22123882626236432, "loss": 0.0, "num_input_tokens_seen": 7783944, "step": 13700 }, { "epoch": 240.44247787610618, "grad_norm": 1.6188612050882512e-07, "learning_rate": 0.2211869829603988, "loss": 0.0, "num_input_tokens_seen": 7787192, "step": 13705 }, { "epoch": 240.53097345132744, "grad_norm": 1.7247224093352997e-07, "learning_rate": 0.22113512868050592, "loss": 0.0, "num_input_tokens_seen": 7790280, "step": 13710 }, { "epoch": 240.61946902654867, "grad_norm": 1.758469068136037e-07, "learning_rate": 0.2210832634306822, "loss": 0.0, "num_input_tokens_seen": 7793176, "step": 13715 }, { "epoch": 240.7079646017699, "grad_norm": 1.0901798930262885e-07, "learning_rate": 0.22103138721892598, "loss": 0.0, "num_input_tokens_seen": 7796072, "step": 13720 }, { "epoch": 240.79646017699116, "grad_norm": 1.222950203327855e-07, "learning_rate": 0.22097950005323724, "loss": 0.0, "num_input_tokens_seen": 7798968, "step": 13725 }, { "epoch": 240.8849557522124, "grad_norm": 1.4041638962680736e-07, "learning_rate": 0.22092760194161762, "loss": 0.0, "num_input_tokens_seen": 7801640, "step": 13730 }, { "epoch": 240.97345132743362, "grad_norm": 2.1586514264981815e-07, "learning_rate": 0.2208756928920704, "loss": 0.0, "num_input_tokens_seen": 7804472, "step": 13735 }, { "epoch": 241.05309734513276, "grad_norm": 1.2975438323792332e-07, "learning_rate": 0.22082377291260072, "loss": 0.0, "num_input_tokens_seen": 7806696, "step": 13740 }, { "epoch": 241.141592920354, "grad_norm": 1.0007765638420096e-07, "learning_rate": 0.2207718420112152, "loss": 0.0, "num_input_tokens_seen": 7809304, "step": 13745 }, { "epoch": 241.23008849557522, "grad_norm": 5.561296845257857e-08, "learning_rate": 0.22071990019592228, "loss": 0.0, "num_input_tokens_seen": 7811912, "step": 13750 }, { "epoch": 241.31858407079645, "grad_norm": 1.0997740673701628e-07, "learning_rate": 0.22066794747473198, "loss": 0.0, "num_input_tokens_seen": 7815256, "step": 13755 }, { "epoch": 241.4070796460177, "grad_norm": 1.307185186760762e-07, "learning_rate": 0.2206159838556562, "loss": 0.0, "num_input_tokens_seen": 7818152, "step": 13760 }, { "epoch": 241.49557522123894, "grad_norm": 1.361951404987849e-07, "learning_rate": 0.2205640093467082, "loss": 0.0, "num_input_tokens_seen": 7821752, "step": 13765 }, { "epoch": 241.58407079646017, "grad_norm": 7.977958205174218e-08, "learning_rate": 0.22051202395590322, "loss": 0.0, "num_input_tokens_seen": 7824344, "step": 13770 }, { "epoch": 241.67256637168143, "grad_norm": 2.0460380767417519e-07, "learning_rate": 0.22046002769125808, "loss": 0.0, "num_input_tokens_seen": 7827464, "step": 13775 }, { "epoch": 241.76106194690266, "grad_norm": 8.143172891550421e-08, "learning_rate": 0.2204080205607912, "loss": 0.0, "num_input_tokens_seen": 7830376, "step": 13780 }, { "epoch": 241.8495575221239, "grad_norm": 9.609262008325459e-08, "learning_rate": 0.22035600257252272, "loss": 0.0, "num_input_tokens_seen": 7832856, "step": 13785 }, { "epoch": 241.93805309734512, "grad_norm": 6.787737305558039e-08, "learning_rate": 0.2203039737344745, "loss": 0.0, "num_input_tokens_seen": 7835800, "step": 13790 }, { "epoch": 242.01769911504425, "grad_norm": 2.448912539421144e-07, "learning_rate": 0.22025193405467003, "loss": 0.0, "num_input_tokens_seen": 7838072, "step": 13795 }, { "epoch": 242.10619469026548, "grad_norm": 1.9076000512541214e-07, "learning_rate": 0.2201998835411345, "loss": 0.0, "num_input_tokens_seen": 7840696, "step": 13800 }, { "epoch": 242.10619469026548, "eval_loss": 0.5332645773887634, "eval_runtime": 0.9356, "eval_samples_per_second": 26.721, "eval_steps_per_second": 13.895, "num_input_tokens_seen": 7840696, "step": 13800 }, { "epoch": 242.1946902654867, "grad_norm": 4.217205784584621e-08, "learning_rate": 0.22014782220189474, "loss": 0.0, "num_input_tokens_seen": 7843656, "step": 13805 }, { "epoch": 242.28318584070797, "grad_norm": 7.804183610460314e-08, "learning_rate": 0.2200957500449793, "loss": 0.0, "num_input_tokens_seen": 7846568, "step": 13810 }, { "epoch": 242.3716814159292, "grad_norm": 1.5613738924002973e-07, "learning_rate": 0.22004366707841827, "loss": 0.0, "num_input_tokens_seen": 7849208, "step": 13815 }, { "epoch": 242.46017699115043, "grad_norm": 1.8025227177531633e-07, "learning_rate": 0.21999157331024358, "loss": 0.0, "num_input_tokens_seen": 7851832, "step": 13820 }, { "epoch": 242.5486725663717, "grad_norm": 1.671162266347892e-07, "learning_rate": 0.21993946874848871, "loss": 0.0, "num_input_tokens_seen": 7854456, "step": 13825 }, { "epoch": 242.63716814159292, "grad_norm": 1.339509623221602e-07, "learning_rate": 0.2198873534011888, "loss": 0.0, "num_input_tokens_seen": 7857464, "step": 13830 }, { "epoch": 242.72566371681415, "grad_norm": 2.430681718124106e-07, "learning_rate": 0.2198352272763808, "loss": 0.0, "num_input_tokens_seen": 7860760, "step": 13835 }, { "epoch": 242.81415929203538, "grad_norm": 1.359424715019486e-07, "learning_rate": 0.2197830903821031, "loss": 0.0, "num_input_tokens_seen": 7863448, "step": 13840 }, { "epoch": 242.90265486725664, "grad_norm": 1.8141631130674796e-07, "learning_rate": 0.21973094272639598, "loss": 0.0, "num_input_tokens_seen": 7866360, "step": 13845 }, { "epoch": 242.99115044247787, "grad_norm": 2.608825298011652e-07, "learning_rate": 0.21967878431730117, "loss": 0.0, "num_input_tokens_seen": 7870088, "step": 13850 }, { "epoch": 243.07079646017698, "grad_norm": 3.5753551230754965e-08, "learning_rate": 0.21962661516286217, "loss": 0.0, "num_input_tokens_seen": 7872152, "step": 13855 }, { "epoch": 243.15929203539824, "grad_norm": 2.0584251103628048e-07, "learning_rate": 0.21957443527112414, "loss": 0.0, "num_input_tokens_seen": 7874824, "step": 13860 }, { "epoch": 243.24778761061947, "grad_norm": 9.247877841289665e-08, "learning_rate": 0.21952224465013384, "loss": 0.0, "num_input_tokens_seen": 7877576, "step": 13865 }, { "epoch": 243.3362831858407, "grad_norm": 1.1881395067803169e-07, "learning_rate": 0.21947004330793976, "loss": 0.0, "num_input_tokens_seen": 7880216, "step": 13870 }, { "epoch": 243.42477876106196, "grad_norm": 1.821852322336781e-07, "learning_rate": 0.21941783125259198, "loss": 0.0, "num_input_tokens_seen": 7883096, "step": 13875 }, { "epoch": 243.5132743362832, "grad_norm": 5.999988417215718e-08, "learning_rate": 0.21936560849214226, "loss": 0.0, "num_input_tokens_seen": 7886120, "step": 13880 }, { "epoch": 243.60176991150442, "grad_norm": 1.2058698928285594e-07, "learning_rate": 0.21931337503464404, "loss": 0.0, "num_input_tokens_seen": 7889000, "step": 13885 }, { "epoch": 243.69026548672565, "grad_norm": 1.394865734027917e-07, "learning_rate": 0.21926113088815233, "loss": 0.0, "num_input_tokens_seen": 7892168, "step": 13890 }, { "epoch": 243.7787610619469, "grad_norm": 4.558276600619138e-08, "learning_rate": 0.2192088760607238, "loss": 0.0, "num_input_tokens_seen": 7895192, "step": 13895 }, { "epoch": 243.86725663716814, "grad_norm": 8.53177866133592e-08, "learning_rate": 0.2191566105604169, "loss": 0.0, "num_input_tokens_seen": 7898216, "step": 13900 }, { "epoch": 243.95575221238937, "grad_norm": 1.7587866807389219e-07, "learning_rate": 0.21910433439529153, "loss": 0.0, "num_input_tokens_seen": 7901288, "step": 13905 }, { "epoch": 244.0353982300885, "grad_norm": 5.652385226539991e-08, "learning_rate": 0.2190520475734094, "loss": 0.0, "num_input_tokens_seen": 7903720, "step": 13910 }, { "epoch": 244.12389380530973, "grad_norm": 1.0745360157216055e-07, "learning_rate": 0.2189997501028338, "loss": 0.0, "num_input_tokens_seen": 7906184, "step": 13915 }, { "epoch": 244.21238938053096, "grad_norm": 2.114705068834155e-07, "learning_rate": 0.2189474419916296, "loss": 0.0, "num_input_tokens_seen": 7908888, "step": 13920 }, { "epoch": 244.30088495575222, "grad_norm": 1.0678497375238294e-07, "learning_rate": 0.21889512324786342, "loss": 0.0, "num_input_tokens_seen": 7911736, "step": 13925 }, { "epoch": 244.38938053097345, "grad_norm": 2.984749514212126e-08, "learning_rate": 0.21884279387960345, "loss": 0.0, "num_input_tokens_seen": 7914840, "step": 13930 }, { "epoch": 244.47787610619469, "grad_norm": 1.0421387486303502e-07, "learning_rate": 0.2187904538949195, "loss": 0.0, "num_input_tokens_seen": 7917976, "step": 13935 }, { "epoch": 244.56637168141592, "grad_norm": 2.5855086960291374e-07, "learning_rate": 0.2187381033018831, "loss": 0.0, "num_input_tokens_seen": 7920792, "step": 13940 }, { "epoch": 244.65486725663717, "grad_norm": 2.515216834808598e-08, "learning_rate": 0.2186857421085673, "loss": 0.0, "num_input_tokens_seen": 7923960, "step": 13945 }, { "epoch": 244.7433628318584, "grad_norm": 1.3160094169961667e-07, "learning_rate": 0.21863337032304697, "loss": 0.0, "num_input_tokens_seen": 7926728, "step": 13950 }, { "epoch": 244.83185840707964, "grad_norm": 3.6961058214046716e-08, "learning_rate": 0.21858098795339845, "loss": 0.0, "num_input_tokens_seen": 7929432, "step": 13955 }, { "epoch": 244.9203539823009, "grad_norm": 1.7482962277881597e-07, "learning_rate": 0.21852859500769975, "loss": 0.0, "num_input_tokens_seen": 7932200, "step": 13960 }, { "epoch": 245.0, "grad_norm": 8.843733922958563e-08, "learning_rate": 0.21847619149403044, "loss": 0.0, "num_input_tokens_seen": 7934424, "step": 13965 }, { "epoch": 245.08849557522123, "grad_norm": 9.31732060394097e-08, "learning_rate": 0.21842377742047195, "loss": 0.0, "num_input_tokens_seen": 7937624, "step": 13970 }, { "epoch": 245.1769911504425, "grad_norm": 1.5332994962591329e-07, "learning_rate": 0.21837135279510705, "loss": 0.0, "num_input_tokens_seen": 7940504, "step": 13975 }, { "epoch": 245.26548672566372, "grad_norm": 1.1206294203702782e-07, "learning_rate": 0.21831891762602038, "loss": 0.0, "num_input_tokens_seen": 7943528, "step": 13980 }, { "epoch": 245.35398230088495, "grad_norm": 2.2777787478389655e-07, "learning_rate": 0.21826647192129806, "loss": 0.0, "num_input_tokens_seen": 7946728, "step": 13985 }, { "epoch": 245.44247787610618, "grad_norm": 1.203353861001233e-07, "learning_rate": 0.21821401568902787, "loss": 0.0, "num_input_tokens_seen": 7949480, "step": 13990 }, { "epoch": 245.53097345132744, "grad_norm": 1.4154991845316545e-07, "learning_rate": 0.21816154893729925, "loss": 0.0, "num_input_tokens_seen": 7952056, "step": 13995 }, { "epoch": 245.61946902654867, "grad_norm": 1.3212302008014376e-07, "learning_rate": 0.2181090716742032, "loss": 0.0, "num_input_tokens_seen": 7954632, "step": 14000 }, { "epoch": 245.61946902654867, "eval_loss": 0.5419710874557495, "eval_runtime": 0.936, "eval_samples_per_second": 26.71, "eval_steps_per_second": 13.889, "num_input_tokens_seen": 7954632, "step": 14000 }, { "epoch": 245.7079646017699, "grad_norm": 4.98760002187737e-08, "learning_rate": 0.21805658390783236, "loss": 0.0, "num_input_tokens_seen": 7957656, "step": 14005 }, { "epoch": 245.79646017699116, "grad_norm": 5.831762450725364e-08, "learning_rate": 0.21800408564628107, "loss": 0.0, "num_input_tokens_seen": 7960280, "step": 14010 }, { "epoch": 245.8849557522124, "grad_norm": 1.240344005282168e-07, "learning_rate": 0.21795157689764516, "loss": 0.0, "num_input_tokens_seen": 7962648, "step": 14015 }, { "epoch": 245.97345132743362, "grad_norm": 1.0722148147124244e-07, "learning_rate": 0.21789905767002216, "loss": 0.0, "num_input_tokens_seen": 7965832, "step": 14020 }, { "epoch": 246.05309734513276, "grad_norm": 1.968376750483003e-07, "learning_rate": 0.2178465279715112, "loss": 0.0, "num_input_tokens_seen": 7968384, "step": 14025 }, { "epoch": 246.141592920354, "grad_norm": 9.992005089998202e-08, "learning_rate": 0.21779398781021303, "loss": 0.0, "num_input_tokens_seen": 7971264, "step": 14030 }, { "epoch": 246.23008849557522, "grad_norm": 1.2191790688120818e-07, "learning_rate": 0.21774143719422998, "loss": 0.0, "num_input_tokens_seen": 7974224, "step": 14035 }, { "epoch": 246.31858407079645, "grad_norm": 6.926147477770428e-08, "learning_rate": 0.21768887613166601, "loss": 0.0, "num_input_tokens_seen": 7977392, "step": 14040 }, { "epoch": 246.4070796460177, "grad_norm": 1.6060921836924535e-07, "learning_rate": 0.2176363046306267, "loss": 0.0, "num_input_tokens_seen": 7980240, "step": 14045 }, { "epoch": 246.49557522123894, "grad_norm": 1.259788007246243e-07, "learning_rate": 0.21758372269921925, "loss": 0.0, "num_input_tokens_seen": 7982688, "step": 14050 }, { "epoch": 246.58407079646017, "grad_norm": 1.180038111670001e-07, "learning_rate": 0.21753113034555244, "loss": 0.0, "num_input_tokens_seen": 7985552, "step": 14055 }, { "epoch": 246.67256637168143, "grad_norm": 1.3127177567184845e-07, "learning_rate": 0.2174785275777367, "loss": 0.0, "num_input_tokens_seen": 7988720, "step": 14060 }, { "epoch": 246.76106194690266, "grad_norm": 8.332231260510525e-08, "learning_rate": 0.21742591440388404, "loss": 0.0, "num_input_tokens_seen": 7991920, "step": 14065 }, { "epoch": 246.8495575221239, "grad_norm": 8.98556322681543e-08, "learning_rate": 0.21737329083210802, "loss": 0.0, "num_input_tokens_seen": 7994512, "step": 14070 }, { "epoch": 246.93805309734512, "grad_norm": 5.770801791982194e-08, "learning_rate": 0.2173206568705239, "loss": 0.0, "num_input_tokens_seen": 7997504, "step": 14075 }, { "epoch": 247.01769911504425, "grad_norm": 1.670896807581812e-07, "learning_rate": 0.2172680125272485, "loss": 0.0, "num_input_tokens_seen": 7999840, "step": 14080 }, { "epoch": 247.10619469026548, "grad_norm": 1.3387490582772443e-07, "learning_rate": 0.2172153578104002, "loss": 0.0, "num_input_tokens_seen": 8002880, "step": 14085 }, { "epoch": 247.1946902654867, "grad_norm": 6.32661922850275e-08, "learning_rate": 0.21716269272809902, "loss": 0.0, "num_input_tokens_seen": 8005616, "step": 14090 }, { "epoch": 247.28318584070797, "grad_norm": 2.1322611587493157e-07, "learning_rate": 0.21711001728846666, "loss": 0.0, "num_input_tokens_seen": 8008224, "step": 14095 }, { "epoch": 247.3716814159292, "grad_norm": 1.073001527629458e-07, "learning_rate": 0.21705733149962628, "loss": 0.0, "num_input_tokens_seen": 8010832, "step": 14100 }, { "epoch": 247.46017699115043, "grad_norm": 1.2538926341676415e-07, "learning_rate": 0.21700463536970263, "loss": 0.0, "num_input_tokens_seen": 8013952, "step": 14105 }, { "epoch": 247.5486725663717, "grad_norm": 2.1105446990077326e-07, "learning_rate": 0.21695192890682222, "loss": 0.0, "num_input_tokens_seen": 8016352, "step": 14110 }, { "epoch": 247.63716814159292, "grad_norm": 2.702610117921722e-07, "learning_rate": 0.21689921211911298, "loss": 0.0, "num_input_tokens_seen": 8019968, "step": 14115 }, { "epoch": 247.72566371681415, "grad_norm": 1.4195252617810183e-07, "learning_rate": 0.21684648501470452, "loss": 0.0, "num_input_tokens_seen": 8022880, "step": 14120 }, { "epoch": 247.81415929203538, "grad_norm": 8.601875123304126e-08, "learning_rate": 0.216793747601728, "loss": 0.0, "num_input_tokens_seen": 8025632, "step": 14125 }, { "epoch": 247.90265486725664, "grad_norm": 9.098774711446822e-08, "learning_rate": 0.21674099988831627, "loss": 0.0, "num_input_tokens_seen": 8028320, "step": 14130 }, { "epoch": 247.99115044247787, "grad_norm": 1.0906324376946941e-07, "learning_rate": 0.21668824188260363, "loss": 0.0, "num_input_tokens_seen": 8031440, "step": 14135 }, { "epoch": 248.07079646017698, "grad_norm": 7.774207233524066e-08, "learning_rate": 0.21663547359272606, "loss": 0.0, "num_input_tokens_seen": 8034088, "step": 14140 }, { "epoch": 248.15929203539824, "grad_norm": 9.011811386017143e-08, "learning_rate": 0.216582695026821, "loss": 0.0, "num_input_tokens_seen": 8036616, "step": 14145 }, { "epoch": 248.24778761061947, "grad_norm": 2.532333098770323e-07, "learning_rate": 0.21652990619302767, "loss": 0.0, "num_input_tokens_seen": 8040040, "step": 14150 }, { "epoch": 248.3362831858407, "grad_norm": 2.9666697543007103e-08, "learning_rate": 0.21647710709948673, "loss": 0.0, "num_input_tokens_seen": 8042968, "step": 14155 }, { "epoch": 248.42477876106196, "grad_norm": 7.960321823929917e-08, "learning_rate": 0.2164242977543405, "loss": 0.0, "num_input_tokens_seen": 8045512, "step": 14160 }, { "epoch": 248.5132743362832, "grad_norm": 7.413921565557757e-08, "learning_rate": 0.21637147816573277, "loss": 0.0, "num_input_tokens_seen": 8048696, "step": 14165 }, { "epoch": 248.60176991150442, "grad_norm": 9.485130902930905e-08, "learning_rate": 0.21631864834180908, "loss": 0.0, "num_input_tokens_seen": 8051192, "step": 14170 }, { "epoch": 248.69026548672565, "grad_norm": 4.1016342322564014e-08, "learning_rate": 0.21626580829071637, "loss": 0.0, "num_input_tokens_seen": 8054232, "step": 14175 }, { "epoch": 248.7787610619469, "grad_norm": 9.267129286172349e-08, "learning_rate": 0.21621295802060328, "loss": 0.0, "num_input_tokens_seen": 8056824, "step": 14180 }, { "epoch": 248.86725663716814, "grad_norm": 9.988360005763752e-08, "learning_rate": 0.21616009753961996, "loss": 0.0, "num_input_tokens_seen": 8059928, "step": 14185 }, { "epoch": 248.95575221238937, "grad_norm": 1.7210163605341222e-07, "learning_rate": 0.2161072268559182, "loss": 0.0, "num_input_tokens_seen": 8062632, "step": 14190 }, { "epoch": 249.0353982300885, "grad_norm": 5.5678292198990675e-08, "learning_rate": 0.21605434597765133, "loss": 0.0, "num_input_tokens_seen": 8065512, "step": 14195 }, { "epoch": 249.12389380530973, "grad_norm": 1.870575090379134e-07, "learning_rate": 0.21600145491297418, "loss": 0.0, "num_input_tokens_seen": 8068648, "step": 14200 }, { "epoch": 249.12389380530973, "eval_loss": 0.5439008474349976, "eval_runtime": 0.9137, "eval_samples_per_second": 27.361, "eval_steps_per_second": 14.228, "num_input_tokens_seen": 8068648, "step": 14200 }, { "epoch": 249.21238938053096, "grad_norm": 1.2205622113015124e-07, "learning_rate": 0.21594855367004326, "loss": 0.0, "num_input_tokens_seen": 8071144, "step": 14205 }, { "epoch": 249.30088495575222, "grad_norm": 7.311970051659955e-08, "learning_rate": 0.21589564225701663, "loss": 0.0, "num_input_tokens_seen": 8073992, "step": 14210 }, { "epoch": 249.38938053097345, "grad_norm": 7.432397097772991e-08, "learning_rate": 0.21584272068205385, "loss": 0.0, "num_input_tokens_seen": 8076584, "step": 14215 }, { "epoch": 249.47787610619469, "grad_norm": 1.7361314519348525e-07, "learning_rate": 0.2157897889533161, "loss": 0.0, "num_input_tokens_seen": 8079144, "step": 14220 }, { "epoch": 249.56637168141592, "grad_norm": 5.1349960727975485e-08, "learning_rate": 0.21573684707896612, "loss": 0.0, "num_input_tokens_seen": 8082184, "step": 14225 }, { "epoch": 249.65486725663717, "grad_norm": 5.518693768635785e-08, "learning_rate": 0.21568389506716826, "loss": 0.0, "num_input_tokens_seen": 8085208, "step": 14230 }, { "epoch": 249.7433628318584, "grad_norm": 3.939409509712277e-08, "learning_rate": 0.21563093292608831, "loss": 0.0, "num_input_tokens_seen": 8088440, "step": 14235 }, { "epoch": 249.83185840707964, "grad_norm": 1.0374082393127537e-07, "learning_rate": 0.21557796066389376, "loss": 0.0, "num_input_tokens_seen": 8090952, "step": 14240 }, { "epoch": 249.9203539823009, "grad_norm": 5.765467747664843e-08, "learning_rate": 0.21552497828875353, "loss": 0.0, "num_input_tokens_seen": 8093816, "step": 14245 }, { "epoch": 250.0, "grad_norm": 1.025200973003848e-07, "learning_rate": 0.21547198580883828, "loss": 0.0, "num_input_tokens_seen": 8096192, "step": 14250 }, { "epoch": 250.08849557522123, "grad_norm": 1.0608835765424374e-07, "learning_rate": 0.21541898323232, "loss": 0.0, "num_input_tokens_seen": 8098672, "step": 14255 }, { "epoch": 250.1769911504425, "grad_norm": 1.2697438478426193e-07, "learning_rate": 0.2153659705673724, "loss": 0.0, "num_input_tokens_seen": 8101504, "step": 14260 }, { "epoch": 250.26548672566372, "grad_norm": 9.45214608805145e-08, "learning_rate": 0.2153129478221707, "loss": 0.0, "num_input_tokens_seen": 8104944, "step": 14265 }, { "epoch": 250.35398230088495, "grad_norm": 6.674330421674313e-08, "learning_rate": 0.21525991500489164, "loss": 0.0, "num_input_tokens_seen": 8107904, "step": 14270 }, { "epoch": 250.44247787610618, "grad_norm": 1.1776819519582205e-07, "learning_rate": 0.21520687212371362, "loss": 0.0, "num_input_tokens_seen": 8110656, "step": 14275 }, { "epoch": 250.53097345132744, "grad_norm": 1.417420776306244e-07, "learning_rate": 0.21515381918681648, "loss": 0.0, "num_input_tokens_seen": 8113280, "step": 14280 }, { "epoch": 250.61946902654867, "grad_norm": 2.4396086928391014e-07, "learning_rate": 0.21510075620238167, "loss": 0.0, "num_input_tokens_seen": 8115984, "step": 14285 }, { "epoch": 250.7079646017699, "grad_norm": 1.107048888115969e-07, "learning_rate": 0.21504768317859208, "loss": 0.0, "num_input_tokens_seen": 8119008, "step": 14290 }, { "epoch": 250.79646017699116, "grad_norm": 2.448301756885485e-07, "learning_rate": 0.2149946001236323, "loss": 0.0, "num_input_tokens_seen": 8121792, "step": 14295 }, { "epoch": 250.8849557522124, "grad_norm": 3.4540327931154025e-08, "learning_rate": 0.21494150704568848, "loss": 0.0, "num_input_tokens_seen": 8124832, "step": 14300 }, { "epoch": 250.97345132743362, "grad_norm": 1.2766338386427378e-07, "learning_rate": 0.21488840395294811, "loss": 0.0, "num_input_tokens_seen": 8127536, "step": 14305 }, { "epoch": 251.05309734513276, "grad_norm": 1.3328262582490424e-07, "learning_rate": 0.21483529085360042, "loss": 0.0, "num_input_tokens_seen": 8129992, "step": 14310 }, { "epoch": 251.141592920354, "grad_norm": 1.1606729088953216e-07, "learning_rate": 0.2147821677558361, "loss": 0.0, "num_input_tokens_seen": 8133000, "step": 14315 }, { "epoch": 251.23008849557522, "grad_norm": 8.224424163927324e-08, "learning_rate": 0.2147290346678475, "loss": 0.0, "num_input_tokens_seen": 8135608, "step": 14320 }, { "epoch": 251.31858407079645, "grad_norm": 2.80529235396898e-07, "learning_rate": 0.21467589159782827, "loss": 0.0, "num_input_tokens_seen": 8138792, "step": 14325 }, { "epoch": 251.4070796460177, "grad_norm": 6.835380617076225e-08, "learning_rate": 0.21462273855397374, "loss": 0.0, "num_input_tokens_seen": 8141400, "step": 14330 }, { "epoch": 251.49557522123894, "grad_norm": 1.2037759233862744e-07, "learning_rate": 0.21456957554448083, "loss": 0.0, "num_input_tokens_seen": 8144280, "step": 14335 }, { "epoch": 251.58407079646017, "grad_norm": 8.790140526571122e-08, "learning_rate": 0.21451640257754795, "loss": 0.0, "num_input_tokens_seen": 8147272, "step": 14340 }, { "epoch": 251.67256637168143, "grad_norm": 4.162171762800426e-08, "learning_rate": 0.21446321966137508, "loss": 0.0, "num_input_tokens_seen": 8150232, "step": 14345 }, { "epoch": 251.76106194690266, "grad_norm": 2.9392221989610334e-08, "learning_rate": 0.21441002680416354, "loss": 0.0, "num_input_tokens_seen": 8153000, "step": 14350 }, { "epoch": 251.8495575221239, "grad_norm": 5.572619343752194e-08, "learning_rate": 0.21435682401411654, "loss": 0.0, "num_input_tokens_seen": 8155768, "step": 14355 }, { "epoch": 251.93805309734512, "grad_norm": 1.7154786746687023e-07, "learning_rate": 0.2143036112994385, "loss": 0.0, "num_input_tokens_seen": 8158712, "step": 14360 }, { "epoch": 252.01769911504425, "grad_norm": 5.814758807787257e-08, "learning_rate": 0.21425038866833548, "loss": 0.0, "num_input_tokens_seen": 8161616, "step": 14365 }, { "epoch": 252.10619469026548, "grad_norm": 8.532948925221717e-08, "learning_rate": 0.21419715612901508, "loss": 0.0, "num_input_tokens_seen": 8164576, "step": 14370 }, { "epoch": 252.1946902654867, "grad_norm": 1.344986770845935e-07, "learning_rate": 0.21414391368968652, "loss": 0.0, "num_input_tokens_seen": 8167376, "step": 14375 }, { "epoch": 252.28318584070797, "grad_norm": 5.007839476434128e-08, "learning_rate": 0.21409066135856034, "loss": 0.0, "num_input_tokens_seen": 8170064, "step": 14380 }, { "epoch": 252.3716814159292, "grad_norm": 1.0373684489195512e-07, "learning_rate": 0.21403739914384878, "loss": 0.0, "num_input_tokens_seen": 8172304, "step": 14385 }, { "epoch": 252.46017699115043, "grad_norm": 1.4202471732005506e-07, "learning_rate": 0.21398412705376554, "loss": 0.0, "num_input_tokens_seen": 8175664, "step": 14390 }, { "epoch": 252.5486725663717, "grad_norm": 1.9772045334320865e-08, "learning_rate": 0.2139308450965258, "loss": 0.0, "num_input_tokens_seen": 8178800, "step": 14395 }, { "epoch": 252.63716814159292, "grad_norm": 1.0337406308735808e-07, "learning_rate": 0.21387755328034638, "loss": 0.0, "num_input_tokens_seen": 8181840, "step": 14400 }, { "epoch": 252.63716814159292, "eval_loss": 0.5526756048202515, "eval_runtime": 0.9216, "eval_samples_per_second": 27.127, "eval_steps_per_second": 14.106, "num_input_tokens_seen": 8181840, "step": 14400 }, { "epoch": 252.72566371681415, "grad_norm": 4.2634709984668007e-08, "learning_rate": 0.2138242516134455, "loss": 0.0, "num_input_tokens_seen": 8185568, "step": 14405 }, { "epoch": 252.81415929203538, "grad_norm": 1.1562883628357667e-07, "learning_rate": 0.2137709401040429, "loss": 0.0, "num_input_tokens_seen": 8188272, "step": 14410 }, { "epoch": 252.90265486725664, "grad_norm": 1.4140744042379083e-07, "learning_rate": 0.21371761876036, "loss": 0.0, "num_input_tokens_seen": 8190960, "step": 14415 }, { "epoch": 252.99115044247787, "grad_norm": 7.573427041052128e-08, "learning_rate": 0.21366428759061956, "loss": 0.0, "num_input_tokens_seen": 8193312, "step": 14420 }, { "epoch": 253.07079646017698, "grad_norm": 5.2482327816960606e-08, "learning_rate": 0.2136109466030459, "loss": 0.0, "num_input_tokens_seen": 8195488, "step": 14425 }, { "epoch": 253.15929203539824, "grad_norm": 6.561331389320912e-08, "learning_rate": 0.2135575958058649, "loss": 0.0, "num_input_tokens_seen": 8198384, "step": 14430 }, { "epoch": 253.24778761061947, "grad_norm": 8.939981199773683e-08, "learning_rate": 0.2135042352073039, "loss": 0.0, "num_input_tokens_seen": 8200880, "step": 14435 }, { "epoch": 253.3362831858407, "grad_norm": 8.621416469623e-08, "learning_rate": 0.2134508648155918, "loss": 0.0, "num_input_tokens_seen": 8203680, "step": 14440 }, { "epoch": 253.42477876106196, "grad_norm": 1.8726748862718523e-07, "learning_rate": 0.213397484638959, "loss": 0.0, "num_input_tokens_seen": 8206480, "step": 14445 }, { "epoch": 253.5132743362832, "grad_norm": 3.243851409706622e-08, "learning_rate": 0.21334409468563728, "loss": 0.0, "num_input_tokens_seen": 8209584, "step": 14450 }, { "epoch": 253.60176991150442, "grad_norm": 7.378606881047745e-08, "learning_rate": 0.2132906949638602, "loss": 0.0, "num_input_tokens_seen": 8212720, "step": 14455 }, { "epoch": 253.69026548672565, "grad_norm": 1.7226251713964302e-07, "learning_rate": 0.21323728548186255, "loss": 0.0, "num_input_tokens_seen": 8215296, "step": 14460 }, { "epoch": 253.7787610619469, "grad_norm": 1.5365351657692372e-07, "learning_rate": 0.21318386624788088, "loss": 0.0, "num_input_tokens_seen": 8218864, "step": 14465 }, { "epoch": 253.86725663716814, "grad_norm": 1.6260379709365225e-07, "learning_rate": 0.21313043727015288, "loss": 0.0, "num_input_tokens_seen": 8221824, "step": 14470 }, { "epoch": 253.95575221238937, "grad_norm": 1.3820306321576936e-07, "learning_rate": 0.2130769985569182, "loss": 0.0, "num_input_tokens_seen": 8224288, "step": 14475 }, { "epoch": 254.0353982300885, "grad_norm": 2.1093188706799992e-07, "learning_rate": 0.21302355011641766, "loss": 0.0, "num_input_tokens_seen": 8226832, "step": 14480 }, { "epoch": 254.12389380530973, "grad_norm": 4.0449709359791086e-08, "learning_rate": 0.21297009195689365, "loss": 0.0, "num_input_tokens_seen": 8229712, "step": 14485 }, { "epoch": 254.21238938053096, "grad_norm": 1.220660834633236e-07, "learning_rate": 0.21291662408659015, "loss": 0.0, "num_input_tokens_seen": 8232016, "step": 14490 }, { "epoch": 254.30088495575222, "grad_norm": 1.9121267769151018e-07, "learning_rate": 0.21286314651375254, "loss": 0.0, "num_input_tokens_seen": 8235040, "step": 14495 }, { "epoch": 254.38938053097345, "grad_norm": 3.1719356030635026e-08, "learning_rate": 0.2128096592466278, "loss": 0.0, "num_input_tokens_seen": 8238784, "step": 14500 }, { "epoch": 254.47787610619469, "grad_norm": 6.530673601901071e-08, "learning_rate": 0.21275616229346428, "loss": 0.0, "num_input_tokens_seen": 8241408, "step": 14505 }, { "epoch": 254.56637168141592, "grad_norm": 8.79495729577684e-08, "learning_rate": 0.21270265566251184, "loss": 0.0, "num_input_tokens_seen": 8244320, "step": 14510 }, { "epoch": 254.65486725663717, "grad_norm": 6.666812879529971e-08, "learning_rate": 0.21264913936202193, "loss": 0.0, "num_input_tokens_seen": 8246896, "step": 14515 }, { "epoch": 254.7433628318584, "grad_norm": 8.969332299102462e-08, "learning_rate": 0.2125956134002475, "loss": 0.0, "num_input_tokens_seen": 8249840, "step": 14520 }, { "epoch": 254.83185840707964, "grad_norm": 1.0848515330508235e-07, "learning_rate": 0.2125420777854428, "loss": 0.0, "num_input_tokens_seen": 8252672, "step": 14525 }, { "epoch": 254.9203539823009, "grad_norm": 1.9587515964758495e-07, "learning_rate": 0.21248853252586372, "loss": 0.0, "num_input_tokens_seen": 8255904, "step": 14530 }, { "epoch": 255.0, "grad_norm": 3.4515304037086025e-07, "learning_rate": 0.21243497762976774, "loss": 0.0, "num_input_tokens_seen": 8258352, "step": 14535 }, { "epoch": 255.08849557522123, "grad_norm": 7.929283896146444e-08, "learning_rate": 0.21238141310541356, "loss": 0.0, "num_input_tokens_seen": 8261040, "step": 14540 }, { "epoch": 255.1769911504425, "grad_norm": 5.647235212791202e-08, "learning_rate": 0.21232783896106153, "loss": 0.0, "num_input_tokens_seen": 8263712, "step": 14545 }, { "epoch": 255.26548672566372, "grad_norm": 1.2173144625649002e-07, "learning_rate": 0.21227425520497345, "loss": 0.0, "num_input_tokens_seen": 8266016, "step": 14550 }, { "epoch": 255.35398230088495, "grad_norm": 8.870767942426028e-08, "learning_rate": 0.2122206618454127, "loss": 0.0, "num_input_tokens_seen": 8268928, "step": 14555 }, { "epoch": 255.44247787610618, "grad_norm": 6.754464010327865e-08, "learning_rate": 0.2121670588906439, "loss": 0.0, "num_input_tokens_seen": 8272432, "step": 14560 }, { "epoch": 255.53097345132744, "grad_norm": 6.132388818969048e-08, "learning_rate": 0.21211344634893345, "loss": 0.0, "num_input_tokens_seen": 8274912, "step": 14565 }, { "epoch": 255.61946902654867, "grad_norm": 2.404086956175888e-07, "learning_rate": 0.21205982422854897, "loss": 0.0, "num_input_tokens_seen": 8278032, "step": 14570 }, { "epoch": 255.7079646017699, "grad_norm": 7.480979036245117e-08, "learning_rate": 0.21200619253775974, "loss": 0.0, "num_input_tokens_seen": 8280896, "step": 14575 }, { "epoch": 255.79646017699116, "grad_norm": 7.515888711395746e-08, "learning_rate": 0.21195255128483637, "loss": 0.0, "num_input_tokens_seen": 8283616, "step": 14580 }, { "epoch": 255.8849557522124, "grad_norm": 6.464463808697474e-08, "learning_rate": 0.21189890047805102, "loss": 0.0, "num_input_tokens_seen": 8286352, "step": 14585 }, { "epoch": 255.97345132743362, "grad_norm": 6.445137046284799e-08, "learning_rate": 0.21184524012567735, "loss": 0.0, "num_input_tokens_seen": 8289728, "step": 14590 }, { "epoch": 256.05309734513276, "grad_norm": 6.145142350533206e-08, "learning_rate": 0.2117915702359905, "loss": 0.0, "num_input_tokens_seen": 8291776, "step": 14595 }, { "epoch": 256.14159292035396, "grad_norm": 9.628042363374334e-08, "learning_rate": 0.211737890817267, "loss": 0.0, "num_input_tokens_seen": 8294896, "step": 14600 }, { "epoch": 256.14159292035396, "eval_loss": 0.5454229712486267, "eval_runtime": 0.9417, "eval_samples_per_second": 26.548, "eval_steps_per_second": 13.805, "num_input_tokens_seen": 8294896, "step": 14600 }, { "epoch": 256.2300884955752, "grad_norm": 4.978720369308576e-08, "learning_rate": 0.21168420187778483, "loss": 0.0, "num_input_tokens_seen": 8297696, "step": 14605 }, { "epoch": 256.3185840707965, "grad_norm": 3.830402661719745e-08, "learning_rate": 0.21163050342582362, "loss": 0.0, "num_input_tokens_seen": 8300496, "step": 14610 }, { "epoch": 256.4070796460177, "grad_norm": 7.984025529594874e-08, "learning_rate": 0.21157679546966426, "loss": 0.0, "num_input_tokens_seen": 8303488, "step": 14615 }, { "epoch": 256.49557522123894, "grad_norm": 2.237832319451627e-07, "learning_rate": 0.2115230780175892, "loss": 0.0, "num_input_tokens_seen": 8306624, "step": 14620 }, { "epoch": 256.5840707964602, "grad_norm": 1.1179960068830042e-07, "learning_rate": 0.21146935107788237, "loss": 0.0, "num_input_tokens_seen": 8309264, "step": 14625 }, { "epoch": 256.6725663716814, "grad_norm": 1.0081719636900743e-07, "learning_rate": 0.21141561465882916, "loss": 0.0, "num_input_tokens_seen": 8312208, "step": 14630 }, { "epoch": 256.76106194690266, "grad_norm": 7.300442916857719e-08, "learning_rate": 0.21136186876871635, "loss": 0.0, "num_input_tokens_seen": 8314656, "step": 14635 }, { "epoch": 256.8495575221239, "grad_norm": 3.458485053897675e-08, "learning_rate": 0.21130811341583225, "loss": 0.0, "num_input_tokens_seen": 8317728, "step": 14640 }, { "epoch": 256.9380530973451, "grad_norm": 2.430682570775389e-07, "learning_rate": 0.21125434860846667, "loss": 0.0, "num_input_tokens_seen": 8321104, "step": 14645 }, { "epoch": 257.01769911504425, "grad_norm": 9.837411596436141e-08, "learning_rate": 0.2112005743549107, "loss": 0.0, "num_input_tokens_seen": 8323296, "step": 14650 }, { "epoch": 257.1061946902655, "grad_norm": 8.138203355656515e-08, "learning_rate": 0.21114679066345707, "loss": 0.0, "num_input_tokens_seen": 8325968, "step": 14655 }, { "epoch": 257.1946902654867, "grad_norm": 7.265523294108789e-08, "learning_rate": 0.21109299754239993, "loss": 0.0, "num_input_tokens_seen": 8328688, "step": 14660 }, { "epoch": 257.283185840708, "grad_norm": 3.6527833202626425e-08, "learning_rate": 0.21103919500003482, "loss": 0.0, "num_input_tokens_seen": 8331392, "step": 14665 }, { "epoch": 257.37168141592923, "grad_norm": 2.464424575521207e-08, "learning_rate": 0.21098538304465872, "loss": 0.0, "num_input_tokens_seen": 8334592, "step": 14670 }, { "epoch": 257.46017699115043, "grad_norm": 1.980818922220351e-07, "learning_rate": 0.2109315616845702, "loss": 0.0, "num_input_tokens_seen": 8337504, "step": 14675 }, { "epoch": 257.5486725663717, "grad_norm": 3.516291258165438e-08, "learning_rate": 0.21087773092806925, "loss": 0.0, "num_input_tokens_seen": 8340528, "step": 14680 }, { "epoch": 257.6371681415929, "grad_norm": 8.908239834681808e-08, "learning_rate": 0.21082389078345704, "loss": 0.0, "num_input_tokens_seen": 8343168, "step": 14685 }, { "epoch": 257.72566371681415, "grad_norm": 1.701280751831291e-07, "learning_rate": 0.2107700412590365, "loss": 0.0, "num_input_tokens_seen": 8346304, "step": 14690 }, { "epoch": 257.8141592920354, "grad_norm": 1.2315182118527446e-07, "learning_rate": 0.210716182363112, "loss": 0.0, "num_input_tokens_seen": 8349152, "step": 14695 }, { "epoch": 257.9026548672566, "grad_norm": 1.7134304641786002e-08, "learning_rate": 0.2106623141039891, "loss": 0.0, "num_input_tokens_seen": 8351952, "step": 14700 }, { "epoch": 257.9911504424779, "grad_norm": 9.37928845701208e-08, "learning_rate": 0.21060843648997507, "loss": 0.0, "num_input_tokens_seen": 8355408, "step": 14705 }, { "epoch": 258.070796460177, "grad_norm": 5.810087344571002e-08, "learning_rate": 0.21055454952937844, "loss": 0.0, "num_input_tokens_seen": 8357384, "step": 14710 }, { "epoch": 258.1592920353982, "grad_norm": 1.4977747753164294e-07, "learning_rate": 0.21050065323050937, "loss": 0.0, "num_input_tokens_seen": 8360360, "step": 14715 }, { "epoch": 258.24778761061947, "grad_norm": 5.9190643497686324e-08, "learning_rate": 0.21044674760167928, "loss": 0.0, "num_input_tokens_seen": 8363064, "step": 14720 }, { "epoch": 258.3362831858407, "grad_norm": 8.061144995963332e-08, "learning_rate": 0.210392832651201, "loss": 0.0, "num_input_tokens_seen": 8366088, "step": 14725 }, { "epoch": 258.42477876106193, "grad_norm": 2.3237036828049895e-07, "learning_rate": 0.210338908387389, "loss": 0.0, "num_input_tokens_seen": 8368568, "step": 14730 }, { "epoch": 258.5132743362832, "grad_norm": 1.0883340451073309e-07, "learning_rate": 0.21028497481855912, "loss": 0.0, "num_input_tokens_seen": 8372072, "step": 14735 }, { "epoch": 258.60176991150445, "grad_norm": 1.2009915906219248e-07, "learning_rate": 0.21023103195302847, "loss": 0.0, "num_input_tokens_seen": 8375512, "step": 14740 }, { "epoch": 258.69026548672565, "grad_norm": 8.373530846483845e-08, "learning_rate": 0.21017707979911582, "loss": 0.0, "num_input_tokens_seen": 8378376, "step": 14745 }, { "epoch": 258.7787610619469, "grad_norm": 8.831920439433816e-08, "learning_rate": 0.21012311836514122, "loss": 0.0, "num_input_tokens_seen": 8380824, "step": 14750 }, { "epoch": 258.86725663716817, "grad_norm": 7.827964054740733e-08, "learning_rate": 0.21006914765942622, "loss": 0.0, "num_input_tokens_seen": 8383688, "step": 14755 }, { "epoch": 258.95575221238937, "grad_norm": 4.0830013148251965e-08, "learning_rate": 0.2100151676902938, "loss": 0.0, "num_input_tokens_seen": 8386616, "step": 14760 }, { "epoch": 259.0353982300885, "grad_norm": 4.3068737909379706e-08, "learning_rate": 0.2099611784660683, "loss": 0.0, "num_input_tokens_seen": 8389072, "step": 14765 }, { "epoch": 259.12389380530976, "grad_norm": 1.1273470335027014e-07, "learning_rate": 0.20990717999507552, "loss": 0.0, "num_input_tokens_seen": 8392064, "step": 14770 }, { "epoch": 259.21238938053096, "grad_norm": 5.541773262507377e-08, "learning_rate": 0.20985317228564276, "loss": 0.0, "num_input_tokens_seen": 8394864, "step": 14775 }, { "epoch": 259.3008849557522, "grad_norm": 1.1699106039486651e-07, "learning_rate": 0.20979915534609872, "loss": 0.0, "num_input_tokens_seen": 8397840, "step": 14780 }, { "epoch": 259.3893805309734, "grad_norm": 6.097998550558259e-08, "learning_rate": 0.20974512918477342, "loss": 0.0, "num_input_tokens_seen": 8400288, "step": 14785 }, { "epoch": 259.4778761061947, "grad_norm": 1.4602970566102158e-07, "learning_rate": 0.2096910938099984, "loss": 0.0, "num_input_tokens_seen": 8402816, "step": 14790 }, { "epoch": 259.56637168141594, "grad_norm": 8.850250310388219e-08, "learning_rate": 0.2096370492301066, "loss": 0.0, "num_input_tokens_seen": 8405616, "step": 14795 }, { "epoch": 259.65486725663715, "grad_norm": 9.490360497466099e-08, "learning_rate": 0.2095829954534323, "loss": 0.0, "num_input_tokens_seen": 8408512, "step": 14800 }, { "epoch": 259.65486725663715, "eval_loss": 0.5406162142753601, "eval_runtime": 0.9371, "eval_samples_per_second": 26.679, "eval_steps_per_second": 13.873, "num_input_tokens_seen": 8408512, "step": 14800 }, { "epoch": 259.7433628318584, "grad_norm": 1.3778387142338033e-07, "learning_rate": 0.2095289324883114, "loss": 0.0, "num_input_tokens_seen": 8411120, "step": 14805 }, { "epoch": 259.83185840707966, "grad_norm": 1.0958025598029053e-07, "learning_rate": 0.20947486034308097, "loss": 0.0, "num_input_tokens_seen": 8414224, "step": 14810 }, { "epoch": 259.92035398230087, "grad_norm": 5.157104610020724e-08, "learning_rate": 0.2094207790260797, "loss": 0.0, "num_input_tokens_seen": 8417392, "step": 14815 }, { "epoch": 260.0, "grad_norm": 3.8772668631281704e-08, "learning_rate": 0.20936668854564758, "loss": 0.0, "num_input_tokens_seen": 8420256, "step": 14820 }, { "epoch": 260.08849557522126, "grad_norm": 7.08759486656163e-08, "learning_rate": 0.20931258891012602, "loss": 0.0, "num_input_tokens_seen": 8423168, "step": 14825 }, { "epoch": 260.17699115044246, "grad_norm": 8.172844445653027e-08, "learning_rate": 0.20925848012785792, "loss": 0.0, "num_input_tokens_seen": 8425760, "step": 14830 }, { "epoch": 260.2654867256637, "grad_norm": 9.575229142910757e-08, "learning_rate": 0.20920436220718747, "loss": 0.0, "num_input_tokens_seen": 8428720, "step": 14835 }, { "epoch": 260.353982300885, "grad_norm": 3.9533677664849165e-08, "learning_rate": 0.20915023515646033, "loss": 0.0, "num_input_tokens_seen": 8431392, "step": 14840 }, { "epoch": 260.4424778761062, "grad_norm": 2.0674222867000935e-07, "learning_rate": 0.20909609898402368, "loss": 0.0, "num_input_tokens_seen": 8434208, "step": 14845 }, { "epoch": 260.53097345132744, "grad_norm": 5.632215049899969e-08, "learning_rate": 0.2090419536982258, "loss": 0.0, "num_input_tokens_seen": 8436864, "step": 14850 }, { "epoch": 260.6194690265487, "grad_norm": 8.015777552827785e-08, "learning_rate": 0.2089877993074168, "loss": 0.0, "num_input_tokens_seen": 8440288, "step": 14855 }, { "epoch": 260.7079646017699, "grad_norm": 1.8263547119090617e-08, "learning_rate": 0.20893363581994784, "loss": 0.0, "num_input_tokens_seen": 8442832, "step": 14860 }, { "epoch": 260.79646017699116, "grad_norm": 6.974161692596681e-08, "learning_rate": 0.2088794632441716, "loss": 0.0, "num_input_tokens_seen": 8446208, "step": 14865 }, { "epoch": 260.88495575221236, "grad_norm": 1.297104557096418e-08, "learning_rate": 0.20882528158844219, "loss": 0.0, "num_input_tokens_seen": 8448752, "step": 14870 }, { "epoch": 260.9734513274336, "grad_norm": 8.777006144100596e-08, "learning_rate": 0.20877109086111514, "loss": 0.0, "num_input_tokens_seen": 8451504, "step": 14875 }, { "epoch": 261.05309734513276, "grad_norm": 3.226301004133347e-08, "learning_rate": 0.2087168910705473, "loss": 0.0, "num_input_tokens_seen": 8454168, "step": 14880 }, { "epoch": 261.14159292035396, "grad_norm": 4.3140033767485875e-08, "learning_rate": 0.208662682225097, "loss": 0.0, "num_input_tokens_seen": 8457336, "step": 14885 }, { "epoch": 261.2300884955752, "grad_norm": 6.398609997404492e-08, "learning_rate": 0.2086084643331239, "loss": 0.0, "num_input_tokens_seen": 8460856, "step": 14890 }, { "epoch": 261.3185840707965, "grad_norm": 1.2208509758693253e-07, "learning_rate": 0.20855423740298906, "loss": 0.0, "num_input_tokens_seen": 8464136, "step": 14895 }, { "epoch": 261.4070796460177, "grad_norm": 7.810146485098812e-08, "learning_rate": 0.208500001443055, "loss": 0.0, "num_input_tokens_seen": 8466488, "step": 14900 }, { "epoch": 261.49557522123894, "grad_norm": 1.784162861895311e-07, "learning_rate": 0.20844575646168553, "loss": 0.0, "num_input_tokens_seen": 8469592, "step": 14905 }, { "epoch": 261.5840707964602, "grad_norm": 1.1425645141116547e-07, "learning_rate": 0.20839150246724594, "loss": 0.0, "num_input_tokens_seen": 8472168, "step": 14910 }, { "epoch": 261.6725663716814, "grad_norm": 3.754313837589507e-08, "learning_rate": 0.20833723946810287, "loss": 0.0, "num_input_tokens_seen": 8474760, "step": 14915 }, { "epoch": 261.76106194690266, "grad_norm": 6.753247561164244e-08, "learning_rate": 0.20828296747262437, "loss": 0.0, "num_input_tokens_seen": 8477768, "step": 14920 }, { "epoch": 261.8495575221239, "grad_norm": 2.206452087705202e-08, "learning_rate": 0.20822868648917986, "loss": 0.0, "num_input_tokens_seen": 8480360, "step": 14925 }, { "epoch": 261.9380530973451, "grad_norm": 9.415782642463455e-08, "learning_rate": 0.20817439652614017, "loss": 0.0, "num_input_tokens_seen": 8483336, "step": 14930 }, { "epoch": 262.01769911504425, "grad_norm": 8.295224773746668e-08, "learning_rate": 0.20812009759187744, "loss": 0.0, "num_input_tokens_seen": 8485600, "step": 14935 }, { "epoch": 262.1061946902655, "grad_norm": 1.2563631912598794e-07, "learning_rate": 0.2080657896947653, "loss": 0.0, "num_input_tokens_seen": 8488384, "step": 14940 }, { "epoch": 262.1946902654867, "grad_norm": 4.9378890309981216e-08, "learning_rate": 0.2080114728431787, "loss": 0.0, "num_input_tokens_seen": 8491600, "step": 14945 }, { "epoch": 262.283185840708, "grad_norm": 9.07646793280037e-08, "learning_rate": 0.20795714704549392, "loss": 0.0, "num_input_tokens_seen": 8494528, "step": 14950 }, { "epoch": 262.37168141592923, "grad_norm": 4.9053326733883296e-08, "learning_rate": 0.20790281231008875, "loss": 0.0, "num_input_tokens_seen": 8497312, "step": 14955 }, { "epoch": 262.46017699115043, "grad_norm": 1.212995641708403e-07, "learning_rate": 0.20784846864534226, "loss": 0.0, "num_input_tokens_seen": 8500192, "step": 14960 }, { "epoch": 262.5486725663717, "grad_norm": 2.826561562585539e-08, "learning_rate": 0.20779411605963496, "loss": 0.0, "num_input_tokens_seen": 8503280, "step": 14965 }, { "epoch": 262.6371681415929, "grad_norm": 4.655237617612329e-08, "learning_rate": 0.2077397545613487, "loss": 0.0, "num_input_tokens_seen": 8506448, "step": 14970 }, { "epoch": 262.72566371681415, "grad_norm": 7.445149918794414e-08, "learning_rate": 0.20768538415886661, "loss": 0.0, "num_input_tokens_seen": 8508960, "step": 14975 }, { "epoch": 262.8141592920354, "grad_norm": 6.373876715315419e-08, "learning_rate": 0.20763100486057343, "loss": 0.0, "num_input_tokens_seen": 8511840, "step": 14980 }, { "epoch": 262.9026548672566, "grad_norm": 8.458541600475655e-08, "learning_rate": 0.20757661667485502, "loss": 0.0, "num_input_tokens_seen": 8514992, "step": 14985 }, { "epoch": 262.9911504424779, "grad_norm": 9.126129185688114e-08, "learning_rate": 0.2075222196100988, "loss": 0.0, "num_input_tokens_seen": 8517376, "step": 14990 }, { "epoch": 263.070796460177, "grad_norm": 6.500360427708074e-08, "learning_rate": 0.20746781367469344, "loss": 0.0, "num_input_tokens_seen": 8520168, "step": 14995 }, { "epoch": 263.1592920353982, "grad_norm": 1.350842353531334e-07, "learning_rate": 0.207413398877029, "loss": 0.0, "num_input_tokens_seen": 8522664, "step": 15000 }, { "epoch": 263.1592920353982, "eval_loss": 0.5593261122703552, "eval_runtime": 0.9358, "eval_samples_per_second": 26.714, "eval_steps_per_second": 13.891, "num_input_tokens_seen": 8522664, "step": 15000 }, { "epoch": 263.24778761061947, "grad_norm": 4.053318747310186e-08, "learning_rate": 0.20735897522549698, "loss": 0.0, "num_input_tokens_seen": 8525480, "step": 15005 }, { "epoch": 263.3362831858407, "grad_norm": 1.0382632353866939e-07, "learning_rate": 0.2073045427284902, "loss": 0.0, "num_input_tokens_seen": 8528440, "step": 15010 }, { "epoch": 263.42477876106193, "grad_norm": 1.2137867599903984e-07, "learning_rate": 0.2072501013944027, "loss": 0.0, "num_input_tokens_seen": 8531224, "step": 15015 }, { "epoch": 263.5132743362832, "grad_norm": 7.086700293257309e-08, "learning_rate": 0.20719565123163017, "loss": 0.0, "num_input_tokens_seen": 8534152, "step": 15020 }, { "epoch": 263.60176991150445, "grad_norm": 1.0657812055114846e-07, "learning_rate": 0.20714119224856944, "loss": 0.0, "num_input_tokens_seen": 8537240, "step": 15025 }, { "epoch": 263.69026548672565, "grad_norm": 5.5095522810688635e-08, "learning_rate": 0.2070867244536188, "loss": 0.0, "num_input_tokens_seen": 8539960, "step": 15030 }, { "epoch": 263.7787610619469, "grad_norm": 6.964447862856105e-08, "learning_rate": 0.20703224785517785, "loss": 0.0, "num_input_tokens_seen": 8543400, "step": 15035 }, { "epoch": 263.86725663716817, "grad_norm": 1.4728864528024133e-07, "learning_rate": 0.20697776246164754, "loss": 0.0, "num_input_tokens_seen": 8545784, "step": 15040 }, { "epoch": 263.95575221238937, "grad_norm": 3.917111257578654e-08, "learning_rate": 0.2069232682814303, "loss": 0.0, "num_input_tokens_seen": 8548536, "step": 15045 }, { "epoch": 264.0353982300885, "grad_norm": 1.5698725519541767e-07, "learning_rate": 0.20686876532292972, "loss": 0.0, "num_input_tokens_seen": 8550744, "step": 15050 }, { "epoch": 264.12389380530976, "grad_norm": 2.8254461881260795e-08, "learning_rate": 0.20681425359455083, "loss": 0.0, "num_input_tokens_seen": 8553656, "step": 15055 }, { "epoch": 264.21238938053096, "grad_norm": 3.659360814367574e-08, "learning_rate": 0.20675973310470008, "loss": 0.0, "num_input_tokens_seen": 8556616, "step": 15060 }, { "epoch": 264.3008849557522, "grad_norm": 4.326602365267718e-08, "learning_rate": 0.2067052038617852, "loss": 0.0, "num_input_tokens_seen": 8559176, "step": 15065 }, { "epoch": 264.3893805309734, "grad_norm": 7.578302074762178e-08, "learning_rate": 0.2066506658742153, "loss": 0.0, "num_input_tokens_seen": 8562088, "step": 15070 }, { "epoch": 264.4778761061947, "grad_norm": 1.4896217237492237e-07, "learning_rate": 0.20659611915040077, "loss": 0.0, "num_input_tokens_seen": 8564936, "step": 15075 }, { "epoch": 264.56637168141594, "grad_norm": 1.4910073176110927e-08, "learning_rate": 0.20654156369875348, "loss": 0.0, "num_input_tokens_seen": 8567544, "step": 15080 }, { "epoch": 264.65486725663715, "grad_norm": 1.0175042319815475e-07, "learning_rate": 0.20648699952768648, "loss": 0.0, "num_input_tokens_seen": 8570008, "step": 15085 }, { "epoch": 264.7433628318584, "grad_norm": 3.7698999477697726e-08, "learning_rate": 0.20643242664561437, "loss": 0.0, "num_input_tokens_seen": 8572808, "step": 15090 }, { "epoch": 264.83185840707966, "grad_norm": 7.0585308264981e-08, "learning_rate": 0.20637784506095277, "loss": 0.0, "num_input_tokens_seen": 8575864, "step": 15095 }, { "epoch": 264.92035398230087, "grad_norm": 1.223796175509051e-07, "learning_rate": 0.20632325478211908, "loss": 0.0, "num_input_tokens_seen": 8578712, "step": 15100 }, { "epoch": 265.0, "grad_norm": 1.1112658881984316e-07, "learning_rate": 0.20626865581753165, "loss": 0.0, "num_input_tokens_seen": 8581640, "step": 15105 }, { "epoch": 265.08849557522126, "grad_norm": 8.534235007573443e-08, "learning_rate": 0.2062140481756104, "loss": 0.0, "num_input_tokens_seen": 8584232, "step": 15110 }, { "epoch": 265.17699115044246, "grad_norm": 4.245593032692341e-08, "learning_rate": 0.20615943186477648, "loss": 0.0, "num_input_tokens_seen": 8586904, "step": 15115 }, { "epoch": 265.2654867256637, "grad_norm": 9.996076499874107e-08, "learning_rate": 0.20610480689345242, "loss": 0.0, "num_input_tokens_seen": 8589480, "step": 15120 }, { "epoch": 265.353982300885, "grad_norm": 4.443929313424633e-08, "learning_rate": 0.2060501732700621, "loss": 0.0, "num_input_tokens_seen": 8592392, "step": 15125 }, { "epoch": 265.4424778761062, "grad_norm": 7.049123951219372e-08, "learning_rate": 0.20599553100303067, "loss": 0.0, "num_input_tokens_seen": 8595688, "step": 15130 }, { "epoch": 265.53097345132744, "grad_norm": 3.611452115137581e-08, "learning_rate": 0.20594088010078465, "loss": 0.0, "num_input_tokens_seen": 8598392, "step": 15135 }, { "epoch": 265.6194690265487, "grad_norm": 4.93326730577337e-08, "learning_rate": 0.20588622057175196, "loss": 0.0, "num_input_tokens_seen": 8601064, "step": 15140 }, { "epoch": 265.7079646017699, "grad_norm": 1.1810502797970912e-07, "learning_rate": 0.20583155242436177, "loss": 0.0, "num_input_tokens_seen": 8603752, "step": 15145 }, { "epoch": 265.79646017699116, "grad_norm": 3.872182574582439e-08, "learning_rate": 0.20577687566704453, "loss": 0.0, "num_input_tokens_seen": 8606504, "step": 15150 }, { "epoch": 265.88495575221236, "grad_norm": 9.34594410750833e-08, "learning_rate": 0.20572219030823213, "loss": 0.0, "num_input_tokens_seen": 8609864, "step": 15155 }, { "epoch": 265.9734513274336, "grad_norm": 2.8684421948810268e-08, "learning_rate": 0.20566749635635775, "loss": 0.0, "num_input_tokens_seen": 8613224, "step": 15160 }, { "epoch": 266.05309734513276, "grad_norm": 9.351485630304524e-08, "learning_rate": 0.20561279381985587, "loss": 0.0, "num_input_tokens_seen": 8615648, "step": 15165 }, { "epoch": 266.14159292035396, "grad_norm": 6.399483964969477e-08, "learning_rate": 0.2055580827071623, "loss": 0.0, "num_input_tokens_seen": 8618576, "step": 15170 }, { "epoch": 266.2300884955752, "grad_norm": 5.349578913182995e-08, "learning_rate": 0.20550336302671418, "loss": 0.0, "num_input_tokens_seen": 8621280, "step": 15175 }, { "epoch": 266.3185840707965, "grad_norm": 5.0094911330234027e-08, "learning_rate": 0.20544863478695, "loss": 0.0, "num_input_tokens_seen": 8624064, "step": 15180 }, { "epoch": 266.4070796460177, "grad_norm": 7.176794269980746e-08, "learning_rate": 0.20539389799630953, "loss": 0.0, "num_input_tokens_seen": 8626768, "step": 15185 }, { "epoch": 266.49557522123894, "grad_norm": 2.4955779664992406e-08, "learning_rate": 0.20533915266323388, "loss": 0.0, "num_input_tokens_seen": 8630224, "step": 15190 }, { "epoch": 266.5840707964602, "grad_norm": 1.7261923446199035e-08, "learning_rate": 0.20528439879616542, "loss": 0.0, "num_input_tokens_seen": 8633296, "step": 15195 }, { "epoch": 266.6725663716814, "grad_norm": 4.766123140598211e-08, "learning_rate": 0.20522963640354794, "loss": 0.0, "num_input_tokens_seen": 8636032, "step": 15200 }, { "epoch": 266.6725663716814, "eval_loss": 0.5593429803848267, "eval_runtime": 0.9141, "eval_samples_per_second": 27.35, "eval_steps_per_second": 14.222, "num_input_tokens_seen": 8636032, "step": 15200 }, { "epoch": 266.76106194690266, "grad_norm": 5.91427848917192e-08, "learning_rate": 0.20517486549382644, "loss": 0.0, "num_input_tokens_seen": 8638768, "step": 15205 }, { "epoch": 266.8495575221239, "grad_norm": 2.9621311625760427e-08, "learning_rate": 0.20512008607544735, "loss": 0.0, "num_input_tokens_seen": 8641520, "step": 15210 }, { "epoch": 266.9380530973451, "grad_norm": 5.509615519372346e-08, "learning_rate": 0.20506529815685826, "loss": 0.0, "num_input_tokens_seen": 8644592, "step": 15215 }, { "epoch": 267.01769911504425, "grad_norm": 3.450385221981378e-08, "learning_rate": 0.2050105017465082, "loss": 0.0, "num_input_tokens_seen": 8647352, "step": 15220 }, { "epoch": 267.1061946902655, "grad_norm": 1.7605766799988487e-07, "learning_rate": 0.20495569685284754, "loss": 0.0, "num_input_tokens_seen": 8650632, "step": 15225 }, { "epoch": 267.1946902654867, "grad_norm": 9.82971286589418e-08, "learning_rate": 0.20490088348432778, "loss": 0.0, "num_input_tokens_seen": 8653288, "step": 15230 }, { "epoch": 267.283185840708, "grad_norm": 1.0193586774676078e-07, "learning_rate": 0.2048460616494018, "loss": 0.0, "num_input_tokens_seen": 8656376, "step": 15235 }, { "epoch": 267.37168141592923, "grad_norm": 5.908017897127138e-08, "learning_rate": 0.2047912313565239, "loss": 0.0, "num_input_tokens_seen": 8659080, "step": 15240 }, { "epoch": 267.46017699115043, "grad_norm": 3.535107850893837e-08, "learning_rate": 0.20473639261414958, "loss": 0.0, "num_input_tokens_seen": 8662184, "step": 15245 }, { "epoch": 267.5486725663717, "grad_norm": 5.832183447296302e-08, "learning_rate": 0.2046815454307357, "loss": 0.0, "num_input_tokens_seen": 8664328, "step": 15250 }, { "epoch": 267.6371681415929, "grad_norm": 7.983323513371943e-08, "learning_rate": 0.20462668981474028, "loss": 0.0, "num_input_tokens_seen": 8667192, "step": 15255 }, { "epoch": 267.72566371681415, "grad_norm": 1.239865525803907e-07, "learning_rate": 0.20457182577462288, "loss": 0.0, "num_input_tokens_seen": 8669912, "step": 15260 }, { "epoch": 267.8141592920354, "grad_norm": 1.1480042871880869e-07, "learning_rate": 0.2045169533188441, "loss": 0.0, "num_input_tokens_seen": 8672904, "step": 15265 }, { "epoch": 267.9026548672566, "grad_norm": 5.778873557460429e-08, "learning_rate": 0.20446207245586603, "loss": 0.0, "num_input_tokens_seen": 8675608, "step": 15270 }, { "epoch": 267.9911504424779, "grad_norm": 7.050958572563104e-08, "learning_rate": 0.20440718319415196, "loss": 0.0, "num_input_tokens_seen": 8678808, "step": 15275 }, { "epoch": 268.070796460177, "grad_norm": 2.941243337772903e-08, "learning_rate": 0.20435228554216653, "loss": 0.0, "num_input_tokens_seen": 8681520, "step": 15280 }, { "epoch": 268.1592920353982, "grad_norm": 5.803963176731486e-08, "learning_rate": 0.20429737950837565, "loss": 0.0, "num_input_tokens_seen": 8684240, "step": 15285 }, { "epoch": 268.24778761061947, "grad_norm": 6.733184676477322e-08, "learning_rate": 0.20424246510124647, "loss": 0.0, "num_input_tokens_seen": 8687232, "step": 15290 }, { "epoch": 268.3362831858407, "grad_norm": 6.794768836471121e-08, "learning_rate": 0.20418754232924755, "loss": 0.0, "num_input_tokens_seen": 8690064, "step": 15295 }, { "epoch": 268.42477876106193, "grad_norm": 3.334180576075596e-08, "learning_rate": 0.20413261120084863, "loss": 0.0, "num_input_tokens_seen": 8692912, "step": 15300 }, { "epoch": 268.5132743362832, "grad_norm": 4.93289356029436e-08, "learning_rate": 0.2040776717245208, "loss": 0.0, "num_input_tokens_seen": 8695648, "step": 15305 }, { "epoch": 268.60176991150445, "grad_norm": 1.9532598116711597e-07, "learning_rate": 0.2040227239087364, "loss": 0.0, "num_input_tokens_seen": 8698512, "step": 15310 }, { "epoch": 268.69026548672565, "grad_norm": 7.273843039001804e-08, "learning_rate": 0.20396776776196904, "loss": 0.0, "num_input_tokens_seen": 8701232, "step": 15315 }, { "epoch": 268.7787610619469, "grad_norm": 5.405801317692749e-08, "learning_rate": 0.20391280329269373, "loss": 0.0, "num_input_tokens_seen": 8704144, "step": 15320 }, { "epoch": 268.86725663716817, "grad_norm": 1.1916811537560079e-08, "learning_rate": 0.20385783050938663, "loss": 0.0, "num_input_tokens_seen": 8707104, "step": 15325 }, { "epoch": 268.95575221238937, "grad_norm": 1.3493524875229923e-07, "learning_rate": 0.20380284942052526, "loss": 0.0, "num_input_tokens_seen": 8709664, "step": 15330 }, { "epoch": 269.0353982300885, "grad_norm": 1.2628824208604783e-07, "learning_rate": 0.2037478600345884, "loss": 0.0, "num_input_tokens_seen": 8711928, "step": 15335 }, { "epoch": 269.12389380530976, "grad_norm": 1.1048690851112042e-07, "learning_rate": 0.20369286236005604, "loss": 0.0, "num_input_tokens_seen": 8714520, "step": 15340 }, { "epoch": 269.21238938053096, "grad_norm": 6.680797781655201e-08, "learning_rate": 0.20363785640540957, "loss": 0.0, "num_input_tokens_seen": 8717688, "step": 15345 }, { "epoch": 269.3008849557522, "grad_norm": 6.453123546634743e-08, "learning_rate": 0.2035828421791316, "loss": 0.0, "num_input_tokens_seen": 8720408, "step": 15350 }, { "epoch": 269.3893805309734, "grad_norm": 1.6381722645064656e-08, "learning_rate": 0.20352781968970599, "loss": 0.0, "num_input_tokens_seen": 8723752, "step": 15355 }, { "epoch": 269.4778761061947, "grad_norm": 3.405954984714299e-08, "learning_rate": 0.2034727889456179, "loss": 0.0, "num_input_tokens_seen": 8726568, "step": 15360 }, { "epoch": 269.56637168141594, "grad_norm": 9.664385913765727e-08, "learning_rate": 0.2034177499553538, "loss": 0.0, "num_input_tokens_seen": 8729064, "step": 15365 }, { "epoch": 269.65486725663715, "grad_norm": 1.0605712219557972e-07, "learning_rate": 0.2033627027274014, "loss": 0.0, "num_input_tokens_seen": 8731960, "step": 15370 }, { "epoch": 269.7433628318584, "grad_norm": 7.534888624149971e-08, "learning_rate": 0.20330764727024955, "loss": 0.0, "num_input_tokens_seen": 8734936, "step": 15375 }, { "epoch": 269.83185840707966, "grad_norm": 6.250710526956027e-08, "learning_rate": 0.20325258359238868, "loss": 0.0, "num_input_tokens_seen": 8738168, "step": 15380 }, { "epoch": 269.92035398230087, "grad_norm": 8.147846131123515e-08, "learning_rate": 0.20319751170231018, "loss": 0.0, "num_input_tokens_seen": 8740680, "step": 15385 }, { "epoch": 270.0, "grad_norm": 3.005828830282553e-08, "learning_rate": 0.2031424316085068, "loss": 0.0, "num_input_tokens_seen": 8743392, "step": 15390 }, { "epoch": 270.08849557522126, "grad_norm": 1.2045488517742342e-07, "learning_rate": 0.20308734331947265, "loss": 0.0, "num_input_tokens_seen": 8746000, "step": 15395 }, { "epoch": 270.17699115044246, "grad_norm": 3.535718562375223e-08, "learning_rate": 0.20303224684370305, "loss": 0.0, "num_input_tokens_seen": 8748624, "step": 15400 }, { "epoch": 270.17699115044246, "eval_loss": 0.55808424949646, "eval_runtime": 0.9357, "eval_samples_per_second": 26.718, "eval_steps_per_second": 13.893, "num_input_tokens_seen": 8748624, "step": 15400 }, { "epoch": 270.2654867256637, "grad_norm": 9.544882573209179e-08, "learning_rate": 0.20297714218969456, "loss": 0.0, "num_input_tokens_seen": 8751984, "step": 15405 }, { "epoch": 270.353982300885, "grad_norm": 7.714422878279947e-08, "learning_rate": 0.20292202936594497, "loss": 0.0, "num_input_tokens_seen": 8754560, "step": 15410 }, { "epoch": 270.4424778761062, "grad_norm": 1.3567562007210654e-07, "learning_rate": 0.2028669083809534, "loss": 0.0, "num_input_tokens_seen": 8757600, "step": 15415 }, { "epoch": 270.53097345132744, "grad_norm": 2.639728613473835e-08, "learning_rate": 0.20281177924322016, "loss": 0.0, "num_input_tokens_seen": 8760640, "step": 15420 }, { "epoch": 270.6194690265487, "grad_norm": 6.162782284491186e-08, "learning_rate": 0.2027566419612469, "loss": 0.0, "num_input_tokens_seen": 8763744, "step": 15425 }, { "epoch": 270.7079646017699, "grad_norm": 1.0672608397044314e-07, "learning_rate": 0.20270149654353647, "loss": 0.0, "num_input_tokens_seen": 8766160, "step": 15430 }, { "epoch": 270.79646017699116, "grad_norm": 6.111816475140586e-08, "learning_rate": 0.202646342998593, "loss": 0.0, "num_input_tokens_seen": 8768832, "step": 15435 }, { "epoch": 270.88495575221236, "grad_norm": 9.849783566551196e-08, "learning_rate": 0.20259118133492185, "loss": 0.0, "num_input_tokens_seen": 8771968, "step": 15440 }, { "epoch": 270.9734513274336, "grad_norm": 4.267842612648565e-08, "learning_rate": 0.20253601156102966, "loss": 0.0, "num_input_tokens_seen": 8775184, "step": 15445 }, { "epoch": 271.05309734513276, "grad_norm": 3.863820907668014e-08, "learning_rate": 0.20248083368542422, "loss": 0.0, "num_input_tokens_seen": 8777984, "step": 15450 }, { "epoch": 271.14159292035396, "grad_norm": 2.3924330960767293e-08, "learning_rate": 0.2024256477166147, "loss": 0.0, "num_input_tokens_seen": 8780960, "step": 15455 }, { "epoch": 271.2300884955752, "grad_norm": 6.320313872265615e-08, "learning_rate": 0.2023704536631115, "loss": 0.0, "num_input_tokens_seen": 8784320, "step": 15460 }, { "epoch": 271.3185840707965, "grad_norm": 6.806480001841919e-08, "learning_rate": 0.20231525153342625, "loss": 0.0, "num_input_tokens_seen": 8787312, "step": 15465 }, { "epoch": 271.4070796460177, "grad_norm": 1.2528064985417586e-07, "learning_rate": 0.20226004133607173, "loss": 0.0, "num_input_tokens_seen": 8790288, "step": 15470 }, { "epoch": 271.49557522123894, "grad_norm": 4.099985773109438e-08, "learning_rate": 0.20220482307956214, "loss": 0.0, "num_input_tokens_seen": 8792736, "step": 15475 }, { "epoch": 271.5840707964602, "grad_norm": 1.6849686090836258e-08, "learning_rate": 0.20214959677241276, "loss": 0.0, "num_input_tokens_seen": 8795216, "step": 15480 }, { "epoch": 271.6725663716814, "grad_norm": 3.920475677432478e-08, "learning_rate": 0.20209436242314022, "loss": 0.0, "num_input_tokens_seen": 8798064, "step": 15485 }, { "epoch": 271.76106194690266, "grad_norm": 1.9954629237872723e-07, "learning_rate": 0.2020391200402623, "loss": 0.0, "num_input_tokens_seen": 8800928, "step": 15490 }, { "epoch": 271.8495575221239, "grad_norm": 6.408408381730624e-08, "learning_rate": 0.2019838696322981, "loss": 0.0, "num_input_tokens_seen": 8803616, "step": 15495 }, { "epoch": 271.9380530973451, "grad_norm": 8.908627791015533e-08, "learning_rate": 0.20192861120776798, "loss": 0.0, "num_input_tokens_seen": 8806624, "step": 15500 }, { "epoch": 272.01769911504425, "grad_norm": 1.2983086605800054e-07, "learning_rate": 0.20187334477519345, "loss": 0.0, "num_input_tokens_seen": 8808880, "step": 15505 }, { "epoch": 272.1061946902655, "grad_norm": 1.769193502809685e-08, "learning_rate": 0.20181807034309726, "loss": 0.0, "num_input_tokens_seen": 8812192, "step": 15510 }, { "epoch": 272.1946902654867, "grad_norm": 1.1228719642986107e-07, "learning_rate": 0.2017627879200034, "loss": 0.0, "num_input_tokens_seen": 8814656, "step": 15515 }, { "epoch": 272.283185840708, "grad_norm": 2.186459724384804e-08, "learning_rate": 0.2017074975144372, "loss": 0.0, "num_input_tokens_seen": 8817760, "step": 15520 }, { "epoch": 272.37168141592923, "grad_norm": 8.692725828041148e-08, "learning_rate": 0.20165219913492508, "loss": 0.0, "num_input_tokens_seen": 8820352, "step": 15525 }, { "epoch": 272.46017699115043, "grad_norm": 6.793039375452281e-08, "learning_rate": 0.20159689278999468, "loss": 0.0, "num_input_tokens_seen": 8823968, "step": 15530 }, { "epoch": 272.5486725663717, "grad_norm": 2.9531625145295948e-08, "learning_rate": 0.20154157848817508, "loss": 0.0, "num_input_tokens_seen": 8826720, "step": 15535 }, { "epoch": 272.6371681415929, "grad_norm": 3.627345535051063e-08, "learning_rate": 0.20148625623799632, "loss": 0.0, "num_input_tokens_seen": 8829792, "step": 15540 }, { "epoch": 272.72566371681415, "grad_norm": 1.3397683851223974e-08, "learning_rate": 0.20143092604798984, "loss": 0.0, "num_input_tokens_seen": 8832256, "step": 15545 }, { "epoch": 272.8141592920354, "grad_norm": 4.097962147397993e-08, "learning_rate": 0.2013755879266883, "loss": 0.0, "num_input_tokens_seen": 8835088, "step": 15550 }, { "epoch": 272.9026548672566, "grad_norm": 3.8378686895157443e-08, "learning_rate": 0.20132024188262543, "loss": 0.0, "num_input_tokens_seen": 8837888, "step": 15555 }, { "epoch": 272.9911504424779, "grad_norm": 3.2294874330318635e-08, "learning_rate": 0.2012648879243363, "loss": 0.0, "num_input_tokens_seen": 8840656, "step": 15560 }, { "epoch": 273.070796460177, "grad_norm": 7.933802947945878e-08, "learning_rate": 0.20120952606035725, "loss": 0.0, "num_input_tokens_seen": 8842656, "step": 15565 }, { "epoch": 273.1592920353982, "grad_norm": 5.204551101201105e-08, "learning_rate": 0.20115415629922576, "loss": 0.0, "num_input_tokens_seen": 8845168, "step": 15570 }, { "epoch": 273.24778761061947, "grad_norm": 8.269056905874095e-08, "learning_rate": 0.20109877864948048, "loss": 0.0, "num_input_tokens_seen": 8848032, "step": 15575 }, { "epoch": 273.3362831858407, "grad_norm": 1.4811506332534918e-07, "learning_rate": 0.20104339311966138, "loss": 0.0, "num_input_tokens_seen": 8851328, "step": 15580 }, { "epoch": 273.42477876106193, "grad_norm": 5.473173203540682e-08, "learning_rate": 0.2009879997183097, "loss": 0.0, "num_input_tokens_seen": 8854064, "step": 15585 }, { "epoch": 273.5132743362832, "grad_norm": 8.103808113446576e-08, "learning_rate": 0.20093259845396763, "loss": 0.0, "num_input_tokens_seen": 8856848, "step": 15590 }, { "epoch": 273.60176991150445, "grad_norm": 6.404443553265082e-08, "learning_rate": 0.20087718933517884, "loss": 0.0, "num_input_tokens_seen": 8860368, "step": 15595 }, { "epoch": 273.69026548672565, "grad_norm": 7.48431503438951e-08, "learning_rate": 0.20082177237048807, "loss": 0.0, "num_input_tokens_seen": 8863248, "step": 15600 }, { "epoch": 273.69026548672565, "eval_loss": 0.5557603240013123, "eval_runtime": 0.9213, "eval_samples_per_second": 27.137, "eval_steps_per_second": 14.111, "num_input_tokens_seen": 8863248, "step": 15600 }, { "epoch": 273.7787610619469, "grad_norm": 7.866319862159799e-08, "learning_rate": 0.20076634756844133, "loss": 0.0, "num_input_tokens_seen": 8865936, "step": 15605 }, { "epoch": 273.86725663716817, "grad_norm": 1.6804195368536057e-08, "learning_rate": 0.20071091493758586, "loss": 0.0, "num_input_tokens_seen": 8868976, "step": 15610 }, { "epoch": 273.95575221238937, "grad_norm": 4.504391171167299e-08, "learning_rate": 0.20065547448647003, "loss": 0.0, "num_input_tokens_seen": 8871712, "step": 15615 }, { "epoch": 274.0353982300885, "grad_norm": 6.991735546080236e-08, "learning_rate": 0.20060002622364348, "loss": 0.0, "num_input_tokens_seen": 8874184, "step": 15620 }, { "epoch": 274.12389380530976, "grad_norm": 7.701447657382232e-08, "learning_rate": 0.20054457015765695, "loss": 0.0, "num_input_tokens_seen": 8877032, "step": 15625 }, { "epoch": 274.21238938053096, "grad_norm": 1.316049065280822e-07, "learning_rate": 0.20048910629706254, "loss": 0.0, "num_input_tokens_seen": 8879656, "step": 15630 }, { "epoch": 274.3008849557522, "grad_norm": 6.343760361460227e-08, "learning_rate": 0.20043363465041347, "loss": 0.0, "num_input_tokens_seen": 8882664, "step": 15635 }, { "epoch": 274.3893805309734, "grad_norm": 2.9718540517365e-08, "learning_rate": 0.2003781552262641, "loss": 0.0, "num_input_tokens_seen": 8885672, "step": 15640 }, { "epoch": 274.4778761061947, "grad_norm": 5.785954115822278e-08, "learning_rate": 0.20032266803317014, "loss": 0.0, "num_input_tokens_seen": 8888840, "step": 15645 }, { "epoch": 274.56637168141594, "grad_norm": 3.259883385453577e-08, "learning_rate": 0.2002671730796884, "loss": 0.0, "num_input_tokens_seen": 8891448, "step": 15650 }, { "epoch": 274.65486725663715, "grad_norm": 7.861389406116359e-08, "learning_rate": 0.20021167037437684, "loss": 0.0, "num_input_tokens_seen": 8894760, "step": 15655 }, { "epoch": 274.7433628318584, "grad_norm": 1.1214579132001745e-07, "learning_rate": 0.20015615992579472, "loss": 0.0, "num_input_tokens_seen": 8897688, "step": 15660 }, { "epoch": 274.83185840707966, "grad_norm": 7.662499967864278e-08, "learning_rate": 0.20010064174250244, "loss": 0.0, "num_input_tokens_seen": 8900424, "step": 15665 }, { "epoch": 274.92035398230087, "grad_norm": 8.410760443666732e-08, "learning_rate": 0.2000451158330616, "loss": 0.0, "num_input_tokens_seen": 8903096, "step": 15670 }, { "epoch": 275.0, "grad_norm": 4.934129549383215e-08, "learning_rate": 0.199989582206035, "loss": 0.0, "num_input_tokens_seen": 8905392, "step": 15675 }, { "epoch": 275.08849557522126, "grad_norm": 1.7556903486593e-08, "learning_rate": 0.1999340408699866, "loss": 0.0, "num_input_tokens_seen": 8907856, "step": 15680 }, { "epoch": 275.17699115044246, "grad_norm": 2.980762303650408e-08, "learning_rate": 0.19987849183348155, "loss": 0.0, "num_input_tokens_seen": 8910768, "step": 15685 }, { "epoch": 275.2654867256637, "grad_norm": 2.8074124358568042e-08, "learning_rate": 0.19982293510508628, "loss": 0.0, "num_input_tokens_seen": 8913536, "step": 15690 }, { "epoch": 275.353982300885, "grad_norm": 5.7987250556834624e-08, "learning_rate": 0.19976737069336833, "loss": 0.0, "num_input_tokens_seen": 8916816, "step": 15695 }, { "epoch": 275.4424778761062, "grad_norm": 8.304599674602287e-08, "learning_rate": 0.1997117986068964, "loss": 0.0, "num_input_tokens_seen": 8919168, "step": 15700 }, { "epoch": 275.53097345132744, "grad_norm": 4.163000966173058e-08, "learning_rate": 0.19965621885424037, "loss": 0.0, "num_input_tokens_seen": 8922000, "step": 15705 }, { "epoch": 275.6194690265487, "grad_norm": 1.0364058944389853e-08, "learning_rate": 0.19960063144397142, "loss": 0.0, "num_input_tokens_seen": 8924960, "step": 15710 }, { "epoch": 275.7079646017699, "grad_norm": 8.263473461056492e-08, "learning_rate": 0.19954503638466176, "loss": 0.0, "num_input_tokens_seen": 8928016, "step": 15715 }, { "epoch": 275.79646017699116, "grad_norm": 4.879960968651176e-08, "learning_rate": 0.1994894336848848, "loss": 0.0, "num_input_tokens_seen": 8931008, "step": 15720 }, { "epoch": 275.88495575221236, "grad_norm": 5.52937144959742e-08, "learning_rate": 0.1994338233532153, "loss": 0.0, "num_input_tokens_seen": 8933776, "step": 15725 }, { "epoch": 275.9734513274336, "grad_norm": 6.170581201558889e-08, "learning_rate": 0.19937820539822904, "loss": 0.0, "num_input_tokens_seen": 8936688, "step": 15730 }, { "epoch": 276.05309734513276, "grad_norm": 7.043544769658183e-08, "learning_rate": 0.199322579828503, "loss": 0.0, "num_input_tokens_seen": 8938744, "step": 15735 }, { "epoch": 276.14159292035396, "grad_norm": 3.5735393311142616e-08, "learning_rate": 0.19926694665261527, "loss": 0.0, "num_input_tokens_seen": 8941256, "step": 15740 }, { "epoch": 276.2300884955752, "grad_norm": 1.9407226048429038e-08, "learning_rate": 0.19921130587914526, "loss": 0.0, "num_input_tokens_seen": 8943992, "step": 15745 }, { "epoch": 276.3185840707965, "grad_norm": 4.5293635508869556e-08, "learning_rate": 0.19915565751667344, "loss": 0.0, "num_input_tokens_seen": 8946424, "step": 15750 }, { "epoch": 276.4070796460177, "grad_norm": 3.445737206675403e-08, "learning_rate": 0.19910000157378152, "loss": 0.0, "num_input_tokens_seen": 8949752, "step": 15755 }, { "epoch": 276.49557522123894, "grad_norm": 5.6084775934550635e-08, "learning_rate": 0.1990443380590523, "loss": 0.0, "num_input_tokens_seen": 8953256, "step": 15760 }, { "epoch": 276.5840707964602, "grad_norm": 5.4348820555105704e-08, "learning_rate": 0.19898866698106984, "loss": 0.0, "num_input_tokens_seen": 8956360, "step": 15765 }, { "epoch": 276.6725663716814, "grad_norm": 4.5476980403691414e-08, "learning_rate": 0.19893298834841933, "loss": 0.0, "num_input_tokens_seen": 8959352, "step": 15770 }, { "epoch": 276.76106194690266, "grad_norm": 5.5120846553791125e-08, "learning_rate": 0.19887730216968705, "loss": 0.0, "num_input_tokens_seen": 8962104, "step": 15775 }, { "epoch": 276.8495575221239, "grad_norm": 1.0280210460678063e-07, "learning_rate": 0.19882160845346053, "loss": 0.0, "num_input_tokens_seen": 8964984, "step": 15780 }, { "epoch": 276.9380530973451, "grad_norm": 2.0166586978120904e-07, "learning_rate": 0.1987659072083285, "loss": 0.0, "num_input_tokens_seen": 8968008, "step": 15785 }, { "epoch": 277.01769911504425, "grad_norm": 3.310387342025933e-08, "learning_rate": 0.1987101984428807, "loss": 0.0, "num_input_tokens_seen": 8970792, "step": 15790 }, { "epoch": 277.1061946902655, "grad_norm": 3.0108864734756935e-08, "learning_rate": 0.19865448216570822, "loss": 0.0, "num_input_tokens_seen": 8973832, "step": 15795 }, { "epoch": 277.1946902654867, "grad_norm": 2.2687256517883725e-08, "learning_rate": 0.19859875838540317, "loss": 0.0, "num_input_tokens_seen": 8976424, "step": 15800 }, { "epoch": 277.1946902654867, "eval_loss": 0.5764188170433044, "eval_runtime": 0.917, "eval_samples_per_second": 27.263, "eval_steps_per_second": 14.177, "num_input_tokens_seen": 8976424, "step": 15800 }, { "epoch": 277.283185840708, "grad_norm": 4.697780298101861e-08, "learning_rate": 0.1985430271105588, "loss": 0.0, "num_input_tokens_seen": 8978760, "step": 15805 }, { "epoch": 277.37168141592923, "grad_norm": 1.1015589329588238e-08, "learning_rate": 0.19848728834976961, "loss": 0.0, "num_input_tokens_seen": 8981528, "step": 15810 }, { "epoch": 277.46017699115043, "grad_norm": 8.907077386766105e-08, "learning_rate": 0.19843154211163128, "loss": 0.0, "num_input_tokens_seen": 8984568, "step": 15815 }, { "epoch": 277.5486725663717, "grad_norm": 1.0746138912054448e-07, "learning_rate": 0.1983757884047405, "loss": 0.0, "num_input_tokens_seen": 8987656, "step": 15820 }, { "epoch": 277.6371681415929, "grad_norm": 4.0184335858839404e-08, "learning_rate": 0.1983200272376952, "loss": 0.0, "num_input_tokens_seen": 8990296, "step": 15825 }, { "epoch": 277.72566371681415, "grad_norm": 4.0687016422680244e-08, "learning_rate": 0.1982642586190945, "loss": 0.0, "num_input_tokens_seen": 8993656, "step": 15830 }, { "epoch": 277.8141592920354, "grad_norm": 1.8733274487203744e-08, "learning_rate": 0.1982084825575386, "loss": 0.0, "num_input_tokens_seen": 8996840, "step": 15835 }, { "epoch": 277.9026548672566, "grad_norm": 5.365781063915165e-08, "learning_rate": 0.19815269906162883, "loss": 0.0, "num_input_tokens_seen": 8999512, "step": 15840 }, { "epoch": 277.9911504424779, "grad_norm": 1.0786343551671962e-07, "learning_rate": 0.19809690813996775, "loss": 0.0, "num_input_tokens_seen": 9001832, "step": 15845 }, { "epoch": 278.070796460177, "grad_norm": 3.340190701806023e-08, "learning_rate": 0.19804110980115905, "loss": 0.0, "num_input_tokens_seen": 9004264, "step": 15850 }, { "epoch": 278.1592920353982, "grad_norm": 3.488537458906649e-08, "learning_rate": 0.19798530405380746, "loss": 0.0, "num_input_tokens_seen": 9006920, "step": 15855 }, { "epoch": 278.24778761061947, "grad_norm": 2.9722139416321625e-08, "learning_rate": 0.19792949090651893, "loss": 0.0, "num_input_tokens_seen": 9009544, "step": 15860 }, { "epoch": 278.3362831858407, "grad_norm": 1.0514196446820279e-07, "learning_rate": 0.19787367036790066, "loss": 0.0, "num_input_tokens_seen": 9012472, "step": 15865 }, { "epoch": 278.42477876106193, "grad_norm": 9.628074337797443e-08, "learning_rate": 0.19781784244656075, "loss": 0.0, "num_input_tokens_seen": 9015304, "step": 15870 }, { "epoch": 278.5132743362832, "grad_norm": 4.822276267191228e-08, "learning_rate": 0.19776200715110864, "loss": 0.0, "num_input_tokens_seen": 9017928, "step": 15875 }, { "epoch": 278.60176991150445, "grad_norm": 9.086424768156576e-08, "learning_rate": 0.1977061644901548, "loss": 0.0, "num_input_tokens_seen": 9021272, "step": 15880 }, { "epoch": 278.69026548672565, "grad_norm": 4.902606676182586e-08, "learning_rate": 0.1976503144723109, "loss": 0.0, "num_input_tokens_seen": 9024472, "step": 15885 }, { "epoch": 278.7787610619469, "grad_norm": 5.230311472814719e-08, "learning_rate": 0.19759445710618967, "loss": 0.0, "num_input_tokens_seen": 9027288, "step": 15890 }, { "epoch": 278.86725663716817, "grad_norm": 9.063401762432477e-08, "learning_rate": 0.19753859240040508, "loss": 0.0, "num_input_tokens_seen": 9030024, "step": 15895 }, { "epoch": 278.95575221238937, "grad_norm": 5.581898676609853e-08, "learning_rate": 0.1974827203635721, "loss": 0.0, "num_input_tokens_seen": 9032984, "step": 15900 }, { "epoch": 279.0353982300885, "grad_norm": 6.188042789290193e-08, "learning_rate": 0.19742684100430694, "loss": 0.0, "num_input_tokens_seen": 9035232, "step": 15905 }, { "epoch": 279.12389380530976, "grad_norm": 4.417390897515361e-08, "learning_rate": 0.19737095433122692, "loss": 0.0, "num_input_tokens_seen": 9037920, "step": 15910 }, { "epoch": 279.21238938053096, "grad_norm": 3.180096541655075e-08, "learning_rate": 0.19731506035295046, "loss": 0.0, "num_input_tokens_seen": 9040560, "step": 15915 }, { "epoch": 279.3008849557522, "grad_norm": 5.5686051325665176e-08, "learning_rate": 0.19725915907809702, "loss": 0.0, "num_input_tokens_seen": 9043664, "step": 15920 }, { "epoch": 279.3893805309734, "grad_norm": 2.550174649229575e-08, "learning_rate": 0.1972032505152874, "loss": 0.0, "num_input_tokens_seen": 9046544, "step": 15925 }, { "epoch": 279.4778761061947, "grad_norm": 7.3959760982234e-08, "learning_rate": 0.19714733467314338, "loss": 0.0, "num_input_tokens_seen": 9049456, "step": 15930 }, { "epoch": 279.56637168141594, "grad_norm": 4.703490930069165e-08, "learning_rate": 0.19709141156028784, "loss": 0.0, "num_input_tokens_seen": 9052560, "step": 15935 }, { "epoch": 279.65486725663715, "grad_norm": 7.445943595030258e-08, "learning_rate": 0.1970354811853448, "loss": 0.0, "num_input_tokens_seen": 9055312, "step": 15940 }, { "epoch": 279.7433628318584, "grad_norm": 3.6241516454538214e-08, "learning_rate": 0.19697954355693953, "loss": 0.0, "num_input_tokens_seen": 9058080, "step": 15945 }, { "epoch": 279.83185840707966, "grad_norm": 6.715493583442367e-08, "learning_rate": 0.19692359868369827, "loss": 0.0, "num_input_tokens_seen": 9061280, "step": 15950 }, { "epoch": 279.92035398230087, "grad_norm": 5.176018547103922e-08, "learning_rate": 0.1968676465742484, "loss": 0.0, "num_input_tokens_seen": 9064128, "step": 15955 }, { "epoch": 280.0, "grad_norm": 1.3714140045806289e-08, "learning_rate": 0.19681168723721845, "loss": 0.0, "num_input_tokens_seen": 9066520, "step": 15960 }, { "epoch": 280.08849557522126, "grad_norm": 1.2863401011031783e-08, "learning_rate": 0.19675572068123803, "loss": 0.0, "num_input_tokens_seen": 9069192, "step": 15965 }, { "epoch": 280.17699115044246, "grad_norm": 7.397404289122278e-08, "learning_rate": 0.19669974691493794, "loss": 0.0, "num_input_tokens_seen": 9072264, "step": 15970 }, { "epoch": 280.2654867256637, "grad_norm": 4.458270197460479e-08, "learning_rate": 0.19664376594695002, "loss": 0.0, "num_input_tokens_seen": 9074808, "step": 15975 }, { "epoch": 280.353982300885, "grad_norm": 9.256390853806806e-08, "learning_rate": 0.19658777778590722, "loss": 0.0, "num_input_tokens_seen": 9077592, "step": 15980 }, { "epoch": 280.4424778761062, "grad_norm": 2.4110882179684268e-08, "learning_rate": 0.19653178244044364, "loss": 0.0, "num_input_tokens_seen": 9080632, "step": 15985 }, { "epoch": 280.53097345132744, "grad_norm": 6.495034199360816e-08, "learning_rate": 0.19647577991919443, "loss": 0.0, "num_input_tokens_seen": 9083816, "step": 15990 }, { "epoch": 280.6194690265487, "grad_norm": 8.03336988042247e-08, "learning_rate": 0.1964197702307959, "loss": 0.0, "num_input_tokens_seen": 9086312, "step": 15995 }, { "epoch": 280.7079646017699, "grad_norm": 5.919824275224528e-08, "learning_rate": 0.19636375338388545, "loss": 0.0, "num_input_tokens_seen": 9088984, "step": 16000 }, { "epoch": 280.7079646017699, "eval_loss": 0.5791025161743164, "eval_runtime": 0.9334, "eval_samples_per_second": 26.783, "eval_steps_per_second": 13.927, "num_input_tokens_seen": 9088984, "step": 16000 }, { "epoch": 280.79646017699116, "grad_norm": 7.141792934817204e-08, "learning_rate": 0.1963077293871016, "loss": 0.0, "num_input_tokens_seen": 9091704, "step": 16005 }, { "epoch": 280.88495575221236, "grad_norm": 4.188337499044792e-08, "learning_rate": 0.19625169824908395, "loss": 0.0, "num_input_tokens_seen": 9095176, "step": 16010 }, { "epoch": 280.9734513274336, "grad_norm": 7.238379851060017e-08, "learning_rate": 0.19619565997847319, "loss": 0.0, "num_input_tokens_seen": 9098424, "step": 16015 }, { "epoch": 281.05309734513276, "grad_norm": 7.557277115211036e-08, "learning_rate": 0.19613961458391113, "loss": 0.0, "num_input_tokens_seen": 9100800, "step": 16020 }, { "epoch": 281.14159292035396, "grad_norm": 8.267761586466804e-08, "learning_rate": 0.19608356207404065, "loss": 0.0, "num_input_tokens_seen": 9103552, "step": 16025 }, { "epoch": 281.2300884955752, "grad_norm": 2.0887615193032616e-08, "learning_rate": 0.1960275024575058, "loss": 0.0, "num_input_tokens_seen": 9106368, "step": 16030 }, { "epoch": 281.3185840707965, "grad_norm": 8.878649282451079e-08, "learning_rate": 0.19597143574295164, "loss": 0.0, "num_input_tokens_seen": 9109008, "step": 16035 }, { "epoch": 281.4070796460177, "grad_norm": 4.3165886864926506e-08, "learning_rate": 0.1959153619390244, "loss": 0.0, "num_input_tokens_seen": 9112544, "step": 16040 }, { "epoch": 281.49557522123894, "grad_norm": 2.933246356917607e-08, "learning_rate": 0.1958592810543713, "loss": 0.0, "num_input_tokens_seen": 9115280, "step": 16045 }, { "epoch": 281.5840707964602, "grad_norm": 7.311957261890711e-08, "learning_rate": 0.19580319309764077, "loss": 0.0, "num_input_tokens_seen": 9117760, "step": 16050 }, { "epoch": 281.6725663716814, "grad_norm": 6.512665606805967e-08, "learning_rate": 0.1957470980774823, "loss": 0.0, "num_input_tokens_seen": 9120688, "step": 16055 }, { "epoch": 281.76106194690266, "grad_norm": 1.1208196326606412e-07, "learning_rate": 0.19569099600254639, "loss": 0.0, "num_input_tokens_seen": 9123408, "step": 16060 }, { "epoch": 281.8495575221239, "grad_norm": 3.310541529799593e-08, "learning_rate": 0.1956348868814847, "loss": 0.0, "num_input_tokens_seen": 9126368, "step": 16065 }, { "epoch": 281.9380530973451, "grad_norm": 6.28610052899603e-08, "learning_rate": 0.19557877072295, "loss": 0.0, "num_input_tokens_seen": 9129552, "step": 16070 }, { "epoch": 282.01769911504425, "grad_norm": 3.8616189357298936e-08, "learning_rate": 0.19552264753559603, "loss": 0.0, "num_input_tokens_seen": 9132240, "step": 16075 }, { "epoch": 282.1061946902655, "grad_norm": 5.18208551625321e-08, "learning_rate": 0.19546651732807774, "loss": 0.0, "num_input_tokens_seen": 9134768, "step": 16080 }, { "epoch": 282.1946902654867, "grad_norm": 1.1138522992837352e-08, "learning_rate": 0.19541038010905112, "loss": 0.0, "num_input_tokens_seen": 9137856, "step": 16085 }, { "epoch": 282.283185840708, "grad_norm": 5.721481599607614e-08, "learning_rate": 0.19535423588717324, "loss": 0.0, "num_input_tokens_seen": 9140448, "step": 16090 }, { "epoch": 282.37168141592923, "grad_norm": 4.434350486803851e-08, "learning_rate": 0.19529808467110224, "loss": 0.0, "num_input_tokens_seen": 9143200, "step": 16095 }, { "epoch": 282.46017699115043, "grad_norm": 3.773866907863521e-08, "learning_rate": 0.19524192646949734, "loss": 0.0, "num_input_tokens_seen": 9146016, "step": 16100 }, { "epoch": 282.5486725663717, "grad_norm": 4.351641180733168e-08, "learning_rate": 0.19518576129101878, "loss": 0.0, "num_input_tokens_seen": 9148576, "step": 16105 }, { "epoch": 282.6371681415929, "grad_norm": 3.5214704041663936e-08, "learning_rate": 0.19512958914432804, "loss": 0.0, "num_input_tokens_seen": 9151776, "step": 16110 }, { "epoch": 282.72566371681415, "grad_norm": 4.7451951701305006e-08, "learning_rate": 0.1950734100380875, "loss": 0.0, "num_input_tokens_seen": 9154896, "step": 16115 }, { "epoch": 282.8141592920354, "grad_norm": 7.380545241630898e-08, "learning_rate": 0.19501722398096066, "loss": 0.0, "num_input_tokens_seen": 9157744, "step": 16120 }, { "epoch": 282.9026548672566, "grad_norm": 6.765228732774631e-08, "learning_rate": 0.1949610309816122, "loss": 0.0, "num_input_tokens_seen": 9160704, "step": 16125 }, { "epoch": 282.9911504424779, "grad_norm": 1.9251709559853225e-08, "learning_rate": 0.1949048310487078, "loss": 0.0, "num_input_tokens_seen": 9164096, "step": 16130 }, { "epoch": 283.070796460177, "grad_norm": 6.415272224558066e-08, "learning_rate": 0.19484862419091406, "loss": 0.0, "num_input_tokens_seen": 9166512, "step": 16135 }, { "epoch": 283.1592920353982, "grad_norm": 4.660168428927136e-08, "learning_rate": 0.19479241041689893, "loss": 0.0, "num_input_tokens_seen": 9169456, "step": 16140 }, { "epoch": 283.24778761061947, "grad_norm": 4.0298804293570356e-08, "learning_rate": 0.19473618973533116, "loss": 0.0, "num_input_tokens_seen": 9172336, "step": 16145 }, { "epoch": 283.3362831858407, "grad_norm": 7.046748606853726e-08, "learning_rate": 0.19467996215488076, "loss": 0.0, "num_input_tokens_seen": 9174784, "step": 16150 }, { "epoch": 283.42477876106193, "grad_norm": 3.330206865825858e-08, "learning_rate": 0.1946237276842187, "loss": 0.0, "num_input_tokens_seen": 9177456, "step": 16155 }, { "epoch": 283.5132743362832, "grad_norm": 9.651753174466648e-08, "learning_rate": 0.19456748633201712, "loss": 0.0, "num_input_tokens_seen": 9179952, "step": 16160 }, { "epoch": 283.60176991150445, "grad_norm": 3.0168543219133426e-08, "learning_rate": 0.194511238106949, "loss": 0.0, "num_input_tokens_seen": 9182992, "step": 16165 }, { "epoch": 283.69026548672565, "grad_norm": 1.4637682177465194e-08, "learning_rate": 0.19445498301768863, "loss": 0.0, "num_input_tokens_seen": 9186336, "step": 16170 }, { "epoch": 283.7787610619469, "grad_norm": 4.6742393067233934e-08, "learning_rate": 0.19439872107291126, "loss": 0.0, "num_input_tokens_seen": 9189264, "step": 16175 }, { "epoch": 283.86725663716817, "grad_norm": 4.3774665670071045e-08, "learning_rate": 0.1943424522812931, "loss": 0.0, "num_input_tokens_seen": 9192480, "step": 16180 }, { "epoch": 283.95575221238937, "grad_norm": 5.2659473226412956e-08, "learning_rate": 0.19428617665151157, "loss": 0.0, "num_input_tokens_seen": 9195264, "step": 16185 }, { "epoch": 284.0353982300885, "grad_norm": 3.986876606631995e-08, "learning_rate": 0.19422989419224507, "loss": 0.0, "num_input_tokens_seen": 9197744, "step": 16190 }, { "epoch": 284.12389380530976, "grad_norm": 4.466793157575921e-08, "learning_rate": 0.19417360491217303, "loss": 0.0, "num_input_tokens_seen": 9200960, "step": 16195 }, { "epoch": 284.21238938053096, "grad_norm": 1.771601887412544e-08, "learning_rate": 0.19411730881997605, "loss": 0.0, "num_input_tokens_seen": 9204128, "step": 16200 }, { "epoch": 284.21238938053096, "eval_loss": 0.5731778740882874, "eval_runtime": 0.9362, "eval_samples_per_second": 26.703, "eval_steps_per_second": 13.886, "num_input_tokens_seen": 9204128, "step": 16200 }, { "epoch": 284.3008849557522, "grad_norm": 5.0735177836713774e-08, "learning_rate": 0.1940610059243356, "loss": 0.0, "num_input_tokens_seen": 9207184, "step": 16205 }, { "epoch": 284.3893805309734, "grad_norm": 3.561446959565728e-08, "learning_rate": 0.19400469623393435, "loss": 0.0, "num_input_tokens_seen": 9210048, "step": 16210 }, { "epoch": 284.4778761061947, "grad_norm": 4.1122635963120047e-08, "learning_rate": 0.1939483797574559, "loss": 0.0, "num_input_tokens_seen": 9212576, "step": 16215 }, { "epoch": 284.56637168141594, "grad_norm": 4.0147543955981746e-08, "learning_rate": 0.19389205650358504, "loss": 0.0, "num_input_tokens_seen": 9215312, "step": 16220 }, { "epoch": 284.65486725663715, "grad_norm": 8.266813011914564e-08, "learning_rate": 0.19383572648100747, "loss": 0.0, "num_input_tokens_seen": 9217968, "step": 16225 }, { "epoch": 284.7433628318584, "grad_norm": 2.298250656451728e-08, "learning_rate": 0.19377938969841, "loss": 0.0, "num_input_tokens_seen": 9220736, "step": 16230 }, { "epoch": 284.83185840707966, "grad_norm": 5.238841538357519e-08, "learning_rate": 0.1937230461644805, "loss": 0.0, "num_input_tokens_seen": 9223760, "step": 16235 }, { "epoch": 284.92035398230087, "grad_norm": 5.51482841615325e-08, "learning_rate": 0.19366669588790777, "loss": 0.0, "num_input_tokens_seen": 9226512, "step": 16240 }, { "epoch": 285.0, "grad_norm": 2.00334717703754e-08, "learning_rate": 0.19361033887738185, "loss": 0.0, "num_input_tokens_seen": 9229264, "step": 16245 }, { "epoch": 285.08849557522126, "grad_norm": 7.492477038795187e-08, "learning_rate": 0.19355397514159361, "loss": 0.0, "num_input_tokens_seen": 9232288, "step": 16250 }, { "epoch": 285.17699115044246, "grad_norm": 9.965557268287739e-08, "learning_rate": 0.19349760468923508, "loss": 0.0, "num_input_tokens_seen": 9235472, "step": 16255 }, { "epoch": 285.2654867256637, "grad_norm": 9.543219192664765e-08, "learning_rate": 0.19344122752899925, "loss": 0.0, "num_input_tokens_seen": 9238064, "step": 16260 }, { "epoch": 285.353982300885, "grad_norm": 5.619471110662744e-08, "learning_rate": 0.1933848436695802, "loss": 0.0, "num_input_tokens_seen": 9241216, "step": 16265 }, { "epoch": 285.4424778761062, "grad_norm": 4.9771390564501417e-08, "learning_rate": 0.1933284531196731, "loss": 0.0, "num_input_tokens_seen": 9243888, "step": 16270 }, { "epoch": 285.53097345132744, "grad_norm": 1.0878910217115845e-08, "learning_rate": 0.19327205588797403, "loss": 0.0, "num_input_tokens_seen": 9247008, "step": 16275 }, { "epoch": 285.6194690265487, "grad_norm": 3.8934945933988274e-08, "learning_rate": 0.19321565198318014, "loss": 0.0, "num_input_tokens_seen": 9250032, "step": 16280 }, { "epoch": 285.7079646017699, "grad_norm": 1.8918038691140282e-08, "learning_rate": 0.1931592414139896, "loss": 0.0, "num_input_tokens_seen": 9252544, "step": 16285 }, { "epoch": 285.79646017699116, "grad_norm": 1.8219845188127692e-08, "learning_rate": 0.19310282418910169, "loss": 0.0, "num_input_tokens_seen": 9255056, "step": 16290 }, { "epoch": 285.88495575221236, "grad_norm": 4.0929524658395167e-08, "learning_rate": 0.1930464003172166, "loss": 0.0, "num_input_tokens_seen": 9257888, "step": 16295 }, { "epoch": 285.9734513274336, "grad_norm": 3.653700986205877e-08, "learning_rate": 0.19298996980703567, "loss": 0.0, "num_input_tokens_seen": 9260832, "step": 16300 }, { "epoch": 286.05309734513276, "grad_norm": 3.16213402129506e-08, "learning_rate": 0.19293353266726113, "loss": 0.0, "num_input_tokens_seen": 9263472, "step": 16305 }, { "epoch": 286.14159292035396, "grad_norm": 6.699293919609772e-08, "learning_rate": 0.19287708890659633, "loss": 0.0, "num_input_tokens_seen": 9266176, "step": 16310 }, { "epoch": 286.2300884955752, "grad_norm": 8.353413960549005e-08, "learning_rate": 0.19282063853374556, "loss": 0.0, "num_input_tokens_seen": 9269056, "step": 16315 }, { "epoch": 286.3185840707965, "grad_norm": 3.03939486911986e-08, "learning_rate": 0.19276418155741423, "loss": 0.0, "num_input_tokens_seen": 9271840, "step": 16320 }, { "epoch": 286.4070796460177, "grad_norm": 8.881924173920197e-08, "learning_rate": 0.19270771798630867, "loss": 0.0, "num_input_tokens_seen": 9274848, "step": 16325 }, { "epoch": 286.49557522123894, "grad_norm": 2.9505274667940284e-08, "learning_rate": 0.1926512478291363, "loss": 0.0, "num_input_tokens_seen": 9277568, "step": 16330 }, { "epoch": 286.5840707964602, "grad_norm": 4.3635957069909637e-08, "learning_rate": 0.19259477109460557, "loss": 0.0, "num_input_tokens_seen": 9280544, "step": 16335 }, { "epoch": 286.6725663716814, "grad_norm": 4.4941984356228204e-08, "learning_rate": 0.19253828779142584, "loss": 0.0, "num_input_tokens_seen": 9283040, "step": 16340 }, { "epoch": 286.76106194690266, "grad_norm": 9.56378265470903e-09, "learning_rate": 0.19248179792830755, "loss": 0.0, "num_input_tokens_seen": 9286048, "step": 16345 }, { "epoch": 286.8495575221239, "grad_norm": 3.0640425308092745e-08, "learning_rate": 0.19242530151396217, "loss": 0.0, "num_input_tokens_seen": 9289392, "step": 16350 }, { "epoch": 286.9380530973451, "grad_norm": 7.545553160070995e-08, "learning_rate": 0.19236879855710215, "loss": 0.0, "num_input_tokens_seen": 9291952, "step": 16355 }, { "epoch": 287.01769911504425, "grad_norm": 6.334239088801041e-08, "learning_rate": 0.19231228906644096, "loss": 0.0, "num_input_tokens_seen": 9294184, "step": 16360 }, { "epoch": 287.1061946902655, "grad_norm": 7.876735708123306e-08, "learning_rate": 0.19225577305069302, "loss": 0.0, "num_input_tokens_seen": 9297176, "step": 16365 }, { "epoch": 287.1946902654867, "grad_norm": 4.1219756496957416e-08, "learning_rate": 0.1921992505185739, "loss": 0.0, "num_input_tokens_seen": 9299880, "step": 16370 }, { "epoch": 287.283185840708, "grad_norm": 5.33434718619219e-09, "learning_rate": 0.19214272147880004, "loss": 0.0, "num_input_tokens_seen": 9302536, "step": 16375 }, { "epoch": 287.37168141592923, "grad_norm": 4.432377309626645e-08, "learning_rate": 0.19208618594008892, "loss": 0.0, "num_input_tokens_seen": 9305528, "step": 16380 }, { "epoch": 287.46017699115043, "grad_norm": 3.71861439418808e-08, "learning_rate": 0.19202964391115904, "loss": 0.0, "num_input_tokens_seen": 9308488, "step": 16385 }, { "epoch": 287.5486725663717, "grad_norm": 6.090736803798791e-08, "learning_rate": 0.1919730954007299, "loss": 0.0, "num_input_tokens_seen": 9311608, "step": 16390 }, { "epoch": 287.6371681415929, "grad_norm": 2.255715791932289e-08, "learning_rate": 0.19191654041752199, "loss": 0.0, "num_input_tokens_seen": 9314376, "step": 16395 }, { "epoch": 287.72566371681415, "grad_norm": 2.6625951221603827e-08, "learning_rate": 0.19185997897025678, "loss": 0.0, "num_input_tokens_seen": 9317208, "step": 16400 }, { "epoch": 287.72566371681415, "eval_loss": 0.5829854607582092, "eval_runtime": 0.9206, "eval_samples_per_second": 27.157, "eval_steps_per_second": 14.122, "num_input_tokens_seen": 9317208, "step": 16400 }, { "epoch": 287.8141592920354, "grad_norm": 4.3530043569717236e-08, "learning_rate": 0.19180341106765672, "loss": 0.0, "num_input_tokens_seen": 9320456, "step": 16405 }, { "epoch": 287.9026548672566, "grad_norm": 1.5380811291265672e-08, "learning_rate": 0.19174683671844536, "loss": 0.0, "num_input_tokens_seen": 9323400, "step": 16410 }, { "epoch": 287.9911504424779, "grad_norm": 2.4110809349053852e-08, "learning_rate": 0.19169025593134717, "loss": 0.0, "num_input_tokens_seen": 9325992, "step": 16415 }, { "epoch": 288.070796460177, "grad_norm": 5.3318331083573867e-08, "learning_rate": 0.19163366871508764, "loss": 0.0, "num_input_tokens_seen": 9328424, "step": 16420 }, { "epoch": 288.1592920353982, "grad_norm": 2.6718598888919587e-08, "learning_rate": 0.19157707507839317, "loss": 0.0, "num_input_tokens_seen": 9331688, "step": 16425 }, { "epoch": 288.24778761061947, "grad_norm": 4.200950698418637e-08, "learning_rate": 0.19152047502999123, "loss": 0.0, "num_input_tokens_seen": 9334360, "step": 16430 }, { "epoch": 288.3362831858407, "grad_norm": 4.48682975218162e-08, "learning_rate": 0.19146386857861025, "loss": 0.0, "num_input_tokens_seen": 9337096, "step": 16435 }, { "epoch": 288.42477876106193, "grad_norm": 1.650240832873351e-08, "learning_rate": 0.19140725573297968, "loss": 0.0, "num_input_tokens_seen": 9340024, "step": 16440 }, { "epoch": 288.5132743362832, "grad_norm": 3.880736443306887e-08, "learning_rate": 0.19135063650182987, "loss": 0.0, "num_input_tokens_seen": 9343224, "step": 16445 }, { "epoch": 288.60176991150445, "grad_norm": 3.528749914494256e-08, "learning_rate": 0.19129401089389234, "loss": 0.0, "num_input_tokens_seen": 9345896, "step": 16450 }, { "epoch": 288.69026548672565, "grad_norm": 2.3178204244800327e-08, "learning_rate": 0.19123737891789938, "loss": 0.0, "num_input_tokens_seen": 9349160, "step": 16455 }, { "epoch": 288.7787610619469, "grad_norm": 4.84892694885275e-08, "learning_rate": 0.19118074058258439, "loss": 0.0, "num_input_tokens_seen": 9351720, "step": 16460 }, { "epoch": 288.86725663716817, "grad_norm": 6.294360588299241e-08, "learning_rate": 0.1911240958966816, "loss": 0.0, "num_input_tokens_seen": 9353976, "step": 16465 }, { "epoch": 288.95575221238937, "grad_norm": 1.9192977873672135e-08, "learning_rate": 0.19106744486892652, "loss": 0.0, "num_input_tokens_seen": 9356632, "step": 16470 }, { "epoch": 289.0353982300885, "grad_norm": 7.14319625672033e-08, "learning_rate": 0.1910107875080553, "loss": 0.0, "num_input_tokens_seen": 9359328, "step": 16475 }, { "epoch": 289.12389380530976, "grad_norm": 8.615845814574641e-08, "learning_rate": 0.19095412382280533, "loss": 0.0, "num_input_tokens_seen": 9362272, "step": 16480 }, { "epoch": 289.21238938053096, "grad_norm": 1.8070384300017395e-08, "learning_rate": 0.19089745382191473, "loss": 0.0, "num_input_tokens_seen": 9364960, "step": 16485 }, { "epoch": 289.3008849557522, "grad_norm": 4.7998472751942245e-08, "learning_rate": 0.19084077751412284, "loss": 0.0, "num_input_tokens_seen": 9368096, "step": 16490 }, { "epoch": 289.3893805309734, "grad_norm": 5.000238090246967e-08, "learning_rate": 0.19078409490816986, "loss": 0.0, "num_input_tokens_seen": 9371136, "step": 16495 }, { "epoch": 289.4778761061947, "grad_norm": 4.792290297928048e-08, "learning_rate": 0.19072740601279686, "loss": 0.0, "num_input_tokens_seen": 9373904, "step": 16500 }, { "epoch": 289.56637168141594, "grad_norm": 3.454715979955836e-08, "learning_rate": 0.19067071083674605, "loss": 0.0, "num_input_tokens_seen": 9376608, "step": 16505 }, { "epoch": 289.65486725663715, "grad_norm": 4.6097582639959e-08, "learning_rate": 0.19061400938876052, "loss": 0.0, "num_input_tokens_seen": 9379040, "step": 16510 }, { "epoch": 289.7433628318584, "grad_norm": 9.730552363862444e-08, "learning_rate": 0.1905573016775844, "loss": 0.0, "num_input_tokens_seen": 9381904, "step": 16515 }, { "epoch": 289.83185840707966, "grad_norm": 3.659082281615156e-08, "learning_rate": 0.19050058771196263, "loss": 0.0, "num_input_tokens_seen": 9384944, "step": 16520 }, { "epoch": 289.92035398230087, "grad_norm": 4.0630474984482134e-08, "learning_rate": 0.19044386750064132, "loss": 0.0, "num_input_tokens_seen": 9388400, "step": 16525 }, { "epoch": 290.0, "grad_norm": 4.197710978814939e-08, "learning_rate": 0.19038714105236737, "loss": 0.0, "num_input_tokens_seen": 9390648, "step": 16530 }, { "epoch": 290.08849557522126, "grad_norm": 6.408247088529606e-08, "learning_rate": 0.19033040837588874, "loss": 0.0, "num_input_tokens_seen": 9393784, "step": 16535 }, { "epoch": 290.17699115044246, "grad_norm": 3.0754986113379346e-08, "learning_rate": 0.1902736694799543, "loss": 0.0, "num_input_tokens_seen": 9396680, "step": 16540 }, { "epoch": 290.2654867256637, "grad_norm": 3.7041477440880044e-08, "learning_rate": 0.19021692437331392, "loss": 0.0, "num_input_tokens_seen": 9399784, "step": 16545 }, { "epoch": 290.353982300885, "grad_norm": 6.338833458130466e-08, "learning_rate": 0.1901601730647184, "loss": 0.0, "num_input_tokens_seen": 9402632, "step": 16550 }, { "epoch": 290.4424778761062, "grad_norm": 4.488640925615073e-08, "learning_rate": 0.19010341556291954, "loss": 0.0, "num_input_tokens_seen": 9405368, "step": 16555 }, { "epoch": 290.53097345132744, "grad_norm": 3.6203488207320333e-08, "learning_rate": 0.19004665187667, "loss": 0.0, "num_input_tokens_seen": 9408456, "step": 16560 }, { "epoch": 290.6194690265487, "grad_norm": 4.4784055575064485e-08, "learning_rate": 0.1899898820147235, "loss": 0.0, "num_input_tokens_seen": 9410920, "step": 16565 }, { "epoch": 290.7079646017699, "grad_norm": 1.9964266684269205e-08, "learning_rate": 0.18993310598583465, "loss": 0.0, "num_input_tokens_seen": 9414024, "step": 16570 }, { "epoch": 290.79646017699116, "grad_norm": 4.2441460124109653e-08, "learning_rate": 0.18987632379875904, "loss": 0.0, "num_input_tokens_seen": 9417144, "step": 16575 }, { "epoch": 290.88495575221236, "grad_norm": 7.196140927590022e-08, "learning_rate": 0.18981953546225314, "loss": 0.0, "num_input_tokens_seen": 9420024, "step": 16580 }, { "epoch": 290.9734513274336, "grad_norm": 3.026540440487224e-08, "learning_rate": 0.18976274098507445, "loss": 0.0, "num_input_tokens_seen": 9422488, "step": 16585 }, { "epoch": 291.05309734513276, "grad_norm": 3.0443668919133415e-08, "learning_rate": 0.18970594037598146, "loss": 0.0, "num_input_tokens_seen": 9424856, "step": 16590 }, { "epoch": 291.14159292035396, "grad_norm": 2.210692606752218e-08, "learning_rate": 0.1896491336437335, "loss": 0.0, "num_input_tokens_seen": 9427880, "step": 16595 }, { "epoch": 291.2300884955752, "grad_norm": 1.7064269997035808e-08, "learning_rate": 0.18959232079709085, "loss": 0.0, "num_input_tokens_seen": 9431208, "step": 16600 }, { "epoch": 291.2300884955752, "eval_loss": 0.589658260345459, "eval_runtime": 0.9454, "eval_samples_per_second": 26.444, "eval_steps_per_second": 13.751, "num_input_tokens_seen": 9431208, "step": 16600 }, { "epoch": 291.3185840707965, "grad_norm": 5.584525908375326e-08, "learning_rate": 0.18953550184481477, "loss": 0.0, "num_input_tokens_seen": 9433976, "step": 16605 }, { "epoch": 291.4070796460177, "grad_norm": 2.7258566959176278e-08, "learning_rate": 0.18947867679566752, "loss": 0.0, "num_input_tokens_seen": 9436232, "step": 16610 }, { "epoch": 291.49557522123894, "grad_norm": 4.647998963491773e-08, "learning_rate": 0.18942184565841216, "loss": 0.0, "num_input_tokens_seen": 9439112, "step": 16615 }, { "epoch": 291.5840707964602, "grad_norm": 4.213920945517202e-08, "learning_rate": 0.18936500844181278, "loss": 0.0, "num_input_tokens_seen": 9441736, "step": 16620 }, { "epoch": 291.6725663716814, "grad_norm": 4.8096300275801696e-08, "learning_rate": 0.18930816515463436, "loss": 0.0, "num_input_tokens_seen": 9444504, "step": 16625 }, { "epoch": 291.76106194690266, "grad_norm": 3.193757081021431e-08, "learning_rate": 0.18925131580564297, "loss": 0.0, "num_input_tokens_seen": 9447496, "step": 16630 }, { "epoch": 291.8495575221239, "grad_norm": 2.579761115839574e-08, "learning_rate": 0.1891944604036054, "loss": 0.0, "num_input_tokens_seen": 9450648, "step": 16635 }, { "epoch": 291.9380530973451, "grad_norm": 5.6573117745983836e-08, "learning_rate": 0.1891375989572895, "loss": 0.0, "num_input_tokens_seen": 9453880, "step": 16640 }, { "epoch": 292.01769911504425, "grad_norm": 3.78931339639621e-08, "learning_rate": 0.18908073147546398, "loss": 0.0, "num_input_tokens_seen": 9455992, "step": 16645 }, { "epoch": 292.1061946902655, "grad_norm": 2.329855952609705e-08, "learning_rate": 0.18902385796689858, "loss": 0.0, "num_input_tokens_seen": 9458872, "step": 16650 }, { "epoch": 292.1946902654867, "grad_norm": 2.74485287832249e-08, "learning_rate": 0.18896697844036384, "loss": 0.0, "num_input_tokens_seen": 9461512, "step": 16655 }, { "epoch": 292.283185840708, "grad_norm": 6.326374091258913e-08, "learning_rate": 0.18891009290463137, "loss": 0.0, "num_input_tokens_seen": 9464216, "step": 16660 }, { "epoch": 292.37168141592923, "grad_norm": 2.018120959235148e-08, "learning_rate": 0.18885320136847353, "loss": 0.0, "num_input_tokens_seen": 9467160, "step": 16665 }, { "epoch": 292.46017699115043, "grad_norm": 6.190057177946073e-08, "learning_rate": 0.1887963038406639, "loss": 0.0, "num_input_tokens_seen": 9469880, "step": 16670 }, { "epoch": 292.5486725663717, "grad_norm": 5.467841646122906e-08, "learning_rate": 0.18873940032997658, "loss": 0.0, "num_input_tokens_seen": 9472392, "step": 16675 }, { "epoch": 292.6371681415929, "grad_norm": 3.2032961172490104e-08, "learning_rate": 0.18868249084518693, "loss": 0.0, "num_input_tokens_seen": 9475096, "step": 16680 }, { "epoch": 292.72566371681415, "grad_norm": 2.7805063140817765e-08, "learning_rate": 0.18862557539507102, "loss": 0.0, "num_input_tokens_seen": 9478024, "step": 16685 }, { "epoch": 292.8141592920354, "grad_norm": 4.291906208209184e-08, "learning_rate": 0.18856865398840605, "loss": 0.0, "num_input_tokens_seen": 9481416, "step": 16690 }, { "epoch": 292.9026548672566, "grad_norm": 2.9684994018452926e-08, "learning_rate": 0.18851172663396995, "loss": 0.0, "num_input_tokens_seen": 9484360, "step": 16695 }, { "epoch": 292.9911504424779, "grad_norm": 2.3631660184264547e-08, "learning_rate": 0.1884547933405416, "loss": 0.0, "num_input_tokens_seen": 9487320, "step": 16700 }, { "epoch": 293.070796460177, "grad_norm": 4.2839293001861734e-08, "learning_rate": 0.1883978541169009, "loss": 0.0, "num_input_tokens_seen": 9489576, "step": 16705 }, { "epoch": 293.1592920353982, "grad_norm": 6.895344029089756e-08, "learning_rate": 0.18834090897182854, "loss": 0.0, "num_input_tokens_seen": 9492840, "step": 16710 }, { "epoch": 293.24778761061947, "grad_norm": 6.088077952881576e-08, "learning_rate": 0.1882839579141062, "loss": 0.0, "num_input_tokens_seen": 9495496, "step": 16715 }, { "epoch": 293.3362831858407, "grad_norm": 5.0252399574901574e-08, "learning_rate": 0.18822700095251646, "loss": 0.0, "num_input_tokens_seen": 9498504, "step": 16720 }, { "epoch": 293.42477876106193, "grad_norm": 3.512106871994547e-08, "learning_rate": 0.18817003809584273, "loss": 0.0, "num_input_tokens_seen": 9501544, "step": 16725 }, { "epoch": 293.5132743362832, "grad_norm": 4.2326153248950504e-08, "learning_rate": 0.1881130693528695, "loss": 0.0, "num_input_tokens_seen": 9504392, "step": 16730 }, { "epoch": 293.60176991150445, "grad_norm": 5.897478771998976e-08, "learning_rate": 0.18805609473238197, "loss": 0.0, "num_input_tokens_seen": 9507256, "step": 16735 }, { "epoch": 293.69026548672565, "grad_norm": 1.4322976582548108e-08, "learning_rate": 0.18799911424316643, "loss": 0.0, "num_input_tokens_seen": 9510008, "step": 16740 }, { "epoch": 293.7787610619469, "grad_norm": 4.178708579161139e-08, "learning_rate": 0.18794212789400994, "loss": 0.0, "num_input_tokens_seen": 9513064, "step": 16745 }, { "epoch": 293.86725663716817, "grad_norm": 3.618806587724066e-08, "learning_rate": 0.18788513569370052, "loss": 0.0, "num_input_tokens_seen": 9516232, "step": 16750 }, { "epoch": 293.95575221238937, "grad_norm": 3.3374025321109e-08, "learning_rate": 0.1878281376510271, "loss": 0.0, "num_input_tokens_seen": 9518824, "step": 16755 }, { "epoch": 294.0353982300885, "grad_norm": 3.581173757538636e-08, "learning_rate": 0.18777113377477941, "loss": 0.0, "num_input_tokens_seen": 9521096, "step": 16760 }, { "epoch": 294.12389380530976, "grad_norm": 2.4314308788575545e-08, "learning_rate": 0.1877141240737483, "loss": 0.0, "num_input_tokens_seen": 9524568, "step": 16765 }, { "epoch": 294.21238938053096, "grad_norm": 7.729803996880946e-08, "learning_rate": 0.18765710855672527, "loss": 0.0, "num_input_tokens_seen": 9527400, "step": 16770 }, { "epoch": 294.3008849557522, "grad_norm": 4.1474628176274564e-08, "learning_rate": 0.18760008723250288, "loss": 0.0, "num_input_tokens_seen": 9530360, "step": 16775 }, { "epoch": 294.3893805309734, "grad_norm": 2.6699408905983546e-08, "learning_rate": 0.18754306010987457, "loss": 0.0, "num_input_tokens_seen": 9533032, "step": 16780 }, { "epoch": 294.4778761061947, "grad_norm": 3.722354335877753e-08, "learning_rate": 0.18748602719763457, "loss": 0.0, "num_input_tokens_seen": 9535704, "step": 16785 }, { "epoch": 294.56637168141594, "grad_norm": 1.1901504137767915e-07, "learning_rate": 0.18742898850457804, "loss": 0.0, "num_input_tokens_seen": 9538488, "step": 16790 }, { "epoch": 294.65486725663715, "grad_norm": 2.572314627968808e-08, "learning_rate": 0.1873719440395012, "loss": 0.0, "num_input_tokens_seen": 9541448, "step": 16795 }, { "epoch": 294.7433628318584, "grad_norm": 3.163159334462762e-08, "learning_rate": 0.1873148938112009, "loss": 0.0, "num_input_tokens_seen": 9544328, "step": 16800 }, { "epoch": 294.7433628318584, "eval_loss": 0.5792176723480225, "eval_runtime": 0.9357, "eval_samples_per_second": 26.719, "eval_steps_per_second": 13.894, "num_input_tokens_seen": 9544328, "step": 16800 }, { "epoch": 294.83185840707966, "grad_norm": 2.961683165381146e-08, "learning_rate": 0.18725783782847508, "loss": 0.0, "num_input_tokens_seen": 9547144, "step": 16805 }, { "epoch": 294.92035398230087, "grad_norm": 3.014040217408365e-08, "learning_rate": 0.1872007761001224, "loss": 0.0, "num_input_tokens_seen": 9549912, "step": 16810 }, { "epoch": 295.0, "grad_norm": 1.2850875918957172e-08, "learning_rate": 0.1871437086349426, "loss": 0.0, "num_input_tokens_seen": 9552440, "step": 16815 }, { "epoch": 295.08849557522126, "grad_norm": 5.006890191339153e-08, "learning_rate": 0.18708663544173615, "loss": 0.0, "num_input_tokens_seen": 9554984, "step": 16820 }, { "epoch": 295.17699115044246, "grad_norm": 3.834453821127681e-08, "learning_rate": 0.18702955652930442, "loss": 0.0, "num_input_tokens_seen": 9557992, "step": 16825 }, { "epoch": 295.2654867256637, "grad_norm": 2.581457714256885e-08, "learning_rate": 0.18697247190644972, "loss": 0.0, "num_input_tokens_seen": 9560760, "step": 16830 }, { "epoch": 295.353982300885, "grad_norm": 1.1647167674766479e-08, "learning_rate": 0.18691538158197527, "loss": 0.0, "num_input_tokens_seen": 9563624, "step": 16835 }, { "epoch": 295.4424778761062, "grad_norm": 6.591552903500997e-08, "learning_rate": 0.1868582855646851, "loss": 0.0, "num_input_tokens_seen": 9566248, "step": 16840 }, { "epoch": 295.53097345132744, "grad_norm": 5.6345562882142985e-08, "learning_rate": 0.18680118386338404, "loss": 0.0, "num_input_tokens_seen": 9568904, "step": 16845 }, { "epoch": 295.6194690265487, "grad_norm": 1.3678167931630014e-08, "learning_rate": 0.18674407648687794, "loss": 0.0, "num_input_tokens_seen": 9571656, "step": 16850 }, { "epoch": 295.7079646017699, "grad_norm": 4.091856098398239e-08, "learning_rate": 0.1866869634439736, "loss": 0.0, "num_input_tokens_seen": 9574584, "step": 16855 }, { "epoch": 295.79646017699116, "grad_norm": 3.691071626121811e-08, "learning_rate": 0.18662984474347838, "loss": 0.0, "num_input_tokens_seen": 9577592, "step": 16860 }, { "epoch": 295.88495575221236, "grad_norm": 3.5944648146823965e-08, "learning_rate": 0.1865727203942008, "loss": 0.0, "num_input_tokens_seen": 9580920, "step": 16865 }, { "epoch": 295.9734513274336, "grad_norm": 2.9852337490865466e-08, "learning_rate": 0.1865155904049501, "loss": 0.0, "num_input_tokens_seen": 9583784, "step": 16870 }, { "epoch": 296.05309734513276, "grad_norm": 4.145040932712618e-08, "learning_rate": 0.1864584547845365, "loss": 0.0, "num_input_tokens_seen": 9586176, "step": 16875 }, { "epoch": 296.14159292035396, "grad_norm": 3.149389371515099e-08, "learning_rate": 0.186401313541771, "loss": 0.0, "num_input_tokens_seen": 9588896, "step": 16880 }, { "epoch": 296.2300884955752, "grad_norm": 2.6964842803067768e-08, "learning_rate": 0.18634416668546552, "loss": 0.0, "num_input_tokens_seen": 9591328, "step": 16885 }, { "epoch": 296.3185840707965, "grad_norm": 2.046340874528596e-08, "learning_rate": 0.1862870142244328, "loss": 0.0, "num_input_tokens_seen": 9593808, "step": 16890 }, { "epoch": 296.4070796460177, "grad_norm": 5.55764465559605e-08, "learning_rate": 0.1862298561674865, "loss": 0.0, "num_input_tokens_seen": 9596912, "step": 16895 }, { "epoch": 296.49557522123894, "grad_norm": 3.77847406696219e-08, "learning_rate": 0.18617269252344104, "loss": 0.0, "num_input_tokens_seen": 9600096, "step": 16900 }, { "epoch": 296.5840707964602, "grad_norm": 5.608920972122178e-08, "learning_rate": 0.18611552330111186, "loss": 0.0, "num_input_tokens_seen": 9602528, "step": 16905 }, { "epoch": 296.6725663716814, "grad_norm": 1.719443787351338e-08, "learning_rate": 0.18605834850931507, "loss": 0.0, "num_input_tokens_seen": 9605296, "step": 16910 }, { "epoch": 296.76106194690266, "grad_norm": 2.9844613891327754e-08, "learning_rate": 0.18600116815686787, "loss": 0.0, "num_input_tokens_seen": 9608256, "step": 16915 }, { "epoch": 296.8495575221239, "grad_norm": 5.350101162093779e-08, "learning_rate": 0.1859439822525881, "loss": 0.0, "num_input_tokens_seen": 9611536, "step": 16920 }, { "epoch": 296.9380530973451, "grad_norm": 8.16066716424757e-08, "learning_rate": 0.18588679080529455, "loss": 0.0, "num_input_tokens_seen": 9614832, "step": 16925 }, { "epoch": 297.01769911504425, "grad_norm": 4.64931666499524e-08, "learning_rate": 0.1858295938238069, "loss": 0.0, "num_input_tokens_seen": 9617344, "step": 16930 }, { "epoch": 297.1061946902655, "grad_norm": 6.300354726818114e-08, "learning_rate": 0.18577239131694562, "loss": 0.0, "num_input_tokens_seen": 9620544, "step": 16935 }, { "epoch": 297.1946902654867, "grad_norm": 4.9674369506647054e-08, "learning_rate": 0.18571518329353204, "loss": 0.0, "num_input_tokens_seen": 9623280, "step": 16940 }, { "epoch": 297.283185840708, "grad_norm": 4.554261678890725e-08, "learning_rate": 0.18565796976238838, "loss": 0.0, "num_input_tokens_seen": 9626464, "step": 16945 }, { "epoch": 297.37168141592923, "grad_norm": 5.000198655125132e-08, "learning_rate": 0.18560075073233764, "loss": 0.0, "num_input_tokens_seen": 9629072, "step": 16950 }, { "epoch": 297.46017699115043, "grad_norm": 7.067796303772411e-08, "learning_rate": 0.18554352621220377, "loss": 0.0, "num_input_tokens_seen": 9631872, "step": 16955 }, { "epoch": 297.5486725663717, "grad_norm": 3.767374323615513e-08, "learning_rate": 0.18548629621081153, "loss": 0.0, "num_input_tokens_seen": 9634640, "step": 16960 }, { "epoch": 297.6371681415929, "grad_norm": 1.493044443634517e-08, "learning_rate": 0.18542906073698645, "loss": 0.0, "num_input_tokens_seen": 9637360, "step": 16965 }, { "epoch": 297.72566371681415, "grad_norm": 8.849911381503262e-08, "learning_rate": 0.18537181979955494, "loss": 0.0, "num_input_tokens_seen": 9640224, "step": 16970 }, { "epoch": 297.8141592920354, "grad_norm": 2.0982536597102808e-08, "learning_rate": 0.18531457340734434, "loss": 0.0, "num_input_tokens_seen": 9642800, "step": 16975 }, { "epoch": 297.9026548672566, "grad_norm": 3.7881065395595215e-08, "learning_rate": 0.1852573215691827, "loss": 0.0, "num_input_tokens_seen": 9645600, "step": 16980 }, { "epoch": 297.9911504424779, "grad_norm": 1.8675574864346345e-08, "learning_rate": 0.18520006429389904, "loss": 0.0, "num_input_tokens_seen": 9648912, "step": 16985 }, { "epoch": 298.070796460177, "grad_norm": 4.1193338518041855e-08, "learning_rate": 0.1851428015903231, "loss": 0.0, "num_input_tokens_seen": 9651656, "step": 16990 }, { "epoch": 298.1592920353982, "grad_norm": 1.529139659339762e-08, "learning_rate": 0.1850855334672855, "loss": 0.0, "num_input_tokens_seen": 9654552, "step": 16995 }, { "epoch": 298.24778761061947, "grad_norm": 2.4684826627208167e-08, "learning_rate": 0.1850282599336178, "loss": 0.0, "num_input_tokens_seen": 9657432, "step": 17000 }, { "epoch": 298.24778761061947, "eval_loss": 0.5745419859886169, "eval_runtime": 0.9411, "eval_samples_per_second": 26.565, "eval_steps_per_second": 13.814, "num_input_tokens_seen": 9657432, "step": 17000 }, { "epoch": 298.3362831858407, "grad_norm": 2.0465824590587545e-08, "learning_rate": 0.18497098099815215, "loss": 0.0, "num_input_tokens_seen": 9660344, "step": 17005 }, { "epoch": 298.42477876106193, "grad_norm": 3.763595302075373e-08, "learning_rate": 0.18491369666972174, "loss": 0.0, "num_input_tokens_seen": 9662776, "step": 17010 }, { "epoch": 298.5132743362832, "grad_norm": 5.158343441280522e-08, "learning_rate": 0.1848564069571606, "loss": 0.0, "num_input_tokens_seen": 9665656, "step": 17015 }, { "epoch": 298.60176991150445, "grad_norm": 2.339233340364899e-08, "learning_rate": 0.18479911186930348, "loss": 0.0, "num_input_tokens_seen": 9668296, "step": 17020 }, { "epoch": 298.69026548672565, "grad_norm": 2.935281351312824e-08, "learning_rate": 0.18474181141498597, "loss": 0.0, "num_input_tokens_seen": 9671640, "step": 17025 }, { "epoch": 298.7787610619469, "grad_norm": 5.3919677611702355e-08, "learning_rate": 0.18468450560304453, "loss": 0.0, "num_input_tokens_seen": 9674872, "step": 17030 }, { "epoch": 298.86725663716817, "grad_norm": 4.663074903987763e-08, "learning_rate": 0.1846271944423165, "loss": 0.0, "num_input_tokens_seen": 9677672, "step": 17035 }, { "epoch": 298.95575221238937, "grad_norm": 4.397637098918494e-08, "learning_rate": 0.18456987794163993, "loss": 0.0, "num_input_tokens_seen": 9680264, "step": 17040 }, { "epoch": 299.0353982300885, "grad_norm": 1.6805094205096793e-08, "learning_rate": 0.18451255610985373, "loss": 0.0, "num_input_tokens_seen": 9682552, "step": 17045 }, { "epoch": 299.12389380530976, "grad_norm": 5.1185374161377695e-08, "learning_rate": 0.18445522895579766, "loss": 0.0, "num_input_tokens_seen": 9685464, "step": 17050 }, { "epoch": 299.21238938053096, "grad_norm": 3.78581255233712e-08, "learning_rate": 0.1843978964883123, "loss": 0.0, "num_input_tokens_seen": 9688632, "step": 17055 }, { "epoch": 299.3008849557522, "grad_norm": 3.35241985283119e-08, "learning_rate": 0.18434055871623906, "loss": 0.0, "num_input_tokens_seen": 9691288, "step": 17060 }, { "epoch": 299.3893805309734, "grad_norm": 6.846974542895623e-08, "learning_rate": 0.18428321564842007, "loss": 0.0, "num_input_tokens_seen": 9694168, "step": 17065 }, { "epoch": 299.4778761061947, "grad_norm": 3.927053171537409e-08, "learning_rate": 0.18422586729369841, "loss": 0.0, "num_input_tokens_seen": 9697080, "step": 17070 }, { "epoch": 299.56637168141594, "grad_norm": 4.05815789861208e-08, "learning_rate": 0.1841685136609179, "loss": 0.0, "num_input_tokens_seen": 9699832, "step": 17075 }, { "epoch": 299.65486725663715, "grad_norm": 1.8075443364296007e-08, "learning_rate": 0.18411115475892326, "loss": 0.0, "num_input_tokens_seen": 9702712, "step": 17080 }, { "epoch": 299.7433628318584, "grad_norm": 8.423398867307696e-08, "learning_rate": 0.18405379059655982, "loss": 0.0, "num_input_tokens_seen": 9705416, "step": 17085 }, { "epoch": 299.83185840707966, "grad_norm": 2.2795122234242626e-08, "learning_rate": 0.1839964211826739, "loss": 0.0, "num_input_tokens_seen": 9708824, "step": 17090 }, { "epoch": 299.92035398230087, "grad_norm": 3.6615862342159744e-08, "learning_rate": 0.18393904652611265, "loss": 0.0, "num_input_tokens_seen": 9711688, "step": 17095 }, { "epoch": 300.0, "grad_norm": 1.481422273741373e-08, "learning_rate": 0.18388166663572392, "loss": 0.0, "num_input_tokens_seen": 9713848, "step": 17100 }, { "epoch": 300.08849557522126, "grad_norm": 2.0962501068311212e-08, "learning_rate": 0.18382428152035643, "loss": 0.0, "num_input_tokens_seen": 9716568, "step": 17105 }, { "epoch": 300.17699115044246, "grad_norm": 4.435221256926525e-08, "learning_rate": 0.1837668911888596, "loss": 0.0, "num_input_tokens_seen": 9719288, "step": 17110 }, { "epoch": 300.2654867256637, "grad_norm": 3.290381300757872e-08, "learning_rate": 0.18370949565008388, "loss": 0.0, "num_input_tokens_seen": 9722344, "step": 17115 }, { "epoch": 300.353982300885, "grad_norm": 1.6830760785069288e-08, "learning_rate": 0.1836520949128803, "loss": 0.0, "num_input_tokens_seen": 9725496, "step": 17120 }, { "epoch": 300.4424778761062, "grad_norm": 2.8398792650818905e-08, "learning_rate": 0.18359468898610076, "loss": 0.0, "num_input_tokens_seen": 9728456, "step": 17125 }, { "epoch": 300.53097345132744, "grad_norm": 3.014494609487883e-08, "learning_rate": 0.18353727787859797, "loss": 0.0, "num_input_tokens_seen": 9731016, "step": 17130 }, { "epoch": 300.6194690265487, "grad_norm": 1.9530506989440255e-08, "learning_rate": 0.18347986159922552, "loss": 0.0, "num_input_tokens_seen": 9734408, "step": 17135 }, { "epoch": 300.7079646017699, "grad_norm": 4.811677456473262e-08, "learning_rate": 0.1834224401568377, "loss": 0.0, "num_input_tokens_seen": 9737176, "step": 17140 }, { "epoch": 300.79646017699116, "grad_norm": 5.697454596997886e-08, "learning_rate": 0.1833650135602896, "loss": 0.0, "num_input_tokens_seen": 9739896, "step": 17145 }, { "epoch": 300.88495575221236, "grad_norm": 2.2096170226859613e-08, "learning_rate": 0.18330758181843707, "loss": 0.0, "num_input_tokens_seen": 9742952, "step": 17150 }, { "epoch": 300.9734513274336, "grad_norm": 1.1858986681545503e-07, "learning_rate": 0.18325014494013686, "loss": 0.0, "num_input_tokens_seen": 9745896, "step": 17155 }, { "epoch": 301.05309734513276, "grad_norm": 2.6347242609858768e-08, "learning_rate": 0.18319270293424647, "loss": 0.0, "num_input_tokens_seen": 9748024, "step": 17160 }, { "epoch": 301.14159292035396, "grad_norm": 3.522952951584557e-08, "learning_rate": 0.18313525580962417, "loss": 0.0, "num_input_tokens_seen": 9750664, "step": 17165 }, { "epoch": 301.2300884955752, "grad_norm": 3.230042011637124e-08, "learning_rate": 0.18307780357512896, "loss": 0.0, "num_input_tokens_seen": 9753512, "step": 17170 }, { "epoch": 301.3185840707965, "grad_norm": 3.341066800999215e-08, "learning_rate": 0.1830203462396208, "loss": 0.0, "num_input_tokens_seen": 9756664, "step": 17175 }, { "epoch": 301.4070796460177, "grad_norm": 2.9485537567097708e-08, "learning_rate": 0.18296288381196033, "loss": 0.0, "num_input_tokens_seen": 9759736, "step": 17180 }, { "epoch": 301.49557522123894, "grad_norm": 2.7112134759477158e-08, "learning_rate": 0.1829054163010089, "loss": 0.0, "num_input_tokens_seen": 9762728, "step": 17185 }, { "epoch": 301.5840707964602, "grad_norm": 3.825106276167389e-08, "learning_rate": 0.18284794371562874, "loss": 0.0, "num_input_tokens_seen": 9765288, "step": 17190 }, { "epoch": 301.6725663716814, "grad_norm": 1.2838841101370235e-08, "learning_rate": 0.18279046606468288, "loss": 0.0, "num_input_tokens_seen": 9768312, "step": 17195 }, { "epoch": 301.76106194690266, "grad_norm": 2.753484551476504e-08, "learning_rate": 0.1827329833570351, "loss": 0.0, "num_input_tokens_seen": 9770824, "step": 17200 }, { "epoch": 301.76106194690266, "eval_loss": 0.5907140374183655, "eval_runtime": 0.9299, "eval_samples_per_second": 26.884, "eval_steps_per_second": 13.98, "num_input_tokens_seen": 9770824, "step": 17200 }, { "epoch": 301.8495575221239, "grad_norm": 8.460879996619042e-08, "learning_rate": 0.18267549560154991, "loss": 0.0, "num_input_tokens_seen": 9773640, "step": 17205 }, { "epoch": 301.9380530973451, "grad_norm": 3.8869423235610157e-08, "learning_rate": 0.18261800280709267, "loss": 0.0, "num_input_tokens_seen": 9777448, "step": 17210 }, { "epoch": 302.01769911504425, "grad_norm": 2.1254525250924416e-08, "learning_rate": 0.18256050498252957, "loss": 0.0, "num_input_tokens_seen": 9779976, "step": 17215 }, { "epoch": 302.1061946902655, "grad_norm": 3.7917409656529344e-08, "learning_rate": 0.18250300213672735, "loss": 0.0, "num_input_tokens_seen": 9782440, "step": 17220 }, { "epoch": 302.1946902654867, "grad_norm": 2.702996582115702e-08, "learning_rate": 0.18244549427855378, "loss": 0.0, "num_input_tokens_seen": 9785128, "step": 17225 }, { "epoch": 302.283185840708, "grad_norm": 4.234401984604119e-08, "learning_rate": 0.1823879814168772, "loss": 0.0, "num_input_tokens_seen": 9788056, "step": 17230 }, { "epoch": 302.37168141592923, "grad_norm": 2.25520366825549e-08, "learning_rate": 0.18233046356056692, "loss": 0.0, "num_input_tokens_seen": 9791032, "step": 17235 }, { "epoch": 302.46017699115043, "grad_norm": 3.6480916065784186e-08, "learning_rate": 0.18227294071849284, "loss": 0.0, "num_input_tokens_seen": 9793784, "step": 17240 }, { "epoch": 302.5486725663717, "grad_norm": 6.592506451852387e-08, "learning_rate": 0.18221541289952578, "loss": 0.0, "num_input_tokens_seen": 9796808, "step": 17245 }, { "epoch": 302.6371681415929, "grad_norm": 2.178947688946664e-08, "learning_rate": 0.18215788011253717, "loss": 0.0, "num_input_tokens_seen": 9799528, "step": 17250 }, { "epoch": 302.72566371681415, "grad_norm": 2.1903499458630904e-08, "learning_rate": 0.18210034236639935, "loss": 0.0, "num_input_tokens_seen": 9802040, "step": 17255 }, { "epoch": 302.8141592920354, "grad_norm": 5.4742944399777116e-08, "learning_rate": 0.1820427996699853, "loss": 0.0, "num_input_tokens_seen": 9804648, "step": 17260 }, { "epoch": 302.9026548672566, "grad_norm": 3.224980815730305e-08, "learning_rate": 0.1819852520321689, "loss": 0.0, "num_input_tokens_seen": 9807784, "step": 17265 }, { "epoch": 302.9911504424779, "grad_norm": 2.3666672177569126e-08, "learning_rate": 0.18192769946182466, "loss": 0.0, "num_input_tokens_seen": 9811080, "step": 17270 }, { "epoch": 303.070796460177, "grad_norm": 2.979492919052973e-08, "learning_rate": 0.18187014196782794, "loss": 0.0, "num_input_tokens_seen": 9813280, "step": 17275 }, { "epoch": 303.1592920353982, "grad_norm": 1.67226161806866e-08, "learning_rate": 0.18181257955905486, "loss": 0.0, "num_input_tokens_seen": 9816496, "step": 17280 }, { "epoch": 303.24778761061947, "grad_norm": 2.1843913344810062e-08, "learning_rate": 0.18175501224438217, "loss": 0.0, "num_input_tokens_seen": 9819424, "step": 17285 }, { "epoch": 303.3362831858407, "grad_norm": 3.538334425456924e-08, "learning_rate": 0.18169744003268756, "loss": 0.0, "num_input_tokens_seen": 9822464, "step": 17290 }, { "epoch": 303.42477876106193, "grad_norm": 3.8928011036887256e-08, "learning_rate": 0.18163986293284937, "loss": 0.0, "num_input_tokens_seen": 9825504, "step": 17295 }, { "epoch": 303.5132743362832, "grad_norm": 3.043370710997806e-08, "learning_rate": 0.18158228095374673, "loss": 0.0, "num_input_tokens_seen": 9828240, "step": 17300 }, { "epoch": 303.60176991150445, "grad_norm": 4.5588105734850615e-08, "learning_rate": 0.18152469410425945, "loss": 0.0, "num_input_tokens_seen": 9831120, "step": 17305 }, { "epoch": 303.69026548672565, "grad_norm": 3.518137248192943e-08, "learning_rate": 0.18146710239326813, "loss": 0.0, "num_input_tokens_seen": 9834368, "step": 17310 }, { "epoch": 303.7787610619469, "grad_norm": 5.079579068478779e-08, "learning_rate": 0.18140950582965423, "loss": 0.0, "num_input_tokens_seen": 9836976, "step": 17315 }, { "epoch": 303.86725663716817, "grad_norm": 5.085959386974537e-08, "learning_rate": 0.1813519044222998, "loss": 0.0, "num_input_tokens_seen": 9839680, "step": 17320 }, { "epoch": 303.95575221238937, "grad_norm": 9.627172659065764e-08, "learning_rate": 0.18129429818008772, "loss": 0.0, "num_input_tokens_seen": 9842240, "step": 17325 }, { "epoch": 304.0353982300885, "grad_norm": 3.881357457657941e-08, "learning_rate": 0.18123668711190163, "loss": 0.0, "num_input_tokens_seen": 9844824, "step": 17330 }, { "epoch": 304.12389380530976, "grad_norm": 2.155466738429368e-08, "learning_rate": 0.18117907122662583, "loss": 0.0, "num_input_tokens_seen": 9847880, "step": 17335 }, { "epoch": 304.21238938053096, "grad_norm": 2.201389648348595e-08, "learning_rate": 0.1811214505331454, "loss": 0.0, "num_input_tokens_seen": 9850632, "step": 17340 }, { "epoch": 304.3008849557522, "grad_norm": 2.2083453288246346e-08, "learning_rate": 0.1810638250403462, "loss": 0.0, "num_input_tokens_seen": 9853176, "step": 17345 }, { "epoch": 304.3893805309734, "grad_norm": 2.8793548878525144e-08, "learning_rate": 0.1810061947571148, "loss": 0.0, "num_input_tokens_seen": 9855736, "step": 17350 }, { "epoch": 304.4778761061947, "grad_norm": 4.892988769711337e-08, "learning_rate": 0.1809485596923385, "loss": 0.0, "num_input_tokens_seen": 9858520, "step": 17355 }, { "epoch": 304.56637168141594, "grad_norm": 3.909457646500414e-08, "learning_rate": 0.18089091985490546, "loss": 0.0, "num_input_tokens_seen": 9861288, "step": 17360 }, { "epoch": 304.65486725663715, "grad_norm": 3.358822198151756e-08, "learning_rate": 0.18083327525370432, "loss": 0.0, "num_input_tokens_seen": 9863976, "step": 17365 }, { "epoch": 304.7433628318584, "grad_norm": 4.301829292785442e-08, "learning_rate": 0.18077562589762464, "loss": 0.0, "num_input_tokens_seen": 9867192, "step": 17370 }, { "epoch": 304.83185840707966, "grad_norm": 9.283578350505195e-08, "learning_rate": 0.1807179717955567, "loss": 0.0, "num_input_tokens_seen": 9870552, "step": 17375 }, { "epoch": 304.92035398230087, "grad_norm": 8.00100252718039e-09, "learning_rate": 0.1806603129563915, "loss": 0.0, "num_input_tokens_seen": 9873416, "step": 17380 }, { "epoch": 305.0, "grad_norm": 2.9843127435924544e-07, "learning_rate": 0.1806026493890208, "loss": 0.0, "num_input_tokens_seen": 9875816, "step": 17385 }, { "epoch": 305.08849557522126, "grad_norm": 2.2709354396965864e-08, "learning_rate": 0.18054498110233688, "loss": 0.0, "num_input_tokens_seen": 9879064, "step": 17390 }, { "epoch": 305.17699115044246, "grad_norm": 2.366227747074845e-08, "learning_rate": 0.1804873081052331, "loss": 0.0, "num_input_tokens_seen": 9881928, "step": 17395 }, { "epoch": 305.2654867256637, "grad_norm": 1.9995521682858453e-08, "learning_rate": 0.18042963040660326, "loss": 0.0, "num_input_tokens_seen": 9884648, "step": 17400 }, { "epoch": 305.2654867256637, "eval_loss": 0.5804447531700134, "eval_runtime": 0.9527, "eval_samples_per_second": 26.24, "eval_steps_per_second": 13.645, "num_input_tokens_seen": 9884648, "step": 17400 }, { "epoch": 305.353982300885, "grad_norm": 4.283429788642934e-08, "learning_rate": 0.180371948015342, "loss": 0.0, "num_input_tokens_seen": 9887384, "step": 17405 }, { "epoch": 305.4424778761062, "grad_norm": 3.3788996489647616e-08, "learning_rate": 0.18031426094034472, "loss": 0.0, "num_input_tokens_seen": 9890344, "step": 17410 }, { "epoch": 305.53097345132744, "grad_norm": 5.2805820160983785e-08, "learning_rate": 0.18025656919050737, "loss": 0.0, "num_input_tokens_seen": 9893144, "step": 17415 }, { "epoch": 305.6194690265487, "grad_norm": 2.5231379652268515e-08, "learning_rate": 0.18019887277472688, "loss": 0.0, "num_input_tokens_seen": 9895848, "step": 17420 }, { "epoch": 305.7079646017699, "grad_norm": 2.0794196586848557e-08, "learning_rate": 0.18014117170190067, "loss": 0.0, "num_input_tokens_seen": 9899160, "step": 17425 }, { "epoch": 305.79646017699116, "grad_norm": 4.026223976438814e-08, "learning_rate": 0.18008346598092703, "loss": 0.0, "num_input_tokens_seen": 9902024, "step": 17430 }, { "epoch": 305.88495575221236, "grad_norm": 3.763779687915303e-08, "learning_rate": 0.18002575562070489, "loss": 0.0, "num_input_tokens_seen": 9904728, "step": 17435 }, { "epoch": 305.9734513274336, "grad_norm": 2.472095239625105e-08, "learning_rate": 0.1799680406301339, "loss": 0.0, "num_input_tokens_seen": 9907576, "step": 17440 }, { "epoch": 306.05309734513276, "grad_norm": 1.954628103817413e-08, "learning_rate": 0.17991032101811447, "loss": 0.0, "num_input_tokens_seen": 9909608, "step": 17445 }, { "epoch": 306.14159292035396, "grad_norm": 3.244623059117657e-08, "learning_rate": 0.1798525967935476, "loss": 0.0, "num_input_tokens_seen": 9912584, "step": 17450 }, { "epoch": 306.2300884955752, "grad_norm": 2.853486869014432e-08, "learning_rate": 0.17979486796533517, "loss": 0.0, "num_input_tokens_seen": 9915208, "step": 17455 }, { "epoch": 306.3185840707965, "grad_norm": 2.4397111886287348e-08, "learning_rate": 0.1797371345423797, "loss": 0.0, "num_input_tokens_seen": 9918184, "step": 17460 }, { "epoch": 306.4070796460177, "grad_norm": 4.546855691955898e-08, "learning_rate": 0.17967939653358436, "loss": 0.0, "num_input_tokens_seen": 9920776, "step": 17465 }, { "epoch": 306.49557522123894, "grad_norm": 2.1557857721177243e-08, "learning_rate": 0.17962165394785315, "loss": 0.0, "num_input_tokens_seen": 9923400, "step": 17470 }, { "epoch": 306.5840707964602, "grad_norm": 3.0174735599075575e-08, "learning_rate": 0.17956390679409057, "loss": 0.0, "num_input_tokens_seen": 9927256, "step": 17475 }, { "epoch": 306.6725663716814, "grad_norm": 1.5208980741476807e-08, "learning_rate": 0.1795061550812021, "loss": 0.0, "num_input_tokens_seen": 9929704, "step": 17480 }, { "epoch": 306.76106194690266, "grad_norm": 4.5653084868035876e-08, "learning_rate": 0.1794483988180937, "loss": 0.0, "num_input_tokens_seen": 9932872, "step": 17485 }, { "epoch": 306.8495575221239, "grad_norm": 6.080500725147431e-08, "learning_rate": 0.17939063801367214, "loss": 0.0, "num_input_tokens_seen": 9935608, "step": 17490 }, { "epoch": 306.9380530973451, "grad_norm": 2.7800538759947813e-08, "learning_rate": 0.17933287267684483, "loss": 0.0, "num_input_tokens_seen": 9938632, "step": 17495 }, { "epoch": 307.01769911504425, "grad_norm": 3.716049334911986e-08, "learning_rate": 0.17927510281651995, "loss": 0.0, "num_input_tokens_seen": 9940824, "step": 17500 }, { "epoch": 307.1061946902655, "grad_norm": 3.454014319004273e-08, "learning_rate": 0.17921732844160634, "loss": 0.0, "num_input_tokens_seen": 9943704, "step": 17505 }, { "epoch": 307.1946902654867, "grad_norm": 2.6029793431803228e-08, "learning_rate": 0.17915954956101351, "loss": 0.0, "num_input_tokens_seen": 9946184, "step": 17510 }, { "epoch": 307.283185840708, "grad_norm": 2.8860005940600786e-08, "learning_rate": 0.17910176618365165, "loss": 0.0, "num_input_tokens_seen": 9949000, "step": 17515 }, { "epoch": 307.37168141592923, "grad_norm": 4.0711775284307805e-08, "learning_rate": 0.17904397831843177, "loss": 0.0, "num_input_tokens_seen": 9952072, "step": 17520 }, { "epoch": 307.46017699115043, "grad_norm": 2.943823318446448e-08, "learning_rate": 0.17898618597426547, "loss": 0.0, "num_input_tokens_seen": 9954440, "step": 17525 }, { "epoch": 307.5486725663717, "grad_norm": 2.0201213146719965e-08, "learning_rate": 0.17892838916006495, "loss": 0.0, "num_input_tokens_seen": 9957560, "step": 17530 }, { "epoch": 307.6371681415929, "grad_norm": 3.506389845142621e-08, "learning_rate": 0.17887058788474333, "loss": 0.0, "num_input_tokens_seen": 9960216, "step": 17535 }, { "epoch": 307.72566371681415, "grad_norm": 2.2905688012997416e-08, "learning_rate": 0.17881278215721427, "loss": 0.0, "num_input_tokens_seen": 9963288, "step": 17540 }, { "epoch": 307.8141592920354, "grad_norm": 2.3333022625138256e-08, "learning_rate": 0.1787549719863921, "loss": 0.0, "num_input_tokens_seen": 9965960, "step": 17545 }, { "epoch": 307.9026548672566, "grad_norm": 2.6960890409100102e-08, "learning_rate": 0.17869715738119188, "loss": 0.0, "num_input_tokens_seen": 9969000, "step": 17550 }, { "epoch": 307.9911504424779, "grad_norm": 2.1280564865833185e-08, "learning_rate": 0.17863933835052936, "loss": 0.0, "num_input_tokens_seen": 9971928, "step": 17555 }, { "epoch": 308.070796460177, "grad_norm": 1.5566447686410356e-08, "learning_rate": 0.17858151490332097, "loss": 0.0, "num_input_tokens_seen": 9974680, "step": 17560 }, { "epoch": 308.1592920353982, "grad_norm": 3.775135937189589e-08, "learning_rate": 0.17852368704848381, "loss": 0.0, "num_input_tokens_seen": 9977352, "step": 17565 }, { "epoch": 308.24778761061947, "grad_norm": 1.871382160345547e-08, "learning_rate": 0.17846585479493565, "loss": 0.0, "num_input_tokens_seen": 9980584, "step": 17570 }, { "epoch": 308.3362831858407, "grad_norm": 5.1029221737053376e-08, "learning_rate": 0.178408018151595, "loss": 0.0, "num_input_tokens_seen": 9983064, "step": 17575 }, { "epoch": 308.42477876106193, "grad_norm": 1.6652190737431738e-08, "learning_rate": 0.17835017712738085, "loss": 0.0, "num_input_tokens_seen": 9985768, "step": 17580 }, { "epoch": 308.5132743362832, "grad_norm": 1.4837170603243521e-08, "learning_rate": 0.17829233173121323, "loss": 0.0, "num_input_tokens_seen": 9988856, "step": 17585 }, { "epoch": 308.60176991150445, "grad_norm": 3.310266905032222e-08, "learning_rate": 0.17823448197201244, "loss": 0.0, "num_input_tokens_seen": 9991352, "step": 17590 }, { "epoch": 308.69026548672565, "grad_norm": 3.6944566517149724e-08, "learning_rate": 0.1781766278586997, "loss": 0.0, "num_input_tokens_seen": 9994584, "step": 17595 }, { "epoch": 308.7787610619469, "grad_norm": 1.957417516962323e-08, "learning_rate": 0.1781187694001969, "loss": 0.0, "num_input_tokens_seen": 9997288, "step": 17600 }, { "epoch": 308.7787610619469, "eval_loss": 0.6051491498947144, "eval_runtime": 0.9375, "eval_samples_per_second": 26.666, "eval_steps_per_second": 13.866, "num_input_tokens_seen": 9997288, "step": 17600 }, { "epoch": 308.86725663716817, "grad_norm": 1.4702422390655556e-08, "learning_rate": 0.1780609066054265, "loss": 0.0, "num_input_tokens_seen": 9999976, "step": 17605 }, { "epoch": 308.95575221238937, "grad_norm": 3.413185112322026e-08, "learning_rate": 0.17800303948331164, "loss": 0.0, "num_input_tokens_seen": 10003320, "step": 17610 }, { "epoch": 309.0353982300885, "grad_norm": 3.325484598803996e-08, "learning_rate": 0.1779451680427762, "loss": 0.0, "num_input_tokens_seen": 10005664, "step": 17615 }, { "epoch": 309.12389380530976, "grad_norm": 3.8090522735956256e-08, "learning_rate": 0.17788729229274464, "loss": 0.0, "num_input_tokens_seen": 10008288, "step": 17620 }, { "epoch": 309.21238938053096, "grad_norm": 3.1150396040402484e-08, "learning_rate": 0.17782941224214222, "loss": 0.0, "num_input_tokens_seen": 10011440, "step": 17625 }, { "epoch": 309.3008849557522, "grad_norm": 3.800900927330986e-08, "learning_rate": 0.17777152789989464, "loss": 0.0, "num_input_tokens_seen": 10014720, "step": 17630 }, { "epoch": 309.3893805309734, "grad_norm": 3.4252494174324966e-08, "learning_rate": 0.17771363927492845, "loss": 0.0, "num_input_tokens_seen": 10017408, "step": 17635 }, { "epoch": 309.4778761061947, "grad_norm": 1.5553482057839574e-08, "learning_rate": 0.17765574637617085, "loss": 0.0, "num_input_tokens_seen": 10020096, "step": 17640 }, { "epoch": 309.56637168141594, "grad_norm": 3.755023669782531e-08, "learning_rate": 0.17759784921254962, "loss": 0.0, "num_input_tokens_seen": 10022944, "step": 17645 }, { "epoch": 309.65486725663715, "grad_norm": 3.344444365893651e-08, "learning_rate": 0.1775399477929932, "loss": 0.0, "num_input_tokens_seen": 10025344, "step": 17650 }, { "epoch": 309.7433628318584, "grad_norm": 3.3115984621190364e-08, "learning_rate": 0.17748204212643076, "loss": 0.0, "num_input_tokens_seen": 10028656, "step": 17655 }, { "epoch": 309.83185840707966, "grad_norm": 2.402643772825286e-08, "learning_rate": 0.17742413222179204, "loss": 0.0, "num_input_tokens_seen": 10031616, "step": 17660 }, { "epoch": 309.92035398230087, "grad_norm": 2.2982078462518984e-08, "learning_rate": 0.17736621808800754, "loss": 0.0, "num_input_tokens_seen": 10034416, "step": 17665 }, { "epoch": 310.0, "grad_norm": 1.1491189333412422e-08, "learning_rate": 0.17730829973400827, "loss": 0.0, "num_input_tokens_seen": 10036912, "step": 17670 }, { "epoch": 310.08849557522126, "grad_norm": 7.406769952922332e-08, "learning_rate": 0.17725037716872602, "loss": 0.0, "num_input_tokens_seen": 10039408, "step": 17675 }, { "epoch": 310.17699115044246, "grad_norm": 4.5581735719224525e-08, "learning_rate": 0.17719245040109313, "loss": 0.0, "num_input_tokens_seen": 10042304, "step": 17680 }, { "epoch": 310.2654867256637, "grad_norm": 3.112058166720999e-08, "learning_rate": 0.17713451944004271, "loss": 0.0, "num_input_tokens_seen": 10045232, "step": 17685 }, { "epoch": 310.353982300885, "grad_norm": 2.4843648915862104e-08, "learning_rate": 0.17707658429450843, "loss": 0.0, "num_input_tokens_seen": 10048608, "step": 17690 }, { "epoch": 310.4424778761062, "grad_norm": 7.288542036576473e-08, "learning_rate": 0.1770186449734245, "loss": 0.0, "num_input_tokens_seen": 10051568, "step": 17695 }, { "epoch": 310.53097345132744, "grad_norm": 2.948867283691925e-08, "learning_rate": 0.17696070148572599, "loss": 0.0, "num_input_tokens_seen": 10054544, "step": 17700 }, { "epoch": 310.6194690265487, "grad_norm": 3.889022792691321e-08, "learning_rate": 0.17690275384034856, "loss": 0.0, "num_input_tokens_seen": 10057088, "step": 17705 }, { "epoch": 310.7079646017699, "grad_norm": 1.5657384722089773e-08, "learning_rate": 0.17684480204622835, "loss": 0.0, "num_input_tokens_seen": 10059776, "step": 17710 }, { "epoch": 310.79646017699116, "grad_norm": 4.207302595204965e-08, "learning_rate": 0.1767868461123023, "loss": 0.0, "num_input_tokens_seen": 10062576, "step": 17715 }, { "epoch": 310.88495575221236, "grad_norm": 2.8579403732464925e-08, "learning_rate": 0.176728886047508, "loss": 0.0, "num_input_tokens_seen": 10065632, "step": 17720 }, { "epoch": 310.9734513274336, "grad_norm": 1.3303099066774848e-08, "learning_rate": 0.17667092186078362, "loss": 0.0, "num_input_tokens_seen": 10068256, "step": 17725 }, { "epoch": 311.05309734513276, "grad_norm": 1.2292377782330277e-08, "learning_rate": 0.17661295356106785, "loss": 0.0, "num_input_tokens_seen": 10070480, "step": 17730 }, { "epoch": 311.14159292035396, "grad_norm": 2.2120381970580638e-08, "learning_rate": 0.1765549811573002, "loss": 0.0, "num_input_tokens_seen": 10073136, "step": 17735 }, { "epoch": 311.2300884955752, "grad_norm": 2.4424201328088202e-08, "learning_rate": 0.17649700465842078, "loss": 0.0, "num_input_tokens_seen": 10076160, "step": 17740 }, { "epoch": 311.3185840707965, "grad_norm": 2.0671638623070976e-08, "learning_rate": 0.17643902407337023, "loss": 0.0, "num_input_tokens_seen": 10079312, "step": 17745 }, { "epoch": 311.4070796460177, "grad_norm": 4.1152375729325286e-08, "learning_rate": 0.17638103941108993, "loss": 0.0, "num_input_tokens_seen": 10082208, "step": 17750 }, { "epoch": 311.49557522123894, "grad_norm": 2.4225277783784804e-08, "learning_rate": 0.1763230506805218, "loss": 0.0, "num_input_tokens_seen": 10085312, "step": 17755 }, { "epoch": 311.5840707964602, "grad_norm": 2.5195948438749838e-08, "learning_rate": 0.1762650578906085, "loss": 0.0, "num_input_tokens_seen": 10088352, "step": 17760 }, { "epoch": 311.6725663716814, "grad_norm": 1.4683208426902183e-08, "learning_rate": 0.1762070610502932, "loss": 0.0, "num_input_tokens_seen": 10091216, "step": 17765 }, { "epoch": 311.76106194690266, "grad_norm": 2.5868082786928426e-08, "learning_rate": 0.17614906016851975, "loss": 0.0, "num_input_tokens_seen": 10093760, "step": 17770 }, { "epoch": 311.8495575221239, "grad_norm": 3.312248963993625e-08, "learning_rate": 0.17609105525423258, "loss": 0.0, "num_input_tokens_seen": 10097136, "step": 17775 }, { "epoch": 311.9380530973451, "grad_norm": 4.0875747231439163e-08, "learning_rate": 0.1760330463163768, "loss": 0.0, "num_input_tokens_seen": 10099984, "step": 17780 }, { "epoch": 312.01769911504425, "grad_norm": 1.0893040247594854e-08, "learning_rate": 0.17597503336389816, "loss": 0.0, "num_input_tokens_seen": 10102272, "step": 17785 }, { "epoch": 312.1061946902655, "grad_norm": 4.0469526396691435e-08, "learning_rate": 0.17591701640574298, "loss": 0.0, "num_input_tokens_seen": 10104768, "step": 17790 }, { "epoch": 312.1946902654867, "grad_norm": 2.211688077125018e-08, "learning_rate": 0.17585899545085815, "loss": 0.0, "num_input_tokens_seen": 10108304, "step": 17795 }, { "epoch": 312.283185840708, "grad_norm": 3.5944978549196094e-08, "learning_rate": 0.17580097050819124, "loss": 0.0, "num_input_tokens_seen": 10111472, "step": 17800 }, { "epoch": 312.283185840708, "eval_loss": 0.5829068422317505, "eval_runtime": 0.9161, "eval_samples_per_second": 27.29, "eval_steps_per_second": 14.191, "num_input_tokens_seen": 10111472, "step": 17800 }, { "epoch": 312.37168141592923, "grad_norm": 4.848090284781392e-08, "learning_rate": 0.17574294158669046, "loss": 0.0, "num_input_tokens_seen": 10113968, "step": 17805 }, { "epoch": 312.46017699115043, "grad_norm": 2.23792397946454e-08, "learning_rate": 0.17568490869530456, "loss": 0.0, "num_input_tokens_seen": 10116528, "step": 17810 }, { "epoch": 312.5486725663717, "grad_norm": 4.8029367150093094e-08, "learning_rate": 0.17562687184298295, "loss": 0.0, "num_input_tokens_seen": 10119520, "step": 17815 }, { "epoch": 312.6371681415929, "grad_norm": 2.6664874752668766e-08, "learning_rate": 0.1755688310386757, "loss": 0.0, "num_input_tokens_seen": 10122592, "step": 17820 }, { "epoch": 312.72566371681415, "grad_norm": 2.75298663865442e-08, "learning_rate": 0.17551078629133335, "loss": 0.0, "num_input_tokens_seen": 10125376, "step": 17825 }, { "epoch": 312.8141592920354, "grad_norm": 2.757562178601347e-08, "learning_rate": 0.17545273760990718, "loss": 0.0, "num_input_tokens_seen": 10127920, "step": 17830 }, { "epoch": 312.9026548672566, "grad_norm": 2.9937258005929834e-08, "learning_rate": 0.17539468500334904, "loss": 0.0, "num_input_tokens_seen": 10131136, "step": 17835 }, { "epoch": 312.9911504424779, "grad_norm": 2.5270885828376777e-08, "learning_rate": 0.17533662848061132, "loss": 0.0, "num_input_tokens_seen": 10133664, "step": 17840 }, { "epoch": 313.070796460177, "grad_norm": 6.40913455640657e-08, "learning_rate": 0.1752785680506471, "loss": 0.0, "num_input_tokens_seen": 10136024, "step": 17845 }, { "epoch": 313.1592920353982, "grad_norm": 2.8228601678392806e-08, "learning_rate": 0.17522050372241, "loss": 0.0, "num_input_tokens_seen": 10139064, "step": 17850 }, { "epoch": 313.24778761061947, "grad_norm": 1.9679985641118947e-08, "learning_rate": 0.17516243550485425, "loss": 0.0, "num_input_tokens_seen": 10141944, "step": 17855 }, { "epoch": 313.3362831858407, "grad_norm": 1.7207266722607528e-08, "learning_rate": 0.17510436340693478, "loss": 0.0, "num_input_tokens_seen": 10144392, "step": 17860 }, { "epoch": 313.42477876106193, "grad_norm": 4.036898459958138e-08, "learning_rate": 0.175046287437607, "loss": 0.0, "num_input_tokens_seen": 10146840, "step": 17865 }, { "epoch": 313.5132743362832, "grad_norm": 1.0488474089243027e-08, "learning_rate": 0.17498820760582695, "loss": 0.0, "num_input_tokens_seen": 10150104, "step": 17870 }, { "epoch": 313.60176991150445, "grad_norm": 2.1419173990011586e-08, "learning_rate": 0.1749301239205512, "loss": 0.0, "num_input_tokens_seen": 10152888, "step": 17875 }, { "epoch": 313.69026548672565, "grad_norm": 2.4286030964049132e-08, "learning_rate": 0.1748720363907371, "loss": 0.0, "num_input_tokens_seen": 10155192, "step": 17880 }, { "epoch": 313.7787610619469, "grad_norm": 2.9850330207636944e-08, "learning_rate": 0.17481394502534242, "loss": 0.0, "num_input_tokens_seen": 10157976, "step": 17885 }, { "epoch": 313.86725663716817, "grad_norm": 5.9233379090528615e-08, "learning_rate": 0.17475584983332562, "loss": 0.0, "num_input_tokens_seen": 10161144, "step": 17890 }, { "epoch": 313.95575221238937, "grad_norm": 3.2806294569809324e-08, "learning_rate": 0.17469775082364558, "loss": 0.0, "num_input_tokens_seen": 10164328, "step": 17895 }, { "epoch": 314.0353982300885, "grad_norm": 3.494783484825348e-08, "learning_rate": 0.17463964800526205, "loss": 0.0, "num_input_tokens_seen": 10167128, "step": 17900 }, { "epoch": 314.12389380530976, "grad_norm": 4.042754397914905e-08, "learning_rate": 0.17458154138713522, "loss": 0.0, "num_input_tokens_seen": 10169848, "step": 17905 }, { "epoch": 314.21238938053096, "grad_norm": 1.1233067809257591e-08, "learning_rate": 0.17452343097822576, "loss": 0.0, "num_input_tokens_seen": 10172920, "step": 17910 }, { "epoch": 314.3008849557522, "grad_norm": 4.779021267609096e-08, "learning_rate": 0.17446531678749497, "loss": 0.0, "num_input_tokens_seen": 10175576, "step": 17915 }, { "epoch": 314.3893805309734, "grad_norm": 1.8469007656563008e-08, "learning_rate": 0.17440719882390496, "loss": 0.0, "num_input_tokens_seen": 10178056, "step": 17920 }, { "epoch": 314.4778761061947, "grad_norm": 2.601538895419253e-08, "learning_rate": 0.17434907709641814, "loss": 0.0, "num_input_tokens_seen": 10181608, "step": 17925 }, { "epoch": 314.56637168141594, "grad_norm": 2.621306727235151e-08, "learning_rate": 0.17429095161399769, "loss": 0.0, "num_input_tokens_seen": 10184792, "step": 17930 }, { "epoch": 314.65486725663715, "grad_norm": 1.7393782414387715e-08, "learning_rate": 0.1742328223856072, "loss": 0.0, "num_input_tokens_seen": 10187576, "step": 17935 }, { "epoch": 314.7433628318584, "grad_norm": 2.6154546972634307e-08, "learning_rate": 0.174174689420211, "loss": 0.0, "num_input_tokens_seen": 10189928, "step": 17940 }, { "epoch": 314.83185840707966, "grad_norm": 3.161093786729907e-08, "learning_rate": 0.1741165527267739, "loss": 0.0, "num_input_tokens_seen": 10192616, "step": 17945 }, { "epoch": 314.92035398230087, "grad_norm": 2.1355209156581623e-08, "learning_rate": 0.17405841231426125, "loss": 0.0, "num_input_tokens_seen": 10195192, "step": 17950 }, { "epoch": 315.0, "grad_norm": 9.695042280100097e-08, "learning_rate": 0.1740002681916391, "loss": 0.0, "num_input_tokens_seen": 10197744, "step": 17955 }, { "epoch": 315.08849557522126, "grad_norm": 5.1600359540771024e-08, "learning_rate": 0.17394212036787401, "loss": 0.0, "num_input_tokens_seen": 10200736, "step": 17960 }, { "epoch": 315.17699115044246, "grad_norm": 2.231849371980843e-08, "learning_rate": 0.1738839688519331, "loss": 0.0, "num_input_tokens_seen": 10203952, "step": 17965 }, { "epoch": 315.2654867256637, "grad_norm": 5.298907623796367e-08, "learning_rate": 0.17382581365278402, "loss": 0.0, "num_input_tokens_seen": 10206896, "step": 17970 }, { "epoch": 315.353982300885, "grad_norm": 6.470151703297233e-08, "learning_rate": 0.17376765477939507, "loss": 0.0, "num_input_tokens_seen": 10210000, "step": 17975 }, { "epoch": 315.4424778761062, "grad_norm": 3.503372170143848e-08, "learning_rate": 0.1737094922407351, "loss": 0.0, "num_input_tokens_seen": 10212704, "step": 17980 }, { "epoch": 315.53097345132744, "grad_norm": 2.40850770438783e-08, "learning_rate": 0.1736513260457734, "loss": 0.0, "num_input_tokens_seen": 10215136, "step": 17985 }, { "epoch": 315.6194690265487, "grad_norm": 3.802251669071666e-08, "learning_rate": 0.17359315620348006, "loss": 0.0, "num_input_tokens_seen": 10218080, "step": 17990 }, { "epoch": 315.7079646017699, "grad_norm": 4.741281500741934e-08, "learning_rate": 0.17353498272282547, "loss": 0.0, "num_input_tokens_seen": 10220752, "step": 17995 }, { "epoch": 315.79646017699116, "grad_norm": 2.1896131130461072e-08, "learning_rate": 0.17347680561278087, "loss": 0.0, "num_input_tokens_seen": 10223648, "step": 18000 }, { "epoch": 315.79646017699116, "eval_loss": 0.6007505059242249, "eval_runtime": 0.9243, "eval_samples_per_second": 27.048, "eval_steps_per_second": 14.065, "num_input_tokens_seen": 10223648, "step": 18000 }, { "epoch": 315.88495575221236, "grad_norm": 2.2873168248338516e-08, "learning_rate": 0.1734186248823178, "loss": 0.0, "num_input_tokens_seen": 10226400, "step": 18005 }, { "epoch": 315.9734513274336, "grad_norm": 1.7366234672522296e-08, "learning_rate": 0.17336044054040844, "loss": 0.0, "num_input_tokens_seen": 10229536, "step": 18010 }, { "epoch": 316.05309734513276, "grad_norm": 1.516602843310011e-08, "learning_rate": 0.1733022525960256, "loss": 0.0, "num_input_tokens_seen": 10231656, "step": 18015 }, { "epoch": 316.14159292035396, "grad_norm": 2.2598372950710655e-08, "learning_rate": 0.1732440610581426, "loss": 0.0, "num_input_tokens_seen": 10234248, "step": 18020 }, { "epoch": 316.2300884955752, "grad_norm": 3.17091881640863e-08, "learning_rate": 0.17318586593573326, "loss": 0.0, "num_input_tokens_seen": 10236808, "step": 18025 }, { "epoch": 316.3185840707965, "grad_norm": 2.218844308288226e-08, "learning_rate": 0.17312766723777204, "loss": 0.0, "num_input_tokens_seen": 10239496, "step": 18030 }, { "epoch": 316.4070796460177, "grad_norm": 2.6775033745707333e-08, "learning_rate": 0.1730694649732339, "loss": 0.0, "num_input_tokens_seen": 10242392, "step": 18035 }, { "epoch": 316.49557522123894, "grad_norm": 1.6012176473623185e-08, "learning_rate": 0.17301125915109428, "loss": 0.0, "num_input_tokens_seen": 10245304, "step": 18040 }, { "epoch": 316.5840707964602, "grad_norm": 2.3501193879837956e-08, "learning_rate": 0.17295304978032938, "loss": 0.0, "num_input_tokens_seen": 10248088, "step": 18045 }, { "epoch": 316.6725663716814, "grad_norm": 3.90217174128793e-08, "learning_rate": 0.17289483686991577, "loss": 0.0, "num_input_tokens_seen": 10251464, "step": 18050 }, { "epoch": 316.76106194690266, "grad_norm": 1.768485269337816e-08, "learning_rate": 0.1728366204288306, "loss": 0.0, "num_input_tokens_seen": 10254424, "step": 18055 }, { "epoch": 316.8495575221239, "grad_norm": 2.1883304057723763e-08, "learning_rate": 0.17277840046605153, "loss": 0.0, "num_input_tokens_seen": 10257896, "step": 18060 }, { "epoch": 316.9380530973451, "grad_norm": 4.811251841374542e-08, "learning_rate": 0.17272017699055686, "loss": 0.0, "num_input_tokens_seen": 10260424, "step": 18065 }, { "epoch": 317.01769911504425, "grad_norm": 1.7620070735802074e-08, "learning_rate": 0.17266195001132542, "loss": 0.0, "num_input_tokens_seen": 10263120, "step": 18070 }, { "epoch": 317.1061946902655, "grad_norm": 1.9182470722967082e-08, "learning_rate": 0.17260371953733647, "loss": 0.0, "num_input_tokens_seen": 10265568, "step": 18075 }, { "epoch": 317.1946902654867, "grad_norm": 3.03300993209632e-08, "learning_rate": 0.1725454855775699, "loss": 0.0, "num_input_tokens_seen": 10268208, "step": 18080 }, { "epoch": 317.283185840708, "grad_norm": 4.363844041677112e-08, "learning_rate": 0.17248724814100616, "loss": 0.0, "num_input_tokens_seen": 10271376, "step": 18085 }, { "epoch": 317.37168141592923, "grad_norm": 1.250117787066074e-08, "learning_rate": 0.17242900723662619, "loss": 0.0, "num_input_tokens_seen": 10274176, "step": 18090 }, { "epoch": 317.46017699115043, "grad_norm": 1.922675707533017e-08, "learning_rate": 0.1723707628734114, "loss": 0.0, "num_input_tokens_seen": 10277264, "step": 18095 }, { "epoch": 317.5486725663717, "grad_norm": 3.803106096711417e-08, "learning_rate": 0.1723125150603438, "loss": 0.0, "num_input_tokens_seen": 10279744, "step": 18100 }, { "epoch": 317.6371681415929, "grad_norm": 3.4897112755061244e-08, "learning_rate": 0.1722542638064061, "loss": 0.0, "num_input_tokens_seen": 10283040, "step": 18105 }, { "epoch": 317.72566371681415, "grad_norm": 1.4367214973276532e-08, "learning_rate": 0.17219600912058117, "loss": 0.0, "num_input_tokens_seen": 10285536, "step": 18110 }, { "epoch": 317.8141592920354, "grad_norm": 2.9428907311057628e-08, "learning_rate": 0.17213775101185272, "loss": 0.0, "num_input_tokens_seen": 10288672, "step": 18115 }, { "epoch": 317.9026548672566, "grad_norm": 4.240757789375493e-08, "learning_rate": 0.17207948948920485, "loss": 0.0, "num_input_tokens_seen": 10291456, "step": 18120 }, { "epoch": 317.9911504424779, "grad_norm": 3.164161910262919e-08, "learning_rate": 0.17202122456162228, "loss": 0.0, "num_input_tokens_seen": 10294480, "step": 18125 }, { "epoch": 318.070796460177, "grad_norm": 3.797866554577922e-08, "learning_rate": 0.17196295623809013, "loss": 0.0, "num_input_tokens_seen": 10296688, "step": 18130 }, { "epoch": 318.1592920353982, "grad_norm": 3.1976064462924114e-08, "learning_rate": 0.1719046845275941, "loss": 0.0, "num_input_tokens_seen": 10299552, "step": 18135 }, { "epoch": 318.24778761061947, "grad_norm": 2.7210514730313662e-08, "learning_rate": 0.17184640943912044, "loss": 0.0, "num_input_tokens_seen": 10302240, "step": 18140 }, { "epoch": 318.3362831858407, "grad_norm": 1.3284125799373214e-08, "learning_rate": 0.1717881309816559, "loss": 0.0, "num_input_tokens_seen": 10305200, "step": 18145 }, { "epoch": 318.42477876106193, "grad_norm": 1.9616726021354225e-08, "learning_rate": 0.1717298491641878, "loss": 0.0, "num_input_tokens_seen": 10308688, "step": 18150 }, { "epoch": 318.5132743362832, "grad_norm": 2.881006189170421e-08, "learning_rate": 0.17167156399570385, "loss": 0.0, "num_input_tokens_seen": 10311328, "step": 18155 }, { "epoch": 318.60176991150445, "grad_norm": 2.909943219719935e-08, "learning_rate": 0.17161327548519242, "loss": 0.0, "num_input_tokens_seen": 10313952, "step": 18160 }, { "epoch": 318.69026548672565, "grad_norm": 2.3145776850697075e-08, "learning_rate": 0.1715549836416423, "loss": 0.0, "num_input_tokens_seen": 10316656, "step": 18165 }, { "epoch": 318.7787610619469, "grad_norm": 2.911355245771574e-08, "learning_rate": 0.17149668847404279, "loss": 0.0, "num_input_tokens_seen": 10319808, "step": 18170 }, { "epoch": 318.86725663716817, "grad_norm": 3.6916578238788134e-08, "learning_rate": 0.1714383899913838, "loss": 0.0, "num_input_tokens_seen": 10322336, "step": 18175 }, { "epoch": 318.95575221238937, "grad_norm": 4.312478552037646e-08, "learning_rate": 0.17138008820265563, "loss": 0.0, "num_input_tokens_seen": 10325248, "step": 18180 }, { "epoch": 319.0353982300885, "grad_norm": 1.5272995312898274e-08, "learning_rate": 0.17132178311684917, "loss": 0.0, "num_input_tokens_seen": 10327968, "step": 18185 }, { "epoch": 319.12389380530976, "grad_norm": 9.147719026714185e-09, "learning_rate": 0.1712634747429559, "loss": 0.0, "num_input_tokens_seen": 10330800, "step": 18190 }, { "epoch": 319.21238938053096, "grad_norm": 3.3911376817741257e-08, "learning_rate": 0.17120516308996753, "loss": 0.0, "num_input_tokens_seen": 10333424, "step": 18195 }, { "epoch": 319.3008849557522, "grad_norm": 6.868776836199686e-08, "learning_rate": 0.17114684816687653, "loss": 0.0, "num_input_tokens_seen": 10336864, "step": 18200 }, { "epoch": 319.3008849557522, "eval_loss": 0.6037719249725342, "eval_runtime": 0.9305, "eval_samples_per_second": 26.868, "eval_steps_per_second": 13.971, "num_input_tokens_seen": 10336864, "step": 18200 }, { "epoch": 319.3893805309734, "grad_norm": 2.157516654222036e-08, "learning_rate": 0.17108852998267585, "loss": 0.0, "num_input_tokens_seen": 10339680, "step": 18205 }, { "epoch": 319.4778761061947, "grad_norm": 2.9330811557315428e-08, "learning_rate": 0.17103020854635878, "loss": 0.0, "num_input_tokens_seen": 10342224, "step": 18210 }, { "epoch": 319.56637168141594, "grad_norm": 1.686703932080036e-08, "learning_rate": 0.1709718838669193, "loss": 0.0, "num_input_tokens_seen": 10344816, "step": 18215 }, { "epoch": 319.65486725663715, "grad_norm": 3.20881312632082e-08, "learning_rate": 0.17091355595335173, "loss": 0.0, "num_input_tokens_seen": 10347456, "step": 18220 }, { "epoch": 319.7433628318584, "grad_norm": 2.0023383839884445e-08, "learning_rate": 0.17085522481465107, "loss": 0.0, "num_input_tokens_seen": 10350592, "step": 18225 }, { "epoch": 319.83185840707966, "grad_norm": 2.1429780616699645e-08, "learning_rate": 0.17079689045981264, "loss": 0.0, "num_input_tokens_seen": 10353440, "step": 18230 }, { "epoch": 319.92035398230087, "grad_norm": 1.6587575757398554e-08, "learning_rate": 0.17073855289783238, "loss": 0.0, "num_input_tokens_seen": 10356976, "step": 18235 }, { "epoch": 320.0, "grad_norm": 1.5252583196456726e-08, "learning_rate": 0.1706802121377066, "loss": 0.0, "num_input_tokens_seen": 10359368, "step": 18240 }, { "epoch": 320.08849557522126, "grad_norm": 2.9072761975612593e-08, "learning_rate": 0.17062186818843225, "loss": 0.0, "num_input_tokens_seen": 10362504, "step": 18245 }, { "epoch": 320.17699115044246, "grad_norm": 1.779663705292478e-08, "learning_rate": 0.17056352105900668, "loss": 0.0, "num_input_tokens_seen": 10364920, "step": 18250 }, { "epoch": 320.2654867256637, "grad_norm": 4.430547662082063e-08, "learning_rate": 0.17050517075842772, "loss": 0.0, "num_input_tokens_seen": 10368104, "step": 18255 }, { "epoch": 320.353982300885, "grad_norm": 5.272287495472483e-08, "learning_rate": 0.17044681729569375, "loss": 0.0, "num_input_tokens_seen": 10370952, "step": 18260 }, { "epoch": 320.4424778761062, "grad_norm": 2.184851233266727e-08, "learning_rate": 0.17038846067980365, "loss": 0.0, "num_input_tokens_seen": 10373432, "step": 18265 }, { "epoch": 320.53097345132744, "grad_norm": 8.326046518902785e-09, "learning_rate": 0.17033010091975664, "loss": 0.0, "num_input_tokens_seen": 10376280, "step": 18270 }, { "epoch": 320.6194690265487, "grad_norm": 4.247291585102175e-08, "learning_rate": 0.17027173802455262, "loss": 0.0, "num_input_tokens_seen": 10378824, "step": 18275 }, { "epoch": 320.7079646017699, "grad_norm": 2.9208614193976246e-08, "learning_rate": 0.1702133720031918, "loss": 0.0, "num_input_tokens_seen": 10381816, "step": 18280 }, { "epoch": 320.79646017699116, "grad_norm": 4.420769172952532e-08, "learning_rate": 0.17015500286467503, "loss": 0.0, "num_input_tokens_seen": 10384568, "step": 18285 }, { "epoch": 320.88495575221236, "grad_norm": 2.2921271991549474e-08, "learning_rate": 0.17009663061800354, "loss": 0.0, "num_input_tokens_seen": 10387528, "step": 18290 }, { "epoch": 320.9734513274336, "grad_norm": 2.6509443529221244e-08, "learning_rate": 0.17003825527217903, "loss": 0.0, "num_input_tokens_seen": 10390376, "step": 18295 }, { "epoch": 321.05309734513276, "grad_norm": 4.469436021281581e-08, "learning_rate": 0.16997987683620377, "loss": 0.0, "num_input_tokens_seen": 10393016, "step": 18300 }, { "epoch": 321.14159292035396, "grad_norm": 2.14803232978511e-08, "learning_rate": 0.16992149531908043, "loss": 0.0, "num_input_tokens_seen": 10396008, "step": 18305 }, { "epoch": 321.2300884955752, "grad_norm": 3.721454788774281e-08, "learning_rate": 0.16986311072981214, "loss": 0.0, "num_input_tokens_seen": 10399080, "step": 18310 }, { "epoch": 321.3185840707965, "grad_norm": 4.749123760916518e-08, "learning_rate": 0.16980472307740255, "loss": 0.0, "num_input_tokens_seen": 10402072, "step": 18315 }, { "epoch": 321.4070796460177, "grad_norm": 2.9910861343296347e-08, "learning_rate": 0.1697463323708558, "loss": 0.0, "num_input_tokens_seen": 10405032, "step": 18320 }, { "epoch": 321.49557522123894, "grad_norm": 3.643105372930222e-08, "learning_rate": 0.16968793861917641, "loss": 0.0, "num_input_tokens_seen": 10407528, "step": 18325 }, { "epoch": 321.5840707964602, "grad_norm": 2.713463942427552e-08, "learning_rate": 0.16962954183136952, "loss": 0.0, "num_input_tokens_seen": 10410184, "step": 18330 }, { "epoch": 321.6725663716814, "grad_norm": 2.501503182372744e-08, "learning_rate": 0.16957114201644058, "loss": 0.0, "num_input_tokens_seen": 10413288, "step": 18335 }, { "epoch": 321.76106194690266, "grad_norm": 1.3412101651510966e-08, "learning_rate": 0.16951273918339563, "loss": 0.0, "num_input_tokens_seen": 10416376, "step": 18340 }, { "epoch": 321.8495575221239, "grad_norm": 3.0019773333833655e-08, "learning_rate": 0.16945433334124105, "loss": 0.0, "num_input_tokens_seen": 10419656, "step": 18345 }, { "epoch": 321.9380530973451, "grad_norm": 2.1327151600303296e-08, "learning_rate": 0.1693959244989838, "loss": 0.0, "num_input_tokens_seen": 10422376, "step": 18350 }, { "epoch": 322.01769911504425, "grad_norm": 3.289980909926271e-08, "learning_rate": 0.16933751266563127, "loss": 0.0, "num_input_tokens_seen": 10424320, "step": 18355 }, { "epoch": 322.1061946902655, "grad_norm": 1.595761034423049e-08, "learning_rate": 0.16927909785019118, "loss": 0.0, "num_input_tokens_seen": 10427376, "step": 18360 }, { "epoch": 322.1946902654867, "grad_norm": 1.6576729322537176e-08, "learning_rate": 0.169220680061672, "loss": 0.0, "num_input_tokens_seen": 10430240, "step": 18365 }, { "epoch": 322.283185840708, "grad_norm": 2.0480182882920417e-08, "learning_rate": 0.16916225930908244, "loss": 0.0, "num_input_tokens_seen": 10433248, "step": 18370 }, { "epoch": 322.37168141592923, "grad_norm": 1.5170508405049077e-08, "learning_rate": 0.16910383560143163, "loss": 0.0, "num_input_tokens_seen": 10436400, "step": 18375 }, { "epoch": 322.46017699115043, "grad_norm": 1.777240754563536e-08, "learning_rate": 0.16904540894772935, "loss": 0.0, "num_input_tokens_seen": 10439600, "step": 18380 }, { "epoch": 322.5486725663717, "grad_norm": 2.9498496090241133e-08, "learning_rate": 0.16898697935698562, "loss": 0.0, "num_input_tokens_seen": 10442448, "step": 18385 }, { "epoch": 322.6371681415929, "grad_norm": 1.5407220388397036e-08, "learning_rate": 0.1689285468382111, "loss": 0.0, "num_input_tokens_seen": 10444864, "step": 18390 }, { "epoch": 322.72566371681415, "grad_norm": 1.279815986521271e-08, "learning_rate": 0.16887011140041677, "loss": 0.0, "num_input_tokens_seen": 10447424, "step": 18395 }, { "epoch": 322.8141592920354, "grad_norm": 2.8369344207135327e-08, "learning_rate": 0.1688116730526141, "loss": 0.0, "num_input_tokens_seen": 10450688, "step": 18400 }, { "epoch": 322.8141592920354, "eval_loss": 0.6033881902694702, "eval_runtime": 0.9271, "eval_samples_per_second": 26.965, "eval_steps_per_second": 14.022, "num_input_tokens_seen": 10450688, "step": 18400 }, { "epoch": 322.9026548672566, "grad_norm": 1.8517864575073872e-08, "learning_rate": 0.1687532318038151, "loss": 0.0, "num_input_tokens_seen": 10453200, "step": 18405 }, { "epoch": 322.9911504424779, "grad_norm": 8.757428560102198e-09, "learning_rate": 0.16869478766303206, "loss": 0.0, "num_input_tokens_seen": 10456016, "step": 18410 }, { "epoch": 323.070796460177, "grad_norm": 3.0667496986325204e-08, "learning_rate": 0.16863634063927788, "loss": 0.0, "num_input_tokens_seen": 10458352, "step": 18415 }, { "epoch": 323.1592920353982, "grad_norm": 1.9337992540613413e-08, "learning_rate": 0.16857789074156568, "loss": 0.0, "num_input_tokens_seen": 10461456, "step": 18420 }, { "epoch": 323.24778761061947, "grad_norm": 2.2829510726296576e-08, "learning_rate": 0.16851943797890928, "loss": 0.0, "num_input_tokens_seen": 10464256, "step": 18425 }, { "epoch": 323.3362831858407, "grad_norm": 2.4196689096811497e-08, "learning_rate": 0.16846098236032284, "loss": 0.0, "num_input_tokens_seen": 10467488, "step": 18430 }, { "epoch": 323.42477876106193, "grad_norm": 3.079833277297439e-08, "learning_rate": 0.16840252389482097, "loss": 0.0, "num_input_tokens_seen": 10470112, "step": 18435 }, { "epoch": 323.5132743362832, "grad_norm": 6.676538788497055e-08, "learning_rate": 0.16834406259141857, "loss": 0.0, "num_input_tokens_seen": 10473312, "step": 18440 }, { "epoch": 323.60176991150445, "grad_norm": 2.845042956778343e-08, "learning_rate": 0.16828559845913124, "loss": 0.0, "num_input_tokens_seen": 10476192, "step": 18445 }, { "epoch": 323.69026548672565, "grad_norm": 1.6741799058195284e-08, "learning_rate": 0.16822713150697488, "loss": 0.0, "num_input_tokens_seen": 10478960, "step": 18450 }, { "epoch": 323.7787610619469, "grad_norm": 2.862888415222642e-08, "learning_rate": 0.16816866174396575, "loss": 0.0, "num_input_tokens_seen": 10481904, "step": 18455 }, { "epoch": 323.86725663716817, "grad_norm": 2.8268834384448382e-08, "learning_rate": 0.16811018917912057, "loss": 0.0, "num_input_tokens_seen": 10484672, "step": 18460 }, { "epoch": 323.95575221238937, "grad_norm": 2.943059307369822e-08, "learning_rate": 0.16805171382145673, "loss": 0.0, "num_input_tokens_seen": 10487344, "step": 18465 }, { "epoch": 324.0353982300885, "grad_norm": 5.1053955729685185e-08, "learning_rate": 0.16799323567999175, "loss": 0.0, "num_input_tokens_seen": 10489664, "step": 18470 }, { "epoch": 324.12389380530976, "grad_norm": 1.8258516476521436e-08, "learning_rate": 0.16793475476374367, "loss": 0.0, "num_input_tokens_seen": 10492464, "step": 18475 }, { "epoch": 324.21238938053096, "grad_norm": 3.0729133015938714e-08, "learning_rate": 0.1678762710817311, "loss": 0.0, "num_input_tokens_seen": 10495344, "step": 18480 }, { "epoch": 324.3008849557522, "grad_norm": 2.1398889771262475e-08, "learning_rate": 0.1678177846429728, "loss": 0.0, "num_input_tokens_seen": 10497984, "step": 18485 }, { "epoch": 324.3893805309734, "grad_norm": 1.4857591601469267e-08, "learning_rate": 0.16775929545648827, "loss": 0.0, "num_input_tokens_seen": 10501120, "step": 18490 }, { "epoch": 324.4778761061947, "grad_norm": 3.281862248627476e-08, "learning_rate": 0.16770080353129715, "loss": 0.0, "num_input_tokens_seen": 10503968, "step": 18495 }, { "epoch": 324.56637168141594, "grad_norm": 2.8873810009599765e-08, "learning_rate": 0.16764230887641968, "loss": 0.0, "num_input_tokens_seen": 10506880, "step": 18500 }, { "epoch": 324.65486725663715, "grad_norm": 2.3578916596989075e-08, "learning_rate": 0.1675838115008765, "loss": 0.0, "num_input_tokens_seen": 10509520, "step": 18505 }, { "epoch": 324.7433628318584, "grad_norm": 3.416533900235663e-08, "learning_rate": 0.1675253114136886, "loss": 0.0, "num_input_tokens_seen": 10512192, "step": 18510 }, { "epoch": 324.83185840707966, "grad_norm": 1.9839870191162845e-08, "learning_rate": 0.16746680862387747, "loss": 0.0, "num_input_tokens_seen": 10514640, "step": 18515 }, { "epoch": 324.92035398230087, "grad_norm": 3.1701414826557084e-08, "learning_rate": 0.16740830314046493, "loss": 0.0, "num_input_tokens_seen": 10517840, "step": 18520 }, { "epoch": 325.0, "grad_norm": 1.905334201524056e-08, "learning_rate": 0.1673497949724733, "loss": 0.0, "num_input_tokens_seen": 10520464, "step": 18525 }, { "epoch": 325.08849557522126, "grad_norm": 1.7232489213370172e-08, "learning_rate": 0.16729128412892522, "loss": 0.0, "num_input_tokens_seen": 10523584, "step": 18530 }, { "epoch": 325.17699115044246, "grad_norm": 1.5284872034726504e-08, "learning_rate": 0.16723277061884384, "loss": 0.0, "num_input_tokens_seen": 10526144, "step": 18535 }, { "epoch": 325.2654867256637, "grad_norm": 4.376538598194202e-08, "learning_rate": 0.16717425445125267, "loss": 0.0, "num_input_tokens_seen": 10528656, "step": 18540 }, { "epoch": 325.353982300885, "grad_norm": 1.6088705478978227e-08, "learning_rate": 0.16711573563517565, "loss": 0.0, "num_input_tokens_seen": 10531824, "step": 18545 }, { "epoch": 325.4424778761062, "grad_norm": 3.7275132314107395e-08, "learning_rate": 0.1670572141796371, "loss": 0.0, "num_input_tokens_seen": 10534576, "step": 18550 }, { "epoch": 325.53097345132744, "grad_norm": 4.1745451540009526e-08, "learning_rate": 0.16699869009366175, "loss": 0.0, "num_input_tokens_seen": 10537520, "step": 18555 }, { "epoch": 325.6194690265487, "grad_norm": 2.419516675900013e-08, "learning_rate": 0.1669401633862748, "loss": 0.0, "num_input_tokens_seen": 10540640, "step": 18560 }, { "epoch": 325.7079646017699, "grad_norm": 3.436367279618935e-08, "learning_rate": 0.16688163406650178, "loss": 0.0, "num_input_tokens_seen": 10543808, "step": 18565 }, { "epoch": 325.79646017699116, "grad_norm": 3.65207988295424e-08, "learning_rate": 0.1668231021433686, "loss": 0.0, "num_input_tokens_seen": 10546896, "step": 18570 }, { "epoch": 325.88495575221236, "grad_norm": 3.108631929649164e-08, "learning_rate": 0.1667645676259017, "loss": 0.0, "num_input_tokens_seen": 10549776, "step": 18575 }, { "epoch": 325.9734513274336, "grad_norm": 1.7208764191423143e-08, "learning_rate": 0.1667060305231277, "loss": 0.0, "num_input_tokens_seen": 10552192, "step": 18580 }, { "epoch": 326.05309734513276, "grad_norm": 3.619719635139518e-08, "learning_rate": 0.16664749084407396, "loss": 0.0, "num_input_tokens_seen": 10554360, "step": 18585 }, { "epoch": 326.14159292035396, "grad_norm": 3.5454466029705145e-08, "learning_rate": 0.16658894859776788, "loss": 0.0, "num_input_tokens_seen": 10557224, "step": 18590 }, { "epoch": 326.2300884955752, "grad_norm": 3.0140228091113386e-08, "learning_rate": 0.16653040379323752, "loss": 0.0, "num_input_tokens_seen": 10559768, "step": 18595 }, { "epoch": 326.3185840707965, "grad_norm": 2.39052919681626e-08, "learning_rate": 0.16647185643951107, "loss": 0.0, "num_input_tokens_seen": 10563128, "step": 18600 }, { "epoch": 326.3185840707965, "eval_loss": 0.6052086353302002, "eval_runtime": 0.9402, "eval_samples_per_second": 26.589, "eval_steps_per_second": 13.826, "num_input_tokens_seen": 10563128, "step": 18600 }, { "epoch": 326.4070796460177, "grad_norm": 7.47773576392774e-09, "learning_rate": 0.1664133065456174, "loss": 0.0, "num_input_tokens_seen": 10566088, "step": 18605 }, { "epoch": 326.49557522123894, "grad_norm": 2.7061195950750516e-08, "learning_rate": 0.1663547541205856, "loss": 0.0, "num_input_tokens_seen": 10569016, "step": 18610 }, { "epoch": 326.5840707964602, "grad_norm": 3.464355557980525e-08, "learning_rate": 0.16629619917344518, "loss": 0.0, "num_input_tokens_seen": 10572008, "step": 18615 }, { "epoch": 326.6725663716814, "grad_norm": 2.0922890087149426e-08, "learning_rate": 0.16623764171322605, "loss": 0.0, "num_input_tokens_seen": 10574696, "step": 18620 }, { "epoch": 326.76106194690266, "grad_norm": 4.027326028221978e-08, "learning_rate": 0.1661790817489585, "loss": 0.0, "num_input_tokens_seen": 10577720, "step": 18625 }, { "epoch": 326.8495575221239, "grad_norm": 2.403479015811172e-08, "learning_rate": 0.16612051928967328, "loss": 0.0, "num_input_tokens_seen": 10580696, "step": 18630 }, { "epoch": 326.9380530973451, "grad_norm": 5.503138567064525e-08, "learning_rate": 0.16606195434440138, "loss": 0.0, "num_input_tokens_seen": 10583208, "step": 18635 }, { "epoch": 327.01769911504425, "grad_norm": 2.0568158731748554e-08, "learning_rate": 0.16600338692217426, "loss": 0.0, "num_input_tokens_seen": 10586016, "step": 18640 }, { "epoch": 327.1061946902655, "grad_norm": 2.5329068620294493e-08, "learning_rate": 0.16594481703202374, "loss": 0.0, "num_input_tokens_seen": 10588768, "step": 18645 }, { "epoch": 327.1946902654867, "grad_norm": 2.5592937547003203e-08, "learning_rate": 0.1658862446829821, "loss": 0.0, "num_input_tokens_seen": 10592224, "step": 18650 }, { "epoch": 327.283185840708, "grad_norm": 2.4911503970770354e-08, "learning_rate": 0.16582766988408187, "loss": 0.0, "num_input_tokens_seen": 10595264, "step": 18655 }, { "epoch": 327.37168141592923, "grad_norm": 2.9429919834456086e-08, "learning_rate": 0.16576909264435608, "loss": 0.0, "num_input_tokens_seen": 10597792, "step": 18660 }, { "epoch": 327.46017699115043, "grad_norm": 3.623468813884756e-08, "learning_rate": 0.16571051297283798, "loss": 0.0, "num_input_tokens_seen": 10600688, "step": 18665 }, { "epoch": 327.5486725663717, "grad_norm": 3.173963136759994e-08, "learning_rate": 0.16565193087856137, "loss": 0.0, "num_input_tokens_seen": 10603504, "step": 18670 }, { "epoch": 327.6371681415929, "grad_norm": 1.4884436794204703e-08, "learning_rate": 0.16559334637056033, "loss": 0.0, "num_input_tokens_seen": 10606112, "step": 18675 }, { "epoch": 327.72566371681415, "grad_norm": 2.6731184377126738e-08, "learning_rate": 0.16553475945786933, "loss": 0.0, "num_input_tokens_seen": 10609136, "step": 18680 }, { "epoch": 327.8141592920354, "grad_norm": 2.9496682429908105e-08, "learning_rate": 0.16547617014952318, "loss": 0.0, "num_input_tokens_seen": 10611664, "step": 18685 }, { "epoch": 327.9026548672566, "grad_norm": 1.2481264022312644e-08, "learning_rate": 0.1654175784545571, "loss": 0.0, "num_input_tokens_seen": 10614624, "step": 18690 }, { "epoch": 327.9911504424779, "grad_norm": 3.401573422934234e-08, "learning_rate": 0.1653589843820067, "loss": 0.0, "num_input_tokens_seen": 10618032, "step": 18695 }, { "epoch": 328.070796460177, "grad_norm": 2.8303857035893998e-08, "learning_rate": 0.1653003879409079, "loss": 0.0, "num_input_tokens_seen": 10619992, "step": 18700 }, { "epoch": 328.1592920353982, "grad_norm": 1.689766548906846e-08, "learning_rate": 0.165241789140297, "loss": 0.0, "num_input_tokens_seen": 10622616, "step": 18705 }, { "epoch": 328.24778761061947, "grad_norm": 4.515500862112276e-08, "learning_rate": 0.16518318798921064, "loss": 0.0, "num_input_tokens_seen": 10625608, "step": 18710 }, { "epoch": 328.3362831858407, "grad_norm": 3.6354894206169774e-08, "learning_rate": 0.16512458449668593, "loss": 0.0, "num_input_tokens_seen": 10629160, "step": 18715 }, { "epoch": 328.42477876106193, "grad_norm": 2.0987142690387373e-08, "learning_rate": 0.1650659786717602, "loss": 0.0, "num_input_tokens_seen": 10632072, "step": 18720 }, { "epoch": 328.5132743362832, "grad_norm": 1.5597187541516178e-08, "learning_rate": 0.1650073705234712, "loss": 0.0, "num_input_tokens_seen": 10634872, "step": 18725 }, { "epoch": 328.60176991150445, "grad_norm": 2.249777786289542e-08, "learning_rate": 0.16494876006085712, "loss": 0.0, "num_input_tokens_seen": 10637656, "step": 18730 }, { "epoch": 328.69026548672565, "grad_norm": 5.32590460622373e-09, "learning_rate": 0.16489014729295634, "loss": 0.0, "num_input_tokens_seen": 10640456, "step": 18735 }, { "epoch": 328.7787610619469, "grad_norm": 9.699520298056541e-09, "learning_rate": 0.16483153222880775, "loss": 0.0, "num_input_tokens_seen": 10643512, "step": 18740 }, { "epoch": 328.86725663716817, "grad_norm": 2.450302361012291e-08, "learning_rate": 0.16477291487745052, "loss": 0.0, "num_input_tokens_seen": 10646280, "step": 18745 }, { "epoch": 328.95575221238937, "grad_norm": 2.507936613938e-08, "learning_rate": 0.16471429524792416, "loss": 0.0, "num_input_tokens_seen": 10649288, "step": 18750 }, { "epoch": 329.0353982300885, "grad_norm": 2.77280101101951e-08, "learning_rate": 0.16465567334926856, "loss": 0.0, "num_input_tokens_seen": 10651880, "step": 18755 }, { "epoch": 329.12389380530976, "grad_norm": 2.1497992719332615e-08, "learning_rate": 0.16459704919052395, "loss": 0.0, "num_input_tokens_seen": 10654648, "step": 18760 }, { "epoch": 329.21238938053096, "grad_norm": 1.4224113442651287e-08, "learning_rate": 0.16453842278073086, "loss": 0.0, "num_input_tokens_seen": 10657496, "step": 18765 }, { "epoch": 329.3008849557522, "grad_norm": 2.60869779111772e-08, "learning_rate": 0.16447979412893038, "loss": 0.0, "num_input_tokens_seen": 10660216, "step": 18770 }, { "epoch": 329.3893805309734, "grad_norm": 3.541189386169208e-08, "learning_rate": 0.16442116324416367, "loss": 0.0, "num_input_tokens_seen": 10663112, "step": 18775 }, { "epoch": 329.4778761061947, "grad_norm": 1.625785017722592e-08, "learning_rate": 0.1643625301354723, "loss": 0.0, "num_input_tokens_seen": 10665928, "step": 18780 }, { "epoch": 329.56637168141594, "grad_norm": 9.734854700127471e-09, "learning_rate": 0.16430389481189828, "loss": 0.0, "num_input_tokens_seen": 10669080, "step": 18785 }, { "epoch": 329.65486725663715, "grad_norm": 2.31233077130355e-08, "learning_rate": 0.164245257282484, "loss": 0.0, "num_input_tokens_seen": 10672296, "step": 18790 }, { "epoch": 329.7433628318584, "grad_norm": 8.971987597305997e-09, "learning_rate": 0.16418661755627195, "loss": 0.0, "num_input_tokens_seen": 10674904, "step": 18795 }, { "epoch": 329.83185840707966, "grad_norm": 6.985271738813026e-08, "learning_rate": 0.16412797564230527, "loss": 0.0, "num_input_tokens_seen": 10677928, "step": 18800 }, { "epoch": 329.83185840707966, "eval_loss": 0.6042895317077637, "eval_runtime": 0.9431, "eval_samples_per_second": 26.508, "eval_steps_per_second": 13.784, "num_input_tokens_seen": 10677928, "step": 18800 }, { "epoch": 329.92035398230087, "grad_norm": 2.2507819608108548e-08, "learning_rate": 0.16406933154962713, "loss": 0.0, "num_input_tokens_seen": 10681080, "step": 18805 }, { "epoch": 330.0, "grad_norm": 8.662195938313744e-08, "learning_rate": 0.16401068528728133, "loss": 0.0, "num_input_tokens_seen": 10683296, "step": 18810 }, { "epoch": 330.08849557522126, "grad_norm": 1.6302728056416527e-08, "learning_rate": 0.16395203686431173, "loss": 0.0, "num_input_tokens_seen": 10686096, "step": 18815 }, { "epoch": 330.17699115044246, "grad_norm": 2.054228609438269e-08, "learning_rate": 0.16389338628976277, "loss": 0.0, "num_input_tokens_seen": 10689152, "step": 18820 }, { "epoch": 330.2654867256637, "grad_norm": 2.999177795004471e-08, "learning_rate": 0.163834733572679, "loss": 0.0, "num_input_tokens_seen": 10691856, "step": 18825 }, { "epoch": 330.353982300885, "grad_norm": 1.6097574828677352e-08, "learning_rate": 0.16377607872210545, "loss": 0.0, "num_input_tokens_seen": 10694768, "step": 18830 }, { "epoch": 330.4424778761062, "grad_norm": 3.54420386372567e-08, "learning_rate": 0.16371742174708748, "loss": 0.0, "num_input_tokens_seen": 10697328, "step": 18835 }, { "epoch": 330.53097345132744, "grad_norm": 2.2753216200044335e-08, "learning_rate": 0.16365876265667065, "loss": 0.0, "num_input_tokens_seen": 10699616, "step": 18840 }, { "epoch": 330.6194690265487, "grad_norm": 2.75057594478767e-08, "learning_rate": 0.163600101459901, "loss": 0.0, "num_input_tokens_seen": 10702736, "step": 18845 }, { "epoch": 330.7079646017699, "grad_norm": 1.7950901209928816e-08, "learning_rate": 0.16354143816582484, "loss": 0.0, "num_input_tokens_seen": 10705728, "step": 18850 }, { "epoch": 330.79646017699116, "grad_norm": 9.76764447102596e-09, "learning_rate": 0.1634827727834887, "loss": 0.0, "num_input_tokens_seen": 10708784, "step": 18855 }, { "epoch": 330.88495575221236, "grad_norm": 8.428438391661075e-09, "learning_rate": 0.16342410532193954, "loss": 0.0, "num_input_tokens_seen": 10711296, "step": 18860 }, { "epoch": 330.9734513274336, "grad_norm": 1.0533491412445528e-08, "learning_rate": 0.16336543579022464, "loss": 0.0, "num_input_tokens_seen": 10714576, "step": 18865 }, { "epoch": 331.05309734513276, "grad_norm": 2.7337248909020673e-08, "learning_rate": 0.16330676419739157, "loss": 0.0, "num_input_tokens_seen": 10717048, "step": 18870 }, { "epoch": 331.14159292035396, "grad_norm": 4.3176875408335036e-08, "learning_rate": 0.1632480905524883, "loss": 0.0, "num_input_tokens_seen": 10719976, "step": 18875 }, { "epoch": 331.2300884955752, "grad_norm": 1.4761477373781418e-08, "learning_rate": 0.16318941486456293, "loss": 0.0, "num_input_tokens_seen": 10722696, "step": 18880 }, { "epoch": 331.3185840707965, "grad_norm": 1.800334459289843e-08, "learning_rate": 0.16313073714266405, "loss": 0.0, "num_input_tokens_seen": 10725816, "step": 18885 }, { "epoch": 331.4070796460177, "grad_norm": 2.4001693077480013e-08, "learning_rate": 0.16307205739584052, "loss": 0.0, "num_input_tokens_seen": 10728728, "step": 18890 }, { "epoch": 331.49557522123894, "grad_norm": 2.090265383003498e-08, "learning_rate": 0.16301337563314144, "loss": 0.0, "num_input_tokens_seen": 10732424, "step": 18895 }, { "epoch": 331.5840707964602, "grad_norm": 2.8104771843118215e-08, "learning_rate": 0.1629546918636163, "loss": 0.0, "num_input_tokens_seen": 10735064, "step": 18900 }, { "epoch": 331.6725663716814, "grad_norm": 1.9228640013579934e-08, "learning_rate": 0.16289600609631485, "loss": 0.0, "num_input_tokens_seen": 10737944, "step": 18905 }, { "epoch": 331.76106194690266, "grad_norm": 1.2864041387672387e-08, "learning_rate": 0.16283731834028722, "loss": 0.0, "num_input_tokens_seen": 10740568, "step": 18910 }, { "epoch": 331.8495575221239, "grad_norm": 1.2928751402796479e-08, "learning_rate": 0.16277862860458378, "loss": 0.0, "num_input_tokens_seen": 10743384, "step": 18915 }, { "epoch": 331.9380530973451, "grad_norm": 3.7404952024644444e-08, "learning_rate": 0.16271993689825526, "loss": 0.0, "num_input_tokens_seen": 10745736, "step": 18920 }, { "epoch": 332.01769911504425, "grad_norm": 1.4463913622364544e-08, "learning_rate": 0.1626612432303526, "loss": 0.0, "num_input_tokens_seen": 10748000, "step": 18925 }, { "epoch": 332.1061946902655, "grad_norm": 2.20449081211882e-08, "learning_rate": 0.1626025476099271, "loss": 0.0, "num_input_tokens_seen": 10750432, "step": 18930 }, { "epoch": 332.1946902654867, "grad_norm": 2.639871254928039e-08, "learning_rate": 0.1625438500460304, "loss": 0.0, "num_input_tokens_seen": 10753488, "step": 18935 }, { "epoch": 332.283185840708, "grad_norm": 2.6042364709155663e-08, "learning_rate": 0.16248515054771442, "loss": 0.0, "num_input_tokens_seen": 10756592, "step": 18940 }, { "epoch": 332.37168141592923, "grad_norm": 1.2616030886647422e-08, "learning_rate": 0.16242644912403123, "loss": 0.0, "num_input_tokens_seen": 10759424, "step": 18945 }, { "epoch": 332.46017699115043, "grad_norm": 1.9444636123466807e-08, "learning_rate": 0.1623677457840335, "loss": 0.0, "num_input_tokens_seen": 10762192, "step": 18950 }, { "epoch": 332.5486725663717, "grad_norm": 1.3134291876326643e-08, "learning_rate": 0.16230904053677397, "loss": 0.0, "num_input_tokens_seen": 10764912, "step": 18955 }, { "epoch": 332.6371681415929, "grad_norm": 1.075432098929241e-08, "learning_rate": 0.16225033339130568, "loss": 0.0, "num_input_tokens_seen": 10767872, "step": 18960 }, { "epoch": 332.72566371681415, "grad_norm": 2.923427722123506e-08, "learning_rate": 0.16219162435668197, "loss": 0.0, "num_input_tokens_seen": 10771152, "step": 18965 }, { "epoch": 332.8141592920354, "grad_norm": 4.91202420960235e-08, "learning_rate": 0.16213291344195666, "loss": 0.0, "num_input_tokens_seen": 10774000, "step": 18970 }, { "epoch": 332.9026548672566, "grad_norm": 2.3349350897206023e-08, "learning_rate": 0.16207420065618358, "loss": 0.0, "num_input_tokens_seen": 10776688, "step": 18975 }, { "epoch": 332.9911504424779, "grad_norm": 2.0367577846513996e-08, "learning_rate": 0.16201548600841706, "loss": 0.0, "num_input_tokens_seen": 10779600, "step": 18980 }, { "epoch": 333.070796460177, "grad_norm": 1.9519244887078457e-08, "learning_rate": 0.16195676950771154, "loss": 0.0, "num_input_tokens_seen": 10781936, "step": 18985 }, { "epoch": 333.1592920353982, "grad_norm": 1.3237437812563257e-08, "learning_rate": 0.16189805116312198, "loss": 0.0, "num_input_tokens_seen": 10784768, "step": 18990 }, { "epoch": 333.24778761061947, "grad_norm": 1.146504224891487e-08, "learning_rate": 0.16183933098370337, "loss": 0.0, "num_input_tokens_seen": 10788048, "step": 18995 }, { "epoch": 333.3362831858407, "grad_norm": 1.7447741029741337e-08, "learning_rate": 0.16178060897851115, "loss": 0.0, "num_input_tokens_seen": 10790896, "step": 19000 }, { "epoch": 333.3362831858407, "eval_loss": 0.6169363260269165, "eval_runtime": 0.9341, "eval_samples_per_second": 26.763, "eval_steps_per_second": 13.917, "num_input_tokens_seen": 10790896, "step": 19000 }, { "epoch": 333.42477876106193, "grad_norm": 1.712177954971139e-08, "learning_rate": 0.16172188515660096, "loss": 0.0, "num_input_tokens_seen": 10793696, "step": 19005 }, { "epoch": 333.5132743362832, "grad_norm": 1.1515526310290625e-08, "learning_rate": 0.16166315952702878, "loss": 0.0, "num_input_tokens_seen": 10796880, "step": 19010 }, { "epoch": 333.60176991150445, "grad_norm": 5.3441581826518814e-08, "learning_rate": 0.16160443209885084, "loss": 0.0, "num_input_tokens_seen": 10799472, "step": 19015 }, { "epoch": 333.69026548672565, "grad_norm": 2.322113878960863e-08, "learning_rate": 0.16154570288112363, "loss": 0.0, "num_input_tokens_seen": 10801856, "step": 19020 }, { "epoch": 333.7787610619469, "grad_norm": 3.452591812447281e-08, "learning_rate": 0.16148697188290395, "loss": 0.0, "num_input_tokens_seen": 10804864, "step": 19025 }, { "epoch": 333.86725663716817, "grad_norm": 4.346452087133912e-08, "learning_rate": 0.16142823911324888, "loss": 0.0, "num_input_tokens_seen": 10807968, "step": 19030 }, { "epoch": 333.95575221238937, "grad_norm": 1.8056674377930904e-08, "learning_rate": 0.16136950458121568, "loss": 0.0, "num_input_tokens_seen": 10811104, "step": 19035 }, { "epoch": 334.0353982300885, "grad_norm": 9.695314773239261e-09, "learning_rate": 0.16131076829586205, "loss": 0.0, "num_input_tokens_seen": 10813792, "step": 19040 }, { "epoch": 334.12389380530976, "grad_norm": 3.456059971540526e-08, "learning_rate": 0.1612520302662457, "loss": 0.0, "num_input_tokens_seen": 10816576, "step": 19045 }, { "epoch": 334.21238938053096, "grad_norm": 1.5560857491436764e-08, "learning_rate": 0.16119329050142497, "loss": 0.0, "num_input_tokens_seen": 10819632, "step": 19050 }, { "epoch": 334.3008849557522, "grad_norm": 3.725168795654099e-08, "learning_rate": 0.16113454901045818, "loss": 0.0, "num_input_tokens_seen": 10822992, "step": 19055 }, { "epoch": 334.3893805309734, "grad_norm": 2.8653236228137757e-08, "learning_rate": 0.16107580580240397, "loss": 0.0, "num_input_tokens_seen": 10825696, "step": 19060 }, { "epoch": 334.4778761061947, "grad_norm": 2.8585825262439357e-08, "learning_rate": 0.16101706088632134, "loss": 0.0, "num_input_tokens_seen": 10828432, "step": 19065 }, { "epoch": 334.56637168141594, "grad_norm": 1.8479852315067546e-08, "learning_rate": 0.16095831427126947, "loss": 0.0, "num_input_tokens_seen": 10831520, "step": 19070 }, { "epoch": 334.65486725663715, "grad_norm": 1.965136320336569e-08, "learning_rate": 0.16089956596630783, "loss": 0.0, "num_input_tokens_seen": 10833904, "step": 19075 }, { "epoch": 334.7433628318584, "grad_norm": 1.7765525939239524e-08, "learning_rate": 0.16084081598049618, "loss": 0.0, "num_input_tokens_seen": 10836848, "step": 19080 }, { "epoch": 334.83185840707966, "grad_norm": 1.0501519653871583e-08, "learning_rate": 0.1607820643228944, "loss": 0.0, "num_input_tokens_seen": 10839440, "step": 19085 }, { "epoch": 334.92035398230087, "grad_norm": 2.1693297824754154e-08, "learning_rate": 0.16072331100256285, "loss": 0.0, "num_input_tokens_seen": 10842144, "step": 19090 }, { "epoch": 335.0, "grad_norm": 1.0613177892082604e-08, "learning_rate": 0.16066455602856197, "loss": 0.0, "num_input_tokens_seen": 10844392, "step": 19095 }, { "epoch": 335.08849557522126, "grad_norm": 2.5091472011240512e-08, "learning_rate": 0.16060579940995257, "loss": 0.0, "num_input_tokens_seen": 10847320, "step": 19100 }, { "epoch": 335.17699115044246, "grad_norm": 3.494075073717795e-08, "learning_rate": 0.16054704115579557, "loss": 0.0, "num_input_tokens_seen": 10850632, "step": 19105 }, { "epoch": 335.2654867256637, "grad_norm": 4.471290537821915e-08, "learning_rate": 0.1604882812751523, "loss": 0.0, "num_input_tokens_seen": 10853352, "step": 19110 }, { "epoch": 335.353982300885, "grad_norm": 4.138549769550082e-08, "learning_rate": 0.16042951977708425, "loss": 0.0, "num_input_tokens_seen": 10855960, "step": 19115 }, { "epoch": 335.4424778761062, "grad_norm": 3.909099177690223e-08, "learning_rate": 0.16037075667065318, "loss": 0.0, "num_input_tokens_seen": 10858568, "step": 19120 }, { "epoch": 335.53097345132744, "grad_norm": 1.2282856509671092e-08, "learning_rate": 0.1603119919649211, "loss": 0.0, "num_input_tokens_seen": 10861432, "step": 19125 }, { "epoch": 335.6194690265487, "grad_norm": 2.1421820761702293e-08, "learning_rate": 0.16025322566895028, "loss": 0.0, "num_input_tokens_seen": 10864664, "step": 19130 }, { "epoch": 335.7079646017699, "grad_norm": 4.600947178801107e-09, "learning_rate": 0.16019445779180322, "loss": 0.0, "num_input_tokens_seen": 10867512, "step": 19135 }, { "epoch": 335.79646017699116, "grad_norm": 2.4813884280661114e-08, "learning_rate": 0.16013568834254271, "loss": 0.0, "num_input_tokens_seen": 10870888, "step": 19140 }, { "epoch": 335.88495575221236, "grad_norm": 1.515654624029139e-08, "learning_rate": 0.1600769173302316, "loss": 0.0, "num_input_tokens_seen": 10873768, "step": 19145 }, { "epoch": 335.9734513274336, "grad_norm": 5.063694175078126e-08, "learning_rate": 0.16001814476393322, "loss": 0.0, "num_input_tokens_seen": 10876248, "step": 19150 }, { "epoch": 336.05309734513276, "grad_norm": 2.102964913319738e-08, "learning_rate": 0.15995937065271104, "loss": 0.0, "num_input_tokens_seen": 10878952, "step": 19155 }, { "epoch": 336.14159292035396, "grad_norm": 1.4781268653507595e-08, "learning_rate": 0.15990059500562873, "loss": 0.0, "num_input_tokens_seen": 10881896, "step": 19160 }, { "epoch": 336.2300884955752, "grad_norm": 1.9366163783729462e-08, "learning_rate": 0.15984181783175025, "loss": 0.0, "num_input_tokens_seen": 10885224, "step": 19165 }, { "epoch": 336.3185840707965, "grad_norm": 1.1827260060215394e-08, "learning_rate": 0.1597830391401398, "loss": 0.0, "num_input_tokens_seen": 10888120, "step": 19170 }, { "epoch": 336.4070796460177, "grad_norm": 1.7759303361231105e-08, "learning_rate": 0.15972425893986178, "loss": 0.0, "num_input_tokens_seen": 10890808, "step": 19175 }, { "epoch": 336.49557522123894, "grad_norm": 2.9346018948217534e-08, "learning_rate": 0.15966547723998084, "loss": 0.0, "num_input_tokens_seen": 10893752, "step": 19180 }, { "epoch": 336.5840707964602, "grad_norm": 3.681959626078424e-08, "learning_rate": 0.15960669404956176, "loss": 0.0, "num_input_tokens_seen": 10896536, "step": 19185 }, { "epoch": 336.6725663716814, "grad_norm": 1.6344456454930878e-08, "learning_rate": 0.1595479093776698, "loss": 0.0, "num_input_tokens_seen": 10899192, "step": 19190 }, { "epoch": 336.76106194690266, "grad_norm": 8.803603179785568e-09, "learning_rate": 0.15948912323337022, "loss": 0.0, "num_input_tokens_seen": 10902008, "step": 19195 }, { "epoch": 336.8495575221239, "grad_norm": 1.953277362076733e-08, "learning_rate": 0.1594303356257286, "loss": 0.0, "num_input_tokens_seen": 10904600, "step": 19200 }, { "epoch": 336.8495575221239, "eval_loss": 0.6139432787895203, "eval_runtime": 0.9369, "eval_samples_per_second": 26.683, "eval_steps_per_second": 13.875, "num_input_tokens_seen": 10904600, "step": 19200 }, { "epoch": 336.9380530973451, "grad_norm": 1.0277122264312766e-08, "learning_rate": 0.15937154656381072, "loss": 0.0, "num_input_tokens_seen": 10907928, "step": 19205 }, { "epoch": 337.01769911504425, "grad_norm": 1.3423130162948382e-08, "learning_rate": 0.15931275605668258, "loss": 0.0, "num_input_tokens_seen": 10910000, "step": 19210 }, { "epoch": 337.1061946902655, "grad_norm": 2.5080714394221104e-08, "learning_rate": 0.1592539641134104, "loss": 0.0, "num_input_tokens_seen": 10912960, "step": 19215 }, { "epoch": 337.1946902654867, "grad_norm": 3.823129191005137e-08, "learning_rate": 0.1591951707430607, "loss": 0.0, "num_input_tokens_seen": 10915760, "step": 19220 }, { "epoch": 337.283185840708, "grad_norm": 4.2422207968684233e-08, "learning_rate": 0.15913637595470007, "loss": 0.0, "num_input_tokens_seen": 10919184, "step": 19225 }, { "epoch": 337.37168141592923, "grad_norm": 1.2117606473793785e-08, "learning_rate": 0.15907757975739548, "loss": 0.0, "num_input_tokens_seen": 10922128, "step": 19230 }, { "epoch": 337.46017699115043, "grad_norm": 1.880126099251811e-08, "learning_rate": 0.159018782160214, "loss": 0.0, "num_input_tokens_seen": 10924864, "step": 19235 }, { "epoch": 337.5486725663717, "grad_norm": 2.8546976338361674e-08, "learning_rate": 0.158959983172223, "loss": 0.0, "num_input_tokens_seen": 10927936, "step": 19240 }, { "epoch": 337.6371681415929, "grad_norm": 1.688643713748661e-08, "learning_rate": 0.15890118280249, "loss": 0.0, "num_input_tokens_seen": 10930928, "step": 19245 }, { "epoch": 337.72566371681415, "grad_norm": 8.83623840763903e-09, "learning_rate": 0.15884238106008275, "loss": 0.0, "num_input_tokens_seen": 10933792, "step": 19250 }, { "epoch": 337.8141592920354, "grad_norm": 2.998285353328356e-08, "learning_rate": 0.15878357795406922, "loss": 0.0, "num_input_tokens_seen": 10936432, "step": 19255 }, { "epoch": 337.9026548672566, "grad_norm": 1.776307989587167e-08, "learning_rate": 0.15872477349351757, "loss": 0.0, "num_input_tokens_seen": 10939152, "step": 19260 }, { "epoch": 337.9911504424779, "grad_norm": 1.2952593664294909e-08, "learning_rate": 0.15866596768749622, "loss": 0.0, "num_input_tokens_seen": 10941792, "step": 19265 }, { "epoch": 338.070796460177, "grad_norm": 2.2288478618293084e-08, "learning_rate": 0.15860716054507373, "loss": 0.0, "num_input_tokens_seen": 10944112, "step": 19270 }, { "epoch": 338.1592920353982, "grad_norm": 3.8557551818030333e-08, "learning_rate": 0.1585483520753189, "loss": 0.0, "num_input_tokens_seen": 10947408, "step": 19275 }, { "epoch": 338.24778761061947, "grad_norm": 4.5522348557369696e-08, "learning_rate": 0.1584895422873008, "loss": 0.0, "num_input_tokens_seen": 10950048, "step": 19280 }, { "epoch": 338.3362831858407, "grad_norm": 1.1729605731147785e-08, "learning_rate": 0.1584307311900886, "loss": 0.0, "num_input_tokens_seen": 10953424, "step": 19285 }, { "epoch": 338.42477876106193, "grad_norm": 4.8105718519764196e-08, "learning_rate": 0.1583719187927517, "loss": 0.0, "num_input_tokens_seen": 10956016, "step": 19290 }, { "epoch": 338.5132743362832, "grad_norm": 1.662078652486798e-08, "learning_rate": 0.15831310510435967, "loss": 0.0, "num_input_tokens_seen": 10958960, "step": 19295 }, { "epoch": 338.60176991150445, "grad_norm": 2.2359538220939612e-08, "learning_rate": 0.15825429013398243, "loss": 0.0, "num_input_tokens_seen": 10961824, "step": 19300 }, { "epoch": 338.69026548672565, "grad_norm": 1.1256501508682959e-08, "learning_rate": 0.15819547389068986, "loss": 0.0, "num_input_tokens_seen": 10964368, "step": 19305 }, { "epoch": 338.7787610619469, "grad_norm": 2.226046902364942e-08, "learning_rate": 0.1581366563835522, "loss": 0.0, "num_input_tokens_seen": 10967520, "step": 19310 }, { "epoch": 338.86725663716817, "grad_norm": 3.578966811801365e-08, "learning_rate": 0.15807783762163993, "loss": 0.0, "num_input_tokens_seen": 10970288, "step": 19315 }, { "epoch": 338.95575221238937, "grad_norm": 1.7430570764531694e-08, "learning_rate": 0.15801901761402365, "loss": 0.0, "num_input_tokens_seen": 10973344, "step": 19320 }, { "epoch": 339.0353982300885, "grad_norm": 2.0082385532305125e-08, "learning_rate": 0.157960196369774, "loss": 0.0, "num_input_tokens_seen": 10975544, "step": 19325 }, { "epoch": 339.12389380530976, "grad_norm": 1.963833362594869e-08, "learning_rate": 0.157901373897962, "loss": 0.0, "num_input_tokens_seen": 10978328, "step": 19330 }, { "epoch": 339.21238938053096, "grad_norm": 2.0540603884455777e-08, "learning_rate": 0.15784255020765892, "loss": 0.0, "num_input_tokens_seen": 10980952, "step": 19335 }, { "epoch": 339.3008849557522, "grad_norm": 1.5691943744400305e-08, "learning_rate": 0.157783725307936, "loss": 0.0, "num_input_tokens_seen": 10983928, "step": 19340 }, { "epoch": 339.3893805309734, "grad_norm": 2.02158041417988e-08, "learning_rate": 0.15772489920786484, "loss": 0.0, "num_input_tokens_seen": 10987000, "step": 19345 }, { "epoch": 339.4778761061947, "grad_norm": 1.8629103593070795e-08, "learning_rate": 0.15766607191651713, "loss": 0.0, "num_input_tokens_seen": 10990520, "step": 19350 }, { "epoch": 339.56637168141594, "grad_norm": 2.3975617935434457e-08, "learning_rate": 0.1576072434429648, "loss": 0.0, "num_input_tokens_seen": 10993288, "step": 19355 }, { "epoch": 339.65486725663715, "grad_norm": 1.4793481994956892e-08, "learning_rate": 0.15754841379627998, "loss": 0.0, "num_input_tokens_seen": 10996008, "step": 19360 }, { "epoch": 339.7433628318584, "grad_norm": 9.334805817218239e-09, "learning_rate": 0.15748958298553484, "loss": 0.0, "num_input_tokens_seen": 10999176, "step": 19365 }, { "epoch": 339.83185840707966, "grad_norm": 7.770646348603805e-09, "learning_rate": 0.1574307510198019, "loss": 0.0, "num_input_tokens_seen": 11001864, "step": 19370 }, { "epoch": 339.92035398230087, "grad_norm": 2.655172082199897e-08, "learning_rate": 0.15737191790815375, "loss": 0.0, "num_input_tokens_seen": 11004408, "step": 19375 }, { "epoch": 340.0, "grad_norm": 2.5776570211633043e-08, "learning_rate": 0.15731308365966323, "loss": 0.0, "num_input_tokens_seen": 11006656, "step": 19380 }, { "epoch": 340.08849557522126, "grad_norm": 2.184219916046004e-08, "learning_rate": 0.15725424828340331, "loss": 0.0, "num_input_tokens_seen": 11009664, "step": 19385 }, { "epoch": 340.17699115044246, "grad_norm": 1.2979357144615733e-08, "learning_rate": 0.15719541178844715, "loss": 0.0, "num_input_tokens_seen": 11012592, "step": 19390 }, { "epoch": 340.2654867256637, "grad_norm": 2.179431035642665e-08, "learning_rate": 0.15713657418386806, "loss": 0.0, "num_input_tokens_seen": 11015328, "step": 19395 }, { "epoch": 340.353982300885, "grad_norm": 2.4013679933432286e-08, "learning_rate": 0.15707773547873957, "loss": 0.0, "num_input_tokens_seen": 11018112, "step": 19400 }, { "epoch": 340.353982300885, "eval_loss": 0.6213271617889404, "eval_runtime": 0.9383, "eval_samples_per_second": 26.644, "eval_steps_per_second": 13.855, "num_input_tokens_seen": 11018112, "step": 19400 }, { "epoch": 340.4424778761062, "grad_norm": 5.659961743731401e-08, "learning_rate": 0.1570188956821353, "loss": 0.0, "num_input_tokens_seen": 11021200, "step": 19405 }, { "epoch": 340.53097345132744, "grad_norm": 2.9240696974852654e-08, "learning_rate": 0.1569600548031291, "loss": 0.0, "num_input_tokens_seen": 11024320, "step": 19410 }, { "epoch": 340.6194690265487, "grad_norm": 2.9712312610286062e-08, "learning_rate": 0.156901212850795, "loss": 0.0, "num_input_tokens_seen": 11027104, "step": 19415 }, { "epoch": 340.7079646017699, "grad_norm": 1.9466442680027285e-08, "learning_rate": 0.15684236983420716, "loss": 0.0, "num_input_tokens_seen": 11029856, "step": 19420 }, { "epoch": 340.79646017699116, "grad_norm": 3.4599061393691954e-08, "learning_rate": 0.1567835257624399, "loss": 0.0, "num_input_tokens_seen": 11032640, "step": 19425 }, { "epoch": 340.88495575221236, "grad_norm": 2.9608376195255914e-08, "learning_rate": 0.1567246806445677, "loss": 0.0, "num_input_tokens_seen": 11035648, "step": 19430 }, { "epoch": 340.9734513274336, "grad_norm": 1.4636702516668265e-08, "learning_rate": 0.15666583448966526, "loss": 0.0, "num_input_tokens_seen": 11038416, "step": 19435 }, { "epoch": 341.05309734513276, "grad_norm": 2.1981334086262905e-08, "learning_rate": 0.1566069873068074, "loss": 0.0, "num_input_tokens_seen": 11040640, "step": 19440 }, { "epoch": 341.14159292035396, "grad_norm": 2.9149621383339763e-08, "learning_rate": 0.156548139105069, "loss": 0.0, "num_input_tokens_seen": 11043136, "step": 19445 }, { "epoch": 341.2300884955752, "grad_norm": 2.1244167314193874e-08, "learning_rate": 0.15648928989352529, "loss": 0.0, "num_input_tokens_seen": 11046432, "step": 19450 }, { "epoch": 341.3185840707965, "grad_norm": 1.7466796009557584e-08, "learning_rate": 0.15643043968125156, "loss": 0.0, "num_input_tokens_seen": 11049520, "step": 19455 }, { "epoch": 341.4070796460177, "grad_norm": 1.759700474224246e-08, "learning_rate": 0.15637158847732316, "loss": 0.0, "num_input_tokens_seen": 11052496, "step": 19460 }, { "epoch": 341.49557522123894, "grad_norm": 2.002167853731862e-08, "learning_rate": 0.15631273629081582, "loss": 0.0, "num_input_tokens_seen": 11055472, "step": 19465 }, { "epoch": 341.5840707964602, "grad_norm": 1.6389485324452835e-08, "learning_rate": 0.15625388313080518, "loss": 0.0, "num_input_tokens_seen": 11058000, "step": 19470 }, { "epoch": 341.6725663716814, "grad_norm": 1.341398636611757e-08, "learning_rate": 0.15619502900636714, "loss": 0.0, "num_input_tokens_seen": 11060640, "step": 19475 }, { "epoch": 341.76106194690266, "grad_norm": 1.9540433271458824e-08, "learning_rate": 0.15613617392657783, "loss": 0.0, "num_input_tokens_seen": 11064000, "step": 19480 }, { "epoch": 341.8495575221239, "grad_norm": 2.485287531328595e-08, "learning_rate": 0.15607731790051335, "loss": 0.0, "num_input_tokens_seen": 11067120, "step": 19485 }, { "epoch": 341.9380530973451, "grad_norm": 1.7041770661307964e-08, "learning_rate": 0.15601846093725008, "loss": 0.0, "num_input_tokens_seen": 11069984, "step": 19490 }, { "epoch": 342.01769911504425, "grad_norm": 2.1504101610503312e-08, "learning_rate": 0.1559596030458645, "loss": 0.0, "num_input_tokens_seen": 11072296, "step": 19495 }, { "epoch": 342.1061946902655, "grad_norm": 1.1361498408746229e-08, "learning_rate": 0.1559007442354333, "loss": 0.0, "num_input_tokens_seen": 11075384, "step": 19500 }, { "epoch": 342.1946902654867, "grad_norm": 2.0563241776017094e-08, "learning_rate": 0.15584188451503314, "loss": 0.0, "num_input_tokens_seen": 11078440, "step": 19505 }, { "epoch": 342.283185840708, "grad_norm": 2.5246661650157876e-08, "learning_rate": 0.15578302389374094, "loss": 0.0, "num_input_tokens_seen": 11081256, "step": 19510 }, { "epoch": 342.37168141592923, "grad_norm": 2.2373413344212167e-08, "learning_rate": 0.1557241623806338, "loss": 0.0, "num_input_tokens_seen": 11084200, "step": 19515 }, { "epoch": 342.46017699115043, "grad_norm": 1.2857360509599403e-08, "learning_rate": 0.15566529998478887, "loss": 0.0, "num_input_tokens_seen": 11087176, "step": 19520 }, { "epoch": 342.5486725663717, "grad_norm": 2.5074287535176154e-08, "learning_rate": 0.15560643671528354, "loss": 0.0, "num_input_tokens_seen": 11089848, "step": 19525 }, { "epoch": 342.6371681415929, "grad_norm": 1.3175473156934459e-08, "learning_rate": 0.15554757258119514, "loss": 0.0, "num_input_tokens_seen": 11092376, "step": 19530 }, { "epoch": 342.72566371681415, "grad_norm": 2.702178747426842e-08, "learning_rate": 0.1554887075916014, "loss": 0.0, "num_input_tokens_seen": 11095128, "step": 19535 }, { "epoch": 342.8141592920354, "grad_norm": 1.5285881005411284e-08, "learning_rate": 0.15542984175558, "loss": 0.0, "num_input_tokens_seen": 11098024, "step": 19540 }, { "epoch": 342.9026548672566, "grad_norm": 8.54390425075735e-09, "learning_rate": 0.1553709750822087, "loss": 0.0, "num_input_tokens_seen": 11100968, "step": 19545 }, { "epoch": 342.9911504424779, "grad_norm": 1.1288265433506695e-08, "learning_rate": 0.15531210758056554, "loss": 0.0, "num_input_tokens_seen": 11103816, "step": 19550 }, { "epoch": 343.070796460177, "grad_norm": 2.508608787366029e-08, "learning_rate": 0.15525323925972867, "loss": 0.0, "num_input_tokens_seen": 11106368, "step": 19555 }, { "epoch": 343.1592920353982, "grad_norm": 1.7419495179638034e-08, "learning_rate": 0.15519437012877627, "loss": 0.0, "num_input_tokens_seen": 11109088, "step": 19560 }, { "epoch": 343.24778761061947, "grad_norm": 3.9332448409368226e-08, "learning_rate": 0.15513550019678676, "loss": 0.0, "num_input_tokens_seen": 11111712, "step": 19565 }, { "epoch": 343.3362831858407, "grad_norm": 2.485559846832075e-08, "learning_rate": 0.15507662947283854, "loss": 0.0, "num_input_tokens_seen": 11114560, "step": 19570 }, { "epoch": 343.42477876106193, "grad_norm": 5.5668074594450445e-08, "learning_rate": 0.15501775796601028, "loss": 0.0, "num_input_tokens_seen": 11117568, "step": 19575 }, { "epoch": 343.5132743362832, "grad_norm": 1.198928689660761e-08, "learning_rate": 0.15495888568538066, "loss": 0.0, "num_input_tokens_seen": 11120320, "step": 19580 }, { "epoch": 343.60176991150445, "grad_norm": 2.1079257450651312e-08, "learning_rate": 0.1549000126400286, "loss": 0.0, "num_input_tokens_seen": 11123248, "step": 19585 }, { "epoch": 343.69026548672565, "grad_norm": 1.4345579835151057e-08, "learning_rate": 0.15484113883903294, "loss": 0.0, "num_input_tokens_seen": 11125840, "step": 19590 }, { "epoch": 343.7787610619469, "grad_norm": 1.3653644437283674e-08, "learning_rate": 0.15478226429147288, "loss": 0.0, "num_input_tokens_seen": 11128672, "step": 19595 }, { "epoch": 343.86725663716817, "grad_norm": 1.8581886251922697e-08, "learning_rate": 0.15472338900642757, "loss": 0.0, "num_input_tokens_seen": 11131712, "step": 19600 }, { "epoch": 343.86725663716817, "eval_loss": 0.6173127889633179, "eval_runtime": 0.9391, "eval_samples_per_second": 26.62, "eval_steps_per_second": 13.843, "num_input_tokens_seen": 11131712, "step": 19600 }, { "epoch": 343.95575221238937, "grad_norm": 1.731320686815252e-08, "learning_rate": 0.15466451299297632, "loss": 0.0, "num_input_tokens_seen": 11134928, "step": 19605 }, { "epoch": 344.0353982300885, "grad_norm": 1.7944948638159985e-08, "learning_rate": 0.15460563626019852, "loss": 0.0, "num_input_tokens_seen": 11137648, "step": 19610 }, { "epoch": 344.12389380530976, "grad_norm": 2.3812226856989582e-08, "learning_rate": 0.15454675881717375, "loss": 0.0, "num_input_tokens_seen": 11140272, "step": 19615 }, { "epoch": 344.21238938053096, "grad_norm": 3.3350289641020936e-08, "learning_rate": 0.1544878806729816, "loss": 0.0, "num_input_tokens_seen": 11142896, "step": 19620 }, { "epoch": 344.3008849557522, "grad_norm": 2.3984101815699432e-08, "learning_rate": 0.1544290018367019, "loss": 0.0, "num_input_tokens_seen": 11146208, "step": 19625 }, { "epoch": 344.3893805309734, "grad_norm": 1.4134702297496915e-08, "learning_rate": 0.15437012231741445, "loss": 0.0, "num_input_tokens_seen": 11149584, "step": 19630 }, { "epoch": 344.4778761061947, "grad_norm": 1.702203178410855e-08, "learning_rate": 0.1543112421241992, "loss": 0.0, "num_input_tokens_seen": 11152160, "step": 19635 }, { "epoch": 344.56637168141594, "grad_norm": 1.7721630385381104e-08, "learning_rate": 0.15425236126613626, "loss": 0.0, "num_input_tokens_seen": 11154768, "step": 19640 }, { "epoch": 344.65486725663715, "grad_norm": 1.619093481508571e-08, "learning_rate": 0.15419347975230577, "loss": 0.0, "num_input_tokens_seen": 11157456, "step": 19645 }, { "epoch": 344.7433628318584, "grad_norm": 9.433186676233163e-09, "learning_rate": 0.154134597591788, "loss": 0.0, "num_input_tokens_seen": 11160576, "step": 19650 }, { "epoch": 344.83185840707966, "grad_norm": 2.232643403488055e-08, "learning_rate": 0.1540757147936633, "loss": 0.0, "num_input_tokens_seen": 11163520, "step": 19655 }, { "epoch": 344.92035398230087, "grad_norm": 1.3529916742527348e-08, "learning_rate": 0.1540168313670122, "loss": 0.0, "num_input_tokens_seen": 11166160, "step": 19660 }, { "epoch": 345.0, "grad_norm": 9.568119452296742e-08, "learning_rate": 0.1539579473209152, "loss": 0.0, "num_input_tokens_seen": 11168448, "step": 19665 }, { "epoch": 345.08849557522126, "grad_norm": 1.3423012035218562e-08, "learning_rate": 0.15389906266445294, "loss": 0.0, "num_input_tokens_seen": 11170944, "step": 19670 }, { "epoch": 345.17699115044246, "grad_norm": 1.3072654958534713e-08, "learning_rate": 0.15384017740670627, "loss": 0.0, "num_input_tokens_seen": 11173616, "step": 19675 }, { "epoch": 345.2654867256637, "grad_norm": 2.2589210502133028e-08, "learning_rate": 0.15378129155675602, "loss": 0.0, "num_input_tokens_seen": 11176368, "step": 19680 }, { "epoch": 345.353982300885, "grad_norm": 2.5367063116732425e-08, "learning_rate": 0.15372240512368307, "loss": 0.0, "num_input_tokens_seen": 11179408, "step": 19685 }, { "epoch": 345.4424778761062, "grad_norm": 1.9143579166325253e-08, "learning_rate": 0.1536635181165684, "loss": 0.0, "num_input_tokens_seen": 11182304, "step": 19690 }, { "epoch": 345.53097345132744, "grad_norm": 2.174751401184949e-08, "learning_rate": 0.15360463054449328, "loss": 0.0, "num_input_tokens_seen": 11185616, "step": 19695 }, { "epoch": 345.6194690265487, "grad_norm": 2.037923430009414e-08, "learning_rate": 0.1535457424165388, "loss": 0.0, "num_input_tokens_seen": 11188656, "step": 19700 }, { "epoch": 345.7079646017699, "grad_norm": 1.679571504098476e-08, "learning_rate": 0.15348685374178628, "loss": 0.0, "num_input_tokens_seen": 11191712, "step": 19705 }, { "epoch": 345.79646017699116, "grad_norm": 8.979041510315255e-09, "learning_rate": 0.1534279645293171, "loss": 0.0, "num_input_tokens_seen": 11194480, "step": 19710 }, { "epoch": 345.88495575221236, "grad_norm": 1.1783996889391801e-08, "learning_rate": 0.1533690747882127, "loss": 0.0, "num_input_tokens_seen": 11197456, "step": 19715 }, { "epoch": 345.9734513274336, "grad_norm": 3.792064262597705e-08, "learning_rate": 0.15331018452755465, "loss": 0.0, "num_input_tokens_seen": 11200352, "step": 19720 }, { "epoch": 346.05309734513276, "grad_norm": 2.3152407990778556e-08, "learning_rate": 0.15325129375642457, "loss": 0.0, "num_input_tokens_seen": 11202480, "step": 19725 }, { "epoch": 346.14159292035396, "grad_norm": 2.0203787087780256e-08, "learning_rate": 0.15319240248390406, "loss": 0.0, "num_input_tokens_seen": 11204992, "step": 19730 }, { "epoch": 346.2300884955752, "grad_norm": 2.3706277829660394e-08, "learning_rate": 0.153133510719075, "loss": 0.0, "num_input_tokens_seen": 11208208, "step": 19735 }, { "epoch": 346.3185840707965, "grad_norm": 2.5576639473001705e-08, "learning_rate": 0.15307461847101922, "loss": 0.0, "num_input_tokens_seen": 11211472, "step": 19740 }, { "epoch": 346.4070796460177, "grad_norm": 2.164278534166897e-08, "learning_rate": 0.15301572574881864, "loss": 0.0, "num_input_tokens_seen": 11214624, "step": 19745 }, { "epoch": 346.49557522123894, "grad_norm": 1.7428677168140894e-08, "learning_rate": 0.15295683256155523, "loss": 0.0, "num_input_tokens_seen": 11217072, "step": 19750 }, { "epoch": 346.5840707964602, "grad_norm": 1.8294086245873586e-08, "learning_rate": 0.15289793891831113, "loss": 0.0, "num_input_tokens_seen": 11220288, "step": 19755 }, { "epoch": 346.6725663716814, "grad_norm": 1.531275195532089e-08, "learning_rate": 0.15283904482816837, "loss": 0.0, "num_input_tokens_seen": 11222928, "step": 19760 }, { "epoch": 346.76106194690266, "grad_norm": 3.167113504787267e-08, "learning_rate": 0.15278015030020928, "loss": 0.0, "num_input_tokens_seen": 11225856, "step": 19765 }, { "epoch": 346.8495575221239, "grad_norm": 1.8796445289126495e-08, "learning_rate": 0.152721255343516, "loss": 0.0, "num_input_tokens_seen": 11228656, "step": 19770 }, { "epoch": 346.9380530973451, "grad_norm": 3.922051572402552e-09, "learning_rate": 0.15266235996717098, "loss": 0.0, "num_input_tokens_seen": 11231744, "step": 19775 }, { "epoch": 347.01769911504425, "grad_norm": 1.8081482977549967e-08, "learning_rate": 0.15260346418025664, "loss": 0.0, "num_input_tokens_seen": 11233888, "step": 19780 }, { "epoch": 347.1061946902655, "grad_norm": 1.9048654209541382e-08, "learning_rate": 0.15254456799185537, "loss": 0.0, "num_input_tokens_seen": 11236544, "step": 19785 }, { "epoch": 347.1946902654867, "grad_norm": 2.109490537804959e-08, "learning_rate": 0.15248567141104974, "loss": 0.0, "num_input_tokens_seen": 11239392, "step": 19790 }, { "epoch": 347.283185840708, "grad_norm": 1.963394424819853e-08, "learning_rate": 0.15242677444692232, "loss": 0.0, "num_input_tokens_seen": 11242960, "step": 19795 }, { "epoch": 347.37168141592923, "grad_norm": 2.583110614295947e-08, "learning_rate": 0.15236787710855584, "loss": 0.0, "num_input_tokens_seen": 11245728, "step": 19800 }, { "epoch": 347.37168141592923, "eval_loss": 0.6093894243240356, "eval_runtime": 0.9347, "eval_samples_per_second": 26.748, "eval_steps_per_second": 13.909, "num_input_tokens_seen": 11245728, "step": 19800 }, { "epoch": 347.46017699115043, "grad_norm": 3.744959542473225e-08, "learning_rate": 0.1523089794050329, "loss": 0.0, "num_input_tokens_seen": 11248576, "step": 19805 }, { "epoch": 347.5486725663717, "grad_norm": 9.21286513744235e-09, "learning_rate": 0.15225008134543633, "loss": 0.0, "num_input_tokens_seen": 11251264, "step": 19810 }, { "epoch": 347.6371681415929, "grad_norm": 1.8884090735582504e-08, "learning_rate": 0.15219118293884895, "loss": 0.0, "num_input_tokens_seen": 11254272, "step": 19815 }, { "epoch": 347.72566371681415, "grad_norm": 4.1639449221975156e-08, "learning_rate": 0.15213228419435362, "loss": 0.0, "num_input_tokens_seen": 11257008, "step": 19820 }, { "epoch": 347.8141592920354, "grad_norm": 2.6339314729284524e-08, "learning_rate": 0.15207338512103327, "loss": 0.0, "num_input_tokens_seen": 11259760, "step": 19825 }, { "epoch": 347.9026548672566, "grad_norm": 2.030880175141192e-08, "learning_rate": 0.1520144857279709, "loss": 0.0, "num_input_tokens_seen": 11262688, "step": 19830 }, { "epoch": 347.9911504424779, "grad_norm": 2.2875488170370772e-08, "learning_rate": 0.1519555860242495, "loss": 0.0, "num_input_tokens_seen": 11265312, "step": 19835 }, { "epoch": 348.070796460177, "grad_norm": 3.7658036688981156e-08, "learning_rate": 0.15189668601895218, "loss": 0.0, "num_input_tokens_seen": 11267912, "step": 19840 }, { "epoch": 348.1592920353982, "grad_norm": 1.1165218083419859e-08, "learning_rate": 0.151837785721162, "loss": 0.0, "num_input_tokens_seen": 11270536, "step": 19845 }, { "epoch": 348.24778761061947, "grad_norm": 1.9594860845018047e-08, "learning_rate": 0.15177888513996218, "loss": 0.0, "num_input_tokens_seen": 11273512, "step": 19850 }, { "epoch": 348.3362831858407, "grad_norm": 2.6501448147087103e-08, "learning_rate": 0.15171998428443592, "loss": 0.0, "num_input_tokens_seen": 11276392, "step": 19855 }, { "epoch": 348.42477876106193, "grad_norm": 2.46508431445136e-08, "learning_rate": 0.1516610831636665, "loss": 0.0, "num_input_tokens_seen": 11279480, "step": 19860 }, { "epoch": 348.5132743362832, "grad_norm": 1.497066648425971e-08, "learning_rate": 0.15160218178673715, "loss": 0.0, "num_input_tokens_seen": 11282088, "step": 19865 }, { "epoch": 348.60176991150445, "grad_norm": 2.2987851622247035e-08, "learning_rate": 0.15154328016273122, "loss": 0.0, "num_input_tokens_seen": 11285400, "step": 19870 }, { "epoch": 348.69026548672565, "grad_norm": 1.2522995973540674e-08, "learning_rate": 0.1514843783007321, "loss": 0.0, "num_input_tokens_seen": 11288392, "step": 19875 }, { "epoch": 348.7787610619469, "grad_norm": 2.1935191441002644e-08, "learning_rate": 0.15142547620982322, "loss": 0.0, "num_input_tokens_seen": 11291192, "step": 19880 }, { "epoch": 348.86725663716817, "grad_norm": 1.0540124328883849e-08, "learning_rate": 0.15136657389908797, "loss": 0.0, "num_input_tokens_seen": 11293768, "step": 19885 }, { "epoch": 348.95575221238937, "grad_norm": 1.754111700336125e-08, "learning_rate": 0.15130767137760986, "loss": 0.0, "num_input_tokens_seen": 11296952, "step": 19890 }, { "epoch": 349.0353982300885, "grad_norm": 1.5903479422263445e-08, "learning_rate": 0.15124876865447243, "loss": 0.0, "num_input_tokens_seen": 11299280, "step": 19895 }, { "epoch": 349.12389380530976, "grad_norm": 2.12195612192545e-08, "learning_rate": 0.15118986573875912, "loss": 0.0, "num_input_tokens_seen": 11302400, "step": 19900 }, { "epoch": 349.21238938053096, "grad_norm": 1.8483973462934955e-08, "learning_rate": 0.15113096263955358, "loss": 0.0, "num_input_tokens_seen": 11305104, "step": 19905 }, { "epoch": 349.3008849557522, "grad_norm": 3.3185944658953304e-08, "learning_rate": 0.1510720593659394, "loss": 0.0, "num_input_tokens_seen": 11308352, "step": 19910 }, { "epoch": 349.3893805309734, "grad_norm": 2.1092908752962103e-08, "learning_rate": 0.15101315592700015, "loss": 0.0, "num_input_tokens_seen": 11311328, "step": 19915 }, { "epoch": 349.4778761061947, "grad_norm": 1.3774053897464e-08, "learning_rate": 0.15095425233181956, "loss": 0.0, "num_input_tokens_seen": 11313552, "step": 19920 }, { "epoch": 349.56637168141594, "grad_norm": 1.319958009560196e-08, "learning_rate": 0.15089534858948128, "loss": 0.0, "num_input_tokens_seen": 11316496, "step": 19925 }, { "epoch": 349.65486725663715, "grad_norm": 1.371769364766351e-08, "learning_rate": 0.15083644470906898, "loss": 0.0, "num_input_tokens_seen": 11318912, "step": 19930 }, { "epoch": 349.7433628318584, "grad_norm": 1.1879137673531659e-08, "learning_rate": 0.1507775406996664, "loss": 0.0, "num_input_tokens_seen": 11322416, "step": 19935 }, { "epoch": 349.83185840707966, "grad_norm": 7.435073001715864e-09, "learning_rate": 0.15071863657035725, "loss": 0.0, "num_input_tokens_seen": 11325248, "step": 19940 }, { "epoch": 349.92035398230087, "grad_norm": 1.2842073182639524e-08, "learning_rate": 0.15065973233022534, "loss": 0.0, "num_input_tokens_seen": 11327872, "step": 19945 }, { "epoch": 350.0, "grad_norm": 6.898752236139671e-09, "learning_rate": 0.15060082798835442, "loss": 0.0, "num_input_tokens_seen": 11330576, "step": 19950 }, { "epoch": 350.08849557522126, "grad_norm": 2.050330571989889e-08, "learning_rate": 0.15054192355382823, "loss": 0.0, "num_input_tokens_seen": 11333232, "step": 19955 }, { "epoch": 350.17699115044246, "grad_norm": 1.883869948926531e-08, "learning_rate": 0.15048301903573066, "loss": 0.0, "num_input_tokens_seen": 11335952, "step": 19960 }, { "epoch": 350.2654867256637, "grad_norm": 2.6548921283620075e-08, "learning_rate": 0.15042411444314546, "loss": 0.0, "num_input_tokens_seen": 11339136, "step": 19965 }, { "epoch": 350.353982300885, "grad_norm": 2.8345725766598662e-08, "learning_rate": 0.1503652097851565, "loss": 0.0, "num_input_tokens_seen": 11341648, "step": 19970 }, { "epoch": 350.4424778761062, "grad_norm": 1.2640960278531566e-08, "learning_rate": 0.15030630507084758, "loss": 0.0, "num_input_tokens_seen": 11344272, "step": 19975 }, { "epoch": 350.53097345132744, "grad_norm": 1.07394830806129e-08, "learning_rate": 0.1502474003093026, "loss": 0.0, "num_input_tokens_seen": 11346896, "step": 19980 }, { "epoch": 350.6194690265487, "grad_norm": 8.41992253697299e-09, "learning_rate": 0.15018849550960536, "loss": 0.0, "num_input_tokens_seen": 11349904, "step": 19985 }, { "epoch": 350.7079646017699, "grad_norm": 2.4330423897822584e-08, "learning_rate": 0.15012959068083975, "loss": 0.0, "num_input_tokens_seen": 11352720, "step": 19990 }, { "epoch": 350.79646017699116, "grad_norm": 1.13100506737851e-08, "learning_rate": 0.1500706858320896, "loss": 0.0, "num_input_tokens_seen": 11356320, "step": 19995 }, { "epoch": 350.88495575221236, "grad_norm": 1.6672924374461218e-08, "learning_rate": 0.15001178097243886, "loss": 0.0, "num_input_tokens_seen": 11358800, "step": 20000 }, { "epoch": 350.88495575221236, "eval_loss": 0.6228640675544739, "eval_runtime": 0.9433, "eval_samples_per_second": 26.501, "eval_steps_per_second": 13.781, "num_input_tokens_seen": 11358800, "step": 20000 }, { "epoch": 350.9734513274336, "grad_norm": 1.2977445784656538e-08, "learning_rate": 0.1499528761109713, "loss": 0.0, "num_input_tokens_seen": 11362048, "step": 20005 }, { "epoch": 351.05309734513276, "grad_norm": 2.023635126136014e-08, "learning_rate": 0.14989397125677087, "loss": 0.0, "num_input_tokens_seen": 11364464, "step": 20010 }, { "epoch": 351.14159292035396, "grad_norm": 5.89170490172819e-09, "learning_rate": 0.14983506641892141, "loss": 0.0, "num_input_tokens_seen": 11367328, "step": 20015 }, { "epoch": 351.2300884955752, "grad_norm": 3.4013510230579413e-08, "learning_rate": 0.14977616160650672, "loss": 0.0, "num_input_tokens_seen": 11370336, "step": 20020 }, { "epoch": 351.3185840707965, "grad_norm": 2.0559735247616118e-08, "learning_rate": 0.14971725682861076, "loss": 0.0, "num_input_tokens_seen": 11373200, "step": 20025 }, { "epoch": 351.4070796460177, "grad_norm": 1.4595137542983139e-08, "learning_rate": 0.14965835209431738, "loss": 0.0, "num_input_tokens_seen": 11375888, "step": 20030 }, { "epoch": 351.49557522123894, "grad_norm": 2.2873990701555158e-08, "learning_rate": 0.14959944741271036, "loss": 0.0, "num_input_tokens_seen": 11379152, "step": 20035 }, { "epoch": 351.5840707964602, "grad_norm": 3.106718438061762e-08, "learning_rate": 0.14954054279287363, "loss": 0.0, "num_input_tokens_seen": 11382288, "step": 20040 }, { "epoch": 351.6725663716814, "grad_norm": 2.885827754539605e-08, "learning_rate": 0.14948163824389094, "loss": 0.0, "num_input_tokens_seen": 11385344, "step": 20045 }, { "epoch": 351.76106194690266, "grad_norm": 1.5551080423392705e-08, "learning_rate": 0.14942273377484613, "loss": 0.0, "num_input_tokens_seen": 11387920, "step": 20050 }, { "epoch": 351.8495575221239, "grad_norm": 1.496023749325559e-08, "learning_rate": 0.1493638293948231, "loss": 0.0, "num_input_tokens_seen": 11390768, "step": 20055 }, { "epoch": 351.9380530973451, "grad_norm": 2.177559288440989e-08, "learning_rate": 0.14930492511290547, "loss": 0.0, "num_input_tokens_seen": 11393536, "step": 20060 }, { "epoch": 352.01769911504425, "grad_norm": 3.477894949810434e-08, "learning_rate": 0.14924602093817715, "loss": 0.0, "num_input_tokens_seen": 11396296, "step": 20065 }, { "epoch": 352.1061946902655, "grad_norm": 1.0943250750017341e-08, "learning_rate": 0.14918711687972194, "loss": 0.0, "num_input_tokens_seen": 11398824, "step": 20070 }, { "epoch": 352.1946902654867, "grad_norm": 1.57301709435842e-08, "learning_rate": 0.14912821294662346, "loss": 0.0, "num_input_tokens_seen": 11401512, "step": 20075 }, { "epoch": 352.283185840708, "grad_norm": 3.671521042747372e-08, "learning_rate": 0.14906930914796554, "loss": 0.0, "num_input_tokens_seen": 11404296, "step": 20080 }, { "epoch": 352.37168141592923, "grad_norm": 9.178738658022212e-09, "learning_rate": 0.14901040549283182, "loss": 0.0, "num_input_tokens_seen": 11407624, "step": 20085 }, { "epoch": 352.46017699115043, "grad_norm": 2.327243997513051e-08, "learning_rate": 0.148951501990306, "loss": 0.0, "num_input_tokens_seen": 11410984, "step": 20090 }, { "epoch": 352.5486725663717, "grad_norm": 2.5867796793477282e-08, "learning_rate": 0.14889259864947177, "loss": 0.0, "num_input_tokens_seen": 11413432, "step": 20095 }, { "epoch": 352.6371681415929, "grad_norm": 2.2985474856795918e-08, "learning_rate": 0.14883369547941272, "loss": 0.0, "num_input_tokens_seen": 11416440, "step": 20100 }, { "epoch": 352.72566371681415, "grad_norm": 1.8896523457101466e-08, "learning_rate": 0.14877479248921247, "loss": 0.0, "num_input_tokens_seen": 11419496, "step": 20105 }, { "epoch": 352.8141592920354, "grad_norm": 2.7942634872601957e-08, "learning_rate": 0.14871588968795468, "loss": 0.0, "num_input_tokens_seen": 11422424, "step": 20110 }, { "epoch": 352.9026548672566, "grad_norm": 1.1993463999715459e-08, "learning_rate": 0.1486569870847228, "loss": 0.0, "num_input_tokens_seen": 11425432, "step": 20115 }, { "epoch": 352.9911504424779, "grad_norm": 2.5993870167440036e-08, "learning_rate": 0.1485980846886004, "loss": 0.0, "num_input_tokens_seen": 11427976, "step": 20120 }, { "epoch": 353.070796460177, "grad_norm": 3.382660906936508e-08, "learning_rate": 0.14853918250867096, "loss": 0.0, "num_input_tokens_seen": 11430200, "step": 20125 }, { "epoch": 353.1592920353982, "grad_norm": 1.622153078528754e-08, "learning_rate": 0.1484802805540179, "loss": 0.0, "num_input_tokens_seen": 11432936, "step": 20130 }, { "epoch": 353.24778761061947, "grad_norm": 2.3705801766027434e-08, "learning_rate": 0.14842137883372472, "loss": 0.0, "num_input_tokens_seen": 11435432, "step": 20135 }, { "epoch": 353.3362831858407, "grad_norm": 1.5101049299914848e-08, "learning_rate": 0.14836247735687474, "loss": 0.0, "num_input_tokens_seen": 11438088, "step": 20140 }, { "epoch": 353.42477876106193, "grad_norm": 1.514597336438328e-08, "learning_rate": 0.14830357613255132, "loss": 0.0, "num_input_tokens_seen": 11441128, "step": 20145 }, { "epoch": 353.5132743362832, "grad_norm": 1.516309566795826e-08, "learning_rate": 0.1482446751698378, "loss": 0.0, "num_input_tokens_seen": 11444248, "step": 20150 }, { "epoch": 353.60176991150445, "grad_norm": 2.0525597221876524e-08, "learning_rate": 0.14818577447781744, "loss": 0.0, "num_input_tokens_seen": 11447304, "step": 20155 }, { "epoch": 353.69026548672565, "grad_norm": 1.3730550918467088e-08, "learning_rate": 0.14812687406557346, "loss": 0.0, "num_input_tokens_seen": 11450680, "step": 20160 }, { "epoch": 353.7787610619469, "grad_norm": 1.5615817972047807e-08, "learning_rate": 0.14806797394218899, "loss": 0.0, "num_input_tokens_seen": 11453080, "step": 20165 }, { "epoch": 353.86725663716817, "grad_norm": 1.3216395089443722e-08, "learning_rate": 0.1480090741167472, "loss": 0.0, "num_input_tokens_seen": 11456312, "step": 20170 }, { "epoch": 353.95575221238937, "grad_norm": 1.9768100045780557e-08, "learning_rate": 0.1479501745983313, "loss": 0.0, "num_input_tokens_seen": 11459368, "step": 20175 }, { "epoch": 354.0353982300885, "grad_norm": 1.852753150899389e-08, "learning_rate": 0.14789127539602415, "loss": 0.0, "num_input_tokens_seen": 11461496, "step": 20180 }, { "epoch": 354.12389380530976, "grad_norm": 1.1314675418816478e-08, "learning_rate": 0.14783237651890885, "loss": 0.0, "num_input_tokens_seen": 11464104, "step": 20185 }, { "epoch": 354.21238938053096, "grad_norm": 3.445992646788909e-08, "learning_rate": 0.14777347797606838, "loss": 0.0, "num_input_tokens_seen": 11466936, "step": 20190 }, { "epoch": 354.3008849557522, "grad_norm": 1.269147809068727e-08, "learning_rate": 0.14771457977658553, "loss": 0.0, "num_input_tokens_seen": 11469336, "step": 20195 }, { "epoch": 354.3893805309734, "grad_norm": 2.4480131699533558e-08, "learning_rate": 0.14765568192954326, "loss": 0.0, "num_input_tokens_seen": 11471832, "step": 20200 }, { "epoch": 354.3893805309734, "eval_loss": 0.6238388419151306, "eval_runtime": 0.93, "eval_samples_per_second": 26.883, "eval_steps_per_second": 13.979, "num_input_tokens_seen": 11471832, "step": 20200 }, { "epoch": 354.4778761061947, "grad_norm": 2.1439962694103087e-08, "learning_rate": 0.14759678444402421, "loss": 0.0, "num_input_tokens_seen": 11475144, "step": 20205 }, { "epoch": 354.56637168141594, "grad_norm": 2.163777956809554e-08, "learning_rate": 0.14753788732911122, "loss": 0.0, "num_input_tokens_seen": 11478120, "step": 20210 }, { "epoch": 354.65486725663715, "grad_norm": 9.657230570780939e-09, "learning_rate": 0.147478990593887, "loss": 0.0, "num_input_tokens_seen": 11481352, "step": 20215 }, { "epoch": 354.7433628318584, "grad_norm": 1.3311927560266668e-08, "learning_rate": 0.14742009424743405, "loss": 0.0, "num_input_tokens_seen": 11484456, "step": 20220 }, { "epoch": 354.83185840707966, "grad_norm": 1.3496157080794546e-08, "learning_rate": 0.14736119829883504, "loss": 0.0, "num_input_tokens_seen": 11487720, "step": 20225 }, { "epoch": 354.92035398230087, "grad_norm": 2.0098564590398382e-08, "learning_rate": 0.14730230275717243, "loss": 0.0, "num_input_tokens_seen": 11490536, "step": 20230 }, { "epoch": 355.0, "grad_norm": 9.99363347631288e-09, "learning_rate": 0.14724340763152854, "loss": 0.0, "num_input_tokens_seen": 11492832, "step": 20235 }, { "epoch": 355.08849557522126, "grad_norm": 9.158651614882274e-09, "learning_rate": 0.14718451293098594, "loss": 0.0, "num_input_tokens_seen": 11496256, "step": 20240 }, { "epoch": 355.17699115044246, "grad_norm": 2.4637675011263127e-08, "learning_rate": 0.14712561866462676, "loss": 0.0, "num_input_tokens_seen": 11499040, "step": 20245 }, { "epoch": 355.2654867256637, "grad_norm": 1.3754963390510966e-08, "learning_rate": 0.1470667248415333, "loss": 0.0, "num_input_tokens_seen": 11501456, "step": 20250 }, { "epoch": 355.353982300885, "grad_norm": 2.2510526775931794e-08, "learning_rate": 0.1470078314707878, "loss": 0.0, "num_input_tokens_seen": 11504528, "step": 20255 }, { "epoch": 355.4424778761062, "grad_norm": 7.898822040885989e-09, "learning_rate": 0.14694893856147223, "loss": 0.0, "num_input_tokens_seen": 11507200, "step": 20260 }, { "epoch": 355.53097345132744, "grad_norm": 1.7335350932512483e-08, "learning_rate": 0.14689004612266868, "loss": 0.0, "num_input_tokens_seen": 11510096, "step": 20265 }, { "epoch": 355.6194690265487, "grad_norm": 3.0791362348736584e-08, "learning_rate": 0.14683115416345913, "loss": 0.0, "num_input_tokens_seen": 11513056, "step": 20270 }, { "epoch": 355.7079646017699, "grad_norm": 9.169610848402954e-09, "learning_rate": 0.1467722626929254, "loss": 0.0, "num_input_tokens_seen": 11515824, "step": 20275 }, { "epoch": 355.79646017699116, "grad_norm": 1.8513620858584545e-08, "learning_rate": 0.14671337172014937, "loss": 0.0, "num_input_tokens_seen": 11519072, "step": 20280 }, { "epoch": 355.88495575221236, "grad_norm": 1.9270988360631236e-08, "learning_rate": 0.14665448125421265, "loss": 0.0, "num_input_tokens_seen": 11521392, "step": 20285 }, { "epoch": 355.9734513274336, "grad_norm": 3.6038255046833e-08, "learning_rate": 0.146595591304197, "loss": 0.0, "num_input_tokens_seen": 11524080, "step": 20290 }, { "epoch": 356.05309734513276, "grad_norm": 1.063880716856147e-08, "learning_rate": 0.14653670187918397, "loss": 0.0, "num_input_tokens_seen": 11527064, "step": 20295 }, { "epoch": 356.14159292035396, "grad_norm": 1.5941923336981745e-08, "learning_rate": 0.14647781298825502, "loss": 0.0, "num_input_tokens_seen": 11529688, "step": 20300 }, { "epoch": 356.2300884955752, "grad_norm": 1.2657390691117598e-08, "learning_rate": 0.14641892464049153, "loss": 0.0, "num_input_tokens_seen": 11532456, "step": 20305 }, { "epoch": 356.3185840707965, "grad_norm": 2.55356660261441e-08, "learning_rate": 0.14636003684497495, "loss": 0.0, "num_input_tokens_seen": 11535272, "step": 20310 }, { "epoch": 356.4070796460177, "grad_norm": 1.1450010717339865e-08, "learning_rate": 0.14630114961078636, "loss": 0.0, "num_input_tokens_seen": 11537848, "step": 20315 }, { "epoch": 356.49557522123894, "grad_norm": 2.804111076670779e-08, "learning_rate": 0.14624226294700704, "loss": 0.0, "num_input_tokens_seen": 11541256, "step": 20320 }, { "epoch": 356.5840707964602, "grad_norm": 2.2149244216507213e-08, "learning_rate": 0.14618337686271793, "loss": 0.0, "num_input_tokens_seen": 11544008, "step": 20325 }, { "epoch": 356.6725663716814, "grad_norm": 3.9670840834560295e-08, "learning_rate": 0.1461244913670001, "loss": 0.0, "num_input_tokens_seen": 11546904, "step": 20330 }, { "epoch": 356.76106194690266, "grad_norm": 2.5760337862834604e-08, "learning_rate": 0.1460656064689344, "loss": 0.0, "num_input_tokens_seen": 11550104, "step": 20335 }, { "epoch": 356.8495575221239, "grad_norm": 2.5297724803863275e-08, "learning_rate": 0.14600672217760163, "loss": 0.0, "num_input_tokens_seen": 11553176, "step": 20340 }, { "epoch": 356.9380530973451, "grad_norm": 2.0139134804253445e-08, "learning_rate": 0.14594783850208248, "loss": 0.0, "num_input_tokens_seen": 11556024, "step": 20345 }, { "epoch": 357.01769911504425, "grad_norm": 1.9212611945818026e-08, "learning_rate": 0.14588895545145758, "loss": 0.0, "num_input_tokens_seen": 11558240, "step": 20350 }, { "epoch": 357.1061946902655, "grad_norm": 2.1487098322836573e-08, "learning_rate": 0.14583007303480738, "loss": 0.0, "num_input_tokens_seen": 11561456, "step": 20355 }, { "epoch": 357.1946902654867, "grad_norm": 3.394022485281312e-08, "learning_rate": 0.14577119126121235, "loss": 0.0, "num_input_tokens_seen": 11563968, "step": 20360 }, { "epoch": 357.283185840708, "grad_norm": 1.5895770033580447e-08, "learning_rate": 0.14571231013975272, "loss": 0.0, "num_input_tokens_seen": 11566320, "step": 20365 }, { "epoch": 357.37168141592923, "grad_norm": 1.2906404833756824e-08, "learning_rate": 0.1456534296795088, "loss": 0.0, "num_input_tokens_seen": 11569376, "step": 20370 }, { "epoch": 357.46017699115043, "grad_norm": 1.385275183451995e-08, "learning_rate": 0.14559454988956066, "loss": 0.0, "num_input_tokens_seen": 11572320, "step": 20375 }, { "epoch": 357.5486725663717, "grad_norm": 1.1197165861176472e-08, "learning_rate": 0.1455356707789882, "loss": 0.0, "num_input_tokens_seen": 11575168, "step": 20380 }, { "epoch": 357.6371681415929, "grad_norm": 2.6624070059710903e-08, "learning_rate": 0.14547679235687147, "loss": 0.0, "num_input_tokens_seen": 11577792, "step": 20385 }, { "epoch": 357.72566371681415, "grad_norm": 2.4388182140455683e-08, "learning_rate": 0.14541791463229023, "loss": 0.0, "num_input_tokens_seen": 11580224, "step": 20390 }, { "epoch": 357.8141592920354, "grad_norm": 1.6642278666267885e-08, "learning_rate": 0.14535903761432406, "loss": 0.0, "num_input_tokens_seen": 11583024, "step": 20395 }, { "epoch": 357.9026548672566, "grad_norm": 1.5433137434683886e-08, "learning_rate": 0.1453001613120527, "loss": 0.0, "num_input_tokens_seen": 11586368, "step": 20400 }, { "epoch": 357.9026548672566, "eval_loss": 0.6087567210197449, "eval_runtime": 0.9178, "eval_samples_per_second": 27.239, "eval_steps_per_second": 14.164, "num_input_tokens_seen": 11586368, "step": 20400 }, { "epoch": 357.9911504424779, "grad_norm": 1.1285513856762464e-08, "learning_rate": 0.14524128573455547, "loss": 0.0, "num_input_tokens_seen": 11589504, "step": 20405 }, { "epoch": 358.070796460177, "grad_norm": 7.447683803007976e-09, "learning_rate": 0.14518241089091177, "loss": 0.0, "num_input_tokens_seen": 11592016, "step": 20410 }, { "epoch": 358.1592920353982, "grad_norm": 2.1202788857976884e-08, "learning_rate": 0.1451235367902009, "loss": 0.0, "num_input_tokens_seen": 11595216, "step": 20415 }, { "epoch": 358.24778761061947, "grad_norm": 1.1243178832387457e-08, "learning_rate": 0.1450646634415019, "loss": 0.0, "num_input_tokens_seen": 11597664, "step": 20420 }, { "epoch": 358.3362831858407, "grad_norm": 7.382387146037672e-09, "learning_rate": 0.1450057908538938, "loss": 0.0, "num_input_tokens_seen": 11600880, "step": 20425 }, { "epoch": 358.42477876106193, "grad_norm": 3.167939510717588e-08, "learning_rate": 0.14494691903645557, "loss": 0.0, "num_input_tokens_seen": 11603632, "step": 20430 }, { "epoch": 358.5132743362832, "grad_norm": 8.832552467197274e-09, "learning_rate": 0.14488804799826588, "loss": 0.0, "num_input_tokens_seen": 11606288, "step": 20435 }, { "epoch": 358.60176991150445, "grad_norm": 1.685761930048102e-08, "learning_rate": 0.14482917774840348, "loss": 0.0, "num_input_tokens_seen": 11609392, "step": 20440 }, { "epoch": 358.69026548672565, "grad_norm": 2.694374323652937e-08, "learning_rate": 0.14477030829594684, "loss": 0.0, "num_input_tokens_seen": 11612576, "step": 20445 }, { "epoch": 358.7787610619469, "grad_norm": 2.065279325336178e-08, "learning_rate": 0.14471143964997432, "loss": 0.0, "num_input_tokens_seen": 11615040, "step": 20450 }, { "epoch": 358.86725663716817, "grad_norm": 7.151824465978507e-09, "learning_rate": 0.14465257181956434, "loss": 0.0, "num_input_tokens_seen": 11618288, "step": 20455 }, { "epoch": 358.95575221238937, "grad_norm": 2.2720939796272432e-08, "learning_rate": 0.1445937048137949, "loss": 0.0, "num_input_tokens_seen": 11621008, "step": 20460 }, { "epoch": 359.0353982300885, "grad_norm": 1.068551824801034e-08, "learning_rate": 0.14453483864174416, "loss": 0.0, "num_input_tokens_seen": 11623472, "step": 20465 }, { "epoch": 359.12389380530976, "grad_norm": 1.9408155083056045e-08, "learning_rate": 0.14447597331249, "loss": 0.0, "num_input_tokens_seen": 11626176, "step": 20470 }, { "epoch": 359.21238938053096, "grad_norm": 1.6891400278495894e-08, "learning_rate": 0.1444171088351102, "loss": 0.0, "num_input_tokens_seen": 11629216, "step": 20475 }, { "epoch": 359.3008849557522, "grad_norm": 2.1678845385508794e-08, "learning_rate": 0.14435824521868235, "loss": 0.0, "num_input_tokens_seen": 11632352, "step": 20480 }, { "epoch": 359.3893805309734, "grad_norm": 1.0348856882558266e-08, "learning_rate": 0.14429938247228397, "loss": 0.0, "num_input_tokens_seen": 11635024, "step": 20485 }, { "epoch": 359.4778761061947, "grad_norm": 9.239399467730891e-09, "learning_rate": 0.14424052060499243, "loss": 0.0, "num_input_tokens_seen": 11637376, "step": 20490 }, { "epoch": 359.56637168141594, "grad_norm": 3.583798502404534e-08, "learning_rate": 0.14418165962588506, "loss": 0.0, "num_input_tokens_seen": 11640704, "step": 20495 }, { "epoch": 359.65486725663715, "grad_norm": 1.3518898889230968e-08, "learning_rate": 0.1441227995440388, "loss": 0.0, "num_input_tokens_seen": 11643760, "step": 20500 }, { "epoch": 359.7433628318584, "grad_norm": 2.2768530172356805e-08, "learning_rate": 0.14406394036853082, "loss": 0.0, "num_input_tokens_seen": 11646656, "step": 20505 }, { "epoch": 359.83185840707966, "grad_norm": 1.662072790509228e-08, "learning_rate": 0.14400508210843774, "loss": 0.0, "num_input_tokens_seen": 11649552, "step": 20510 }, { "epoch": 359.92035398230087, "grad_norm": 2.6292163113339484e-08, "learning_rate": 0.1439462247728364, "loss": 0.0, "num_input_tokens_seen": 11652464, "step": 20515 }, { "epoch": 360.0, "grad_norm": 9.84210579701994e-09, "learning_rate": 0.14388736837080326, "loss": 0.0, "num_input_tokens_seen": 11655312, "step": 20520 }, { "epoch": 360.08849557522126, "grad_norm": 1.759813628154916e-08, "learning_rate": 0.14382851291141469, "loss": 0.0, "num_input_tokens_seen": 11657760, "step": 20525 }, { "epoch": 360.17699115044246, "grad_norm": 9.746605300620104e-09, "learning_rate": 0.14376965840374697, "loss": 0.0, "num_input_tokens_seen": 11660656, "step": 20530 }, { "epoch": 360.2654867256637, "grad_norm": 6.28666407820333e-09, "learning_rate": 0.14371080485687632, "loss": 0.0, "num_input_tokens_seen": 11663344, "step": 20535 }, { "epoch": 360.353982300885, "grad_norm": 2.008805921605017e-08, "learning_rate": 0.1436519522798785, "loss": 0.0, "num_input_tokens_seen": 11666432, "step": 20540 }, { "epoch": 360.4424778761062, "grad_norm": 9.438396730843124e-09, "learning_rate": 0.14359310068182948, "loss": 0.0, "num_input_tokens_seen": 11669008, "step": 20545 }, { "epoch": 360.53097345132744, "grad_norm": 1.4786361468566156e-08, "learning_rate": 0.14353425007180484, "loss": 0.0, "num_input_tokens_seen": 11672096, "step": 20550 }, { "epoch": 360.6194690265487, "grad_norm": 2.2518527487136453e-08, "learning_rate": 0.14347540045888005, "loss": 0.0, "num_input_tokens_seen": 11675552, "step": 20555 }, { "epoch": 360.7079646017699, "grad_norm": 1.297989982163017e-08, "learning_rate": 0.14341655185213056, "loss": 0.0, "num_input_tokens_seen": 11678272, "step": 20560 }, { "epoch": 360.79646017699116, "grad_norm": 1.2962787288017807e-08, "learning_rate": 0.14335770426063144, "loss": 0.0, "num_input_tokens_seen": 11681472, "step": 20565 }, { "epoch": 360.88495575221236, "grad_norm": 2.027835854789828e-08, "learning_rate": 0.1432988576934578, "loss": 0.0, "num_input_tokens_seen": 11684272, "step": 20570 }, { "epoch": 360.9734513274336, "grad_norm": 3.5156059396967976e-08, "learning_rate": 0.14324001215968457, "loss": 0.0, "num_input_tokens_seen": 11687008, "step": 20575 }, { "epoch": 361.05309734513276, "grad_norm": 2.0488860386080887e-08, "learning_rate": 0.14318116766838637, "loss": 0.0, "num_input_tokens_seen": 11688944, "step": 20580 }, { "epoch": 361.14159292035396, "grad_norm": 8.345012680877062e-09, "learning_rate": 0.14312232422863788, "loss": 0.0, "num_input_tokens_seen": 11692240, "step": 20585 }, { "epoch": 361.2300884955752, "grad_norm": 2.1546897599478143e-08, "learning_rate": 0.14306348184951334, "loss": 0.0, "num_input_tokens_seen": 11694816, "step": 20590 }, { "epoch": 361.3185840707965, "grad_norm": 1.5939853881263844e-08, "learning_rate": 0.1430046405400871, "loss": 0.0, "num_input_tokens_seen": 11697648, "step": 20595 }, { "epoch": 361.4070796460177, "grad_norm": 3.3308676705701146e-08, "learning_rate": 0.14294580030943324, "loss": 0.0, "num_input_tokens_seen": 11700176, "step": 20600 }, { "epoch": 361.4070796460177, "eval_loss": 0.6196402311325073, "eval_runtime": 0.9291, "eval_samples_per_second": 26.907, "eval_steps_per_second": 13.992, "num_input_tokens_seen": 11700176, "step": 20600 }, { "epoch": 361.49557522123894, "grad_norm": 1.69791132265118e-08, "learning_rate": 0.14288696116662553, "loss": 0.0, "num_input_tokens_seen": 11703296, "step": 20605 }, { "epoch": 361.5840707964602, "grad_norm": 1.6609629227559708e-08, "learning_rate": 0.1428281231207378, "loss": 0.0, "num_input_tokens_seen": 11705904, "step": 20610 }, { "epoch": 361.6725663716814, "grad_norm": 1.3674884336012383e-08, "learning_rate": 0.1427692861808437, "loss": 0.0, "num_input_tokens_seen": 11708496, "step": 20615 }, { "epoch": 361.76106194690266, "grad_norm": 1.4731148745283917e-08, "learning_rate": 0.1427104503560165, "loss": 0.0, "num_input_tokens_seen": 11711728, "step": 20620 }, { "epoch": 361.8495575221239, "grad_norm": 2.2985393144381305e-08, "learning_rate": 0.14265161565532947, "loss": 0.0, "num_input_tokens_seen": 11714848, "step": 20625 }, { "epoch": 361.9380530973451, "grad_norm": 1.4958857263991376e-08, "learning_rate": 0.14259278208785564, "loss": 0.0, "num_input_tokens_seen": 11718352, "step": 20630 }, { "epoch": 362.01769911504425, "grad_norm": 7.226466092191686e-09, "learning_rate": 0.14253394966266789, "loss": 0.0, "num_input_tokens_seen": 11720688, "step": 20635 }, { "epoch": 362.1061946902655, "grad_norm": 3.1422491275634457e-08, "learning_rate": 0.14247511838883894, "loss": 0.0, "num_input_tokens_seen": 11723040, "step": 20640 }, { "epoch": 362.1946902654867, "grad_norm": 5.127037461249984e-09, "learning_rate": 0.14241628827544126, "loss": 0.0, "num_input_tokens_seen": 11725872, "step": 20645 }, { "epoch": 362.283185840708, "grad_norm": 2.8290012110687712e-08, "learning_rate": 0.14235745933154723, "loss": 0.0, "num_input_tokens_seen": 11728960, "step": 20650 }, { "epoch": 362.37168141592923, "grad_norm": 2.0837447323174274e-08, "learning_rate": 0.14229863156622907, "loss": 0.0, "num_input_tokens_seen": 11732144, "step": 20655 }, { "epoch": 362.46017699115043, "grad_norm": 1.6062312369058418e-08, "learning_rate": 0.14223980498855868, "loss": 0.0, "num_input_tokens_seen": 11735424, "step": 20660 }, { "epoch": 362.5486725663717, "grad_norm": 8.104629856120482e-09, "learning_rate": 0.14218097960760792, "loss": 0.0, "num_input_tokens_seen": 11738144, "step": 20665 }, { "epoch": 362.6371681415929, "grad_norm": 1.0961080043614402e-08, "learning_rate": 0.1421221554324483, "loss": 0.0, "num_input_tokens_seen": 11740528, "step": 20670 }, { "epoch": 362.72566371681415, "grad_norm": 1.638603563947072e-08, "learning_rate": 0.1420633324721513, "loss": 0.0, "num_input_tokens_seen": 11743344, "step": 20675 }, { "epoch": 362.8141592920354, "grad_norm": 4.821150412226416e-08, "learning_rate": 0.14200451073578824, "loss": 0.0, "num_input_tokens_seen": 11746624, "step": 20680 }, { "epoch": 362.9026548672566, "grad_norm": 8.894887493227088e-09, "learning_rate": 0.14194569023243003, "loss": 0.0, "num_input_tokens_seen": 11749920, "step": 20685 }, { "epoch": 362.9911504424779, "grad_norm": 6.4860552484447e-09, "learning_rate": 0.14188687097114766, "loss": 0.0, "num_input_tokens_seen": 11752432, "step": 20690 }, { "epoch": 363.070796460177, "grad_norm": 1.8422387171312948e-08, "learning_rate": 0.14182805296101172, "loss": 0.0, "num_input_tokens_seen": 11754384, "step": 20695 }, { "epoch": 363.1592920353982, "grad_norm": 1.2956578032685684e-08, "learning_rate": 0.14176923621109272, "loss": 0.0, "num_input_tokens_seen": 11756912, "step": 20700 }, { "epoch": 363.24778761061947, "grad_norm": 1.1690784340601112e-08, "learning_rate": 0.14171042073046097, "loss": 0.0, "num_input_tokens_seen": 11760048, "step": 20705 }, { "epoch": 363.3362831858407, "grad_norm": 1.245918124226364e-08, "learning_rate": 0.14165160652818642, "loss": 0.0, "num_input_tokens_seen": 11762960, "step": 20710 }, { "epoch": 363.42477876106193, "grad_norm": 1.4610498588751852e-08, "learning_rate": 0.14159279361333907, "loss": 0.0, "num_input_tokens_seen": 11766320, "step": 20715 }, { "epoch": 363.5132743362832, "grad_norm": 9.162421044095481e-09, "learning_rate": 0.14153398199498868, "loss": 0.0, "num_input_tokens_seen": 11769264, "step": 20720 }, { "epoch": 363.60176991150445, "grad_norm": 1.8742936092053242e-08, "learning_rate": 0.14147517168220458, "loss": 0.0, "num_input_tokens_seen": 11771776, "step": 20725 }, { "epoch": 363.69026548672565, "grad_norm": 1.258982518237417e-08, "learning_rate": 0.14141636268405616, "loss": 0.0, "num_input_tokens_seen": 11774432, "step": 20730 }, { "epoch": 363.7787610619469, "grad_norm": 1.524202986047385e-08, "learning_rate": 0.14135755500961253, "loss": 0.0, "num_input_tokens_seen": 11778000, "step": 20735 }, { "epoch": 363.86725663716817, "grad_norm": 1.4824502514443338e-08, "learning_rate": 0.14129874866794245, "loss": 0.0, "num_input_tokens_seen": 11781072, "step": 20740 }, { "epoch": 363.95575221238937, "grad_norm": 1.343240718654215e-08, "learning_rate": 0.14123994366811476, "loss": 0.0, "num_input_tokens_seen": 11783888, "step": 20745 }, { "epoch": 364.0353982300885, "grad_norm": 1.400528581996241e-08, "learning_rate": 0.14118114001919774, "loss": 0.0, "num_input_tokens_seen": 11786048, "step": 20750 }, { "epoch": 364.12389380530976, "grad_norm": 1.634225910152054e-08, "learning_rate": 0.14112233773025978, "loss": 0.0, "num_input_tokens_seen": 11788784, "step": 20755 }, { "epoch": 364.21238938053096, "grad_norm": 1.6255725654445996e-08, "learning_rate": 0.14106353681036896, "loss": 0.0, "num_input_tokens_seen": 11791712, "step": 20760 }, { "epoch": 364.3008849557522, "grad_norm": 2.4176024737698754e-08, "learning_rate": 0.14100473726859303, "loss": 0.0, "num_input_tokens_seen": 11794320, "step": 20765 }, { "epoch": 364.3893805309734, "grad_norm": 1.9456436461950943e-08, "learning_rate": 0.14094593911399964, "loss": 0.0, "num_input_tokens_seen": 11796768, "step": 20770 }, { "epoch": 364.4778761061947, "grad_norm": 1.141694117023917e-08, "learning_rate": 0.14088714235565625, "loss": 0.0, "num_input_tokens_seen": 11799904, "step": 20775 }, { "epoch": 364.56637168141594, "grad_norm": 2.160816059415538e-08, "learning_rate": 0.14082834700263, "loss": 0.0, "num_input_tokens_seen": 11802240, "step": 20780 }, { "epoch": 364.65486725663715, "grad_norm": 1.620752243525203e-08, "learning_rate": 0.14076955306398795, "loss": 0.0, "num_input_tokens_seen": 11805440, "step": 20785 }, { "epoch": 364.7433628318584, "grad_norm": 2.049148584148952e-08, "learning_rate": 0.14071076054879675, "loss": 0.0, "num_input_tokens_seen": 11808352, "step": 20790 }, { "epoch": 364.83185840707966, "grad_norm": 2.029076107135097e-08, "learning_rate": 0.14065196946612302, "loss": 0.0, "num_input_tokens_seen": 11811280, "step": 20795 }, { "epoch": 364.92035398230087, "grad_norm": 1.4978921214492402e-08, "learning_rate": 0.1405931798250331, "loss": 0.0, "num_input_tokens_seen": 11814304, "step": 20800 }, { "epoch": 364.92035398230087, "eval_loss": 0.6082650423049927, "eval_runtime": 0.9357, "eval_samples_per_second": 26.718, "eval_steps_per_second": 13.893, "num_input_tokens_seen": 11814304, "step": 20800 }, { "epoch": 365.0, "grad_norm": 3.75432840371559e-08, "learning_rate": 0.14053439163459308, "loss": 0.0, "num_input_tokens_seen": 11817184, "step": 20805 }, { "epoch": 365.08849557522126, "grad_norm": 1.164159524336128e-08, "learning_rate": 0.14047560490386876, "loss": 0.0, "num_input_tokens_seen": 11819888, "step": 20810 }, { "epoch": 365.17699115044246, "grad_norm": 1.2403885030209949e-08, "learning_rate": 0.14041681964192593, "loss": 0.0, "num_input_tokens_seen": 11822624, "step": 20815 }, { "epoch": 365.2654867256637, "grad_norm": 9.751704332927602e-09, "learning_rate": 0.14035803585782988, "loss": 0.0, "num_input_tokens_seen": 11825344, "step": 20820 }, { "epoch": 365.353982300885, "grad_norm": 2.8926244510785182e-08, "learning_rate": 0.14029925356064593, "loss": 0.0, "num_input_tokens_seen": 11828384, "step": 20825 }, { "epoch": 365.4424778761062, "grad_norm": 1.834217222551615e-08, "learning_rate": 0.1402404727594389, "loss": 0.0, "num_input_tokens_seen": 11831248, "step": 20830 }, { "epoch": 365.53097345132744, "grad_norm": 1.491800638575569e-08, "learning_rate": 0.1401816934632737, "loss": 0.0, "num_input_tokens_seen": 11834224, "step": 20835 }, { "epoch": 365.6194690265487, "grad_norm": 2.5070377773772634e-08, "learning_rate": 0.1401229156812147, "loss": 0.0, "num_input_tokens_seen": 11837504, "step": 20840 }, { "epoch": 365.7079646017699, "grad_norm": 7.69395036570586e-09, "learning_rate": 0.14006413942232626, "loss": 0.0, "num_input_tokens_seen": 11840288, "step": 20845 }, { "epoch": 365.79646017699116, "grad_norm": 1.3463584913608884e-08, "learning_rate": 0.14000536469567235, "loss": 0.0, "num_input_tokens_seen": 11842832, "step": 20850 }, { "epoch": 365.88495575221236, "grad_norm": 2.059219283978564e-08, "learning_rate": 0.13994659151031685, "loss": 0.0, "num_input_tokens_seen": 11846192, "step": 20855 }, { "epoch": 365.9734513274336, "grad_norm": 1.409799210705387e-08, "learning_rate": 0.13988781987532323, "loss": 0.0, "num_input_tokens_seen": 11849232, "step": 20860 }, { "epoch": 366.05309734513276, "grad_norm": 1.047312192525851e-08, "learning_rate": 0.1398290497997549, "loss": 0.0, "num_input_tokens_seen": 11851592, "step": 20865 }, { "epoch": 366.14159292035396, "grad_norm": 1.5370671846426376e-08, "learning_rate": 0.13977028129267488, "loss": 0.0, "num_input_tokens_seen": 11854664, "step": 20870 }, { "epoch": 366.2300884955752, "grad_norm": 1.2457318732117528e-08, "learning_rate": 0.13971151436314605, "loss": 0.0, "num_input_tokens_seen": 11857544, "step": 20875 }, { "epoch": 366.3185840707965, "grad_norm": 1.4188492158950794e-08, "learning_rate": 0.13965274902023103, "loss": 0.0, "num_input_tokens_seen": 11860536, "step": 20880 }, { "epoch": 366.4070796460177, "grad_norm": 2.4096118877992012e-08, "learning_rate": 0.13959398527299208, "loss": 0.0, "num_input_tokens_seen": 11863272, "step": 20885 }, { "epoch": 366.49557522123894, "grad_norm": 2.1936724436955046e-08, "learning_rate": 0.13953522313049138, "loss": 0.0, "num_input_tokens_seen": 11865944, "step": 20890 }, { "epoch": 366.5840707964602, "grad_norm": 2.3663115911176646e-08, "learning_rate": 0.13947646260179083, "loss": 0.0, "num_input_tokens_seen": 11868776, "step": 20895 }, { "epoch": 366.6725663716814, "grad_norm": 1.8187884975873203e-08, "learning_rate": 0.13941770369595194, "loss": 0.0, "num_input_tokens_seen": 11871816, "step": 20900 }, { "epoch": 366.76106194690266, "grad_norm": 2.0509236975385647e-08, "learning_rate": 0.1393589464220362, "loss": 0.0, "num_input_tokens_seen": 11874520, "step": 20905 }, { "epoch": 366.8495575221239, "grad_norm": 3.256969449694225e-08, "learning_rate": 0.13930019078910455, "loss": 0.0, "num_input_tokens_seen": 11877288, "step": 20910 }, { "epoch": 366.9380530973451, "grad_norm": 1.2754302503026338e-08, "learning_rate": 0.139241436806218, "loss": 0.0, "num_input_tokens_seen": 11880328, "step": 20915 }, { "epoch": 367.01769911504425, "grad_norm": 3.205655119131734e-08, "learning_rate": 0.13918268448243712, "loss": 0.0, "num_input_tokens_seen": 11882952, "step": 20920 }, { "epoch": 367.1061946902655, "grad_norm": 3.270785242648344e-08, "learning_rate": 0.13912393382682217, "loss": 0.0, "num_input_tokens_seen": 11885848, "step": 20925 }, { "epoch": 367.1946902654867, "grad_norm": 8.971103859778395e-09, "learning_rate": 0.1390651848484333, "loss": 0.0, "num_input_tokens_seen": 11888424, "step": 20930 }, { "epoch": 367.283185840708, "grad_norm": 2.079596228554692e-08, "learning_rate": 0.1390064375563304, "loss": 0.0, "num_input_tokens_seen": 11891224, "step": 20935 }, { "epoch": 367.37168141592923, "grad_norm": 1.870449750640546e-08, "learning_rate": 0.13894769195957293, "loss": 0.0, "num_input_tokens_seen": 11894264, "step": 20940 }, { "epoch": 367.46017699115043, "grad_norm": 1.5185953827767662e-08, "learning_rate": 0.13888894806722032, "loss": 0.0, "num_input_tokens_seen": 11896984, "step": 20945 }, { "epoch": 367.5486725663717, "grad_norm": 2.0985217119573463e-08, "learning_rate": 0.1388302058883315, "loss": 0.0, "num_input_tokens_seen": 11900184, "step": 20950 }, { "epoch": 367.6371681415929, "grad_norm": 2.3187835651583555e-08, "learning_rate": 0.13877146543196528, "loss": 0.0, "num_input_tokens_seen": 11903464, "step": 20955 }, { "epoch": 367.72566371681415, "grad_norm": 1.342171707108264e-08, "learning_rate": 0.13871272670718027, "loss": 0.0, "num_input_tokens_seen": 11906056, "step": 20960 }, { "epoch": 367.8141592920354, "grad_norm": 1.3380082819480776e-08, "learning_rate": 0.13865398972303455, "loss": 0.0, "num_input_tokens_seen": 11908888, "step": 20965 }, { "epoch": 367.9026548672566, "grad_norm": 1.1497662377735196e-08, "learning_rate": 0.13859525448858623, "loss": 0.0, "num_input_tokens_seen": 11911704, "step": 20970 }, { "epoch": 367.9911504424779, "grad_norm": 2.6607247960441782e-08, "learning_rate": 0.13853652101289304, "loss": 0.0, "num_input_tokens_seen": 11914248, "step": 20975 }, { "epoch": 368.070796460177, "grad_norm": 2.7784947675968397e-08, "learning_rate": 0.13847778930501234, "loss": 0.0, "num_input_tokens_seen": 11916344, "step": 20980 }, { "epoch": 368.1592920353982, "grad_norm": 1.4356341893062563e-08, "learning_rate": 0.1384190593740013, "loss": 0.0, "num_input_tokens_seen": 11919304, "step": 20985 }, { "epoch": 368.24778761061947, "grad_norm": 2.1583788978318807e-08, "learning_rate": 0.13836033122891686, "loss": 0.0, "num_input_tokens_seen": 11922376, "step": 20990 }, { "epoch": 368.3362831858407, "grad_norm": 4.6563082278794354e-09, "learning_rate": 0.1383016048788156, "loss": 0.0, "num_input_tokens_seen": 11924888, "step": 20995 }, { "epoch": 368.42477876106193, "grad_norm": 2.4134251930263417e-08, "learning_rate": 0.13824288033275392, "loss": 0.0, "num_input_tokens_seen": 11927464, "step": 21000 }, { "epoch": 368.42477876106193, "eval_loss": 0.6165170669555664, "eval_runtime": 0.9408, "eval_samples_per_second": 26.574, "eval_steps_per_second": 13.819, "num_input_tokens_seen": 11927464, "step": 21000 }, { "epoch": 368.5132743362832, "grad_norm": 1.5906845618474108e-08, "learning_rate": 0.1381841575997878, "loss": 0.0, "num_input_tokens_seen": 11930888, "step": 21005 }, { "epoch": 368.60176991150445, "grad_norm": 1.0400287742129422e-08, "learning_rate": 0.13812543668897306, "loss": 0.0, "num_input_tokens_seen": 11933976, "step": 21010 }, { "epoch": 368.69026548672565, "grad_norm": 7.253219358460683e-09, "learning_rate": 0.13806671760936526, "loss": 0.0, "num_input_tokens_seen": 11936936, "step": 21015 }, { "epoch": 368.7787610619469, "grad_norm": 2.7450271389284353e-08, "learning_rate": 0.13800800037001956, "loss": 0.0, "num_input_tokens_seen": 11939688, "step": 21020 }, { "epoch": 368.86725663716817, "grad_norm": 1.5099494987680373e-08, "learning_rate": 0.13794928497999087, "loss": 0.0, "num_input_tokens_seen": 11942616, "step": 21025 }, { "epoch": 368.95575221238937, "grad_norm": 1.4149569516064275e-08, "learning_rate": 0.1378905714483339, "loss": 0.0, "num_input_tokens_seen": 11945304, "step": 21030 }, { "epoch": 369.0353982300885, "grad_norm": 1.2661311110662155e-08, "learning_rate": 0.13783185978410295, "loss": 0.0, "num_input_tokens_seen": 11947520, "step": 21035 }, { "epoch": 369.12389380530976, "grad_norm": 1.2589754128100594e-08, "learning_rate": 0.13777314999635218, "loss": 0.0, "num_input_tokens_seen": 11951280, "step": 21040 }, { "epoch": 369.21238938053096, "grad_norm": 8.283661756536276e-09, "learning_rate": 0.1377144420941353, "loss": 0.0, "num_input_tokens_seen": 11954384, "step": 21045 }, { "epoch": 369.3008849557522, "grad_norm": 1.2869382892688463e-08, "learning_rate": 0.13765573608650586, "loss": 0.0, "num_input_tokens_seen": 11956880, "step": 21050 }, { "epoch": 369.3893805309734, "grad_norm": 2.7905651123205644e-08, "learning_rate": 0.13759703198251702, "loss": 0.0, "num_input_tokens_seen": 11959744, "step": 21055 }, { "epoch": 369.4778761061947, "grad_norm": 1.786570535955434e-08, "learning_rate": 0.13753832979122174, "loss": 0.0, "num_input_tokens_seen": 11962656, "step": 21060 }, { "epoch": 369.56637168141594, "grad_norm": 1.0682538409412246e-08, "learning_rate": 0.13747962952167264, "loss": 0.0, "num_input_tokens_seen": 11965648, "step": 21065 }, { "epoch": 369.65486725663715, "grad_norm": 2.3083101652332516e-08, "learning_rate": 0.13742093118292192, "loss": 0.0, "num_input_tokens_seen": 11968752, "step": 21070 }, { "epoch": 369.7433628318584, "grad_norm": 1.9371547921309684e-08, "learning_rate": 0.13736223478402174, "loss": 0.0, "num_input_tokens_seen": 11971440, "step": 21075 }, { "epoch": 369.83185840707966, "grad_norm": 2.1973098895955445e-08, "learning_rate": 0.1373035403340238, "loss": 0.0, "num_input_tokens_seen": 11974064, "step": 21080 }, { "epoch": 369.92035398230087, "grad_norm": 2.394252440751643e-08, "learning_rate": 0.13724484784197943, "loss": 0.0, "num_input_tokens_seen": 11976816, "step": 21085 }, { "epoch": 370.0, "grad_norm": 4.17807655139768e-09, "learning_rate": 0.13718615731693987, "loss": 0.0, "num_input_tokens_seen": 11978976, "step": 21090 }, { "epoch": 370.08849557522126, "grad_norm": 2.0835162928278805e-08, "learning_rate": 0.13712746876795587, "loss": 0.0, "num_input_tokens_seen": 11981376, "step": 21095 }, { "epoch": 370.17699115044246, "grad_norm": 2.188935432911876e-08, "learning_rate": 0.13706878220407792, "loss": 0.0, "num_input_tokens_seen": 11984576, "step": 21100 }, { "epoch": 370.2654867256637, "grad_norm": 1.5311197643086416e-08, "learning_rate": 0.13701009763435631, "loss": 0.0, "num_input_tokens_seen": 11987328, "step": 21105 }, { "epoch": 370.353982300885, "grad_norm": 1.0007565443004296e-08, "learning_rate": 0.13695141506784084, "loss": 0.0, "num_input_tokens_seen": 11990720, "step": 21110 }, { "epoch": 370.4424778761062, "grad_norm": 1.3415443866904297e-08, "learning_rate": 0.13689273451358114, "loss": 0.0, "num_input_tokens_seen": 11993440, "step": 21115 }, { "epoch": 370.53097345132744, "grad_norm": 2.2467157023697837e-08, "learning_rate": 0.13683405598062653, "loss": 0.0, "num_input_tokens_seen": 11996112, "step": 21120 }, { "epoch": 370.6194690265487, "grad_norm": 1.4291983596592672e-08, "learning_rate": 0.1367753794780259, "loss": 0.0, "num_input_tokens_seen": 11998688, "step": 21125 }, { "epoch": 370.7079646017699, "grad_norm": 2.5027432570823294e-08, "learning_rate": 0.13671670501482802, "loss": 0.0, "num_input_tokens_seen": 12001520, "step": 21130 }, { "epoch": 370.79646017699116, "grad_norm": 6.299257560016258e-09, "learning_rate": 0.1366580326000811, "loss": 0.0, "num_input_tokens_seen": 12004768, "step": 21135 }, { "epoch": 370.88495575221236, "grad_norm": 1.87700077702857e-08, "learning_rate": 0.1365993622428332, "loss": 0.0, "num_input_tokens_seen": 12007552, "step": 21140 }, { "epoch": 370.9734513274336, "grad_norm": 1.1979889080748762e-08, "learning_rate": 0.13654069395213211, "loss": 0.0, "num_input_tokens_seen": 12010352, "step": 21145 }, { "epoch": 371.05309734513276, "grad_norm": 2.019115896700896e-08, "learning_rate": 0.13648202773702509, "loss": 0.0, "num_input_tokens_seen": 12012648, "step": 21150 }, { "epoch": 371.14159292035396, "grad_norm": 1.9877182566574447e-08, "learning_rate": 0.13642336360655927, "loss": 0.0, "num_input_tokens_seen": 12016232, "step": 21155 }, { "epoch": 371.2300884955752, "grad_norm": 1.6089570564759015e-08, "learning_rate": 0.13636470156978145, "loss": 0.0, "num_input_tokens_seen": 12018872, "step": 21160 }, { "epoch": 371.3185840707965, "grad_norm": 1.3394274134270745e-08, "learning_rate": 0.13630604163573798, "loss": 0.0, "num_input_tokens_seen": 12021144, "step": 21165 }, { "epoch": 371.4070796460177, "grad_norm": 8.188746569715022e-09, "learning_rate": 0.13624738381347495, "loss": 0.0, "num_input_tokens_seen": 12023752, "step": 21170 }, { "epoch": 371.49557522123894, "grad_norm": 2.343839078378096e-08, "learning_rate": 0.1361887281120382, "loss": 0.0, "num_input_tokens_seen": 12026648, "step": 21175 }, { "epoch": 371.5840707964602, "grad_norm": 1.5552259924334066e-08, "learning_rate": 0.13613007454047307, "loss": 0.0, "num_input_tokens_seen": 12029368, "step": 21180 }, { "epoch": 371.6725663716814, "grad_norm": 2.2851432746051614e-08, "learning_rate": 0.13607142310782486, "loss": 0.0, "num_input_tokens_seen": 12032120, "step": 21185 }, { "epoch": 371.76106194690266, "grad_norm": 1.3518519637045756e-08, "learning_rate": 0.13601277382313814, "loss": 0.0, "num_input_tokens_seen": 12035000, "step": 21190 }, { "epoch": 371.8495575221239, "grad_norm": 2.3419456596229793e-08, "learning_rate": 0.1359541266954575, "loss": 0.0, "num_input_tokens_seen": 12038024, "step": 21195 }, { "epoch": 371.9380530973451, "grad_norm": 2.00007761463894e-08, "learning_rate": 0.13589548173382707, "loss": 0.0, "num_input_tokens_seen": 12041416, "step": 21200 }, { "epoch": 371.9380530973451, "eval_loss": 0.638322114944458, "eval_runtime": 0.9182, "eval_samples_per_second": 27.226, "eval_steps_per_second": 14.158, "num_input_tokens_seen": 12041416, "step": 21200 }, { "epoch": 372.01769911504425, "grad_norm": 2.171546142903935e-08, "learning_rate": 0.1358368389472906, "loss": 0.0, "num_input_tokens_seen": 12044088, "step": 21205 }, { "epoch": 372.1061946902655, "grad_norm": 1.4542909987369512e-08, "learning_rate": 0.13577819834489155, "loss": 0.0, "num_input_tokens_seen": 12046792, "step": 21210 }, { "epoch": 372.1946902654867, "grad_norm": 9.069131223782279e-09, "learning_rate": 0.135719559935673, "loss": 0.0, "num_input_tokens_seen": 12049480, "step": 21215 }, { "epoch": 372.283185840708, "grad_norm": 2.242401997420984e-08, "learning_rate": 0.13566092372867775, "loss": 0.0, "num_input_tokens_seen": 12052552, "step": 21220 }, { "epoch": 372.37168141592923, "grad_norm": 1.5746298487329113e-08, "learning_rate": 0.13560228973294833, "loss": 0.0, "num_input_tokens_seen": 12055480, "step": 21225 }, { "epoch": 372.46017699115043, "grad_norm": 1.7373873006931717e-08, "learning_rate": 0.13554365795752668, "loss": 0.0, "num_input_tokens_seen": 12058232, "step": 21230 }, { "epoch": 372.5486725663717, "grad_norm": 1.7579480982021778e-08, "learning_rate": 0.1354850284114547, "loss": 0.0, "num_input_tokens_seen": 12060808, "step": 21235 }, { "epoch": 372.6371681415929, "grad_norm": 2.5899092648273836e-08, "learning_rate": 0.13542640110377374, "loss": 0.0, "num_input_tokens_seen": 12063896, "step": 21240 }, { "epoch": 372.72566371681415, "grad_norm": 5.975762995547029e-09, "learning_rate": 0.13536777604352487, "loss": 0.0, "num_input_tokens_seen": 12066968, "step": 21245 }, { "epoch": 372.8141592920354, "grad_norm": 1.8985764071999256e-08, "learning_rate": 0.13530915323974887, "loss": 0.0, "num_input_tokens_seen": 12070200, "step": 21250 }, { "epoch": 372.9026548672566, "grad_norm": 2.3709528562676496e-08, "learning_rate": 0.13525053270148596, "loss": 0.0, "num_input_tokens_seen": 12072728, "step": 21255 }, { "epoch": 372.9911504424779, "grad_norm": 2.2514997866096564e-08, "learning_rate": 0.13519191443777628, "loss": 0.0, "num_input_tokens_seen": 12075480, "step": 21260 }, { "epoch": 373.070796460177, "grad_norm": 1.7400649809928836e-08, "learning_rate": 0.13513329845765953, "loss": 0.0, "num_input_tokens_seen": 12077736, "step": 21265 }, { "epoch": 373.1592920353982, "grad_norm": 5.443795814130681e-08, "learning_rate": 0.13507468477017495, "loss": 0.0, "num_input_tokens_seen": 12080472, "step": 21270 }, { "epoch": 373.24778761061947, "grad_norm": 1.7439512944861235e-08, "learning_rate": 0.13501607338436153, "loss": 0.0, "num_input_tokens_seen": 12083224, "step": 21275 }, { "epoch": 373.3362831858407, "grad_norm": 3.0176924070701716e-08, "learning_rate": 0.13495746430925798, "loss": 0.0, "num_input_tokens_seen": 12086216, "step": 21280 }, { "epoch": 373.42477876106193, "grad_norm": 2.4948381138756304e-08, "learning_rate": 0.13489885755390238, "loss": 0.0, "num_input_tokens_seen": 12088696, "step": 21285 }, { "epoch": 373.5132743362832, "grad_norm": 1.145363626164908e-08, "learning_rate": 0.13484025312733275, "loss": 0.0, "num_input_tokens_seen": 12091384, "step": 21290 }, { "epoch": 373.60176991150445, "grad_norm": 1.3110120988812923e-08, "learning_rate": 0.13478165103858658, "loss": 0.0, "num_input_tokens_seen": 12094376, "step": 21295 }, { "epoch": 373.69026548672565, "grad_norm": 1.1916478470652692e-08, "learning_rate": 0.13472305129670106, "loss": 0.0, "num_input_tokens_seen": 12097176, "step": 21300 }, { "epoch": 373.7787610619469, "grad_norm": 1.7910361194140023e-08, "learning_rate": 0.13466445391071305, "loss": 0.0, "num_input_tokens_seen": 12100488, "step": 21305 }, { "epoch": 373.86725663716817, "grad_norm": 1.3258038222829782e-08, "learning_rate": 0.13460585888965895, "loss": 0.0, "num_input_tokens_seen": 12103288, "step": 21310 }, { "epoch": 373.95575221238937, "grad_norm": 8.428695963402788e-09, "learning_rate": 0.13454726624257482, "loss": 0.0, "num_input_tokens_seen": 12106168, "step": 21315 }, { "epoch": 374.0353982300885, "grad_norm": 1.939895888369847e-08, "learning_rate": 0.1344886759784965, "loss": 0.0, "num_input_tokens_seen": 12108248, "step": 21320 }, { "epoch": 374.12389380530976, "grad_norm": 1.2268477789234566e-08, "learning_rate": 0.13443008810645923, "loss": 0.0, "num_input_tokens_seen": 12111720, "step": 21325 }, { "epoch": 374.21238938053096, "grad_norm": 1.4299653017246783e-08, "learning_rate": 0.13437150263549807, "loss": 0.0, "num_input_tokens_seen": 12115000, "step": 21330 }, { "epoch": 374.3008849557522, "grad_norm": 2.141700505831068e-08, "learning_rate": 0.13431291957464755, "loss": 0.0, "num_input_tokens_seen": 12117720, "step": 21335 }, { "epoch": 374.3893805309734, "grad_norm": 9.425385805172937e-09, "learning_rate": 0.13425433893294197, "loss": 0.0, "num_input_tokens_seen": 12120152, "step": 21340 }, { "epoch": 374.4778761061947, "grad_norm": 8.487467617612765e-09, "learning_rate": 0.13419576071941525, "loss": 0.0, "num_input_tokens_seen": 12122616, "step": 21345 }, { "epoch": 374.56637168141594, "grad_norm": 1.8586508332418816e-08, "learning_rate": 0.1341371849431008, "loss": 0.0, "num_input_tokens_seen": 12125672, "step": 21350 }, { "epoch": 374.65486725663715, "grad_norm": 2.0426911717663643e-08, "learning_rate": 0.13407861161303178, "loss": 0.0, "num_input_tokens_seen": 12128456, "step": 21355 }, { "epoch": 374.7433628318584, "grad_norm": 1.650841596756436e-08, "learning_rate": 0.13402004073824098, "loss": 0.0, "num_input_tokens_seen": 12131288, "step": 21360 }, { "epoch": 374.83185840707966, "grad_norm": 9.593998484547228e-09, "learning_rate": 0.13396147232776062, "loss": 0.0, "num_input_tokens_seen": 12133960, "step": 21365 }, { "epoch": 374.92035398230087, "grad_norm": 2.178178171163836e-08, "learning_rate": 0.13390290639062288, "loss": 0.0, "num_input_tokens_seen": 12136904, "step": 21370 }, { "epoch": 375.0, "grad_norm": 5.793886259652936e-09, "learning_rate": 0.13384434293585917, "loss": 0.0, "num_input_tokens_seen": 12139256, "step": 21375 }, { "epoch": 375.08849557522126, "grad_norm": 1.2354564482564001e-08, "learning_rate": 0.13378578197250088, "loss": 0.0, "num_input_tokens_seen": 12142568, "step": 21380 }, { "epoch": 375.17699115044246, "grad_norm": 2.7288256987390014e-08, "learning_rate": 0.13372722350957872, "loss": 0.0, "num_input_tokens_seen": 12145368, "step": 21385 }, { "epoch": 375.2654867256637, "grad_norm": 6.562236087859219e-09, "learning_rate": 0.13366866755612322, "loss": 0.0, "num_input_tokens_seen": 12148312, "step": 21390 }, { "epoch": 375.353982300885, "grad_norm": 1.0230097657881743e-08, "learning_rate": 0.13361011412116436, "loss": 0.0, "num_input_tokens_seen": 12150776, "step": 21395 }, { "epoch": 375.4424778761062, "grad_norm": 1.4556003513632731e-08, "learning_rate": 0.13355156321373196, "loss": 0.0, "num_input_tokens_seen": 12153176, "step": 21400 }, { "epoch": 375.4424778761062, "eval_loss": 0.6363808512687683, "eval_runtime": 0.9402, "eval_samples_per_second": 26.59, "eval_steps_per_second": 13.827, "num_input_tokens_seen": 12153176, "step": 21400 }, { "epoch": 375.53097345132744, "grad_norm": 1.7307209887462705e-08, "learning_rate": 0.13349301484285514, "loss": 0.0, "num_input_tokens_seen": 12155576, "step": 21405 }, { "epoch": 375.6194690265487, "grad_norm": 2.8373651872470873e-08, "learning_rate": 0.13343446901756295, "loss": 0.0, "num_input_tokens_seen": 12158952, "step": 21410 }, { "epoch": 375.7079646017699, "grad_norm": 1.715056896500755e-08, "learning_rate": 0.13337592574688376, "loss": 0.0, "num_input_tokens_seen": 12162232, "step": 21415 }, { "epoch": 375.79646017699116, "grad_norm": 1.413116113013757e-08, "learning_rate": 0.13331738503984572, "loss": 0.0, "num_input_tokens_seen": 12165160, "step": 21420 }, { "epoch": 375.88495575221236, "grad_norm": 1.3669526843784752e-08, "learning_rate": 0.1332588469054766, "loss": 0.0, "num_input_tokens_seen": 12167704, "step": 21425 }, { "epoch": 375.9734513274336, "grad_norm": 1.7933281526438805e-08, "learning_rate": 0.1332003113528036, "loss": 0.0, "num_input_tokens_seen": 12170808, "step": 21430 }, { "epoch": 376.05309734513276, "grad_norm": 1.0285207352467296e-08, "learning_rate": 0.13314177839085373, "loss": 0.0, "num_input_tokens_seen": 12173112, "step": 21435 }, { "epoch": 376.14159292035396, "grad_norm": 1.235726365678147e-08, "learning_rate": 0.13308324802865354, "loss": 0.0, "num_input_tokens_seen": 12175672, "step": 21440 }, { "epoch": 376.2300884955752, "grad_norm": 1.0793367088979267e-08, "learning_rate": 0.13302472027522905, "loss": 0.0, "num_input_tokens_seen": 12178584, "step": 21445 }, { "epoch": 376.3185840707965, "grad_norm": 2.4013241173292954e-08, "learning_rate": 0.13296619513960606, "loss": 0.0, "num_input_tokens_seen": 12181048, "step": 21450 }, { "epoch": 376.4070796460177, "grad_norm": 1.1807909316985388e-08, "learning_rate": 0.1329076726308098, "loss": 0.0, "num_input_tokens_seen": 12183640, "step": 21455 }, { "epoch": 376.49557522123894, "grad_norm": 1.4625924471545204e-08, "learning_rate": 0.13284915275786519, "loss": 0.0, "num_input_tokens_seen": 12187160, "step": 21460 }, { "epoch": 376.5840707964602, "grad_norm": 1.1211493067264655e-08, "learning_rate": 0.1327906355297968, "loss": 0.0, "num_input_tokens_seen": 12190152, "step": 21465 }, { "epoch": 376.6725663716814, "grad_norm": 2.3921648661939798e-08, "learning_rate": 0.13273212095562867, "loss": 0.0, "num_input_tokens_seen": 12193528, "step": 21470 }, { "epoch": 376.76106194690266, "grad_norm": 2.7123686408003778e-08, "learning_rate": 0.13267360904438444, "loss": 0.0, "num_input_tokens_seen": 12196360, "step": 21475 }, { "epoch": 376.8495575221239, "grad_norm": 2.3246665037390812e-08, "learning_rate": 0.1326150998050875, "loss": 0.0, "num_input_tokens_seen": 12199016, "step": 21480 }, { "epoch": 376.9380530973451, "grad_norm": 1.3751527916383566e-08, "learning_rate": 0.1325565932467606, "loss": 0.0, "num_input_tokens_seen": 12202232, "step": 21485 }, { "epoch": 377.01769911504425, "grad_norm": 1.3110107666136628e-08, "learning_rate": 0.13249808937842628, "loss": 0.0, "num_input_tokens_seen": 12204616, "step": 21490 }, { "epoch": 377.1061946902655, "grad_norm": 1.4684764515493498e-08, "learning_rate": 0.1324395882091065, "loss": 0.0, "num_input_tokens_seen": 12207736, "step": 21495 }, { "epoch": 377.1946902654867, "grad_norm": 1.403149596512776e-08, "learning_rate": 0.13238108974782284, "loss": 0.0, "num_input_tokens_seen": 12210952, "step": 21500 }, { "epoch": 377.283185840708, "grad_norm": 1.7238258820384544e-08, "learning_rate": 0.13232259400359664, "loss": 0.0, "num_input_tokens_seen": 12214168, "step": 21505 }, { "epoch": 377.37168141592923, "grad_norm": 1.4966405004201988e-08, "learning_rate": 0.13226410098544852, "loss": 0.0, "num_input_tokens_seen": 12216920, "step": 21510 }, { "epoch": 377.46017699115043, "grad_norm": 1.6609872588446706e-08, "learning_rate": 0.13220561070239892, "loss": 0.0, "num_input_tokens_seen": 12219864, "step": 21515 }, { "epoch": 377.5486725663717, "grad_norm": 8.190641942462662e-09, "learning_rate": 0.13214712316346783, "loss": 0.0, "num_input_tokens_seen": 12223000, "step": 21520 }, { "epoch": 377.6371681415929, "grad_norm": 3.6088383836840876e-08, "learning_rate": 0.13208863837767465, "loss": 0.0, "num_input_tokens_seen": 12225368, "step": 21525 }, { "epoch": 377.72566371681415, "grad_norm": 3.074510956935228e-08, "learning_rate": 0.13203015635403856, "loss": 0.0, "num_input_tokens_seen": 12228056, "step": 21530 }, { "epoch": 377.8141592920354, "grad_norm": 1.28132358057087e-08, "learning_rate": 0.13197167710157817, "loss": 0.0, "num_input_tokens_seen": 12230952, "step": 21535 }, { "epoch": 377.9026548672566, "grad_norm": 2.937786724999114e-08, "learning_rate": 0.13191320062931167, "loss": 0.0, "num_input_tokens_seen": 12233944, "step": 21540 }, { "epoch": 377.9911504424779, "grad_norm": 1.872537680469577e-08, "learning_rate": 0.13185472694625702, "loss": 0.0, "num_input_tokens_seen": 12236552, "step": 21545 }, { "epoch": 378.070796460177, "grad_norm": 1.3543206556221321e-08, "learning_rate": 0.13179625606143142, "loss": 0.0, "num_input_tokens_seen": 12238784, "step": 21550 }, { "epoch": 378.1592920353982, "grad_norm": 2.1033050856544833e-08, "learning_rate": 0.13173778798385188, "loss": 0.0, "num_input_tokens_seen": 12241248, "step": 21555 }, { "epoch": 378.24778761061947, "grad_norm": 7.65906005284478e-09, "learning_rate": 0.13167932272253505, "loss": 0.0, "num_input_tokens_seen": 12244688, "step": 21560 }, { "epoch": 378.3362831858407, "grad_norm": 2.2610937122635733e-08, "learning_rate": 0.1316208602864968, "loss": 0.0, "num_input_tokens_seen": 12247472, "step": 21565 }, { "epoch": 378.42477876106193, "grad_norm": 1.4434825779119365e-08, "learning_rate": 0.13156240068475292, "loss": 0.0, "num_input_tokens_seen": 12250176, "step": 21570 }, { "epoch": 378.5132743362832, "grad_norm": 2.5421051930152316e-08, "learning_rate": 0.1315039439263185, "loss": 0.0, "num_input_tokens_seen": 12253104, "step": 21575 }, { "epoch": 378.60176991150445, "grad_norm": 2.615980676523577e-08, "learning_rate": 0.13144549002020833, "loss": 0.0, "num_input_tokens_seen": 12256016, "step": 21580 }, { "epoch": 378.69026548672565, "grad_norm": 4.778605600108676e-08, "learning_rate": 0.13138703897543688, "loss": 0.0, "num_input_tokens_seen": 12259088, "step": 21585 }, { "epoch": 378.7787610619469, "grad_norm": 1.6752553122501013e-08, "learning_rate": 0.1313285908010178, "loss": 0.0, "num_input_tokens_seen": 12261824, "step": 21590 }, { "epoch": 378.86725663716817, "grad_norm": 2.9572781556908012e-08, "learning_rate": 0.13127014550596475, "loss": 0.0, "num_input_tokens_seen": 12265008, "step": 21595 }, { "epoch": 378.95575221238937, "grad_norm": 1.7024936127540968e-08, "learning_rate": 0.1312117030992906, "loss": 0.0, "num_input_tokens_seen": 12267984, "step": 21600 }, { "epoch": 378.95575221238937, "eval_loss": 0.6360908150672913, "eval_runtime": 0.9394, "eval_samples_per_second": 26.613, "eval_steps_per_second": 13.839, "num_input_tokens_seen": 12267984, "step": 21600 }, { "epoch": 379.0353982300885, "grad_norm": 2.4077191795868202e-08, "learning_rate": 0.13115326359000795, "loss": 0.0, "num_input_tokens_seen": 12270560, "step": 21605 }, { "epoch": 379.12389380530976, "grad_norm": 2.1546009421058443e-08, "learning_rate": 0.13109482698712896, "loss": 0.0, "num_input_tokens_seen": 12273808, "step": 21610 }, { "epoch": 379.21238938053096, "grad_norm": 2.1392050797430784e-08, "learning_rate": 0.1310363932996651, "loss": 0.0, "num_input_tokens_seen": 12276736, "step": 21615 }, { "epoch": 379.3008849557522, "grad_norm": 8.689410080364723e-09, "learning_rate": 0.13097796253662775, "loss": 0.0, "num_input_tokens_seen": 12279600, "step": 21620 }, { "epoch": 379.3893805309734, "grad_norm": 3.103157908412868e-08, "learning_rate": 0.1309195347070277, "loss": 0.0, "num_input_tokens_seen": 12282304, "step": 21625 }, { "epoch": 379.4778761061947, "grad_norm": 2.063122650497462e-08, "learning_rate": 0.13086110981987506, "loss": 0.0, "num_input_tokens_seen": 12285040, "step": 21630 }, { "epoch": 379.56637168141594, "grad_norm": 1.059758059085425e-08, "learning_rate": 0.13080268788417987, "loss": 0.0, "num_input_tokens_seen": 12287648, "step": 21635 }, { "epoch": 379.65486725663715, "grad_norm": 1.220715528660321e-08, "learning_rate": 0.1307442689089515, "loss": 0.0, "num_input_tokens_seen": 12290288, "step": 21640 }, { "epoch": 379.7433628318584, "grad_norm": 2.5657818980562297e-08, "learning_rate": 0.13068585290319873, "loss": 0.0, "num_input_tokens_seen": 12293376, "step": 21645 }, { "epoch": 379.83185840707966, "grad_norm": 4.2601719485446665e-08, "learning_rate": 0.13062743987593026, "loss": 0.0, "num_input_tokens_seen": 12295840, "step": 21650 }, { "epoch": 379.92035398230087, "grad_norm": 1.530250592907123e-08, "learning_rate": 0.13056902983615395, "loss": 0.0, "num_input_tokens_seen": 12298528, "step": 21655 }, { "epoch": 380.0, "grad_norm": 5.174288730813714e-08, "learning_rate": 0.13051062279287742, "loss": 0.0, "num_input_tokens_seen": 12301360, "step": 21660 }, { "epoch": 380.08849557522126, "grad_norm": 1.237156688205232e-08, "learning_rate": 0.13045221875510782, "loss": 0.0, "num_input_tokens_seen": 12304800, "step": 21665 }, { "epoch": 380.17699115044246, "grad_norm": 4.118275498399271e-08, "learning_rate": 0.13039381773185174, "loss": 0.0, "num_input_tokens_seen": 12307664, "step": 21670 }, { "epoch": 380.2654867256637, "grad_norm": 2.3269141280479744e-08, "learning_rate": 0.1303354197321153, "loss": 0.0, "num_input_tokens_seen": 12310624, "step": 21675 }, { "epoch": 380.353982300885, "grad_norm": 6.980287015068143e-09, "learning_rate": 0.13027702476490433, "loss": 0.0, "num_input_tokens_seen": 12313376, "step": 21680 }, { "epoch": 380.4424778761062, "grad_norm": 1.6939646130254005e-08, "learning_rate": 0.1302186328392239, "loss": 0.0, "num_input_tokens_seen": 12316112, "step": 21685 }, { "epoch": 380.53097345132744, "grad_norm": 2.1661577420672984e-08, "learning_rate": 0.130160243964079, "loss": 0.0, "num_input_tokens_seen": 12318848, "step": 21690 }, { "epoch": 380.6194690265487, "grad_norm": 1.3422196687429278e-08, "learning_rate": 0.13010185814847372, "loss": 0.0, "num_input_tokens_seen": 12321536, "step": 21695 }, { "epoch": 380.7079646017699, "grad_norm": 4.966408173601167e-09, "learning_rate": 0.13004347540141192, "loss": 0.0, "num_input_tokens_seen": 12324336, "step": 21700 }, { "epoch": 380.79646017699116, "grad_norm": 5.923156720655243e-08, "learning_rate": 0.12998509573189712, "loss": 0.0, "num_input_tokens_seen": 12327280, "step": 21705 }, { "epoch": 380.88495575221236, "grad_norm": 1.3902946349730883e-08, "learning_rate": 0.12992671914893203, "loss": 0.0, "num_input_tokens_seen": 12330336, "step": 21710 }, { "epoch": 380.9734513274336, "grad_norm": 1.6472800012934385e-08, "learning_rate": 0.12986834566151909, "loss": 0.0, "num_input_tokens_seen": 12333232, "step": 21715 }, { "epoch": 381.05309734513276, "grad_norm": 8.530534501005604e-09, "learning_rate": 0.12980997527866028, "loss": 0.0, "num_input_tokens_seen": 12335352, "step": 21720 }, { "epoch": 381.14159292035396, "grad_norm": 2.3939721316423856e-08, "learning_rate": 0.12975160800935692, "loss": 0.0, "num_input_tokens_seen": 12338376, "step": 21725 }, { "epoch": 381.2300884955752, "grad_norm": 3.075766130677948e-08, "learning_rate": 0.12969324386261016, "loss": 0.0, "num_input_tokens_seen": 12341416, "step": 21730 }, { "epoch": 381.3185840707965, "grad_norm": 1.764943213800052e-08, "learning_rate": 0.12963488284742034, "loss": 0.0, "num_input_tokens_seen": 12344568, "step": 21735 }, { "epoch": 381.4070796460177, "grad_norm": 2.8743913915718622e-08, "learning_rate": 0.12957652497278752, "loss": 0.0, "num_input_tokens_seen": 12347592, "step": 21740 }, { "epoch": 381.49557522123894, "grad_norm": 1.731848264796554e-08, "learning_rate": 0.12951817024771117, "loss": 0.0, "num_input_tokens_seen": 12350808, "step": 21745 }, { "epoch": 381.5840707964602, "grad_norm": 1.4398705339147e-08, "learning_rate": 0.12945981868119041, "loss": 0.0, "num_input_tokens_seen": 12353304, "step": 21750 }, { "epoch": 381.6725663716814, "grad_norm": 1.8289671999127677e-08, "learning_rate": 0.12940147028222376, "loss": 0.0, "num_input_tokens_seen": 12356184, "step": 21755 }, { "epoch": 381.76106194690266, "grad_norm": 1.3997897951867344e-08, "learning_rate": 0.12934312505980916, "loss": 0.0, "num_input_tokens_seen": 12359080, "step": 21760 }, { "epoch": 381.8495575221239, "grad_norm": 2.4316523905554277e-08, "learning_rate": 0.1292847830229443, "loss": 0.0, "num_input_tokens_seen": 12361832, "step": 21765 }, { "epoch": 381.9380530973451, "grad_norm": 1.1131783494988667e-08, "learning_rate": 0.12922644418062626, "loss": 0.0, "num_input_tokens_seen": 12364424, "step": 21770 }, { "epoch": 382.01769911504425, "grad_norm": 4.7455838370069614e-08, "learning_rate": 0.1291681085418515, "loss": 0.0, "num_input_tokens_seen": 12366928, "step": 21775 }, { "epoch": 382.1061946902655, "grad_norm": 2.7103070010525698e-08, "learning_rate": 0.12910977611561628, "loss": 0.0, "num_input_tokens_seen": 12369616, "step": 21780 }, { "epoch": 382.1946902654867, "grad_norm": 1.4873815068483509e-08, "learning_rate": 0.1290514469109161, "loss": 0.0, "num_input_tokens_seen": 12372720, "step": 21785 }, { "epoch": 382.283185840708, "grad_norm": 1.6079168219107487e-08, "learning_rate": 0.128993120936746, "loss": 0.0, "num_input_tokens_seen": 12375616, "step": 21790 }, { "epoch": 382.37168141592923, "grad_norm": 7.752157138440907e-09, "learning_rate": 0.12893479820210071, "loss": 0.0, "num_input_tokens_seen": 12378528, "step": 21795 }, { "epoch": 382.46017699115043, "grad_norm": 5.334298780468316e-09, "learning_rate": 0.1288764787159742, "loss": 0.0, "num_input_tokens_seen": 12381424, "step": 21800 }, { "epoch": 382.46017699115043, "eval_loss": 0.6419970393180847, "eval_runtime": 0.9242, "eval_samples_per_second": 27.051, "eval_steps_per_second": 14.066, "num_input_tokens_seen": 12381424, "step": 21800 }, { "epoch": 382.5486725663717, "grad_norm": 1.9997443700958684e-08, "learning_rate": 0.1288181624873601, "loss": 0.0, "num_input_tokens_seen": 12384224, "step": 21805 }, { "epoch": 382.6371681415929, "grad_norm": 1.7583657196951208e-08, "learning_rate": 0.12875984952525163, "loss": 0.0, "num_input_tokens_seen": 12387072, "step": 21810 }, { "epoch": 382.72566371681415, "grad_norm": 6.896881288298573e-09, "learning_rate": 0.12870153983864122, "loss": 0.0, "num_input_tokens_seen": 12389856, "step": 21815 }, { "epoch": 382.8141592920354, "grad_norm": 1.2451110364963824e-08, "learning_rate": 0.12864323343652104, "loss": 0.0, "num_input_tokens_seen": 12392464, "step": 21820 }, { "epoch": 382.9026548672566, "grad_norm": 2.6352012127972557e-08, "learning_rate": 0.12858493032788268, "loss": 0.0, "num_input_tokens_seen": 12395520, "step": 21825 }, { "epoch": 382.9911504424779, "grad_norm": 2.041293889476492e-08, "learning_rate": 0.12852663052171714, "loss": 0.0, "num_input_tokens_seen": 12398432, "step": 21830 }, { "epoch": 383.070796460177, "grad_norm": 6.577066447022162e-09, "learning_rate": 0.12846833402701507, "loss": 0.0, "num_input_tokens_seen": 12400704, "step": 21835 }, { "epoch": 383.1592920353982, "grad_norm": 4.9643680277711155e-09, "learning_rate": 0.12841004085276642, "loss": 0.0, "num_input_tokens_seen": 12403776, "step": 21840 }, { "epoch": 383.24778761061947, "grad_norm": 9.945113177423082e-09, "learning_rate": 0.12835175100796076, "loss": 0.0, "num_input_tokens_seen": 12406576, "step": 21845 }, { "epoch": 383.3362831858407, "grad_norm": 1.7421944775719567e-08, "learning_rate": 0.12829346450158724, "loss": 0.0, "num_input_tokens_seen": 12409088, "step": 21850 }, { "epoch": 383.42477876106193, "grad_norm": 1.527706494641734e-08, "learning_rate": 0.12823518134263423, "loss": 0.0, "num_input_tokens_seen": 12411952, "step": 21855 }, { "epoch": 383.5132743362832, "grad_norm": 2.5963837302356296e-08, "learning_rate": 0.12817690154008973, "loss": 0.0, "num_input_tokens_seen": 12415440, "step": 21860 }, { "epoch": 383.60176991150445, "grad_norm": 1.0052848331554287e-08, "learning_rate": 0.12811862510294134, "loss": 0.0, "num_input_tokens_seen": 12418272, "step": 21865 }, { "epoch": 383.69026548672565, "grad_norm": 2.0349252949358743e-08, "learning_rate": 0.12806035204017585, "loss": 0.0, "num_input_tokens_seen": 12421120, "step": 21870 }, { "epoch": 383.7787610619469, "grad_norm": 2.2320968184885714e-08, "learning_rate": 0.12800208236077987, "loss": 0.0, "num_input_tokens_seen": 12424272, "step": 21875 }, { "epoch": 383.86725663716817, "grad_norm": 1.3297536405332266e-08, "learning_rate": 0.12794381607373917, "loss": 0.0, "num_input_tokens_seen": 12426848, "step": 21880 }, { "epoch": 383.95575221238937, "grad_norm": 6.117480300105171e-09, "learning_rate": 0.12788555318803924, "loss": 0.0, "num_input_tokens_seen": 12429424, "step": 21885 }, { "epoch": 384.0353982300885, "grad_norm": 1.9726492439531285e-08, "learning_rate": 0.1278272937126649, "loss": 0.0, "num_input_tokens_seen": 12431960, "step": 21890 }, { "epoch": 384.12389380530976, "grad_norm": 1.440385766215968e-08, "learning_rate": 0.1277690376566005, "loss": 0.0, "num_input_tokens_seen": 12434840, "step": 21895 }, { "epoch": 384.21238938053096, "grad_norm": 1.9955331609367022e-08, "learning_rate": 0.12771078502882985, "loss": 0.0, "num_input_tokens_seen": 12437944, "step": 21900 }, { "epoch": 384.3008849557522, "grad_norm": 1.2626526491033019e-08, "learning_rate": 0.12765253583833633, "loss": 0.0, "num_input_tokens_seen": 12440568, "step": 21905 }, { "epoch": 384.3893805309734, "grad_norm": 2.2999259385869664e-08, "learning_rate": 0.12759429009410256, "loss": 0.0, "num_input_tokens_seen": 12443912, "step": 21910 }, { "epoch": 384.4778761061947, "grad_norm": 1.8385636124662597e-08, "learning_rate": 0.12753604780511085, "loss": 0.0, "num_input_tokens_seen": 12447032, "step": 21915 }, { "epoch": 384.56637168141594, "grad_norm": 1.3263515619144073e-08, "learning_rate": 0.12747780898034283, "loss": 0.0, "num_input_tokens_seen": 12449544, "step": 21920 }, { "epoch": 384.65486725663715, "grad_norm": 2.5865070085728803e-08, "learning_rate": 0.12741957362877973, "loss": 0.0, "num_input_tokens_seen": 12452504, "step": 21925 }, { "epoch": 384.7433628318584, "grad_norm": 1.4113616053634814e-08, "learning_rate": 0.12736134175940214, "loss": 0.0, "num_input_tokens_seen": 12455560, "step": 21930 }, { "epoch": 384.83185840707966, "grad_norm": 1.2776934177338717e-08, "learning_rate": 0.12730311338119016, "loss": 0.0, "num_input_tokens_seen": 12458104, "step": 21935 }, { "epoch": 384.92035398230087, "grad_norm": 1.2596658827135343e-08, "learning_rate": 0.12724488850312327, "loss": 0.0, "num_input_tokens_seen": 12460680, "step": 21940 }, { "epoch": 385.0, "grad_norm": 1.0522375859522981e-08, "learning_rate": 0.1271866671341806, "loss": 0.0, "num_input_tokens_seen": 12462904, "step": 21945 }, { "epoch": 385.08849557522126, "grad_norm": 1.3311026059170672e-08, "learning_rate": 0.12712844928334047, "loss": 0.0, "num_input_tokens_seen": 12466184, "step": 21950 }, { "epoch": 385.17699115044246, "grad_norm": 1.5029657518539352e-08, "learning_rate": 0.12707023495958095, "loss": 0.0, "num_input_tokens_seen": 12468536, "step": 21955 }, { "epoch": 385.2654867256637, "grad_norm": 7.962212222878406e-09, "learning_rate": 0.12701202417187932, "loss": 0.0, "num_input_tokens_seen": 12471016, "step": 21960 }, { "epoch": 385.353982300885, "grad_norm": 1.0392604998799015e-08, "learning_rate": 0.12695381692921243, "loss": 0.0, "num_input_tokens_seen": 12474040, "step": 21965 }, { "epoch": 385.4424778761062, "grad_norm": 1.0190799315523691e-08, "learning_rate": 0.12689561324055665, "loss": 0.0, "num_input_tokens_seen": 12476904, "step": 21970 }, { "epoch": 385.53097345132744, "grad_norm": 1.842513341898666e-08, "learning_rate": 0.12683741311488758, "loss": 0.0, "num_input_tokens_seen": 12479768, "step": 21975 }, { "epoch": 385.6194690265487, "grad_norm": 2.778828545046963e-08, "learning_rate": 0.1267792165611805, "loss": 0.0, "num_input_tokens_seen": 12482552, "step": 21980 }, { "epoch": 385.7079646017699, "grad_norm": 2.5782131984897205e-08, "learning_rate": 0.1267210235884101, "loss": 0.0, "num_input_tokens_seen": 12485864, "step": 21985 }, { "epoch": 385.79646017699116, "grad_norm": 1.5259576713333445e-08, "learning_rate": 0.12666283420555033, "loss": 0.0, "num_input_tokens_seen": 12488568, "step": 21990 }, { "epoch": 385.88495575221236, "grad_norm": 1.3593719927484926e-08, "learning_rate": 0.12660464842157487, "loss": 0.0, "num_input_tokens_seen": 12491368, "step": 21995 }, { "epoch": 385.9734513274336, "grad_norm": 3.690900740593861e-08, "learning_rate": 0.1265464662454566, "loss": 0.0, "num_input_tokens_seen": 12494280, "step": 22000 }, { "epoch": 385.9734513274336, "eval_loss": 0.6455292105674744, "eval_runtime": 0.9384, "eval_samples_per_second": 26.64, "eval_steps_per_second": 13.853, "num_input_tokens_seen": 12494280, "step": 22000 }, { "epoch": 386.05309734513276, "grad_norm": 2.3234814960915173e-08, "learning_rate": 0.12648828768616793, "loss": 0.0, "num_input_tokens_seen": 12496928, "step": 22005 }, { "epoch": 386.14159292035396, "grad_norm": 1.5138942544012934e-08, "learning_rate": 0.12643011275268085, "loss": 0.0, "num_input_tokens_seen": 12499904, "step": 22010 }, { "epoch": 386.2300884955752, "grad_norm": 1.3861062519993084e-08, "learning_rate": 0.1263719414539665, "loss": 0.0, "num_input_tokens_seen": 12502752, "step": 22015 }, { "epoch": 386.3185840707965, "grad_norm": 1.421829942671593e-08, "learning_rate": 0.1263137737989957, "loss": 0.0, "num_input_tokens_seen": 12505552, "step": 22020 }, { "epoch": 386.4070796460177, "grad_norm": 4.025379851668731e-08, "learning_rate": 0.1262556097967387, "loss": 0.0, "num_input_tokens_seen": 12508000, "step": 22025 }, { "epoch": 386.49557522123894, "grad_norm": 1.812128225253673e-08, "learning_rate": 0.126197449456165, "loss": 0.0, "num_input_tokens_seen": 12510784, "step": 22030 }, { "epoch": 386.5840707964602, "grad_norm": 7.0404904128906765e-09, "learning_rate": 0.12613929278624378, "loss": 0.0, "num_input_tokens_seen": 12513920, "step": 22035 }, { "epoch": 386.6725663716814, "grad_norm": 1.991753961760878e-08, "learning_rate": 0.12608113979594343, "loss": 0.0, "num_input_tokens_seen": 12516448, "step": 22040 }, { "epoch": 386.76106194690266, "grad_norm": 2.2610192829120024e-08, "learning_rate": 0.1260229904942319, "loss": 0.0, "num_input_tokens_seen": 12519440, "step": 22045 }, { "epoch": 386.8495575221239, "grad_norm": 8.463902467781281e-09, "learning_rate": 0.12596484489007662, "loss": 0.0, "num_input_tokens_seen": 12522544, "step": 22050 }, { "epoch": 386.9380530973451, "grad_norm": 7.386858236202443e-09, "learning_rate": 0.1259067029924442, "loss": 0.0, "num_input_tokens_seen": 12525408, "step": 22055 }, { "epoch": 387.01769911504425, "grad_norm": 2.43399735921912e-08, "learning_rate": 0.12584856481030096, "loss": 0.0, "num_input_tokens_seen": 12527936, "step": 22060 }, { "epoch": 387.1061946902655, "grad_norm": 1.6605758546006655e-08, "learning_rate": 0.12579043035261261, "loss": 0.0, "num_input_tokens_seen": 12530976, "step": 22065 }, { "epoch": 387.1946902654867, "grad_norm": 2.0735178907216323e-08, "learning_rate": 0.1257322996283441, "loss": 0.0, "num_input_tokens_seen": 12533952, "step": 22070 }, { "epoch": 387.283185840708, "grad_norm": 1.9213350910263216e-08, "learning_rate": 0.12567417264645994, "loss": 0.0, "num_input_tokens_seen": 12536656, "step": 22075 }, { "epoch": 387.37168141592923, "grad_norm": 2.1179825893113957e-08, "learning_rate": 0.12561604941592408, "loss": 0.0, "num_input_tokens_seen": 12539104, "step": 22080 }, { "epoch": 387.46017699115043, "grad_norm": 1.5930703867184093e-08, "learning_rate": 0.12555792994569978, "loss": 0.0, "num_input_tokens_seen": 12541920, "step": 22085 }, { "epoch": 387.5486725663717, "grad_norm": 9.965130054467863e-09, "learning_rate": 0.1254998142447499, "loss": 0.0, "num_input_tokens_seen": 12545024, "step": 22090 }, { "epoch": 387.6371681415929, "grad_norm": 1.900011525890477e-08, "learning_rate": 0.1254417023220365, "loss": 0.0, "num_input_tokens_seen": 12547920, "step": 22095 }, { "epoch": 387.72566371681415, "grad_norm": 9.225914254784584e-09, "learning_rate": 0.12538359418652126, "loss": 0.0, "num_input_tokens_seen": 12550848, "step": 22100 }, { "epoch": 387.8141592920354, "grad_norm": 9.254468302799523e-09, "learning_rate": 0.12532548984716513, "loss": 0.0, "num_input_tokens_seen": 12553952, "step": 22105 }, { "epoch": 387.9026548672566, "grad_norm": 2.4100847539898496e-08, "learning_rate": 0.12526738931292855, "loss": 0.0, "num_input_tokens_seen": 12556880, "step": 22110 }, { "epoch": 387.9911504424779, "grad_norm": 1.742125732562272e-08, "learning_rate": 0.1252092925927714, "loss": 0.0, "num_input_tokens_seen": 12559952, "step": 22115 }, { "epoch": 388.070796460177, "grad_norm": 1.3089803019283863e-08, "learning_rate": 0.12515119969565278, "loss": 0.0, "num_input_tokens_seen": 12562832, "step": 22120 }, { "epoch": 388.1592920353982, "grad_norm": 1.2578012409392159e-08, "learning_rate": 0.12509311063053144, "loss": 0.0, "num_input_tokens_seen": 12566016, "step": 22125 }, { "epoch": 388.24778761061947, "grad_norm": 2.5503510414637276e-08, "learning_rate": 0.1250350254063655, "loss": 0.0, "num_input_tokens_seen": 12568544, "step": 22130 }, { "epoch": 388.3362831858407, "grad_norm": 6.92286583614532e-09, "learning_rate": 0.1249769440321123, "loss": 0.0, "num_input_tokens_seen": 12571184, "step": 22135 }, { "epoch": 388.42477876106193, "grad_norm": 1.763817536470924e-08, "learning_rate": 0.12491886651672884, "loss": 0.0, "num_input_tokens_seen": 12573904, "step": 22140 }, { "epoch": 388.5132743362832, "grad_norm": 1.7815311892377395e-08, "learning_rate": 0.12486079286917139, "loss": 0.0, "num_input_tokens_seen": 12576800, "step": 22145 }, { "epoch": 388.60176991150445, "grad_norm": 1.657811310451507e-08, "learning_rate": 0.12480272309839553, "loss": 0.0, "num_input_tokens_seen": 12579664, "step": 22150 }, { "epoch": 388.69026548672565, "grad_norm": 1.2017911998896125e-08, "learning_rate": 0.12474465721335648, "loss": 0.0, "num_input_tokens_seen": 12582288, "step": 22155 }, { "epoch": 388.7787610619469, "grad_norm": 2.3396788506602206e-08, "learning_rate": 0.12468659522300861, "loss": 0.0, "num_input_tokens_seen": 12585184, "step": 22160 }, { "epoch": 388.86725663716817, "grad_norm": 1.4254811553371383e-08, "learning_rate": 0.12462853713630584, "loss": 0.0, "num_input_tokens_seen": 12587952, "step": 22165 }, { "epoch": 388.95575221238937, "grad_norm": 7.74776331979865e-09, "learning_rate": 0.12457048296220156, "loss": 0.0, "num_input_tokens_seen": 12590960, "step": 22170 }, { "epoch": 389.0353982300885, "grad_norm": 1.2084876210849416e-08, "learning_rate": 0.12451243270964832, "loss": 0.0, "num_input_tokens_seen": 12593288, "step": 22175 }, { "epoch": 389.12389380530976, "grad_norm": 1.1579198933020507e-08, "learning_rate": 0.12445438638759827, "loss": 0.0, "num_input_tokens_seen": 12596200, "step": 22180 }, { "epoch": 389.21238938053096, "grad_norm": 1.934990478957843e-08, "learning_rate": 0.1243963440050029, "loss": 0.0, "num_input_tokens_seen": 12599256, "step": 22185 }, { "epoch": 389.3008849557522, "grad_norm": 1.4611671872444276e-08, "learning_rate": 0.12433830557081298, "loss": 0.0, "num_input_tokens_seen": 12601800, "step": 22190 }, { "epoch": 389.3893805309734, "grad_norm": 2.3437731755393543e-08, "learning_rate": 0.12428027109397889, "loss": 0.0, "num_input_tokens_seen": 12604984, "step": 22195 }, { "epoch": 389.4778761061947, "grad_norm": 8.249762650791581e-09, "learning_rate": 0.12422224058345015, "loss": 0.0, "num_input_tokens_seen": 12608008, "step": 22200 }, { "epoch": 389.4778761061947, "eval_loss": 0.6497951149940491, "eval_runtime": 0.9367, "eval_samples_per_second": 26.689, "eval_steps_per_second": 13.878, "num_input_tokens_seen": 12608008, "step": 22200 }, { "epoch": 389.56637168141594, "grad_norm": 2.2578566571951342e-08, "learning_rate": 0.12416421404817583, "loss": 0.0, "num_input_tokens_seen": 12610984, "step": 22205 }, { "epoch": 389.65486725663715, "grad_norm": 1.8115187572220748e-08, "learning_rate": 0.12410619149710447, "loss": 0.0, "num_input_tokens_seen": 12613400, "step": 22210 }, { "epoch": 389.7433628318584, "grad_norm": 1.897621437763064e-08, "learning_rate": 0.12404817293918374, "loss": 0.0, "num_input_tokens_seen": 12615992, "step": 22215 }, { "epoch": 389.83185840707966, "grad_norm": 2.4747663474045112e-08, "learning_rate": 0.12399015838336086, "loss": 0.0, "num_input_tokens_seen": 12619112, "step": 22220 }, { "epoch": 389.92035398230087, "grad_norm": 1.6079084730336035e-08, "learning_rate": 0.12393214783858246, "loss": 0.0, "num_input_tokens_seen": 12622136, "step": 22225 }, { "epoch": 390.0, "grad_norm": 2.5466855291256252e-08, "learning_rate": 0.1238741413137944, "loss": 0.0, "num_input_tokens_seen": 12624536, "step": 22230 }, { "epoch": 390.08849557522126, "grad_norm": 1.863835663584723e-08, "learning_rate": 0.12381613881794212, "loss": 0.0, "num_input_tokens_seen": 12627944, "step": 22235 }, { "epoch": 390.17699115044246, "grad_norm": 1.3968273648856666e-08, "learning_rate": 0.12375814035997022, "loss": 0.0, "num_input_tokens_seen": 12631048, "step": 22240 }, { "epoch": 390.2654867256637, "grad_norm": 1.1178486580831759e-08, "learning_rate": 0.12370014594882285, "loss": 0.0, "num_input_tokens_seen": 12633560, "step": 22245 }, { "epoch": 390.353982300885, "grad_norm": 1.7459278467413242e-08, "learning_rate": 0.12364215559344356, "loss": 0.0, "num_input_tokens_seen": 12636600, "step": 22250 }, { "epoch": 390.4424778761062, "grad_norm": 1.5260058106036922e-08, "learning_rate": 0.12358416930277506, "loss": 0.0, "num_input_tokens_seen": 12639320, "step": 22255 }, { "epoch": 390.53097345132744, "grad_norm": 1.3597761139294562e-08, "learning_rate": 0.1235261870857596, "loss": 0.0, "num_input_tokens_seen": 12642328, "step": 22260 }, { "epoch": 390.6194690265487, "grad_norm": 2.0572903380866592e-08, "learning_rate": 0.12346820895133884, "loss": 0.0, "num_input_tokens_seen": 12644936, "step": 22265 }, { "epoch": 390.7079646017699, "grad_norm": 7.003775781555532e-09, "learning_rate": 0.12341023490845361, "loss": 0.0, "num_input_tokens_seen": 12647432, "step": 22270 }, { "epoch": 390.79646017699116, "grad_norm": 2.5497255862205748e-08, "learning_rate": 0.12335226496604437, "loss": 0.0, "num_input_tokens_seen": 12650072, "step": 22275 }, { "epoch": 390.88495575221236, "grad_norm": 2.455562153613755e-08, "learning_rate": 0.12329429913305069, "loss": 0.0, "num_input_tokens_seen": 12652728, "step": 22280 }, { "epoch": 390.9734513274336, "grad_norm": 9.907348719195852e-09, "learning_rate": 0.12323633741841171, "loss": 0.0, "num_input_tokens_seen": 12656328, "step": 22285 }, { "epoch": 391.05309734513276, "grad_norm": 5.421924686999091e-09, "learning_rate": 0.12317837983106583, "loss": 0.0, "num_input_tokens_seen": 12658760, "step": 22290 }, { "epoch": 391.14159292035396, "grad_norm": 2.3390636982867363e-08, "learning_rate": 0.12312042637995087, "loss": 0.0, "num_input_tokens_seen": 12661448, "step": 22295 }, { "epoch": 391.2300884955752, "grad_norm": 2.296941126189722e-08, "learning_rate": 0.12306247707400389, "loss": 0.0, "num_input_tokens_seen": 12664920, "step": 22300 }, { "epoch": 391.3185840707965, "grad_norm": 1.3966984901969681e-08, "learning_rate": 0.12300453192216154, "loss": 0.0, "num_input_tokens_seen": 12667864, "step": 22305 }, { "epoch": 391.4070796460177, "grad_norm": 2.3648436098255843e-08, "learning_rate": 0.12294659093335956, "loss": 0.0, "num_input_tokens_seen": 12671160, "step": 22310 }, { "epoch": 391.49557522123894, "grad_norm": 7.777452459833967e-09, "learning_rate": 0.12288865411653327, "loss": 0.0, "num_input_tokens_seen": 12673672, "step": 22315 }, { "epoch": 391.5840707964602, "grad_norm": 2.966413070737417e-08, "learning_rate": 0.12283072148061717, "loss": 0.0, "num_input_tokens_seen": 12676552, "step": 22320 }, { "epoch": 391.6725663716814, "grad_norm": 1.828832729700025e-08, "learning_rate": 0.12277279303454529, "loss": 0.0, "num_input_tokens_seen": 12679160, "step": 22325 }, { "epoch": 391.76106194690266, "grad_norm": 3.6674300929462333e-08, "learning_rate": 0.12271486878725091, "loss": 0.0, "num_input_tokens_seen": 12681640, "step": 22330 }, { "epoch": 391.8495575221239, "grad_norm": 1.7966572229966005e-08, "learning_rate": 0.12265694874766658, "loss": 0.0, "num_input_tokens_seen": 12684328, "step": 22335 }, { "epoch": 391.9380530973451, "grad_norm": 2.835227874697921e-08, "learning_rate": 0.12259903292472435, "loss": 0.0, "num_input_tokens_seen": 12687224, "step": 22340 }, { "epoch": 392.01769911504425, "grad_norm": 1.3514561025829153e-08, "learning_rate": 0.12254112132735567, "loss": 0.0, "num_input_tokens_seen": 12689680, "step": 22345 }, { "epoch": 392.1061946902655, "grad_norm": 1.0831961105850496e-08, "learning_rate": 0.12248321396449108, "loss": 0.0, "num_input_tokens_seen": 12692400, "step": 22350 }, { "epoch": 392.1946902654867, "grad_norm": 1.1851597037093597e-08, "learning_rate": 0.12242531084506075, "loss": 0.0, "num_input_tokens_seen": 12695712, "step": 22355 }, { "epoch": 392.283185840708, "grad_norm": 1.562199969384892e-08, "learning_rate": 0.122367411977994, "loss": 0.0, "num_input_tokens_seen": 12698448, "step": 22360 }, { "epoch": 392.37168141592923, "grad_norm": 1.570048446808414e-08, "learning_rate": 0.12230951737221954, "loss": 0.0, "num_input_tokens_seen": 12701216, "step": 22365 }, { "epoch": 392.46017699115043, "grad_norm": 1.9046989763182864e-08, "learning_rate": 0.12225162703666555, "loss": 0.0, "num_input_tokens_seen": 12703984, "step": 22370 }, { "epoch": 392.5486725663717, "grad_norm": 9.888730723162098e-09, "learning_rate": 0.1221937409802593, "loss": 0.0, "num_input_tokens_seen": 12707440, "step": 22375 }, { "epoch": 392.6371681415929, "grad_norm": 1.0944146922042819e-08, "learning_rate": 0.12213585921192768, "loss": 0.0, "num_input_tokens_seen": 12710336, "step": 22380 }, { "epoch": 392.72566371681415, "grad_norm": 6.787971074118104e-09, "learning_rate": 0.1220779817405967, "loss": 0.0, "num_input_tokens_seen": 12713024, "step": 22385 }, { "epoch": 392.8141592920354, "grad_norm": 1.3164699552703496e-08, "learning_rate": 0.12202010857519181, "loss": 0.0, "num_input_tokens_seen": 12715952, "step": 22390 }, { "epoch": 392.9026548672566, "grad_norm": 5.5493267758777165e-09, "learning_rate": 0.12196223972463785, "loss": 0.0, "num_input_tokens_seen": 12718672, "step": 22395 }, { "epoch": 392.9911504424779, "grad_norm": 1.971590002369794e-08, "learning_rate": 0.12190437519785885, "loss": 0.0, "num_input_tokens_seen": 12721456, "step": 22400 }, { "epoch": 392.9911504424779, "eval_loss": 0.6347877979278564, "eval_runtime": 0.9435, "eval_samples_per_second": 26.497, "eval_steps_per_second": 13.778, "num_input_tokens_seen": 12721456, "step": 22400 }, { "epoch": 393.070796460177, "grad_norm": 6.805064067805233e-09, "learning_rate": 0.12184651500377823, "loss": 0.0, "num_input_tokens_seen": 12724032, "step": 22405 }, { "epoch": 393.1592920353982, "grad_norm": 1.1738399585681236e-08, "learning_rate": 0.12178865915131885, "loss": 0.0, "num_input_tokens_seen": 12727008, "step": 22410 }, { "epoch": 393.24778761061947, "grad_norm": 2.0762097818760594e-08, "learning_rate": 0.1217308076494027, "loss": 0.0, "num_input_tokens_seen": 12730048, "step": 22415 }, { "epoch": 393.3362831858407, "grad_norm": 3.953227079023236e-08, "learning_rate": 0.12167296050695134, "loss": 0.0, "num_input_tokens_seen": 12732960, "step": 22420 }, { "epoch": 393.42477876106193, "grad_norm": 5.054475504806533e-09, "learning_rate": 0.12161511773288536, "loss": 0.0, "num_input_tokens_seen": 12735632, "step": 22425 }, { "epoch": 393.5132743362832, "grad_norm": 1.0298231600813779e-08, "learning_rate": 0.121557279336125, "loss": 0.0, "num_input_tokens_seen": 12738240, "step": 22430 }, { "epoch": 393.60176991150445, "grad_norm": 2.23158593826156e-08, "learning_rate": 0.12149944532558957, "loss": 0.0, "num_input_tokens_seen": 12740544, "step": 22435 }, { "epoch": 393.69026548672565, "grad_norm": 1.9754546443095933e-08, "learning_rate": 0.12144161571019785, "loss": 0.0, "num_input_tokens_seen": 12743872, "step": 22440 }, { "epoch": 393.7787610619469, "grad_norm": 1.5650918783194356e-08, "learning_rate": 0.12138379049886781, "loss": 0.0, "num_input_tokens_seen": 12746496, "step": 22445 }, { "epoch": 393.86725663716817, "grad_norm": 2.7703826788183505e-08, "learning_rate": 0.12132596970051697, "loss": 0.0, "num_input_tokens_seen": 12749568, "step": 22450 }, { "epoch": 393.95575221238937, "grad_norm": 2.0582669790769614e-08, "learning_rate": 0.12126815332406189, "loss": 0.0, "num_input_tokens_seen": 12753008, "step": 22455 }, { "epoch": 394.0353982300885, "grad_norm": 1.161786933323583e-08, "learning_rate": 0.12121034137841868, "loss": 0.0, "num_input_tokens_seen": 12755224, "step": 22460 }, { "epoch": 394.12389380530976, "grad_norm": 2.8027114851170154e-08, "learning_rate": 0.12115253387250258, "loss": 0.0, "num_input_tokens_seen": 12757896, "step": 22465 }, { "epoch": 394.21238938053096, "grad_norm": 2.590366676713529e-08, "learning_rate": 0.12109473081522831, "loss": 0.0, "num_input_tokens_seen": 12760840, "step": 22470 }, { "epoch": 394.3008849557522, "grad_norm": 1.5732085856257072e-08, "learning_rate": 0.12103693221550982, "loss": 0.0, "num_input_tokens_seen": 12763656, "step": 22475 }, { "epoch": 394.3893805309734, "grad_norm": 2.304725477131342e-08, "learning_rate": 0.12097913808226027, "loss": 0.0, "num_input_tokens_seen": 12766712, "step": 22480 }, { "epoch": 394.4778761061947, "grad_norm": 3.9600998036348756e-08, "learning_rate": 0.12092134842439234, "loss": 0.0, "num_input_tokens_seen": 12769368, "step": 22485 }, { "epoch": 394.56637168141594, "grad_norm": 1.4522219871082598e-08, "learning_rate": 0.12086356325081798, "loss": 0.0, "num_input_tokens_seen": 12772568, "step": 22490 }, { "epoch": 394.65486725663715, "grad_norm": 1.1444687864070602e-08, "learning_rate": 0.12080578257044824, "loss": 0.0, "num_input_tokens_seen": 12775304, "step": 22495 }, { "epoch": 394.7433628318584, "grad_norm": 1.0336536071520186e-08, "learning_rate": 0.12074800639219378, "loss": 0.0, "num_input_tokens_seen": 12778184, "step": 22500 }, { "epoch": 394.83185840707966, "grad_norm": 2.841681379095462e-08, "learning_rate": 0.12069023472496428, "loss": 0.0, "num_input_tokens_seen": 12781112, "step": 22505 }, { "epoch": 394.92035398230087, "grad_norm": 1.45842928844786e-08, "learning_rate": 0.12063246757766893, "loss": 0.0, "num_input_tokens_seen": 12783848, "step": 22510 }, { "epoch": 395.0, "grad_norm": 3.797373082647937e-08, "learning_rate": 0.12057470495921618, "loss": 0.0, "num_input_tokens_seen": 12786624, "step": 22515 }, { "epoch": 395.08849557522126, "grad_norm": 1.4754335531108609e-08, "learning_rate": 0.12051694687851364, "loss": 0.0, "num_input_tokens_seen": 12789248, "step": 22520 }, { "epoch": 395.17699115044246, "grad_norm": 2.7719547546212198e-08, "learning_rate": 0.12045919334446839, "loss": 0.0, "num_input_tokens_seen": 12792016, "step": 22525 }, { "epoch": 395.2654867256637, "grad_norm": 2.43986786330197e-08, "learning_rate": 0.12040144436598683, "loss": 0.0, "num_input_tokens_seen": 12795568, "step": 22530 }, { "epoch": 395.353982300885, "grad_norm": 7.483545338970998e-09, "learning_rate": 0.12034369995197444, "loss": 0.0, "num_input_tokens_seen": 12798624, "step": 22535 }, { "epoch": 395.4424778761062, "grad_norm": 2.376306440510234e-08, "learning_rate": 0.12028596011133627, "loss": 0.0, "num_input_tokens_seen": 12801184, "step": 22540 }, { "epoch": 395.53097345132744, "grad_norm": 9.38770039482506e-09, "learning_rate": 0.12022822485297643, "loss": 0.0, "num_input_tokens_seen": 12803872, "step": 22545 }, { "epoch": 395.6194690265487, "grad_norm": 9.240691767331555e-09, "learning_rate": 0.12017049418579843, "loss": 0.0, "num_input_tokens_seen": 12806544, "step": 22550 }, { "epoch": 395.7079646017699, "grad_norm": 2.2414560874040035e-08, "learning_rate": 0.12011276811870514, "loss": 0.0, "num_input_tokens_seen": 12809376, "step": 22555 }, { "epoch": 395.79646017699116, "grad_norm": 1.5335395175952726e-08, "learning_rate": 0.12005504666059852, "loss": 0.0, "num_input_tokens_seen": 12812368, "step": 22560 }, { "epoch": 395.88495575221236, "grad_norm": 5.622640131264234e-09, "learning_rate": 0.11999732982038003, "loss": 0.0, "num_input_tokens_seen": 12815424, "step": 22565 }, { "epoch": 395.9734513274336, "grad_norm": 9.676749179732269e-09, "learning_rate": 0.11993961760695038, "loss": 0.0, "num_input_tokens_seen": 12818208, "step": 22570 }, { "epoch": 396.05309734513276, "grad_norm": 1.4318051633210871e-08, "learning_rate": 0.11988191002920942, "loss": 0.0, "num_input_tokens_seen": 12820568, "step": 22575 }, { "epoch": 396.14159292035396, "grad_norm": 1.4125025593614282e-08, "learning_rate": 0.11982420709605641, "loss": 0.0, "num_input_tokens_seen": 12823480, "step": 22580 }, { "epoch": 396.2300884955752, "grad_norm": 1.4863754671523566e-08, "learning_rate": 0.11976650881638991, "loss": 0.0, "num_input_tokens_seen": 12826072, "step": 22585 }, { "epoch": 396.3185840707965, "grad_norm": 2.248228447854217e-08, "learning_rate": 0.11970881519910764, "loss": 0.0, "num_input_tokens_seen": 12829112, "step": 22590 }, { "epoch": 396.4070796460177, "grad_norm": 1.6384424483817384e-08, "learning_rate": 0.1196511262531068, "loss": 0.0, "num_input_tokens_seen": 12831832, "step": 22595 }, { "epoch": 396.49557522123894, "grad_norm": 1.7098988891461886e-08, "learning_rate": 0.11959344198728361, "loss": 0.0, "num_input_tokens_seen": 12835240, "step": 22600 }, { "epoch": 396.49557522123894, "eval_loss": 0.6525802612304688, "eval_runtime": 0.9417, "eval_samples_per_second": 26.546, "eval_steps_per_second": 13.804, "num_input_tokens_seen": 12835240, "step": 22600 }, { "epoch": 396.5840707964602, "grad_norm": 1.3225540662631374e-08, "learning_rate": 0.11953576241053378, "loss": 0.0, "num_input_tokens_seen": 12838904, "step": 22605 }, { "epoch": 396.6725663716814, "grad_norm": 7.3576611470116404e-09, "learning_rate": 0.11947808753175228, "loss": 0.0, "num_input_tokens_seen": 12841800, "step": 22610 }, { "epoch": 396.76106194690266, "grad_norm": 1.7066506430296613e-08, "learning_rate": 0.1194204173598332, "loss": 0.0, "num_input_tokens_seen": 12844472, "step": 22615 }, { "epoch": 396.8495575221239, "grad_norm": 1.5326394375847485e-08, "learning_rate": 0.11936275190367007, "loss": 0.0, "num_input_tokens_seen": 12847128, "step": 22620 }, { "epoch": 396.9380530973451, "grad_norm": 2.2233178853525715e-08, "learning_rate": 0.11930509117215563, "loss": 0.0, "num_input_tokens_seen": 12849640, "step": 22625 }, { "epoch": 397.01769911504425, "grad_norm": 1.512944436399266e-08, "learning_rate": 0.11924743517418179, "loss": 0.0, "num_input_tokens_seen": 12851704, "step": 22630 }, { "epoch": 397.1061946902655, "grad_norm": 1.7388154915920495e-08, "learning_rate": 0.11918978391864, "loss": 0.0, "num_input_tokens_seen": 12854392, "step": 22635 }, { "epoch": 397.1946902654867, "grad_norm": 1.7031128507483118e-08, "learning_rate": 0.11913213741442065, "loss": 0.0, "num_input_tokens_seen": 12857016, "step": 22640 }, { "epoch": 397.283185840708, "grad_norm": 2.5244951018521533e-08, "learning_rate": 0.11907449567041364, "loss": 0.0, "num_input_tokens_seen": 12859896, "step": 22645 }, { "epoch": 397.37168141592923, "grad_norm": 1.9413663565615025e-08, "learning_rate": 0.11901685869550803, "loss": 0.0, "num_input_tokens_seen": 12862744, "step": 22650 }, { "epoch": 397.46017699115043, "grad_norm": 8.936064332942806e-09, "learning_rate": 0.1189592264985922, "loss": 0.0, "num_input_tokens_seen": 12865464, "step": 22655 }, { "epoch": 397.5486725663717, "grad_norm": 1.9158358455229063e-08, "learning_rate": 0.11890159908855373, "loss": 0.0, "num_input_tokens_seen": 12868120, "step": 22660 }, { "epoch": 397.6371681415929, "grad_norm": 1.7200580515464026e-08, "learning_rate": 0.11884397647427941, "loss": 0.0, "num_input_tokens_seen": 12871112, "step": 22665 }, { "epoch": 397.72566371681415, "grad_norm": 2.1743174372090834e-08, "learning_rate": 0.11878635866465546, "loss": 0.0, "num_input_tokens_seen": 12874008, "step": 22670 }, { "epoch": 397.8141592920354, "grad_norm": 8.967786513380815e-09, "learning_rate": 0.11872874566856734, "loss": 0.0, "num_input_tokens_seen": 12877112, "step": 22675 }, { "epoch": 397.9026548672566, "grad_norm": 3.6348314580436636e-08, "learning_rate": 0.11867113749489955, "loss": 0.0, "num_input_tokens_seen": 12879944, "step": 22680 }, { "epoch": 397.9911504424779, "grad_norm": 1.7296160947921635e-08, "learning_rate": 0.11861353415253607, "loss": 0.0, "num_input_tokens_seen": 12883672, "step": 22685 }, { "epoch": 398.070796460177, "grad_norm": 2.3365389623108967e-08, "learning_rate": 0.11855593565036011, "loss": 0.0, "num_input_tokens_seen": 12886360, "step": 22690 }, { "epoch": 398.1592920353982, "grad_norm": 2.6991326507186386e-08, "learning_rate": 0.11849834199725394, "loss": 0.0, "num_input_tokens_seen": 12888904, "step": 22695 }, { "epoch": 398.24778761061947, "grad_norm": 1.8704033877270376e-08, "learning_rate": 0.1184407532020994, "loss": 0.0, "num_input_tokens_seen": 12891864, "step": 22700 }, { "epoch": 398.3362831858407, "grad_norm": 3.5521580343811365e-08, "learning_rate": 0.11838316927377723, "loss": 0.0, "num_input_tokens_seen": 12894888, "step": 22705 }, { "epoch": 398.42477876106193, "grad_norm": 1.0431282504441697e-08, "learning_rate": 0.11832559022116766, "loss": 0.0, "num_input_tokens_seen": 12897768, "step": 22710 }, { "epoch": 398.5132743362832, "grad_norm": 1.550418105011886e-08, "learning_rate": 0.11826801605315022, "loss": 0.0, "num_input_tokens_seen": 12900392, "step": 22715 }, { "epoch": 398.60176991150445, "grad_norm": 1.648567149459268e-08, "learning_rate": 0.1182104467786034, "loss": 0.0, "num_input_tokens_seen": 12903304, "step": 22720 }, { "epoch": 398.69026548672565, "grad_norm": 1.7989917111549403e-08, "learning_rate": 0.1181528824064052, "loss": 0.0, "num_input_tokens_seen": 12906360, "step": 22725 }, { "epoch": 398.7787610619469, "grad_norm": 5.307334305371114e-08, "learning_rate": 0.11809532294543279, "loss": 0.0, "num_input_tokens_seen": 12909448, "step": 22730 }, { "epoch": 398.86725663716817, "grad_norm": 6.676551311812773e-09, "learning_rate": 0.11803776840456245, "loss": 0.0, "num_input_tokens_seen": 12912376, "step": 22735 }, { "epoch": 398.95575221238937, "grad_norm": 1.3535728093927446e-08, "learning_rate": 0.11798021879266997, "loss": 0.0, "num_input_tokens_seen": 12914872, "step": 22740 }, { "epoch": 399.0353982300885, "grad_norm": 1.3234355833446898e-08, "learning_rate": 0.11792267411863006, "loss": 0.0, "num_input_tokens_seen": 12917160, "step": 22745 }, { "epoch": 399.12389380530976, "grad_norm": 1.7656676121191595e-08, "learning_rate": 0.1178651343913169, "loss": 0.0, "num_input_tokens_seen": 12919800, "step": 22750 }, { "epoch": 399.21238938053096, "grad_norm": 1.9406472873129132e-08, "learning_rate": 0.11780759961960392, "loss": 0.0, "num_input_tokens_seen": 12922920, "step": 22755 }, { "epoch": 399.3008849557522, "grad_norm": 2.1896052970760138e-08, "learning_rate": 0.1177500698123636, "loss": 0.0, "num_input_tokens_seen": 12925656, "step": 22760 }, { "epoch": 399.3893805309734, "grad_norm": 1.781878644635526e-08, "learning_rate": 0.11769254497846778, "loss": 0.0, "num_input_tokens_seen": 12928568, "step": 22765 }, { "epoch": 399.4778761061947, "grad_norm": 2.1404279237913215e-08, "learning_rate": 0.11763502512678758, "loss": 0.0, "num_input_tokens_seen": 12931720, "step": 22770 }, { "epoch": 399.56637168141594, "grad_norm": 2.0152281621221846e-08, "learning_rate": 0.11757751026619315, "loss": 0.0, "num_input_tokens_seen": 12934264, "step": 22775 }, { "epoch": 399.65486725663715, "grad_norm": 1.4029194694842317e-08, "learning_rate": 0.11752000040555416, "loss": 0.0, "num_input_tokens_seen": 12937848, "step": 22780 }, { "epoch": 399.7433628318584, "grad_norm": 2.1692054374966574e-08, "learning_rate": 0.11746249555373921, "loss": 0.0, "num_input_tokens_seen": 12940632, "step": 22785 }, { "epoch": 399.83185840707966, "grad_norm": 1.3162543055500464e-08, "learning_rate": 0.11740499571961638, "loss": 0.0, "num_input_tokens_seen": 12943512, "step": 22790 }, { "epoch": 399.92035398230087, "grad_norm": 7.184137285065617e-09, "learning_rate": 0.11734750091205279, "loss": 0.0, "num_input_tokens_seen": 12946456, "step": 22795 }, { "epoch": 400.0, "grad_norm": 7.554487702066126e-09, "learning_rate": 0.11729001113991493, "loss": 0.0, "num_input_tokens_seen": 12948416, "step": 22800 }, { "epoch": 400.0, "eval_loss": 0.6555616855621338, "eval_runtime": 0.9104, "eval_samples_per_second": 27.461, "eval_steps_per_second": 14.28, "num_input_tokens_seen": 12948416, "step": 22800 }, { "epoch": 400.08849557522126, "grad_norm": 3.993587327499881e-08, "learning_rate": 0.11723252641206837, "loss": 0.0, "num_input_tokens_seen": 12951568, "step": 22805 }, { "epoch": 400.17699115044246, "grad_norm": 5.0706567833458394e-08, "learning_rate": 0.11717504673737808, "loss": 0.0, "num_input_tokens_seen": 12954160, "step": 22810 }, { "epoch": 400.2654867256637, "grad_norm": 1.3663618680936906e-08, "learning_rate": 0.11711757212470802, "loss": 0.0, "num_input_tokens_seen": 12957136, "step": 22815 }, { "epoch": 400.353982300885, "grad_norm": 9.915761545187252e-09, "learning_rate": 0.11706010258292165, "loss": 0.0, "num_input_tokens_seen": 12959712, "step": 22820 }, { "epoch": 400.4424778761062, "grad_norm": 6.230533866613541e-09, "learning_rate": 0.11700263812088131, "loss": 0.0, "num_input_tokens_seen": 12962416, "step": 22825 }, { "epoch": 400.53097345132744, "grad_norm": 6.045437483948035e-09, "learning_rate": 0.11694517874744892, "loss": 0.0, "num_input_tokens_seen": 12965456, "step": 22830 }, { "epoch": 400.6194690265487, "grad_norm": 1.4797997494042647e-08, "learning_rate": 0.11688772447148532, "loss": 0.0, "num_input_tokens_seen": 12968480, "step": 22835 }, { "epoch": 400.7079646017699, "grad_norm": 3.485905608613393e-08, "learning_rate": 0.11683027530185074, "loss": 0.0, "num_input_tokens_seen": 12971376, "step": 22840 }, { "epoch": 400.79646017699116, "grad_norm": 1.717506847853656e-08, "learning_rate": 0.11677283124740451, "loss": 0.0, "num_input_tokens_seen": 12974288, "step": 22845 }, { "epoch": 400.88495575221236, "grad_norm": 2.5411662107899247e-08, "learning_rate": 0.11671539231700531, "loss": 0.0, "num_input_tokens_seen": 12976928, "step": 22850 }, { "epoch": 400.9734513274336, "grad_norm": 1.417283357341148e-08, "learning_rate": 0.11665795851951084, "loss": 0.0, "num_input_tokens_seen": 12979712, "step": 22855 }, { "epoch": 401.05309734513276, "grad_norm": 1.56341446455599e-08, "learning_rate": 0.11660052986377825, "loss": 0.0, "num_input_tokens_seen": 12982208, "step": 22860 }, { "epoch": 401.14159292035396, "grad_norm": 1.665713256215895e-08, "learning_rate": 0.1165431063586636, "loss": 0.0, "num_input_tokens_seen": 12985184, "step": 22865 }, { "epoch": 401.2300884955752, "grad_norm": 1.4885169541400955e-08, "learning_rate": 0.11648568801302245, "loss": 0.0, "num_input_tokens_seen": 12987984, "step": 22870 }, { "epoch": 401.3185840707965, "grad_norm": 2.28700116622349e-08, "learning_rate": 0.11642827483570937, "loss": 0.0, "num_input_tokens_seen": 12990400, "step": 22875 }, { "epoch": 401.4070796460177, "grad_norm": 1.6769041266684326e-08, "learning_rate": 0.11637086683557815, "loss": 0.0, "num_input_tokens_seen": 12993296, "step": 22880 }, { "epoch": 401.49557522123894, "grad_norm": 1.4107182977340926e-08, "learning_rate": 0.11631346402148188, "loss": 0.0, "num_input_tokens_seen": 12996224, "step": 22885 }, { "epoch": 401.5840707964602, "grad_norm": 3.0523256810965904e-08, "learning_rate": 0.11625606640227285, "loss": 0.0, "num_input_tokens_seen": 12999248, "step": 22890 }, { "epoch": 401.6725663716814, "grad_norm": 1.4714084173306219e-08, "learning_rate": 0.11619867398680238, "loss": 0.0, "num_input_tokens_seen": 13002672, "step": 22895 }, { "epoch": 401.76106194690266, "grad_norm": 1.1894740303830531e-08, "learning_rate": 0.11614128678392119, "loss": 0.0, "num_input_tokens_seen": 13005984, "step": 22900 }, { "epoch": 401.8495575221239, "grad_norm": 6.3458962529239216e-09, "learning_rate": 0.11608390480247906, "loss": 0.0, "num_input_tokens_seen": 13008480, "step": 22905 }, { "epoch": 401.9380530973451, "grad_norm": 1.9208753698762848e-08, "learning_rate": 0.11602652805132499, "loss": 0.0, "num_input_tokens_seen": 13011088, "step": 22910 }, { "epoch": 402.01769911504425, "grad_norm": 7.036338622867788e-09, "learning_rate": 0.11596915653930731, "loss": 0.0, "num_input_tokens_seen": 13013504, "step": 22915 }, { "epoch": 402.1061946902655, "grad_norm": 1.71735869969325e-08, "learning_rate": 0.11591179027527328, "loss": 0.0, "num_input_tokens_seen": 13016416, "step": 22920 }, { "epoch": 402.1946902654867, "grad_norm": 5.721998874719247e-09, "learning_rate": 0.11585442926806956, "loss": 0.0, "num_input_tokens_seen": 13018992, "step": 22925 }, { "epoch": 402.283185840708, "grad_norm": 2.736266502267881e-08, "learning_rate": 0.11579707352654202, "loss": 0.0, "num_input_tokens_seen": 13022208, "step": 22930 }, { "epoch": 402.37168141592923, "grad_norm": 1.415112471647717e-08, "learning_rate": 0.11573972305953548, "loss": 0.0, "num_input_tokens_seen": 13024912, "step": 22935 }, { "epoch": 402.46017699115043, "grad_norm": 1.6397095237152826e-08, "learning_rate": 0.11568237787589426, "loss": 0.0, "num_input_tokens_seen": 13027872, "step": 22940 }, { "epoch": 402.5486725663717, "grad_norm": 4.694610922229003e-09, "learning_rate": 0.11562503798446161, "loss": 0.0, "num_input_tokens_seen": 13030640, "step": 22945 }, { "epoch": 402.6371681415929, "grad_norm": 1.8762506215352914e-08, "learning_rate": 0.11556770339408005, "loss": 0.0, "num_input_tokens_seen": 13033904, "step": 22950 }, { "epoch": 402.72566371681415, "grad_norm": 9.63910373741328e-09, "learning_rate": 0.1155103741135914, "loss": 0.0, "num_input_tokens_seen": 13036784, "step": 22955 }, { "epoch": 402.8141592920354, "grad_norm": 2.2597976823135468e-08, "learning_rate": 0.1154530501518364, "loss": 0.0, "num_input_tokens_seen": 13039744, "step": 22960 }, { "epoch": 402.9026548672566, "grad_norm": 1.3377373875300691e-08, "learning_rate": 0.11539573151765523, "loss": 0.0, "num_input_tokens_seen": 13042384, "step": 22965 }, { "epoch": 402.9911504424779, "grad_norm": 2.3121671688386414e-08, "learning_rate": 0.11533841821988719, "loss": 0.0, "num_input_tokens_seen": 13045216, "step": 22970 }, { "epoch": 403.070796460177, "grad_norm": 1.1252958564966775e-08, "learning_rate": 0.11528111026737059, "loss": 0.0, "num_input_tokens_seen": 13047600, "step": 22975 }, { "epoch": 403.1592920353982, "grad_norm": 6.688443132674138e-09, "learning_rate": 0.11522380766894312, "loss": 0.0, "num_input_tokens_seen": 13049888, "step": 22980 }, { "epoch": 403.24778761061947, "grad_norm": 8.139422469355395e-09, "learning_rate": 0.11516651043344152, "loss": 0.0, "num_input_tokens_seen": 13052912, "step": 22985 }, { "epoch": 403.3362831858407, "grad_norm": 9.687149749026958e-09, "learning_rate": 0.11510921856970172, "loss": 0.0, "num_input_tokens_seen": 13055584, "step": 22990 }, { "epoch": 403.42477876106193, "grad_norm": 1.5801285613292748e-08, "learning_rate": 0.11505193208655895, "loss": 0.0, "num_input_tokens_seen": 13058736, "step": 22995 }, { "epoch": 403.5132743362832, "grad_norm": 8.92944651553762e-09, "learning_rate": 0.11499465099284738, "loss": 0.0, "num_input_tokens_seen": 13061472, "step": 23000 }, { "epoch": 403.5132743362832, "eval_loss": 0.6734206676483154, "eval_runtime": 0.9379, "eval_samples_per_second": 26.656, "eval_steps_per_second": 13.861, "num_input_tokens_seen": 13061472, "step": 23000 }, { "epoch": 403.60176991150445, "grad_norm": 1.771767443869976e-08, "learning_rate": 0.1149373752974006, "loss": 0.0, "num_input_tokens_seen": 13064528, "step": 23005 }, { "epoch": 403.69026548672565, "grad_norm": 1.0293218721812991e-08, "learning_rate": 0.11488010500905109, "loss": 0.0, "num_input_tokens_seen": 13067712, "step": 23010 }, { "epoch": 403.7787610619469, "grad_norm": 1.4144703186502738e-08, "learning_rate": 0.11482284013663077, "loss": 0.0, "num_input_tokens_seen": 13070528, "step": 23015 }, { "epoch": 403.86725663716817, "grad_norm": 1.1841510882959483e-08, "learning_rate": 0.11476558068897061, "loss": 0.0, "num_input_tokens_seen": 13073328, "step": 23020 }, { "epoch": 403.95575221238937, "grad_norm": 3.120117852972726e-08, "learning_rate": 0.11470832667490061, "loss": 0.0, "num_input_tokens_seen": 13076688, "step": 23025 }, { "epoch": 404.0353982300885, "grad_norm": 6.324176293759365e-09, "learning_rate": 0.11465107810325013, "loss": 0.0, "num_input_tokens_seen": 13079304, "step": 23030 }, { "epoch": 404.12389380530976, "grad_norm": 1.684606232288388e-08, "learning_rate": 0.11459383498284771, "loss": 0.0, "num_input_tokens_seen": 13082024, "step": 23035 }, { "epoch": 404.21238938053096, "grad_norm": 1.900956725364722e-08, "learning_rate": 0.11453659732252082, "loss": 0.0, "num_input_tokens_seen": 13084680, "step": 23040 }, { "epoch": 404.3008849557522, "grad_norm": 7.702327664560471e-09, "learning_rate": 0.11447936513109633, "loss": 0.0, "num_input_tokens_seen": 13087848, "step": 23045 }, { "epoch": 404.3893805309734, "grad_norm": 2.7196165319764987e-08, "learning_rate": 0.11442213841740011, "loss": 0.0, "num_input_tokens_seen": 13090840, "step": 23050 }, { "epoch": 404.4778761061947, "grad_norm": 1.5479223236525286e-08, "learning_rate": 0.1143649171902572, "loss": 0.0, "num_input_tokens_seen": 13093688, "step": 23055 }, { "epoch": 404.56637168141594, "grad_norm": 6.005317576551761e-09, "learning_rate": 0.11430770145849194, "loss": 0.0, "num_input_tokens_seen": 13096616, "step": 23060 }, { "epoch": 404.65486725663715, "grad_norm": 1.1879356165422905e-08, "learning_rate": 0.11425049123092756, "loss": 0.0, "num_input_tokens_seen": 13099816, "step": 23065 }, { "epoch": 404.7433628318584, "grad_norm": 1.3877059501510303e-08, "learning_rate": 0.11419328651638674, "loss": 0.0, "num_input_tokens_seen": 13102680, "step": 23070 }, { "epoch": 404.83185840707966, "grad_norm": 2.6002698660931856e-08, "learning_rate": 0.11413608732369115, "loss": 0.0, "num_input_tokens_seen": 13105320, "step": 23075 }, { "epoch": 404.92035398230087, "grad_norm": 2.0933553557256346e-08, "learning_rate": 0.11407889366166153, "loss": 0.0, "num_input_tokens_seen": 13107912, "step": 23080 }, { "epoch": 405.0, "grad_norm": 1.389931103545905e-08, "learning_rate": 0.11402170553911797, "loss": 0.0, "num_input_tokens_seen": 13110120, "step": 23085 }, { "epoch": 405.08849557522126, "grad_norm": 2.5882272325361555e-08, "learning_rate": 0.11396452296487955, "loss": 0.0, "num_input_tokens_seen": 13112888, "step": 23090 }, { "epoch": 405.17699115044246, "grad_norm": 2.8278396513314874e-08, "learning_rate": 0.11390734594776449, "loss": 0.0, "num_input_tokens_seen": 13115752, "step": 23095 }, { "epoch": 405.2654867256637, "grad_norm": 2.7061229701530465e-08, "learning_rate": 0.11385017449659031, "loss": 0.0, "num_input_tokens_seen": 13118424, "step": 23100 }, { "epoch": 405.353982300885, "grad_norm": 1.2547345384916753e-08, "learning_rate": 0.11379300862017344, "loss": 0.0, "num_input_tokens_seen": 13121432, "step": 23105 }, { "epoch": 405.4424778761062, "grad_norm": 3.2688351581100505e-08, "learning_rate": 0.11373584832732966, "loss": 0.0, "num_input_tokens_seen": 13124024, "step": 23110 }, { "epoch": 405.53097345132744, "grad_norm": 1.5208270198741047e-08, "learning_rate": 0.11367869362687386, "loss": 0.0, "num_input_tokens_seen": 13127096, "step": 23115 }, { "epoch": 405.6194690265487, "grad_norm": 1.1899675023130385e-08, "learning_rate": 0.11362154452761988, "loss": 0.0, "num_input_tokens_seen": 13130136, "step": 23120 }, { "epoch": 405.7079646017699, "grad_norm": 1.6653078915851438e-08, "learning_rate": 0.11356440103838095, "loss": 0.0, "num_input_tokens_seen": 13133576, "step": 23125 }, { "epoch": 405.79646017699116, "grad_norm": 3.01733571461682e-08, "learning_rate": 0.11350726316796922, "loss": 0.0, "num_input_tokens_seen": 13136472, "step": 23130 }, { "epoch": 405.88495575221236, "grad_norm": 1.4410870718961633e-08, "learning_rate": 0.11345013092519607, "loss": 0.0, "num_input_tokens_seen": 13139368, "step": 23135 }, { "epoch": 405.9734513274336, "grad_norm": 1.582699304947255e-08, "learning_rate": 0.11339300431887213, "loss": 0.0, "num_input_tokens_seen": 13142104, "step": 23140 }, { "epoch": 406.05309734513276, "grad_norm": 2.502267903992106e-08, "learning_rate": 0.11333588335780687, "loss": 0.0, "num_input_tokens_seen": 13144904, "step": 23145 }, { "epoch": 406.14159292035396, "grad_norm": 1.275447480963976e-08, "learning_rate": 0.11327876805080916, "loss": 0.0, "num_input_tokens_seen": 13148008, "step": 23150 }, { "epoch": 406.2300884955752, "grad_norm": 1.990478359914505e-08, "learning_rate": 0.11322165840668696, "loss": 0.0, "num_input_tokens_seen": 13151032, "step": 23155 }, { "epoch": 406.3185840707965, "grad_norm": 3.045416008262691e-08, "learning_rate": 0.11316455443424717, "loss": 0.0, "num_input_tokens_seen": 13154152, "step": 23160 }, { "epoch": 406.4070796460177, "grad_norm": 1.721425135770005e-08, "learning_rate": 0.11310745614229603, "loss": 0.0, "num_input_tokens_seen": 13156744, "step": 23165 }, { "epoch": 406.49557522123894, "grad_norm": 2.7009129155430855e-08, "learning_rate": 0.1130503635396387, "loss": 0.0, "num_input_tokens_seen": 13159512, "step": 23170 }, { "epoch": 406.5840707964602, "grad_norm": 1.8396399070752523e-08, "learning_rate": 0.11299327663507966, "loss": 0.0, "num_input_tokens_seen": 13162248, "step": 23175 }, { "epoch": 406.6725663716814, "grad_norm": 7.2525634386977345e-09, "learning_rate": 0.11293619543742246, "loss": 0.0, "num_input_tokens_seen": 13165464, "step": 23180 }, { "epoch": 406.76106194690266, "grad_norm": 1.1231595209437728e-08, "learning_rate": 0.11287911995546965, "loss": 0.0, "num_input_tokens_seen": 13167848, "step": 23185 }, { "epoch": 406.8495575221239, "grad_norm": 2.610593341501044e-08, "learning_rate": 0.11282205019802308, "loss": 0.0, "num_input_tokens_seen": 13170648, "step": 23190 }, { "epoch": 406.9380530973451, "grad_norm": 1.7198015456187932e-08, "learning_rate": 0.11276498617388354, "loss": 0.0, "num_input_tokens_seen": 13173528, "step": 23195 }, { "epoch": 407.01769911504425, "grad_norm": 2.9755598873748568e-08, "learning_rate": 0.11270792789185109, "loss": 0.0, "num_input_tokens_seen": 13175888, "step": 23200 }, { "epoch": 407.01769911504425, "eval_loss": 0.6624284982681274, "eval_runtime": 0.9383, "eval_samples_per_second": 26.645, "eval_steps_per_second": 13.855, "num_input_tokens_seen": 13175888, "step": 23200 }, { "epoch": 407.1061946902655, "grad_norm": 5.904866640094042e-08, "learning_rate": 0.11265087536072482, "loss": 0.0, "num_input_tokens_seen": 13178864, "step": 23205 }, { "epoch": 407.1946902654867, "grad_norm": 1.0544271233925429e-08, "learning_rate": 0.11259382858930288, "loss": 0.0, "num_input_tokens_seen": 13181568, "step": 23210 }, { "epoch": 407.283185840708, "grad_norm": 3.1689801005541085e-08, "learning_rate": 0.11253678758638262, "loss": 0.0, "num_input_tokens_seen": 13184592, "step": 23215 }, { "epoch": 407.37168141592923, "grad_norm": 4.387031182773171e-09, "learning_rate": 0.11247975236076059, "loss": 0.0, "num_input_tokens_seen": 13187648, "step": 23220 }, { "epoch": 407.46017699115043, "grad_norm": 6.863122070654981e-09, "learning_rate": 0.11242272292123218, "loss": 0.0, "num_input_tokens_seen": 13190576, "step": 23225 }, { "epoch": 407.5486725663717, "grad_norm": 9.499886211017383e-09, "learning_rate": 0.11236569927659217, "loss": 0.0, "num_input_tokens_seen": 13193664, "step": 23230 }, { "epoch": 407.6371681415929, "grad_norm": 1.5419667320770714e-08, "learning_rate": 0.11230868143563429, "loss": 0.0, "num_input_tokens_seen": 13196592, "step": 23235 }, { "epoch": 407.72566371681415, "grad_norm": 1.3003311316595045e-08, "learning_rate": 0.11225166940715131, "loss": 0.0, "num_input_tokens_seen": 13199264, "step": 23240 }, { "epoch": 407.8141592920354, "grad_norm": 7.246398592286596e-09, "learning_rate": 0.11219466319993537, "loss": 0.0, "num_input_tokens_seen": 13202096, "step": 23245 }, { "epoch": 407.9026548672566, "grad_norm": 1.9832606668046537e-08, "learning_rate": 0.11213766282277739, "loss": 0.0, "num_input_tokens_seen": 13205088, "step": 23250 }, { "epoch": 407.9911504424779, "grad_norm": 4.228149563800798e-08, "learning_rate": 0.11208066828446761, "loss": 0.0, "num_input_tokens_seen": 13207872, "step": 23255 }, { "epoch": 408.070796460177, "grad_norm": 1.3120314612535822e-08, "learning_rate": 0.11202367959379537, "loss": 0.0, "num_input_tokens_seen": 13210088, "step": 23260 }, { "epoch": 408.1592920353982, "grad_norm": 1.497315160747803e-08, "learning_rate": 0.11196669675954894, "loss": 0.0, "num_input_tokens_seen": 13213192, "step": 23265 }, { "epoch": 408.24778761061947, "grad_norm": 1.1162573976264412e-08, "learning_rate": 0.1119097197905158, "loss": 0.0, "num_input_tokens_seen": 13215784, "step": 23270 }, { "epoch": 408.3362831858407, "grad_norm": 1.4044536200685798e-08, "learning_rate": 0.11185274869548259, "loss": 0.0, "num_input_tokens_seen": 13219048, "step": 23275 }, { "epoch": 408.42477876106193, "grad_norm": 1.5450698498398197e-08, "learning_rate": 0.11179578348323486, "loss": 0.0, "num_input_tokens_seen": 13221960, "step": 23280 }, { "epoch": 408.5132743362832, "grad_norm": 2.975899882073918e-08, "learning_rate": 0.1117388241625575, "loss": 0.0, "num_input_tokens_seen": 13224824, "step": 23285 }, { "epoch": 408.60176991150445, "grad_norm": 1.9304582821177974e-08, "learning_rate": 0.11168187074223421, "loss": 0.0, "num_input_tokens_seen": 13227688, "step": 23290 }, { "epoch": 408.69026548672565, "grad_norm": 9.234964792881328e-09, "learning_rate": 0.11162492323104796, "loss": 0.0, "num_input_tokens_seen": 13230552, "step": 23295 }, { "epoch": 408.7787610619469, "grad_norm": 8.634789772088425e-09, "learning_rate": 0.11156798163778091, "loss": 0.0, "num_input_tokens_seen": 13233128, "step": 23300 }, { "epoch": 408.86725663716817, "grad_norm": 1.545158134774738e-08, "learning_rate": 0.11151104597121399, "loss": 0.0, "num_input_tokens_seen": 13236200, "step": 23305 }, { "epoch": 408.95575221238937, "grad_norm": 1.3841445323237167e-08, "learning_rate": 0.11145411624012742, "loss": 0.0, "num_input_tokens_seen": 13239048, "step": 23310 }, { "epoch": 409.0353982300885, "grad_norm": 4.715678869615658e-08, "learning_rate": 0.11139719245330063, "loss": 0.0, "num_input_tokens_seen": 13241352, "step": 23315 }, { "epoch": 409.12389380530976, "grad_norm": 1.3138969023884783e-08, "learning_rate": 0.11134027461951179, "loss": 0.0, "num_input_tokens_seen": 13244296, "step": 23320 }, { "epoch": 409.21238938053096, "grad_norm": 1.742924915504318e-08, "learning_rate": 0.11128336274753849, "loss": 0.0, "num_input_tokens_seen": 13247288, "step": 23325 }, { "epoch": 409.3008849557522, "grad_norm": 1.8698862902510882e-08, "learning_rate": 0.11122645684615715, "loss": 0.0, "num_input_tokens_seen": 13250040, "step": 23330 }, { "epoch": 409.3893805309734, "grad_norm": 5.7857607593803095e-09, "learning_rate": 0.11116955692414345, "loss": 0.0, "num_input_tokens_seen": 13252664, "step": 23335 }, { "epoch": 409.4778761061947, "grad_norm": 1.854470177420353e-08, "learning_rate": 0.11111266299027203, "loss": 0.0, "num_input_tokens_seen": 13255512, "step": 23340 }, { "epoch": 409.56637168141594, "grad_norm": 3.695391015412497e-08, "learning_rate": 0.11105577505331668, "loss": 0.0, "num_input_tokens_seen": 13258952, "step": 23345 }, { "epoch": 409.65486725663715, "grad_norm": 1.4777440604518688e-08, "learning_rate": 0.11099889312205018, "loss": 0.0, "num_input_tokens_seen": 13262264, "step": 23350 }, { "epoch": 409.7433628318584, "grad_norm": 1.2590103182219536e-08, "learning_rate": 0.11094201720524455, "loss": 0.0, "num_input_tokens_seen": 13264920, "step": 23355 }, { "epoch": 409.83185840707966, "grad_norm": 2.3628235368278183e-08, "learning_rate": 0.11088514731167064, "loss": 0.0, "num_input_tokens_seen": 13267336, "step": 23360 }, { "epoch": 409.92035398230087, "grad_norm": 1.117708947617757e-08, "learning_rate": 0.11082828345009862, "loss": 0.0, "num_input_tokens_seen": 13270216, "step": 23365 }, { "epoch": 410.0, "grad_norm": 1.4269455839155398e-08, "learning_rate": 0.11077142562929748, "loss": 0.0, "num_input_tokens_seen": 13272776, "step": 23370 }, { "epoch": 410.08849557522126, "grad_norm": 1.4635761047543383e-08, "learning_rate": 0.11071457385803554, "loss": 0.0, "num_input_tokens_seen": 13275736, "step": 23375 }, { "epoch": 410.17699115044246, "grad_norm": 3.40902666096099e-08, "learning_rate": 0.11065772814508001, "loss": 0.0, "num_input_tokens_seen": 13278712, "step": 23380 }, { "epoch": 410.2654867256637, "grad_norm": 2.284498812343827e-08, "learning_rate": 0.11060088849919715, "loss": 0.0, "num_input_tokens_seen": 13281368, "step": 23385 }, { "epoch": 410.353982300885, "grad_norm": 1.7419941045204723e-08, "learning_rate": 0.11054405492915244, "loss": 0.0, "num_input_tokens_seen": 13284088, "step": 23390 }, { "epoch": 410.4424778761062, "grad_norm": 6.968247312499898e-09, "learning_rate": 0.11048722744371031, "loss": 0.0, "num_input_tokens_seen": 13287048, "step": 23395 }, { "epoch": 410.53097345132744, "grad_norm": 3.521601854572509e-08, "learning_rate": 0.1104304060516342, "loss": 0.0, "num_input_tokens_seen": 13289752, "step": 23400 }, { "epoch": 410.53097345132744, "eval_loss": 0.6691233515739441, "eval_runtime": 0.9403, "eval_samples_per_second": 26.587, "eval_steps_per_second": 13.825, "num_input_tokens_seen": 13289752, "step": 23400 }, { "epoch": 410.6194690265487, "grad_norm": 1.3993168401782441e-08, "learning_rate": 0.11037359076168682, "loss": 0.0, "num_input_tokens_seen": 13292584, "step": 23405 }, { "epoch": 410.7079646017699, "grad_norm": 1.5348335935527757e-08, "learning_rate": 0.11031678158262966, "loss": 0.0, "num_input_tokens_seen": 13295704, "step": 23410 }, { "epoch": 410.79646017699116, "grad_norm": 2.4803979314924618e-08, "learning_rate": 0.11025997852322349, "loss": 0.0, "num_input_tokens_seen": 13298600, "step": 23415 }, { "epoch": 410.88495575221236, "grad_norm": 1.5970096356454633e-08, "learning_rate": 0.11020318159222807, "loss": 0.0, "num_input_tokens_seen": 13301912, "step": 23420 }, { "epoch": 410.9734513274336, "grad_norm": 8.863681344450924e-09, "learning_rate": 0.1101463907984021, "loss": 0.0, "num_input_tokens_seen": 13304472, "step": 23425 }, { "epoch": 411.05309734513276, "grad_norm": 8.989633926148599e-09, "learning_rate": 0.11008960615050352, "loss": 0.0, "num_input_tokens_seen": 13307080, "step": 23430 }, { "epoch": 411.14159292035396, "grad_norm": 1.6773583411122672e-08, "learning_rate": 0.11003282765728925, "loss": 0.0, "num_input_tokens_seen": 13310152, "step": 23435 }, { "epoch": 411.2300884955752, "grad_norm": 1.326870702200722e-08, "learning_rate": 0.10997605532751518, "loss": 0.0, "num_input_tokens_seen": 13312840, "step": 23440 }, { "epoch": 411.3185840707965, "grad_norm": 1.2515298131177133e-08, "learning_rate": 0.1099192891699364, "loss": 0.0, "num_input_tokens_seen": 13316168, "step": 23445 }, { "epoch": 411.4070796460177, "grad_norm": 2.2525105336512752e-08, "learning_rate": 0.10986252919330687, "loss": 0.0, "num_input_tokens_seen": 13319304, "step": 23450 }, { "epoch": 411.49557522123894, "grad_norm": 9.780164234030053e-09, "learning_rate": 0.10980577540637973, "loss": 0.0, "num_input_tokens_seen": 13321912, "step": 23455 }, { "epoch": 411.5840707964602, "grad_norm": 1.3979986945855671e-08, "learning_rate": 0.10974902781790719, "loss": 0.0, "num_input_tokens_seen": 13324232, "step": 23460 }, { "epoch": 411.6725663716814, "grad_norm": 2.0850441373454487e-08, "learning_rate": 0.10969228643664032, "loss": 0.0, "num_input_tokens_seen": 13327016, "step": 23465 }, { "epoch": 411.76106194690266, "grad_norm": 2.302249413332902e-08, "learning_rate": 0.10963555127132942, "loss": 0.0, "num_input_tokens_seen": 13329784, "step": 23470 }, { "epoch": 411.8495575221239, "grad_norm": 2.3937388959893724e-08, "learning_rate": 0.10957882233072382, "loss": 0.0, "num_input_tokens_seen": 13332616, "step": 23475 }, { "epoch": 411.9380530973451, "grad_norm": 2.9374922050351415e-08, "learning_rate": 0.10952209962357176, "loss": 0.0, "num_input_tokens_seen": 13335736, "step": 23480 }, { "epoch": 412.01769911504425, "grad_norm": 2.4371290763269826e-08, "learning_rate": 0.10946538315862062, "loss": 0.0, "num_input_tokens_seen": 13338224, "step": 23485 }, { "epoch": 412.1061946902655, "grad_norm": 1.6097486010835382e-08, "learning_rate": 0.10940867294461679, "loss": 0.0, "num_input_tokens_seen": 13341264, "step": 23490 }, { "epoch": 412.1946902654867, "grad_norm": 1.0998035371301285e-08, "learning_rate": 0.10935196899030565, "loss": 0.0, "num_input_tokens_seen": 13344048, "step": 23495 }, { "epoch": 412.283185840708, "grad_norm": 5.5595510417560945e-09, "learning_rate": 0.10929527130443177, "loss": 0.0, "num_input_tokens_seen": 13346416, "step": 23500 }, { "epoch": 412.37168141592923, "grad_norm": 2.810078392201376e-08, "learning_rate": 0.1092385798957385, "loss": 0.0, "num_input_tokens_seen": 13349984, "step": 23505 }, { "epoch": 412.46017699115043, "grad_norm": 1.6443516770436872e-08, "learning_rate": 0.10918189477296848, "loss": 0.0, "num_input_tokens_seen": 13352736, "step": 23510 }, { "epoch": 412.5486725663717, "grad_norm": 1.9833244380151882e-08, "learning_rate": 0.1091252159448633, "loss": 0.0, "num_input_tokens_seen": 13355136, "step": 23515 }, { "epoch": 412.6371681415929, "grad_norm": 1.0777743142398322e-08, "learning_rate": 0.10906854342016345, "loss": 0.0, "num_input_tokens_seen": 13358032, "step": 23520 }, { "epoch": 412.72566371681415, "grad_norm": 1.5804111797024234e-08, "learning_rate": 0.10901187720760858, "loss": 0.0, "num_input_tokens_seen": 13360528, "step": 23525 }, { "epoch": 412.8141592920354, "grad_norm": 2.0875198458725208e-08, "learning_rate": 0.10895521731593734, "loss": 0.0, "num_input_tokens_seen": 13363904, "step": 23530 }, { "epoch": 412.9026548672566, "grad_norm": 2.4561208178397465e-08, "learning_rate": 0.10889856375388733, "loss": 0.0, "num_input_tokens_seen": 13366576, "step": 23535 }, { "epoch": 412.9911504424779, "grad_norm": 1.8343975227708142e-08, "learning_rate": 0.1088419165301954, "loss": 0.0, "num_input_tokens_seen": 13369584, "step": 23540 }, { "epoch": 413.070796460177, "grad_norm": 2.4901494199980334e-08, "learning_rate": 0.1087852756535971, "loss": 0.0, "num_input_tokens_seen": 13371928, "step": 23545 }, { "epoch": 413.1592920353982, "grad_norm": 1.4291774874664043e-08, "learning_rate": 0.10872864113282725, "loss": 0.0, "num_input_tokens_seen": 13374888, "step": 23550 }, { "epoch": 413.24778761061947, "grad_norm": 1.9592768296661234e-08, "learning_rate": 0.10867201297661958, "loss": 0.0, "num_input_tokens_seen": 13377848, "step": 23555 }, { "epoch": 413.3362831858407, "grad_norm": 1.2940991389598366e-08, "learning_rate": 0.10861539119370689, "loss": 0.0, "num_input_tokens_seen": 13380408, "step": 23560 }, { "epoch": 413.42477876106193, "grad_norm": 1.4583505070220326e-08, "learning_rate": 0.10855877579282096, "loss": 0.0, "num_input_tokens_seen": 13383128, "step": 23565 }, { "epoch": 413.5132743362832, "grad_norm": 1.986278341803427e-08, "learning_rate": 0.10850216678269252, "loss": 0.0, "num_input_tokens_seen": 13386104, "step": 23570 }, { "epoch": 413.60176991150445, "grad_norm": 2.2927629572677688e-08, "learning_rate": 0.10844556417205146, "loss": 0.0, "num_input_tokens_seen": 13389144, "step": 23575 }, { "epoch": 413.69026548672565, "grad_norm": 5.493663479683164e-08, "learning_rate": 0.10838896796962669, "loss": 0.0, "num_input_tokens_seen": 13392312, "step": 23580 }, { "epoch": 413.7787610619469, "grad_norm": 2.254039443982947e-08, "learning_rate": 0.1083323781841459, "loss": 0.0, "num_input_tokens_seen": 13395096, "step": 23585 }, { "epoch": 413.86725663716817, "grad_norm": 1.772010094214238e-08, "learning_rate": 0.10827579482433607, "loss": 0.0, "num_input_tokens_seen": 13398008, "step": 23590 }, { "epoch": 413.95575221238937, "grad_norm": 8.396543904609644e-09, "learning_rate": 0.10821921789892304, "loss": 0.0, "num_input_tokens_seen": 13401112, "step": 23595 }, { "epoch": 414.0353982300885, "grad_norm": 8.434754228403563e-09, "learning_rate": 0.10816264741663158, "loss": 0.0, "num_input_tokens_seen": 13403848, "step": 23600 }, { "epoch": 414.0353982300885, "eval_loss": 0.6630703806877136, "eval_runtime": 0.9183, "eval_samples_per_second": 27.224, "eval_steps_per_second": 14.156, "num_input_tokens_seen": 13403848, "step": 23600 }, { "epoch": 414.12389380530976, "grad_norm": 6.935042762279409e-09, "learning_rate": 0.10810608338618573, "loss": 0.0, "num_input_tokens_seen": 13406680, "step": 23605 }, { "epoch": 414.21238938053096, "grad_norm": 2.136934185159589e-08, "learning_rate": 0.10804952581630821, "loss": 0.0, "num_input_tokens_seen": 13409160, "step": 23610 }, { "epoch": 414.3008849557522, "grad_norm": 1.868936294613377e-08, "learning_rate": 0.10799297471572102, "loss": 0.0, "num_input_tokens_seen": 13411752, "step": 23615 }, { "epoch": 414.3893805309734, "grad_norm": 9.654744559384199e-09, "learning_rate": 0.10793643009314507, "loss": 0.0, "num_input_tokens_seen": 13413896, "step": 23620 }, { "epoch": 414.4778761061947, "grad_norm": 2.977247604007971e-08, "learning_rate": 0.10787989195730015, "loss": 0.0, "num_input_tokens_seen": 13417048, "step": 23625 }, { "epoch": 414.56637168141594, "grad_norm": 1.2168228202824594e-08, "learning_rate": 0.10782336031690525, "loss": 0.0, "num_input_tokens_seen": 13419896, "step": 23630 }, { "epoch": 414.65486725663715, "grad_norm": 1.496701962366842e-08, "learning_rate": 0.10776683518067821, "loss": 0.0, "num_input_tokens_seen": 13422920, "step": 23635 }, { "epoch": 414.7433628318584, "grad_norm": 2.9012804603212317e-08, "learning_rate": 0.10771031655733587, "loss": 0.0, "num_input_tokens_seen": 13426424, "step": 23640 }, { "epoch": 414.83185840707966, "grad_norm": 1.098116442221908e-08, "learning_rate": 0.10765380445559422, "loss": 0.0, "num_input_tokens_seen": 13429304, "step": 23645 }, { "epoch": 414.92035398230087, "grad_norm": 2.2911457620011788e-08, "learning_rate": 0.10759729888416801, "loss": 0.0, "num_input_tokens_seen": 13432536, "step": 23650 }, { "epoch": 415.0, "grad_norm": 1.0395956095976544e-08, "learning_rate": 0.10754079985177119, "loss": 0.0, "num_input_tokens_seen": 13435104, "step": 23655 }, { "epoch": 415.08849557522126, "grad_norm": 2.1259085158931157e-08, "learning_rate": 0.10748430736711667, "loss": 0.0, "num_input_tokens_seen": 13437776, "step": 23660 }, { "epoch": 415.17699115044246, "grad_norm": 1.7276256869536155e-08, "learning_rate": 0.10742782143891623, "loss": 0.0, "num_input_tokens_seen": 13440528, "step": 23665 }, { "epoch": 415.2654867256637, "grad_norm": 1.6972878214005505e-08, "learning_rate": 0.10737134207588069, "loss": 0.0, "num_input_tokens_seen": 13442944, "step": 23670 }, { "epoch": 415.353982300885, "grad_norm": 1.6328147722788344e-08, "learning_rate": 0.10731486928671992, "loss": 0.0, "num_input_tokens_seen": 13445856, "step": 23675 }, { "epoch": 415.4424778761062, "grad_norm": 2.218165917611259e-08, "learning_rate": 0.10725840308014269, "loss": 0.0, "num_input_tokens_seen": 13449424, "step": 23680 }, { "epoch": 415.53097345132744, "grad_norm": 1.5456746993436354e-08, "learning_rate": 0.10720194346485688, "loss": 0.0, "num_input_tokens_seen": 13452608, "step": 23685 }, { "epoch": 415.6194690265487, "grad_norm": 1.153248341267954e-08, "learning_rate": 0.10714549044956918, "loss": 0.0, "num_input_tokens_seen": 13455776, "step": 23690 }, { "epoch": 415.7079646017699, "grad_norm": 1.8261339107539243e-08, "learning_rate": 0.10708904404298542, "loss": 0.0, "num_input_tokens_seen": 13459024, "step": 23695 }, { "epoch": 415.79646017699116, "grad_norm": 2.5647835855124868e-08, "learning_rate": 0.1070326042538103, "loss": 0.0, "num_input_tokens_seen": 13461776, "step": 23700 }, { "epoch": 415.88495575221236, "grad_norm": 5.042371498120701e-08, "learning_rate": 0.10697617109074758, "loss": 0.0, "num_input_tokens_seen": 13464416, "step": 23705 }, { "epoch": 415.9734513274336, "grad_norm": 2.424854095295359e-08, "learning_rate": 0.10691974456249999, "loss": 0.0, "num_input_tokens_seen": 13467072, "step": 23710 }, { "epoch": 416.05309734513276, "grad_norm": 2.609938754005725e-08, "learning_rate": 0.10686332467776909, "loss": 0.0, "num_input_tokens_seen": 13469936, "step": 23715 }, { "epoch": 416.14159292035396, "grad_norm": 1.153541528964297e-08, "learning_rate": 0.10680691144525563, "loss": 0.0, "num_input_tokens_seen": 13472528, "step": 23720 }, { "epoch": 416.2300884955752, "grad_norm": 8.795050909782276e-09, "learning_rate": 0.10675050487365928, "loss": 0.0, "num_input_tokens_seen": 13475264, "step": 23725 }, { "epoch": 416.3185840707965, "grad_norm": 1.6890737697394798e-08, "learning_rate": 0.10669410497167851, "loss": 0.0, "num_input_tokens_seen": 13478288, "step": 23730 }, { "epoch": 416.4070796460177, "grad_norm": 1.765055657187986e-08, "learning_rate": 0.10663771174801102, "loss": 0.0, "num_input_tokens_seen": 13481312, "step": 23735 }, { "epoch": 416.49557522123894, "grad_norm": 1.7016301256944644e-08, "learning_rate": 0.10658132521135329, "loss": 0.0, "num_input_tokens_seen": 13484352, "step": 23740 }, { "epoch": 416.5840707964602, "grad_norm": 1.950981776133176e-08, "learning_rate": 0.10652494537040084, "loss": 0.0, "num_input_tokens_seen": 13487712, "step": 23745 }, { "epoch": 416.6725663716814, "grad_norm": 1.8957804215347096e-08, "learning_rate": 0.1064685722338482, "loss": 0.0, "num_input_tokens_seen": 13490576, "step": 23750 }, { "epoch": 416.76106194690266, "grad_norm": 1.077756817124964e-08, "learning_rate": 0.10641220581038871, "loss": 0.0, "num_input_tokens_seen": 13493104, "step": 23755 }, { "epoch": 416.8495575221239, "grad_norm": 1.410130678891619e-08, "learning_rate": 0.10635584610871483, "loss": 0.0, "num_input_tokens_seen": 13495792, "step": 23760 }, { "epoch": 416.9380530973451, "grad_norm": 1.108687541773179e-08, "learning_rate": 0.10629949313751803, "loss": 0.0, "num_input_tokens_seen": 13498688, "step": 23765 }, { "epoch": 417.01769911504425, "grad_norm": 9.97520910317462e-09, "learning_rate": 0.10624314690548849, "loss": 0.0, "num_input_tokens_seen": 13500624, "step": 23770 }, { "epoch": 417.1061946902655, "grad_norm": 2.4093974815286856e-08, "learning_rate": 0.1061868074213156, "loss": 0.0, "num_input_tokens_seen": 13503808, "step": 23775 }, { "epoch": 417.1946902654867, "grad_norm": 1.6165726535177782e-08, "learning_rate": 0.10613047469368765, "loss": 0.0, "num_input_tokens_seen": 13507136, "step": 23780 }, { "epoch": 417.283185840708, "grad_norm": 2.140992627630567e-08, "learning_rate": 0.10607414873129171, "loss": 0.0, "num_input_tokens_seen": 13509728, "step": 23785 }, { "epoch": 417.37168141592923, "grad_norm": 1.31434827466137e-08, "learning_rate": 0.10601782954281413, "loss": 0.0, "num_input_tokens_seen": 13512400, "step": 23790 }, { "epoch": 417.46017699115043, "grad_norm": 1.7027115717382912e-08, "learning_rate": 0.1059615171369399, "loss": 0.0, "num_input_tokens_seen": 13515904, "step": 23795 }, { "epoch": 417.5486725663717, "grad_norm": 1.3262433817828878e-08, "learning_rate": 0.10590521152235312, "loss": 0.0, "num_input_tokens_seen": 13518496, "step": 23800 }, { "epoch": 417.5486725663717, "eval_loss": 0.6664594411849976, "eval_runtime": 0.9322, "eval_samples_per_second": 26.818, "eval_steps_per_second": 13.945, "num_input_tokens_seen": 13518496, "step": 23800 }, { "epoch": 417.6371681415929, "grad_norm": 2.3354372657991007e-08, "learning_rate": 0.1058489127077369, "loss": 0.0, "num_input_tokens_seen": 13521296, "step": 23805 }, { "epoch": 417.72566371681415, "grad_norm": 8.658715522358307e-09, "learning_rate": 0.1057926207017732, "loss": 0.0, "num_input_tokens_seen": 13523728, "step": 23810 }, { "epoch": 417.8141592920354, "grad_norm": 1.5148568621725644e-08, "learning_rate": 0.10573633551314285, "loss": 0.0, "num_input_tokens_seen": 13526624, "step": 23815 }, { "epoch": 417.9026548672566, "grad_norm": 9.920798405005371e-09, "learning_rate": 0.1056800571505259, "loss": 0.0, "num_input_tokens_seen": 13529888, "step": 23820 }, { "epoch": 417.9911504424779, "grad_norm": 1.0793336002734577e-08, "learning_rate": 0.10562378562260105, "loss": 0.0, "num_input_tokens_seen": 13532272, "step": 23825 }, { "epoch": 418.070796460177, "grad_norm": 1.7681996311580406e-08, "learning_rate": 0.10556752093804615, "loss": 0.0, "num_input_tokens_seen": 13535136, "step": 23830 }, { "epoch": 418.1592920353982, "grad_norm": 1.22331327290226e-08, "learning_rate": 0.10551126310553786, "loss": 0.0, "num_input_tokens_seen": 13538032, "step": 23835 }, { "epoch": 418.24778761061947, "grad_norm": 9.0353635684437e-09, "learning_rate": 0.10545501213375187, "loss": 0.0, "num_input_tokens_seen": 13540816, "step": 23840 }, { "epoch": 418.3362831858407, "grad_norm": 1.5939530584319073e-08, "learning_rate": 0.10539876803136287, "loss": 0.0, "num_input_tokens_seen": 13543776, "step": 23845 }, { "epoch": 418.42477876106193, "grad_norm": 1.3554539712856695e-08, "learning_rate": 0.10534253080704428, "loss": 0.0, "num_input_tokens_seen": 13546464, "step": 23850 }, { "epoch": 418.5132743362832, "grad_norm": 1.1078339134940052e-08, "learning_rate": 0.10528630046946862, "loss": 0.0, "num_input_tokens_seen": 13549136, "step": 23855 }, { "epoch": 418.60176991150445, "grad_norm": 9.127957056875857e-09, "learning_rate": 0.1052300770273074, "loss": 0.0, "num_input_tokens_seen": 13551552, "step": 23860 }, { "epoch": 418.69026548672565, "grad_norm": 1.996546927784948e-08, "learning_rate": 0.10517386048923086, "loss": 0.0, "num_input_tokens_seen": 13554240, "step": 23865 }, { "epoch": 418.7787610619469, "grad_norm": 2.7619559972436036e-08, "learning_rate": 0.10511765086390841, "loss": 0.0, "num_input_tokens_seen": 13557056, "step": 23870 }, { "epoch": 418.86725663716817, "grad_norm": 9.347899343481458e-09, "learning_rate": 0.10506144816000816, "loss": 0.0, "num_input_tokens_seen": 13560880, "step": 23875 }, { "epoch": 418.95575221238937, "grad_norm": 1.8250934985530876e-08, "learning_rate": 0.10500525238619736, "loss": 0.0, "num_input_tokens_seen": 13563792, "step": 23880 }, { "epoch": 419.0353982300885, "grad_norm": 2.1587649001730824e-08, "learning_rate": 0.10494906355114209, "loss": 0.0, "num_input_tokens_seen": 13566536, "step": 23885 }, { "epoch": 419.12389380530976, "grad_norm": 7.017772585271587e-09, "learning_rate": 0.10489288166350737, "loss": 0.0, "num_input_tokens_seen": 13569256, "step": 23890 }, { "epoch": 419.21238938053096, "grad_norm": 1.6259392054962518e-08, "learning_rate": 0.10483670673195711, "loss": 0.0, "num_input_tokens_seen": 13572216, "step": 23895 }, { "epoch": 419.3008849557522, "grad_norm": 1.9554819985501126e-08, "learning_rate": 0.10478053876515431, "loss": 0.0, "num_input_tokens_seen": 13574984, "step": 23900 }, { "epoch": 419.3893805309734, "grad_norm": 1.6344701592174715e-08, "learning_rate": 0.10472437777176061, "loss": 0.0, "num_input_tokens_seen": 13578040, "step": 23905 }, { "epoch": 419.4778761061947, "grad_norm": 2.0534988820486433e-08, "learning_rate": 0.1046682237604369, "loss": 0.0, "num_input_tokens_seen": 13581080, "step": 23910 }, { "epoch": 419.56637168141594, "grad_norm": 1.0028012198404213e-08, "learning_rate": 0.1046120767398427, "loss": 0.0, "num_input_tokens_seen": 13584104, "step": 23915 }, { "epoch": 419.65486725663715, "grad_norm": 1.1960072932026833e-08, "learning_rate": 0.10455593671863667, "loss": 0.0, "num_input_tokens_seen": 13586840, "step": 23920 }, { "epoch": 419.7433628318584, "grad_norm": 7.756448816564898e-09, "learning_rate": 0.1044998037054763, "loss": 0.0, "num_input_tokens_seen": 13589448, "step": 23925 }, { "epoch": 419.83185840707966, "grad_norm": 1.4699782724392207e-08, "learning_rate": 0.10444367770901794, "loss": 0.0, "num_input_tokens_seen": 13592264, "step": 23930 }, { "epoch": 419.92035398230087, "grad_norm": 7.233229837311228e-08, "learning_rate": 0.10438755873791698, "loss": 0.0, "num_input_tokens_seen": 13595080, "step": 23935 }, { "epoch": 420.0, "grad_norm": 1.72510112861346e-08, "learning_rate": 0.10433144680082775, "loss": 0.0, "num_input_tokens_seen": 13597592, "step": 23940 }, { "epoch": 420.08849557522126, "grad_norm": 1.908260038874232e-08, "learning_rate": 0.10427534190640322, "loss": 0.0, "num_input_tokens_seen": 13600248, "step": 23945 }, { "epoch": 420.17699115044246, "grad_norm": 1.2434132834471257e-08, "learning_rate": 0.10421924406329568, "loss": 0.0, "num_input_tokens_seen": 13603144, "step": 23950 }, { "epoch": 420.2654867256637, "grad_norm": 2.1936871874572716e-08, "learning_rate": 0.10416315328015598, "loss": 0.0, "num_input_tokens_seen": 13606296, "step": 23955 }, { "epoch": 420.353982300885, "grad_norm": 1.3908443285970407e-08, "learning_rate": 0.10410706956563402, "loss": 0.0, "num_input_tokens_seen": 13609592, "step": 23960 }, { "epoch": 420.4424778761062, "grad_norm": 9.548700496964102e-09, "learning_rate": 0.10405099292837874, "loss": 0.0, "num_input_tokens_seen": 13612024, "step": 23965 }, { "epoch": 420.53097345132744, "grad_norm": 1.9694622821475605e-08, "learning_rate": 0.10399492337703771, "loss": 0.0, "num_input_tokens_seen": 13615128, "step": 23970 }, { "epoch": 420.6194690265487, "grad_norm": 1.770651181232097e-08, "learning_rate": 0.10393886092025764, "loss": 0.0, "num_input_tokens_seen": 13618024, "step": 23975 }, { "epoch": 420.7079646017699, "grad_norm": 1.7677342256661177e-08, "learning_rate": 0.10388280556668412, "loss": 0.0, "num_input_tokens_seen": 13621128, "step": 23980 }, { "epoch": 420.79646017699116, "grad_norm": 2.0323303928648784e-08, "learning_rate": 0.10382675732496145, "loss": 0.0, "num_input_tokens_seen": 13624168, "step": 23985 }, { "epoch": 420.88495575221236, "grad_norm": 1.8389787470596275e-08, "learning_rate": 0.10377071620373311, "loss": 0.0, "num_input_tokens_seen": 13626616, "step": 23990 }, { "epoch": 420.9734513274336, "grad_norm": 2.4333775883178532e-08, "learning_rate": 0.10371468221164128, "loss": 0.0, "num_input_tokens_seen": 13629272, "step": 23995 }, { "epoch": 421.05309734513276, "grad_norm": 1.5674833875323202e-08, "learning_rate": 0.10365865535732706, "loss": 0.0, "num_input_tokens_seen": 13631704, "step": 24000 }, { "epoch": 421.05309734513276, "eval_loss": 0.6828293204307556, "eval_runtime": 0.9413, "eval_samples_per_second": 26.56, "eval_steps_per_second": 13.811, "num_input_tokens_seen": 13631704, "step": 24000 }, { "epoch": 421.14159292035396, "grad_norm": 2.2386631215454145e-08, "learning_rate": 0.10360263564943062, "loss": 0.0, "num_input_tokens_seen": 13634808, "step": 24005 }, { "epoch": 421.2300884955752, "grad_norm": 3.4005953608584605e-08, "learning_rate": 0.10354662309659075, "loss": 0.0, "num_input_tokens_seen": 13637448, "step": 24010 }, { "epoch": 421.3185840707965, "grad_norm": 1.2718325947957965e-08, "learning_rate": 0.10349061770744537, "loss": 0.0, "num_input_tokens_seen": 13640520, "step": 24015 }, { "epoch": 421.4070796460177, "grad_norm": 2.510599017568893e-08, "learning_rate": 0.10343461949063128, "loss": 0.0, "num_input_tokens_seen": 13643064, "step": 24020 }, { "epoch": 421.49557522123894, "grad_norm": 4.1802589834105675e-08, "learning_rate": 0.103378628454784, "loss": 0.0, "num_input_tokens_seen": 13646296, "step": 24025 }, { "epoch": 421.5840707964602, "grad_norm": 2.321601222377012e-08, "learning_rate": 0.10332264460853811, "loss": 0.0, "num_input_tokens_seen": 13649128, "step": 24030 }, { "epoch": 421.6725663716814, "grad_norm": 2.8648244665419043e-08, "learning_rate": 0.10326666796052701, "loss": 0.0, "num_input_tokens_seen": 13651688, "step": 24035 }, { "epoch": 421.76106194690266, "grad_norm": 4.0814377655351564e-08, "learning_rate": 0.10321069851938296, "loss": 0.0, "num_input_tokens_seen": 13654744, "step": 24040 }, { "epoch": 421.8495575221239, "grad_norm": 2.784245545228714e-08, "learning_rate": 0.10315473629373724, "loss": 0.0, "num_input_tokens_seen": 13657544, "step": 24045 }, { "epoch": 421.9380530973451, "grad_norm": 9.91120518989419e-09, "learning_rate": 0.10309878129221982, "loss": 0.0, "num_input_tokens_seen": 13660504, "step": 24050 }, { "epoch": 422.01769911504425, "grad_norm": 2.085777595084437e-08, "learning_rate": 0.10304283352345973, "loss": 0.0, "num_input_tokens_seen": 13662744, "step": 24055 }, { "epoch": 422.1061946902655, "grad_norm": 2.5363235067743517e-08, "learning_rate": 0.10298689299608486, "loss": 0.0, "num_input_tokens_seen": 13665576, "step": 24060 }, { "epoch": 422.1946902654867, "grad_norm": 2.9616124663789378e-08, "learning_rate": 0.10293095971872188, "loss": 0.0, "num_input_tokens_seen": 13668056, "step": 24065 }, { "epoch": 422.283185840708, "grad_norm": 2.2449189174267303e-08, "learning_rate": 0.10287503369999645, "loss": 0.0, "num_input_tokens_seen": 13670872, "step": 24070 }, { "epoch": 422.37168141592923, "grad_norm": 8.507095472509718e-09, "learning_rate": 0.10281911494853295, "loss": 0.0, "num_input_tokens_seen": 13674632, "step": 24075 }, { "epoch": 422.46017699115043, "grad_norm": 7.160232406988598e-09, "learning_rate": 0.10276320347295485, "loss": 0.0, "num_input_tokens_seen": 13677272, "step": 24080 }, { "epoch": 422.5486725663717, "grad_norm": 2.52450487181477e-08, "learning_rate": 0.10270729928188446, "loss": 0.0, "num_input_tokens_seen": 13680264, "step": 24085 }, { "epoch": 422.6371681415929, "grad_norm": 1.808686711513019e-08, "learning_rate": 0.10265140238394276, "loss": 0.0, "num_input_tokens_seen": 13683432, "step": 24090 }, { "epoch": 422.72566371681415, "grad_norm": 5.898430632811369e-09, "learning_rate": 0.10259551278774988, "loss": 0.0, "num_input_tokens_seen": 13686168, "step": 24095 }, { "epoch": 422.8141592920354, "grad_norm": 1.644991698412923e-08, "learning_rate": 0.10253963050192462, "loss": 0.0, "num_input_tokens_seen": 13688984, "step": 24100 }, { "epoch": 422.9026548672566, "grad_norm": 1.63677302822407e-08, "learning_rate": 0.10248375553508478, "loss": 0.0, "num_input_tokens_seen": 13691880, "step": 24105 }, { "epoch": 422.9911504424779, "grad_norm": 8.617729641002825e-09, "learning_rate": 0.102427887895847, "loss": 0.0, "num_input_tokens_seen": 13694440, "step": 24110 }, { "epoch": 423.070796460177, "grad_norm": 9.974002068702248e-09, "learning_rate": 0.10237202759282668, "loss": 0.0, "num_input_tokens_seen": 13696576, "step": 24115 }, { "epoch": 423.1592920353982, "grad_norm": 2.6756598714428037e-08, "learning_rate": 0.10231617463463821, "loss": 0.0, "num_input_tokens_seen": 13699392, "step": 24120 }, { "epoch": 423.24778761061947, "grad_norm": 1.6031043159614455e-08, "learning_rate": 0.10226032902989492, "loss": 0.0, "num_input_tokens_seen": 13702160, "step": 24125 }, { "epoch": 423.3362831858407, "grad_norm": 1.575125452291104e-08, "learning_rate": 0.10220449078720877, "loss": 0.0, "num_input_tokens_seen": 13705200, "step": 24130 }, { "epoch": 423.42477876106193, "grad_norm": 1.8440307059108818e-08, "learning_rate": 0.1021486599151908, "loss": 0.0, "num_input_tokens_seen": 13707872, "step": 24135 }, { "epoch": 423.5132743362832, "grad_norm": 2.0640598563659296e-08, "learning_rate": 0.10209283642245084, "loss": 0.0, "num_input_tokens_seen": 13710896, "step": 24140 }, { "epoch": 423.60176991150445, "grad_norm": 1.3931455100646417e-08, "learning_rate": 0.10203702031759748, "loss": 0.0, "num_input_tokens_seen": 13713632, "step": 24145 }, { "epoch": 423.69026548672565, "grad_norm": 2.8616893743560468e-08, "learning_rate": 0.1019812116092384, "loss": 0.0, "num_input_tokens_seen": 13716848, "step": 24150 }, { "epoch": 423.7787610619469, "grad_norm": 1.528298909647674e-08, "learning_rate": 0.10192541030597986, "loss": 0.0, "num_input_tokens_seen": 13719632, "step": 24155 }, { "epoch": 423.86725663716817, "grad_norm": 1.8383865096893715e-08, "learning_rate": 0.1018696164164272, "loss": 0.0, "num_input_tokens_seen": 13722576, "step": 24160 }, { "epoch": 423.95575221238937, "grad_norm": 8.85377460235759e-09, "learning_rate": 0.10181382994918459, "loss": 0.0, "num_input_tokens_seen": 13725440, "step": 24165 }, { "epoch": 424.0353982300885, "grad_norm": 8.7520906077998e-09, "learning_rate": 0.10175805091285492, "loss": 0.0, "num_input_tokens_seen": 13728208, "step": 24170 }, { "epoch": 424.12389380530976, "grad_norm": 1.6841095629160918e-08, "learning_rate": 0.10170227931603999, "loss": 0.0, "num_input_tokens_seen": 13731456, "step": 24175 }, { "epoch": 424.21238938053096, "grad_norm": 7.774830557139012e-09, "learning_rate": 0.10164651516734062, "loss": 0.0, "num_input_tokens_seen": 13734144, "step": 24180 }, { "epoch": 424.3008849557522, "grad_norm": 1.4597838493557447e-08, "learning_rate": 0.1015907584753562, "loss": 0.0, "num_input_tokens_seen": 13736816, "step": 24185 }, { "epoch": 424.3893805309734, "grad_norm": 8.156400888026383e-09, "learning_rate": 0.10153500924868523, "loss": 0.0, "num_input_tokens_seen": 13739264, "step": 24190 }, { "epoch": 424.4778761061947, "grad_norm": 1.9641605675246865e-08, "learning_rate": 0.10147926749592483, "loss": 0.0, "num_input_tokens_seen": 13742176, "step": 24195 }, { "epoch": 424.56637168141594, "grad_norm": 2.3396735215897024e-08, "learning_rate": 0.10142353322567112, "loss": 0.0, "num_input_tokens_seen": 13745200, "step": 24200 }, { "epoch": 424.56637168141594, "eval_loss": 0.6715885400772095, "eval_runtime": 0.9357, "eval_samples_per_second": 26.717, "eval_steps_per_second": 13.893, "num_input_tokens_seen": 13745200, "step": 24200 }, { "epoch": 424.65486725663715, "grad_norm": 2.9654023236957983e-08, "learning_rate": 0.1013678064465191, "loss": 0.0, "num_input_tokens_seen": 13748144, "step": 24205 }, { "epoch": 424.7433628318584, "grad_norm": 1.979486263792296e-08, "learning_rate": 0.10131208716706244, "loss": 0.0, "num_input_tokens_seen": 13751504, "step": 24210 }, { "epoch": 424.83185840707966, "grad_norm": 2.0616676366103093e-08, "learning_rate": 0.10125637539589379, "loss": 0.0, "num_input_tokens_seen": 13754336, "step": 24215 }, { "epoch": 424.92035398230087, "grad_norm": 2.2947043376575493e-08, "learning_rate": 0.10120067114160464, "loss": 0.0, "num_input_tokens_seen": 13757184, "step": 24220 }, { "epoch": 425.0, "grad_norm": 5.1930246769416044e-08, "learning_rate": 0.10114497441278517, "loss": 0.0, "num_input_tokens_seen": 13759352, "step": 24225 }, { "epoch": 425.08849557522126, "grad_norm": 1.0561343799508904e-08, "learning_rate": 0.10108928521802468, "loss": 0.0, "num_input_tokens_seen": 13762296, "step": 24230 }, { "epoch": 425.17699115044246, "grad_norm": 1.9751928093114657e-08, "learning_rate": 0.101033603565911, "loss": 0.0, "num_input_tokens_seen": 13765320, "step": 24235 }, { "epoch": 425.2654867256637, "grad_norm": 2.025234913105578e-08, "learning_rate": 0.10097792946503102, "loss": 0.0, "num_input_tokens_seen": 13768184, "step": 24240 }, { "epoch": 425.353982300885, "grad_norm": 1.0818581586136133e-08, "learning_rate": 0.10092226292397039, "loss": 0.0, "num_input_tokens_seen": 13770888, "step": 24245 }, { "epoch": 425.4424778761062, "grad_norm": 1.6381303424850557e-08, "learning_rate": 0.10086660395131354, "loss": 0.0, "num_input_tokens_seen": 13773432, "step": 24250 }, { "epoch": 425.53097345132744, "grad_norm": 9.701216718838168e-09, "learning_rate": 0.10081095255564385, "loss": 0.0, "num_input_tokens_seen": 13776440, "step": 24255 }, { "epoch": 425.6194690265487, "grad_norm": 1.1961954093919758e-08, "learning_rate": 0.10075530874554335, "loss": 0.0, "num_input_tokens_seen": 13779320, "step": 24260 }, { "epoch": 425.7079646017699, "grad_norm": 1.575720531832303e-08, "learning_rate": 0.10069967252959311, "loss": 0.0, "num_input_tokens_seen": 13782168, "step": 24265 }, { "epoch": 425.79646017699116, "grad_norm": 3.116558033866568e-08, "learning_rate": 0.10064404391637297, "loss": 0.0, "num_input_tokens_seen": 13785288, "step": 24270 }, { "epoch": 425.88495575221236, "grad_norm": 2.3190386500004934e-08, "learning_rate": 0.10058842291446145, "loss": 0.0, "num_input_tokens_seen": 13788056, "step": 24275 }, { "epoch": 425.9734513274336, "grad_norm": 1.1791805754057805e-08, "learning_rate": 0.10053280953243608, "loss": 0.0, "num_input_tokens_seen": 13790920, "step": 24280 }, { "epoch": 426.05309734513276, "grad_norm": 9.672472600641413e-09, "learning_rate": 0.10047720377887315, "loss": 0.0, "num_input_tokens_seen": 13793464, "step": 24285 }, { "epoch": 426.14159292035396, "grad_norm": 1.589108045152443e-08, "learning_rate": 0.10042160566234767, "loss": 0.0, "num_input_tokens_seen": 13796424, "step": 24290 }, { "epoch": 426.2300884955752, "grad_norm": 2.7987011819163854e-08, "learning_rate": 0.10036601519143372, "loss": 0.0, "num_input_tokens_seen": 13799064, "step": 24295 }, { "epoch": 426.3185840707965, "grad_norm": 9.196863715033032e-09, "learning_rate": 0.1003104323747039, "loss": 0.0, "num_input_tokens_seen": 13802408, "step": 24300 }, { "epoch": 426.4070796460177, "grad_norm": 1.524469439573295e-08, "learning_rate": 0.10025485722072984, "loss": 0.0, "num_input_tokens_seen": 13804920, "step": 24305 }, { "epoch": 426.49557522123894, "grad_norm": 1.164058893721176e-08, "learning_rate": 0.10019928973808201, "loss": 0.0, "num_input_tokens_seen": 13808008, "step": 24310 }, { "epoch": 426.5840707964602, "grad_norm": 1.3659242625863044e-08, "learning_rate": 0.10014372993532945, "loss": 0.0, "num_input_tokens_seen": 13811160, "step": 24315 }, { "epoch": 426.6725663716814, "grad_norm": 9.726346839045164e-09, "learning_rate": 0.1000881778210403, "loss": 0.0, "num_input_tokens_seen": 13814472, "step": 24320 }, { "epoch": 426.76106194690266, "grad_norm": 1.7420003217694102e-08, "learning_rate": 0.10003263340378142, "loss": 0.0, "num_input_tokens_seen": 13817528, "step": 24325 }, { "epoch": 426.8495575221239, "grad_norm": 1.7506126326338745e-08, "learning_rate": 0.09997709669211834, "loss": 0.0, "num_input_tokens_seen": 13819960, "step": 24330 }, { "epoch": 426.9380530973451, "grad_norm": 1.804729699017571e-08, "learning_rate": 0.0999215676946156, "loss": 0.0, "num_input_tokens_seen": 13822712, "step": 24335 }, { "epoch": 427.01769911504425, "grad_norm": 1.9975336940092348e-08, "learning_rate": 0.0998660464198364, "loss": 0.0, "num_input_tokens_seen": 13825240, "step": 24340 }, { "epoch": 427.1061946902655, "grad_norm": 9.838304393383623e-09, "learning_rate": 0.09981053287634288, "loss": 0.0, "num_input_tokens_seen": 13827688, "step": 24345 }, { "epoch": 427.1946902654867, "grad_norm": 2.9348646179983007e-08, "learning_rate": 0.09975502707269596, "loss": 0.0, "num_input_tokens_seen": 13830840, "step": 24350 }, { "epoch": 427.283185840708, "grad_norm": 2.9994154715495824e-08, "learning_rate": 0.09969952901745524, "loss": 0.0, "num_input_tokens_seen": 13834264, "step": 24355 }, { "epoch": 427.37168141592923, "grad_norm": 1.6671299007953166e-08, "learning_rate": 0.09964403871917925, "loss": 0.0, "num_input_tokens_seen": 13837336, "step": 24360 }, { "epoch": 427.46017699115043, "grad_norm": 1.6614343678611476e-08, "learning_rate": 0.09958855618642536, "loss": 0.0, "num_input_tokens_seen": 13840168, "step": 24365 }, { "epoch": 427.5486725663717, "grad_norm": 5.596457963719104e-09, "learning_rate": 0.09953308142774955, "loss": 0.0, "num_input_tokens_seen": 13842760, "step": 24370 }, { "epoch": 427.6371681415929, "grad_norm": 1.4678498416742514e-08, "learning_rate": 0.09947761445170686, "loss": 0.0, "num_input_tokens_seen": 13845592, "step": 24375 }, { "epoch": 427.72566371681415, "grad_norm": 8.726074973708364e-09, "learning_rate": 0.09942215526685086, "loss": 0.0, "num_input_tokens_seen": 13848568, "step": 24380 }, { "epoch": 427.8141592920354, "grad_norm": 1.5947787090908605e-08, "learning_rate": 0.09936670388173414, "loss": 0.0, "num_input_tokens_seen": 13851464, "step": 24385 }, { "epoch": 427.9026548672566, "grad_norm": 5.098774291667496e-09, "learning_rate": 0.09931126030490799, "loss": 0.0, "num_input_tokens_seen": 13854744, "step": 24390 }, { "epoch": 427.9911504424779, "grad_norm": 6.868448032548713e-09, "learning_rate": 0.0992558245449225, "loss": 0.0, "num_input_tokens_seen": 13857256, "step": 24395 }, { "epoch": 428.070796460177, "grad_norm": 1.3744265281445678e-08, "learning_rate": 0.09920039661032651, "loss": 0.0, "num_input_tokens_seen": 13859752, "step": 24400 }, { "epoch": 428.070796460177, "eval_loss": 0.6813579201698303, "eval_runtime": 0.9258, "eval_samples_per_second": 27.004, "eval_steps_per_second": 14.042, "num_input_tokens_seen": 13859752, "step": 24400 }, { "epoch": 428.1592920353982, "grad_norm": 2.3601018028784893e-08, "learning_rate": 0.09914497650966782, "loss": 0.0, "num_input_tokens_seen": 13863672, "step": 24405 }, { "epoch": 428.24778761061947, "grad_norm": 1.54743862168516e-08, "learning_rate": 0.09908956425149276, "loss": 0.0, "num_input_tokens_seen": 13866632, "step": 24410 }, { "epoch": 428.3362831858407, "grad_norm": 2.54750087691491e-08, "learning_rate": 0.09903415984434677, "loss": 0.0, "num_input_tokens_seen": 13869592, "step": 24415 }, { "epoch": 428.42477876106193, "grad_norm": 2.4714006841008995e-08, "learning_rate": 0.09897876329677373, "loss": 0.0, "num_input_tokens_seen": 13872472, "step": 24420 }, { "epoch": 428.5132743362832, "grad_norm": 2.1826659590828967e-08, "learning_rate": 0.09892337461731658, "loss": 0.0, "num_input_tokens_seen": 13875032, "step": 24425 }, { "epoch": 428.60176991150445, "grad_norm": 1.160288309876023e-08, "learning_rate": 0.09886799381451693, "loss": 0.0, "num_input_tokens_seen": 13877624, "step": 24430 }, { "epoch": 428.69026548672565, "grad_norm": 5.234942257459352e-09, "learning_rate": 0.09881262089691521, "loss": 0.0, "num_input_tokens_seen": 13880056, "step": 24435 }, { "epoch": 428.7787610619469, "grad_norm": 1.8167332527241342e-08, "learning_rate": 0.09875725587305059, "loss": 0.0, "num_input_tokens_seen": 13883192, "step": 24440 }, { "epoch": 428.86725663716817, "grad_norm": 1.7706662802652318e-08, "learning_rate": 0.09870189875146111, "loss": 0.0, "num_input_tokens_seen": 13885720, "step": 24445 }, { "epoch": 428.95575221238937, "grad_norm": 7.57153539865385e-09, "learning_rate": 0.09864654954068346, "loss": 0.0, "num_input_tokens_seen": 13888776, "step": 24450 }, { "epoch": 429.0353982300885, "grad_norm": 4.4220303863085064e-08, "learning_rate": 0.09859120824925326, "loss": 0.0, "num_input_tokens_seen": 13891224, "step": 24455 }, { "epoch": 429.12389380530976, "grad_norm": 1.4896378353057571e-08, "learning_rate": 0.09853587488570474, "loss": 0.0, "num_input_tokens_seen": 13894136, "step": 24460 }, { "epoch": 429.21238938053096, "grad_norm": 1.0092270130712677e-08, "learning_rate": 0.09848054945857107, "loss": 0.0, "num_input_tokens_seen": 13896744, "step": 24465 }, { "epoch": 429.3008849557522, "grad_norm": 1.0971993091857257e-08, "learning_rate": 0.09842523197638416, "loss": 0.0, "num_input_tokens_seen": 13899512, "step": 24470 }, { "epoch": 429.3893805309734, "grad_norm": 1.292303153377361e-08, "learning_rate": 0.09836992244767452, "loss": 0.0, "num_input_tokens_seen": 13902616, "step": 24475 }, { "epoch": 429.4778761061947, "grad_norm": 1.2649548075671646e-08, "learning_rate": 0.09831462088097168, "loss": 0.0, "num_input_tokens_seen": 13905256, "step": 24480 }, { "epoch": 429.56637168141594, "grad_norm": 1.4461504882490317e-08, "learning_rate": 0.09825932728480385, "loss": 0.0, "num_input_tokens_seen": 13908536, "step": 24485 }, { "epoch": 429.65486725663715, "grad_norm": 1.2747034539017932e-08, "learning_rate": 0.09820404166769794, "loss": 0.0, "num_input_tokens_seen": 13911384, "step": 24490 }, { "epoch": 429.7433628318584, "grad_norm": 2.8783643912788648e-08, "learning_rate": 0.09814876403817978, "loss": 0.0, "num_input_tokens_seen": 13914520, "step": 24495 }, { "epoch": 429.83185840707966, "grad_norm": 2.8067038471135675e-08, "learning_rate": 0.09809349440477376, "loss": 0.0, "num_input_tokens_seen": 13917448, "step": 24500 }, { "epoch": 429.92035398230087, "grad_norm": 1.9494818204179865e-08, "learning_rate": 0.09803823277600317, "loss": 0.0, "num_input_tokens_seen": 13920136, "step": 24505 }, { "epoch": 430.0, "grad_norm": 2.657998088295699e-08, "learning_rate": 0.09798297916039014, "loss": 0.0, "num_input_tokens_seen": 13922320, "step": 24510 }, { "epoch": 430.08849557522126, "grad_norm": 1.0516676418603765e-08, "learning_rate": 0.09792773356645534, "loss": 0.0, "num_input_tokens_seen": 13925120, "step": 24515 }, { "epoch": 430.17699115044246, "grad_norm": 1.2195171095186197e-08, "learning_rate": 0.09787249600271843, "loss": 0.0, "num_input_tokens_seen": 13928400, "step": 24520 }, { "epoch": 430.2654867256637, "grad_norm": 8.019510389090101e-09, "learning_rate": 0.09781726647769776, "loss": 0.0, "num_input_tokens_seen": 13930816, "step": 24525 }, { "epoch": 430.353982300885, "grad_norm": 6.884379732952084e-09, "learning_rate": 0.0977620449999103, "loss": 0.0, "num_input_tokens_seen": 13933712, "step": 24530 }, { "epoch": 430.4424778761062, "grad_norm": 4.826856692119463e-09, "learning_rate": 0.09770683157787204, "loss": 0.0, "num_input_tokens_seen": 13936576, "step": 24535 }, { "epoch": 430.53097345132744, "grad_norm": 1.5148563292655126e-08, "learning_rate": 0.09765162622009745, "loss": 0.0, "num_input_tokens_seen": 13939824, "step": 24540 }, { "epoch": 430.6194690265487, "grad_norm": 2.2606663208080136e-08, "learning_rate": 0.09759642893509995, "loss": 0.0, "num_input_tokens_seen": 13942752, "step": 24545 }, { "epoch": 430.7079646017699, "grad_norm": 2.077787009113763e-08, "learning_rate": 0.09754123973139169, "loss": 0.0, "num_input_tokens_seen": 13945504, "step": 24550 }, { "epoch": 430.79646017699116, "grad_norm": 1.6844516892433603e-08, "learning_rate": 0.09748605861748345, "loss": 0.0, "num_input_tokens_seen": 13948240, "step": 24555 }, { "epoch": 430.88495575221236, "grad_norm": 1.5362864758117212e-08, "learning_rate": 0.0974308856018849, "loss": 0.0, "num_input_tokens_seen": 13951040, "step": 24560 }, { "epoch": 430.9734513274336, "grad_norm": 7.404143076428227e-09, "learning_rate": 0.09737572069310449, "loss": 0.0, "num_input_tokens_seen": 13953808, "step": 24565 }, { "epoch": 431.05309734513276, "grad_norm": 1.840582442014238e-08, "learning_rate": 0.09732056389964922, "loss": 0.0, "num_input_tokens_seen": 13956216, "step": 24570 }, { "epoch": 431.14159292035396, "grad_norm": 1.0124649563181265e-08, "learning_rate": 0.097265415230025, "loss": 0.0, "num_input_tokens_seen": 13958872, "step": 24575 }, { "epoch": 431.2300884955752, "grad_norm": 1.3716680236086631e-08, "learning_rate": 0.09721027469273648, "loss": 0.0, "num_input_tokens_seen": 13961848, "step": 24580 }, { "epoch": 431.3185840707965, "grad_norm": 1.2224049328324327e-08, "learning_rate": 0.09715514229628695, "loss": 0.0, "num_input_tokens_seen": 13964632, "step": 24585 }, { "epoch": 431.4070796460177, "grad_norm": 1.6347108555692103e-08, "learning_rate": 0.09710001804917864, "loss": 0.0, "num_input_tokens_seen": 13967048, "step": 24590 }, { "epoch": 431.49557522123894, "grad_norm": 1.797405069225988e-08, "learning_rate": 0.09704490195991226, "loss": 0.0, "num_input_tokens_seen": 13969480, "step": 24595 }, { "epoch": 431.5840707964602, "grad_norm": 1.2947486638381633e-08, "learning_rate": 0.09698979403698753, "loss": 0.0, "num_input_tokens_seen": 13972648, "step": 24600 }, { "epoch": 431.5840707964602, "eval_loss": 0.7037013173103333, "eval_runtime": 0.9251, "eval_samples_per_second": 27.025, "eval_steps_per_second": 14.053, "num_input_tokens_seen": 13972648, "step": 24600 }, { "epoch": 431.6725663716814, "grad_norm": 1.2729199916350353e-08, "learning_rate": 0.0969346942889027, "loss": 0.0, "num_input_tokens_seen": 13975832, "step": 24605 }, { "epoch": 431.76106194690266, "grad_norm": 2.8263142937134944e-08, "learning_rate": 0.09687960272415487, "loss": 0.0, "num_input_tokens_seen": 13979112, "step": 24610 }, { "epoch": 431.8495575221239, "grad_norm": 2.130036058645146e-08, "learning_rate": 0.0968245193512399, "loss": 0.0, "num_input_tokens_seen": 13981960, "step": 24615 }, { "epoch": 431.9380530973451, "grad_norm": 3.26237987735567e-08, "learning_rate": 0.09676944417865221, "loss": 0.0, "num_input_tokens_seen": 13984536, "step": 24620 }, { "epoch": 432.01769911504425, "grad_norm": 1.6969975646929925e-08, "learning_rate": 0.09671437721488517, "loss": 0.0, "num_input_tokens_seen": 13986872, "step": 24625 }, { "epoch": 432.1061946902655, "grad_norm": 1.1209075445606231e-08, "learning_rate": 0.09665931846843086, "loss": 0.0, "num_input_tokens_seen": 13989352, "step": 24630 }, { "epoch": 432.1946902654867, "grad_norm": 2.604737581179961e-08, "learning_rate": 0.0966042679477799, "loss": 0.0, "num_input_tokens_seen": 13992280, "step": 24635 }, { "epoch": 432.283185840708, "grad_norm": 9.309838233662049e-09, "learning_rate": 0.09654922566142186, "loss": 0.0, "num_input_tokens_seen": 13995256, "step": 24640 }, { "epoch": 432.37168141592923, "grad_norm": 2.3717360519981412e-08, "learning_rate": 0.09649419161784498, "loss": 0.0, "num_input_tokens_seen": 13997992, "step": 24645 }, { "epoch": 432.46017699115043, "grad_norm": 2.1024375129741202e-08, "learning_rate": 0.09643916582553606, "loss": 0.0, "num_input_tokens_seen": 14000792, "step": 24650 }, { "epoch": 432.5486725663717, "grad_norm": 5.286521798097965e-08, "learning_rate": 0.09638414829298093, "loss": 0.0, "num_input_tokens_seen": 14003352, "step": 24655 }, { "epoch": 432.6371681415929, "grad_norm": 1.8994064987509773e-08, "learning_rate": 0.09632913902866386, "loss": 0.0, "num_input_tokens_seen": 14007016, "step": 24660 }, { "epoch": 432.72566371681415, "grad_norm": 1.5377258577586872e-08, "learning_rate": 0.096274138041068, "loss": 0.0, "num_input_tokens_seen": 14009928, "step": 24665 }, { "epoch": 432.8141592920354, "grad_norm": 1.2973567109497708e-08, "learning_rate": 0.09621914533867527, "loss": 0.0, "num_input_tokens_seen": 14012792, "step": 24670 }, { "epoch": 432.9026548672566, "grad_norm": 1.608490229898507e-08, "learning_rate": 0.09616416092996616, "loss": 0.0, "num_input_tokens_seen": 14015400, "step": 24675 }, { "epoch": 432.9911504424779, "grad_norm": 2.2943297040001198e-08, "learning_rate": 0.09610918482342, "loss": 0.0, "num_input_tokens_seen": 14018920, "step": 24680 }, { "epoch": 433.070796460177, "grad_norm": 6.555514797668138e-09, "learning_rate": 0.09605421702751478, "loss": 0.0, "num_input_tokens_seen": 14021720, "step": 24685 }, { "epoch": 433.1592920353982, "grad_norm": 1.609571320670966e-08, "learning_rate": 0.09599925755072718, "loss": 0.0, "num_input_tokens_seen": 14025016, "step": 24690 }, { "epoch": 433.24778761061947, "grad_norm": 2.2465760807222068e-08, "learning_rate": 0.09594430640153273, "loss": 0.0, "num_input_tokens_seen": 14028056, "step": 24695 }, { "epoch": 433.3362831858407, "grad_norm": 1.955738326842038e-08, "learning_rate": 0.09588936358840547, "loss": 0.0, "num_input_tokens_seen": 14030888, "step": 24700 }, { "epoch": 433.42477876106193, "grad_norm": 9.37789934596367e-09, "learning_rate": 0.09583442911981836, "loss": 0.0, "num_input_tokens_seen": 14033480, "step": 24705 }, { "epoch": 433.5132743362832, "grad_norm": 1.684567862980657e-08, "learning_rate": 0.09577950300424302, "loss": 0.0, "num_input_tokens_seen": 14036648, "step": 24710 }, { "epoch": 433.60176991150445, "grad_norm": 2.5509919510113832e-08, "learning_rate": 0.09572458525014967, "loss": 0.0, "num_input_tokens_seen": 14039096, "step": 24715 }, { "epoch": 433.69026548672565, "grad_norm": 2.5883581500352193e-08, "learning_rate": 0.0956696758660073, "loss": 0.0, "num_input_tokens_seen": 14042024, "step": 24720 }, { "epoch": 433.7787610619469, "grad_norm": 2.49113707440074e-08, "learning_rate": 0.09561477486028373, "loss": 0.0, "num_input_tokens_seen": 14045032, "step": 24725 }, { "epoch": 433.86725663716817, "grad_norm": 4.982338541736908e-09, "learning_rate": 0.09555988224144528, "loss": 0.0, "num_input_tokens_seen": 14047384, "step": 24730 }, { "epoch": 433.95575221238937, "grad_norm": 1.3694565481614518e-08, "learning_rate": 0.09550499801795717, "loss": 0.0, "num_input_tokens_seen": 14050136, "step": 24735 }, { "epoch": 434.0353982300885, "grad_norm": 1.4087083499703112e-08, "learning_rate": 0.09545012219828314, "loss": 0.0, "num_input_tokens_seen": 14052824, "step": 24740 }, { "epoch": 434.12389380530976, "grad_norm": 1.2442194829986875e-08, "learning_rate": 0.09539525479088577, "loss": 0.0, "num_input_tokens_seen": 14055816, "step": 24745 }, { "epoch": 434.21238938053096, "grad_norm": 2.2323366266618905e-08, "learning_rate": 0.0953403958042264, "loss": 0.0, "num_input_tokens_seen": 14058712, "step": 24750 }, { "epoch": 434.3008849557522, "grad_norm": 1.1496656959764096e-08, "learning_rate": 0.09528554524676484, "loss": 0.0, "num_input_tokens_seen": 14061176, "step": 24755 }, { "epoch": 434.3893805309734, "grad_norm": 1.7933215801235747e-08, "learning_rate": 0.09523070312695978, "loss": 0.0, "num_input_tokens_seen": 14064056, "step": 24760 }, { "epoch": 434.4778761061947, "grad_norm": 2.0176425863382974e-08, "learning_rate": 0.09517586945326863, "loss": 0.0, "num_input_tokens_seen": 14067096, "step": 24765 }, { "epoch": 434.56637168141594, "grad_norm": 6.98669344600944e-09, "learning_rate": 0.0951210442341473, "loss": 0.0, "num_input_tokens_seen": 14070264, "step": 24770 }, { "epoch": 434.65486725663715, "grad_norm": 2.0521886412439017e-08, "learning_rate": 0.09506622747805066, "loss": 0.0, "num_input_tokens_seen": 14072728, "step": 24775 }, { "epoch": 434.7433628318584, "grad_norm": 1.3090804884541285e-08, "learning_rate": 0.09501141919343203, "loss": 0.0, "num_input_tokens_seen": 14075464, "step": 24780 }, { "epoch": 434.83185840707966, "grad_norm": 1.0282419360407857e-08, "learning_rate": 0.09495661938874361, "loss": 0.0, "num_input_tokens_seen": 14078776, "step": 24785 }, { "epoch": 434.92035398230087, "grad_norm": 2.406540922095246e-08, "learning_rate": 0.0949018280724362, "loss": 0.0, "num_input_tokens_seen": 14081576, "step": 24790 }, { "epoch": 435.0, "grad_norm": 3.7556864285193114e-09, "learning_rate": 0.09484704525295934, "loss": 0.0, "num_input_tokens_seen": 14083896, "step": 24795 }, { "epoch": 435.08849557522126, "grad_norm": 1.189509646337683e-08, "learning_rate": 0.09479227093876112, "loss": 0.0, "num_input_tokens_seen": 14086360, "step": 24800 }, { "epoch": 435.08849557522126, "eval_loss": 0.7048231363296509, "eval_runtime": 0.9385, "eval_samples_per_second": 26.639, "eval_steps_per_second": 13.852, "num_input_tokens_seen": 14086360, "step": 24800 }, { "epoch": 435.17699115044246, "grad_norm": 1.5620324589349366e-08, "learning_rate": 0.0947375051382886, "loss": 0.0, "num_input_tokens_seen": 14089400, "step": 24805 }, { "epoch": 435.2654867256637, "grad_norm": 4.3884018197104524e-08, "learning_rate": 0.09468274785998718, "loss": 0.0, "num_input_tokens_seen": 14092600, "step": 24810 }, { "epoch": 435.353982300885, "grad_norm": 1.5795084351566402e-08, "learning_rate": 0.09462799911230127, "loss": 0.0, "num_input_tokens_seen": 14095240, "step": 24815 }, { "epoch": 435.4424778761062, "grad_norm": 1.5666474340036984e-08, "learning_rate": 0.0945732589036737, "loss": 0.0, "num_input_tokens_seen": 14098200, "step": 24820 }, { "epoch": 435.53097345132744, "grad_norm": 8.971531961776691e-09, "learning_rate": 0.09451852724254614, "loss": 0.0, "num_input_tokens_seen": 14101480, "step": 24825 }, { "epoch": 435.6194690265487, "grad_norm": 7.4403372352094266e-09, "learning_rate": 0.09446380413735894, "loss": 0.0, "num_input_tokens_seen": 14103960, "step": 24830 }, { "epoch": 435.7079646017699, "grad_norm": 1.036785945984775e-08, "learning_rate": 0.09440908959655099, "loss": 0.0, "num_input_tokens_seen": 14107048, "step": 24835 }, { "epoch": 435.79646017699116, "grad_norm": 1.9933104056235607e-08, "learning_rate": 0.09435438362856004, "loss": 0.0, "num_input_tokens_seen": 14110008, "step": 24840 }, { "epoch": 435.88495575221236, "grad_norm": 8.240979454399167e-09, "learning_rate": 0.0942996862418225, "loss": 0.0, "num_input_tokens_seen": 14113000, "step": 24845 }, { "epoch": 435.9734513274336, "grad_norm": 1.6430469429451477e-08, "learning_rate": 0.09424499744477322, "loss": 0.0, "num_input_tokens_seen": 14115752, "step": 24850 }, { "epoch": 436.05309734513276, "grad_norm": 1.2888797584764689e-08, "learning_rate": 0.09419031724584608, "loss": 0.0, "num_input_tokens_seen": 14118128, "step": 24855 }, { "epoch": 436.14159292035396, "grad_norm": 3.079478361200927e-08, "learning_rate": 0.09413564565347331, "loss": 0.0, "num_input_tokens_seen": 14121072, "step": 24860 }, { "epoch": 436.2300884955752, "grad_norm": 1.0627303481669514e-08, "learning_rate": 0.094080982676086, "loss": 0.0, "num_input_tokens_seen": 14124304, "step": 24865 }, { "epoch": 436.3185840707965, "grad_norm": 2.1888443058060147e-08, "learning_rate": 0.09402632832211395, "loss": 0.0, "num_input_tokens_seen": 14127056, "step": 24870 }, { "epoch": 436.4070796460177, "grad_norm": 1.6213340003901067e-08, "learning_rate": 0.09397168259998541, "loss": 0.0, "num_input_tokens_seen": 14129776, "step": 24875 }, { "epoch": 436.49557522123894, "grad_norm": 2.2197815141566934e-08, "learning_rate": 0.09391704551812759, "loss": 0.0, "num_input_tokens_seen": 14132400, "step": 24880 }, { "epoch": 436.5840707964602, "grad_norm": 1.6259660284845268e-08, "learning_rate": 0.09386241708496605, "loss": 0.0, "num_input_tokens_seen": 14135280, "step": 24885 }, { "epoch": 436.6725663716814, "grad_norm": 3.5416672261590065e-08, "learning_rate": 0.09380779730892527, "loss": 0.0, "num_input_tokens_seen": 14137776, "step": 24890 }, { "epoch": 436.76106194690266, "grad_norm": 8.90942697395758e-09, "learning_rate": 0.09375318619842836, "loss": 0.0, "num_input_tokens_seen": 14140768, "step": 24895 }, { "epoch": 436.8495575221239, "grad_norm": 1.7470728863600016e-08, "learning_rate": 0.09369858376189696, "loss": 0.0, "num_input_tokens_seen": 14143744, "step": 24900 }, { "epoch": 436.9380530973451, "grad_norm": 9.069882622725345e-09, "learning_rate": 0.09364399000775143, "loss": 0.0, "num_input_tokens_seen": 14146448, "step": 24905 }, { "epoch": 437.01769911504425, "grad_norm": 1.2190141340795435e-08, "learning_rate": 0.09358940494441093, "loss": 0.0, "num_input_tokens_seen": 14149152, "step": 24910 }, { "epoch": 437.1061946902655, "grad_norm": 7.442679805791386e-09, "learning_rate": 0.09353482858029301, "loss": 0.0, "num_input_tokens_seen": 14151952, "step": 24915 }, { "epoch": 437.1946902654867, "grad_norm": 1.4810519921582e-08, "learning_rate": 0.09348026092381419, "loss": 0.0, "num_input_tokens_seen": 14154784, "step": 24920 }, { "epoch": 437.283185840708, "grad_norm": 1.9193564071429137e-08, "learning_rate": 0.09342570198338931, "loss": 0.0, "num_input_tokens_seen": 14157872, "step": 24925 }, { "epoch": 437.37168141592923, "grad_norm": 1.7272729024853106e-08, "learning_rate": 0.0933711517674322, "loss": 0.0, "num_input_tokens_seen": 14161040, "step": 24930 }, { "epoch": 437.46017699115043, "grad_norm": 1.0443019782258034e-08, "learning_rate": 0.09331661028435513, "loss": 0.0, "num_input_tokens_seen": 14163872, "step": 24935 }, { "epoch": 437.5486725663717, "grad_norm": 2.437110246944485e-08, "learning_rate": 0.09326207754256909, "loss": 0.0, "num_input_tokens_seen": 14167152, "step": 24940 }, { "epoch": 437.6371681415929, "grad_norm": 1.1533236587979445e-08, "learning_rate": 0.09320755355048366, "loss": 0.0, "num_input_tokens_seen": 14169888, "step": 24945 }, { "epoch": 437.72566371681415, "grad_norm": 2.420979683392943e-08, "learning_rate": 0.09315303831650722, "loss": 0.0, "num_input_tokens_seen": 14172992, "step": 24950 }, { "epoch": 437.8141592920354, "grad_norm": 2.9012666047378843e-08, "learning_rate": 0.09309853184904661, "loss": 0.0, "num_input_tokens_seen": 14175568, "step": 24955 }, { "epoch": 437.9026548672566, "grad_norm": 8.08491140702472e-09, "learning_rate": 0.09304403415650753, "loss": 0.0, "num_input_tokens_seen": 14178144, "step": 24960 }, { "epoch": 437.9911504424779, "grad_norm": 3.081795085790873e-08, "learning_rate": 0.09298954524729405, "loss": 0.0, "num_input_tokens_seen": 14181104, "step": 24965 }, { "epoch": 438.070796460177, "grad_norm": 9.074105911111019e-09, "learning_rate": 0.09293506512980916, "loss": 0.0, "num_input_tokens_seen": 14183192, "step": 24970 }, { "epoch": 438.1592920353982, "grad_norm": 9.462334915610882e-09, "learning_rate": 0.0928805938124544, "loss": 0.0, "num_input_tokens_seen": 14185976, "step": 24975 }, { "epoch": 438.24778761061947, "grad_norm": 1.4073005871750865e-08, "learning_rate": 0.09282613130362982, "loss": 0.0, "num_input_tokens_seen": 14189144, "step": 24980 }, { "epoch": 438.3362831858407, "grad_norm": 2.7817570469323982e-08, "learning_rate": 0.09277167761173427, "loss": 0.0, "num_input_tokens_seen": 14192472, "step": 24985 }, { "epoch": 438.42477876106193, "grad_norm": 1.8608405483178103e-08, "learning_rate": 0.0927172327451653, "loss": 0.0, "num_input_tokens_seen": 14195560, "step": 24990 }, { "epoch": 438.5132743362832, "grad_norm": 1.631699042548007e-08, "learning_rate": 0.09266279671231882, "loss": 0.0, "num_input_tokens_seen": 14198760, "step": 24995 }, { "epoch": 438.60176991150445, "grad_norm": 1.515447500821665e-08, "learning_rate": 0.09260836952158967, "loss": 0.0, "num_input_tokens_seen": 14201656, "step": 25000 }, { "epoch": 438.60176991150445, "eval_loss": 0.7057361602783203, "eval_runtime": 0.943, "eval_samples_per_second": 26.512, "eval_steps_per_second": 13.786, "num_input_tokens_seen": 14201656, "step": 25000 }, { "epoch": 438.69026548672565, "grad_norm": 3.51023281552898e-08, "learning_rate": 0.09255395118137114, "loss": 0.0, "num_input_tokens_seen": 14204424, "step": 25005 }, { "epoch": 438.7787610619469, "grad_norm": 1.38833442520081e-08, "learning_rate": 0.09249954170005527, "loss": 0.0, "num_input_tokens_seen": 14207144, "step": 25010 }, { "epoch": 438.86725663716817, "grad_norm": 1.6037652983413864e-08, "learning_rate": 0.0924451410860327, "loss": 0.0, "num_input_tokens_seen": 14209656, "step": 25015 }, { "epoch": 438.95575221238937, "grad_norm": 1.7365231030908035e-08, "learning_rate": 0.09239074934769258, "loss": 0.0, "num_input_tokens_seen": 14212424, "step": 25020 }, { "epoch": 439.0353982300885, "grad_norm": 1.9683875862597233e-08, "learning_rate": 0.09233636649342288, "loss": 0.0, "num_input_tokens_seen": 14214888, "step": 25025 }, { "epoch": 439.12389380530976, "grad_norm": 4.296921574109547e-08, "learning_rate": 0.09228199253161017, "loss": 0.0, "num_input_tokens_seen": 14217768, "step": 25030 }, { "epoch": 439.21238938053096, "grad_norm": 2.348705230303949e-08, "learning_rate": 0.09222762747063949, "loss": 0.0, "num_input_tokens_seen": 14220328, "step": 25035 }, { "epoch": 439.3008849557522, "grad_norm": 1.1002473598864526e-08, "learning_rate": 0.09217327131889473, "loss": 0.0, "num_input_tokens_seen": 14222840, "step": 25040 }, { "epoch": 439.3893805309734, "grad_norm": 1.623202194878104e-08, "learning_rate": 0.09211892408475818, "loss": 0.0, "num_input_tokens_seen": 14225352, "step": 25045 }, { "epoch": 439.4778761061947, "grad_norm": 7.425308812258891e-09, "learning_rate": 0.09206458577661089, "loss": 0.0, "num_input_tokens_seen": 14228072, "step": 25050 }, { "epoch": 439.56637168141594, "grad_norm": 1.1506207542311131e-08, "learning_rate": 0.09201025640283263, "loss": 0.0, "num_input_tokens_seen": 14231080, "step": 25055 }, { "epoch": 439.65486725663715, "grad_norm": 1.501340030074516e-08, "learning_rate": 0.09195593597180148, "loss": 0.0, "num_input_tokens_seen": 14234200, "step": 25060 }, { "epoch": 439.7433628318584, "grad_norm": 1.4956277993860567e-08, "learning_rate": 0.09190162449189444, "loss": 0.0, "num_input_tokens_seen": 14237048, "step": 25065 }, { "epoch": 439.83185840707966, "grad_norm": 9.613093432392361e-09, "learning_rate": 0.09184732197148705, "loss": 0.0, "num_input_tokens_seen": 14239992, "step": 25070 }, { "epoch": 439.92035398230087, "grad_norm": 1.9428084030437276e-08, "learning_rate": 0.09179302841895343, "loss": 0.0, "num_input_tokens_seen": 14243480, "step": 25075 }, { "epoch": 440.0, "grad_norm": 2.007786648050569e-08, "learning_rate": 0.09173874384266625, "loss": 0.0, "num_input_tokens_seen": 14246096, "step": 25080 }, { "epoch": 440.08849557522126, "grad_norm": 1.276848404785369e-08, "learning_rate": 0.09168446825099695, "loss": 0.0, "num_input_tokens_seen": 14249216, "step": 25085 }, { "epoch": 440.17699115044246, "grad_norm": 1.1526397614147754e-08, "learning_rate": 0.09163020165231545, "loss": 0.0, "num_input_tokens_seen": 14252000, "step": 25090 }, { "epoch": 440.2654867256637, "grad_norm": 2.593488979130143e-08, "learning_rate": 0.09157594405499044, "loss": 0.0, "num_input_tokens_seen": 14255216, "step": 25095 }, { "epoch": 440.353982300885, "grad_norm": 3.653280344906307e-08, "learning_rate": 0.09152169546738899, "loss": 0.0, "num_input_tokens_seen": 14257904, "step": 25100 }, { "epoch": 440.4424778761062, "grad_norm": 2.470744497884425e-08, "learning_rate": 0.09146745589787698, "loss": 0.0, "num_input_tokens_seen": 14260496, "step": 25105 }, { "epoch": 440.53097345132744, "grad_norm": 1.726898801734933e-08, "learning_rate": 0.09141322535481891, "loss": 0.0, "num_input_tokens_seen": 14263680, "step": 25110 }, { "epoch": 440.6194690265487, "grad_norm": 3.3338444893615815e-08, "learning_rate": 0.0913590038465777, "loss": 0.0, "num_input_tokens_seen": 14266272, "step": 25115 }, { "epoch": 440.7079646017699, "grad_norm": 6.6506156137791095e-09, "learning_rate": 0.09130479138151505, "loss": 0.0, "num_input_tokens_seen": 14268784, "step": 25120 }, { "epoch": 440.79646017699116, "grad_norm": 7.137592294981232e-09, "learning_rate": 0.09125058796799114, "loss": 0.0, "num_input_tokens_seen": 14271952, "step": 25125 }, { "epoch": 440.88495575221236, "grad_norm": 2.135628029975578e-08, "learning_rate": 0.09119639361436485, "loss": 0.0, "num_input_tokens_seen": 14274608, "step": 25130 }, { "epoch": 440.9734513274336, "grad_norm": 6.746412317681916e-09, "learning_rate": 0.09114220832899368, "loss": 0.0, "num_input_tokens_seen": 14277520, "step": 25135 }, { "epoch": 441.05309734513276, "grad_norm": 1.4916572865786293e-08, "learning_rate": 0.0910880321202336, "loss": 0.0, "num_input_tokens_seen": 14280120, "step": 25140 }, { "epoch": 441.14159292035396, "grad_norm": 1.1636162255967974e-08, "learning_rate": 0.09103386499643933, "loss": 0.0, "num_input_tokens_seen": 14283176, "step": 25145 }, { "epoch": 441.2300884955752, "grad_norm": 1.0038572639814447e-08, "learning_rate": 0.09097970696596407, "loss": 0.0, "num_input_tokens_seen": 14285800, "step": 25150 }, { "epoch": 441.3185840707965, "grad_norm": 8.630494541250755e-09, "learning_rate": 0.09092555803715971, "loss": 0.0, "num_input_tokens_seen": 14288680, "step": 25155 }, { "epoch": 441.4070796460177, "grad_norm": 7.586899997136243e-09, "learning_rate": 0.0908714182183767, "loss": 0.0, "num_input_tokens_seen": 14291416, "step": 25160 }, { "epoch": 441.49557522123894, "grad_norm": 2.645566965497892e-08, "learning_rate": 0.090817287517964, "loss": 0.0, "num_input_tokens_seen": 14294424, "step": 25165 }, { "epoch": 441.5840707964602, "grad_norm": 6.3991913989980276e-09, "learning_rate": 0.09076316594426931, "loss": 0.0, "num_input_tokens_seen": 14297048, "step": 25170 }, { "epoch": 441.6725663716814, "grad_norm": 8.215237379260998e-09, "learning_rate": 0.09070905350563888, "loss": 0.0, "num_input_tokens_seen": 14299672, "step": 25175 }, { "epoch": 441.76106194690266, "grad_norm": 2.2645725294978547e-08, "learning_rate": 0.09065495021041745, "loss": 0.0, "num_input_tokens_seen": 14302568, "step": 25180 }, { "epoch": 441.8495575221239, "grad_norm": 2.10178647819248e-08, "learning_rate": 0.09060085606694851, "loss": 0.0, "num_input_tokens_seen": 14305448, "step": 25185 }, { "epoch": 441.9380530973451, "grad_norm": 2.1103296887758916e-08, "learning_rate": 0.09054677108357405, "loss": 0.0, "num_input_tokens_seen": 14308680, "step": 25190 }, { "epoch": 442.01769911504425, "grad_norm": 1.5178985179886695e-08, "learning_rate": 0.09049269526863457, "loss": 0.0, "num_input_tokens_seen": 14311808, "step": 25195 }, { "epoch": 442.1061946902655, "grad_norm": 6.334731850188291e-09, "learning_rate": 0.09043862863046935, "loss": 0.0, "num_input_tokens_seen": 14314736, "step": 25200 }, { "epoch": 442.1061946902655, "eval_loss": 0.723112940788269, "eval_runtime": 0.9394, "eval_samples_per_second": 26.614, "eval_steps_per_second": 13.839, "num_input_tokens_seen": 14314736, "step": 25200 }, { "epoch": 442.1946902654867, "grad_norm": 1.5193407421065785e-08, "learning_rate": 0.09038457117741602, "loss": 0.0, "num_input_tokens_seen": 14317232, "step": 25205 }, { "epoch": 442.283185840708, "grad_norm": 3.3318954706373916e-08, "learning_rate": 0.09033052291781099, "loss": 0.0, "num_input_tokens_seen": 14319824, "step": 25210 }, { "epoch": 442.37168141592923, "grad_norm": 2.2404291755151462e-08, "learning_rate": 0.09027648385998926, "loss": 0.0, "num_input_tokens_seen": 14323552, "step": 25215 }, { "epoch": 442.46017699115043, "grad_norm": 2.2368542573758532e-08, "learning_rate": 0.09022245401228417, "loss": 0.0, "num_input_tokens_seen": 14326240, "step": 25220 }, { "epoch": 442.5486725663717, "grad_norm": 2.388278375065056e-08, "learning_rate": 0.09016843338302792, "loss": 0.0, "num_input_tokens_seen": 14329824, "step": 25225 }, { "epoch": 442.6371681415929, "grad_norm": 1.9167471165815186e-08, "learning_rate": 0.09011442198055115, "loss": 0.0, "num_input_tokens_seen": 14332720, "step": 25230 }, { "epoch": 442.72566371681415, "grad_norm": 8.309196886102654e-09, "learning_rate": 0.09006041981318305, "loss": 0.0, "num_input_tokens_seen": 14335168, "step": 25235 }, { "epoch": 442.8141592920354, "grad_norm": 6.149508013919558e-09, "learning_rate": 0.09000642688925149, "loss": 0.0, "num_input_tokens_seen": 14337936, "step": 25240 }, { "epoch": 442.9026548672566, "grad_norm": 1.3960371525456594e-08, "learning_rate": 0.0899524432170828, "loss": 0.0, "num_input_tokens_seen": 14340288, "step": 25245 }, { "epoch": 442.9911504424779, "grad_norm": 1.8322767303402543e-08, "learning_rate": 0.08989846880500196, "loss": 0.0, "num_input_tokens_seen": 14343200, "step": 25250 }, { "epoch": 443.070796460177, "grad_norm": 1.4297336647928205e-08, "learning_rate": 0.08984450366133256, "loss": 0.0, "num_input_tokens_seen": 14345424, "step": 25255 }, { "epoch": 443.1592920353982, "grad_norm": 2.311042557323617e-08, "learning_rate": 0.08979054779439664, "loss": 0.0, "num_input_tokens_seen": 14348080, "step": 25260 }, { "epoch": 443.24778761061947, "grad_norm": 1.4854398600050445e-08, "learning_rate": 0.08973660121251485, "loss": 0.0, "num_input_tokens_seen": 14351088, "step": 25265 }, { "epoch": 443.3362831858407, "grad_norm": 3.098662659795082e-08, "learning_rate": 0.08968266392400655, "loss": 0.0, "num_input_tokens_seen": 14354304, "step": 25270 }, { "epoch": 443.42477876106193, "grad_norm": 1.7175400657265527e-08, "learning_rate": 0.0896287359371894, "loss": 0.0, "num_input_tokens_seen": 14356976, "step": 25275 }, { "epoch": 443.5132743362832, "grad_norm": 2.475711369243072e-08, "learning_rate": 0.08957481726037989, "loss": 0.0, "num_input_tokens_seen": 14359952, "step": 25280 }, { "epoch": 443.60176991150445, "grad_norm": 3.724558794715449e-08, "learning_rate": 0.08952090790189286, "loss": 0.0, "num_input_tokens_seen": 14362992, "step": 25285 }, { "epoch": 443.69026548672565, "grad_norm": 1.7159591081394865e-08, "learning_rate": 0.08946700787004187, "loss": 0.0, "num_input_tokens_seen": 14365696, "step": 25290 }, { "epoch": 443.7787610619469, "grad_norm": 3.156666394943386e-08, "learning_rate": 0.08941311717313899, "loss": 0.0, "num_input_tokens_seen": 14368800, "step": 25295 }, { "epoch": 443.86725663716817, "grad_norm": 6.053795242877413e-09, "learning_rate": 0.08935923581949483, "loss": 0.0, "num_input_tokens_seen": 14371440, "step": 25300 }, { "epoch": 443.95575221238937, "grad_norm": 4.214627224996548e-08, "learning_rate": 0.0893053638174185, "loss": 0.0, "num_input_tokens_seen": 14374960, "step": 25305 }, { "epoch": 444.0353982300885, "grad_norm": 1.394225890294365e-08, "learning_rate": 0.0892515011752179, "loss": 0.0, "num_input_tokens_seen": 14377168, "step": 25310 }, { "epoch": 444.12389380530976, "grad_norm": 2.4382996954841474e-08, "learning_rate": 0.08919764790119918, "loss": 0.0, "num_input_tokens_seen": 14380288, "step": 25315 }, { "epoch": 444.21238938053096, "grad_norm": 1.027482721127626e-08, "learning_rate": 0.08914380400366727, "loss": 0.0, "num_input_tokens_seen": 14382640, "step": 25320 }, { "epoch": 444.3008849557522, "grad_norm": 1.8817347680055718e-08, "learning_rate": 0.08908996949092551, "loss": 0.0, "num_input_tokens_seen": 14385440, "step": 25325 }, { "epoch": 444.3893805309734, "grad_norm": 1.8012066504979884e-08, "learning_rate": 0.08903614437127592, "loss": 0.0, "num_input_tokens_seen": 14388304, "step": 25330 }, { "epoch": 444.4778761061947, "grad_norm": 3.427703276770444e-08, "learning_rate": 0.088982328653019, "loss": 0.0, "num_input_tokens_seen": 14391376, "step": 25335 }, { "epoch": 444.56637168141594, "grad_norm": 7.205828467249376e-08, "learning_rate": 0.0889285223444538, "loss": 0.0, "num_input_tokens_seen": 14394336, "step": 25340 }, { "epoch": 444.65486725663715, "grad_norm": 2.50008191926554e-08, "learning_rate": 0.08887472545387787, "loss": 0.0, "num_input_tokens_seen": 14396880, "step": 25345 }, { "epoch": 444.7433628318584, "grad_norm": 1.9071165979767102e-08, "learning_rate": 0.08882093798958751, "loss": 0.0, "num_input_tokens_seen": 14399520, "step": 25350 }, { "epoch": 444.83185840707966, "grad_norm": 3.08715826236039e-08, "learning_rate": 0.08876715995987726, "loss": 0.0, "num_input_tokens_seen": 14402304, "step": 25355 }, { "epoch": 444.92035398230087, "grad_norm": 1.5056425439752275e-08, "learning_rate": 0.08871339137304052, "loss": 0.0, "num_input_tokens_seen": 14405232, "step": 25360 }, { "epoch": 445.0, "grad_norm": 1.1761803975218754e-08, "learning_rate": 0.0886596322373689, "loss": 0.0, "num_input_tokens_seen": 14407480, "step": 25365 }, { "epoch": 445.08849557522126, "grad_norm": 8.249334548793286e-09, "learning_rate": 0.08860588256115293, "loss": 0.0, "num_input_tokens_seen": 14410616, "step": 25370 }, { "epoch": 445.17699115044246, "grad_norm": 1.917528358319487e-08, "learning_rate": 0.0885521423526814, "loss": 0.0, "num_input_tokens_seen": 14413176, "step": 25375 }, { "epoch": 445.2654867256637, "grad_norm": 8.394606787476278e-09, "learning_rate": 0.08849841162024165, "loss": 0.0, "num_input_tokens_seen": 14416200, "step": 25380 }, { "epoch": 445.353982300885, "grad_norm": 3.043290419668665e-08, "learning_rate": 0.08844469037211973, "loss": 0.0, "num_input_tokens_seen": 14419464, "step": 25385 }, { "epoch": 445.4424778761062, "grad_norm": 1.053585663157719e-08, "learning_rate": 0.08839097861660014, "loss": 0.0, "num_input_tokens_seen": 14421992, "step": 25390 }, { "epoch": 445.53097345132744, "grad_norm": 1.0072643163994144e-08, "learning_rate": 0.08833727636196585, "loss": 0.0, "num_input_tokens_seen": 14425256, "step": 25395 }, { "epoch": 445.6194690265487, "grad_norm": 1.0567135610983769e-08, "learning_rate": 0.08828358361649848, "loss": 0.0, "num_input_tokens_seen": 14428104, "step": 25400 }, { "epoch": 445.6194690265487, "eval_loss": 0.7107198238372803, "eval_runtime": 0.9415, "eval_samples_per_second": 26.553, "eval_steps_per_second": 13.808, "num_input_tokens_seen": 14428104, "step": 25400 }, { "epoch": 445.7079646017699, "grad_norm": 2.0073770201634034e-08, "learning_rate": 0.08822990038847807, "loss": 0.0, "num_input_tokens_seen": 14430552, "step": 25405 }, { "epoch": 445.79646017699116, "grad_norm": 1.3487524874733481e-08, "learning_rate": 0.08817622668618325, "loss": 0.0, "num_input_tokens_seen": 14433528, "step": 25410 }, { "epoch": 445.88495575221236, "grad_norm": 2.2903414276242984e-08, "learning_rate": 0.08812256251789125, "loss": 0.0, "num_input_tokens_seen": 14436056, "step": 25415 }, { "epoch": 445.9734513274336, "grad_norm": 2.7575371319699116e-08, "learning_rate": 0.08806890789187766, "loss": 0.0, "num_input_tokens_seen": 14439160, "step": 25420 }, { "epoch": 446.05309734513276, "grad_norm": 2.2124336140905143e-08, "learning_rate": 0.08801526281641672, "loss": 0.0, "num_input_tokens_seen": 14441688, "step": 25425 }, { "epoch": 446.14159292035396, "grad_norm": 2.027275414206997e-08, "learning_rate": 0.0879616272997813, "loss": 0.0, "num_input_tokens_seen": 14444552, "step": 25430 }, { "epoch": 446.2300884955752, "grad_norm": 3.1768315977842576e-08, "learning_rate": 0.08790800135024247, "loss": 0.0, "num_input_tokens_seen": 14447160, "step": 25435 }, { "epoch": 446.3185840707965, "grad_norm": 8.499662307315248e-09, "learning_rate": 0.08785438497607023, "loss": 0.0, "num_input_tokens_seen": 14450104, "step": 25440 }, { "epoch": 446.4070796460177, "grad_norm": 2.39212880615014e-08, "learning_rate": 0.08780077818553277, "loss": 0.0, "num_input_tokens_seen": 14453128, "step": 25445 }, { "epoch": 446.49557522123894, "grad_norm": 2.442332025509586e-08, "learning_rate": 0.0877471809868969, "loss": 0.0, "num_input_tokens_seen": 14456008, "step": 25450 }, { "epoch": 446.5840707964602, "grad_norm": 1.4147370386297098e-08, "learning_rate": 0.08769359338842811, "loss": 0.0, "num_input_tokens_seen": 14459208, "step": 25455 }, { "epoch": 446.6725663716814, "grad_norm": 2.328372339377438e-08, "learning_rate": 0.08764001539839016, "loss": 0.0, "num_input_tokens_seen": 14461800, "step": 25460 }, { "epoch": 446.76106194690266, "grad_norm": 1.9413610274909843e-08, "learning_rate": 0.08758644702504548, "loss": 0.0, "num_input_tokens_seen": 14464744, "step": 25465 }, { "epoch": 446.8495575221239, "grad_norm": 4.3745405520212444e-08, "learning_rate": 0.0875328882766551, "loss": 0.0, "num_input_tokens_seen": 14467672, "step": 25470 }, { "epoch": 446.9380530973451, "grad_norm": 1.1377708553084176e-08, "learning_rate": 0.08747933916147828, "loss": 0.0, "num_input_tokens_seen": 14470456, "step": 25475 }, { "epoch": 447.01769911504425, "grad_norm": 6.901486493404718e-09, "learning_rate": 0.0874257996877731, "loss": 0.0, "num_input_tokens_seen": 14472848, "step": 25480 }, { "epoch": 447.1061946902655, "grad_norm": 1.051503151217048e-08, "learning_rate": 0.08737226986379593, "loss": 0.0, "num_input_tokens_seen": 14475520, "step": 25485 }, { "epoch": 447.1946902654867, "grad_norm": 3.569459039454159e-08, "learning_rate": 0.08731874969780173, "loss": 0.0, "num_input_tokens_seen": 14478144, "step": 25490 }, { "epoch": 447.283185840708, "grad_norm": 1.2714255426260479e-08, "learning_rate": 0.08726523919804412, "loss": 0.0, "num_input_tokens_seen": 14481280, "step": 25495 }, { "epoch": 447.37168141592923, "grad_norm": 1.4650731294807429e-08, "learning_rate": 0.08721173837277492, "loss": 0.0, "num_input_tokens_seen": 14484608, "step": 25500 }, { "epoch": 447.46017699115043, "grad_norm": 2.014829902918791e-08, "learning_rate": 0.08715824723024479, "loss": 0.0, "num_input_tokens_seen": 14487248, "step": 25505 }, { "epoch": 447.5486725663717, "grad_norm": 2.5937216818761044e-08, "learning_rate": 0.08710476577870258, "loss": 0.0, "num_input_tokens_seen": 14489776, "step": 25510 }, { "epoch": 447.6371681415929, "grad_norm": 1.6054478635396663e-08, "learning_rate": 0.08705129402639587, "loss": 0.0, "num_input_tokens_seen": 14492768, "step": 25515 }, { "epoch": 447.72566371681415, "grad_norm": 7.921170386282483e-09, "learning_rate": 0.08699783198157078, "loss": 0.0, "num_input_tokens_seen": 14495664, "step": 25520 }, { "epoch": 447.8141592920354, "grad_norm": 2.781213126468174e-08, "learning_rate": 0.08694437965247163, "loss": 0.0, "num_input_tokens_seen": 14498544, "step": 25525 }, { "epoch": 447.9026548672566, "grad_norm": 1.509962643808649e-08, "learning_rate": 0.08689093704734165, "loss": 0.0, "num_input_tokens_seen": 14501520, "step": 25530 }, { "epoch": 447.9911504424779, "grad_norm": 3.7440436528868304e-08, "learning_rate": 0.08683750417442222, "loss": 0.0, "num_input_tokens_seen": 14504656, "step": 25535 }, { "epoch": 448.070796460177, "grad_norm": 1.6950535197679528e-08, "learning_rate": 0.08678408104195334, "loss": 0.0, "num_input_tokens_seen": 14506960, "step": 25540 }, { "epoch": 448.1592920353982, "grad_norm": 1.8642110077848884e-08, "learning_rate": 0.08673066765817365, "loss": 0.0, "num_input_tokens_seen": 14510720, "step": 25545 }, { "epoch": 448.24778761061947, "grad_norm": 1.9420292929339666e-08, "learning_rate": 0.08667726403132005, "loss": 0.0, "num_input_tokens_seen": 14513552, "step": 25550 }, { "epoch": 448.3362831858407, "grad_norm": 2.0546952583799794e-08, "learning_rate": 0.0866238701696281, "loss": 0.0, "num_input_tokens_seen": 14516592, "step": 25555 }, { "epoch": 448.42477876106193, "grad_norm": 3.2289456441958464e-08, "learning_rate": 0.08657048608133185, "loss": 0.0, "num_input_tokens_seen": 14519216, "step": 25560 }, { "epoch": 448.5132743362832, "grad_norm": 2.1484304113528196e-08, "learning_rate": 0.08651711177466369, "loss": 0.0, "num_input_tokens_seen": 14522608, "step": 25565 }, { "epoch": 448.60176991150445, "grad_norm": 2.0192853611433748e-08, "learning_rate": 0.08646374725785466, "loss": 0.0, "num_input_tokens_seen": 14524928, "step": 25570 }, { "epoch": 448.69026548672565, "grad_norm": 4.986773305404313e-08, "learning_rate": 0.08641039253913434, "loss": 0.0, "num_input_tokens_seen": 14527616, "step": 25575 }, { "epoch": 448.7787610619469, "grad_norm": 1.8009783886441255e-08, "learning_rate": 0.08635704762673052, "loss": 0.0, "num_input_tokens_seen": 14530688, "step": 25580 }, { "epoch": 448.86725663716817, "grad_norm": 1.97724165929003e-08, "learning_rate": 0.08630371252886981, "loss": 0.0, "num_input_tokens_seen": 14533152, "step": 25585 }, { "epoch": 448.95575221238937, "grad_norm": 7.6031962947809e-09, "learning_rate": 0.08625038725377704, "loss": 0.0, "num_input_tokens_seen": 14535984, "step": 25590 }, { "epoch": 449.0353982300885, "grad_norm": 6.799034668603099e-09, "learning_rate": 0.08619707180967566, "loss": 0.0, "num_input_tokens_seen": 14537984, "step": 25595 }, { "epoch": 449.12389380530976, "grad_norm": 1.5876528536296064e-08, "learning_rate": 0.08614376620478768, "loss": 0.0, "num_input_tokens_seen": 14541136, "step": 25600 }, { "epoch": 449.12389380530976, "eval_loss": 0.7242066264152527, "eval_runtime": 0.9504, "eval_samples_per_second": 26.305, "eval_steps_per_second": 13.679, "num_input_tokens_seen": 14541136, "step": 25600 }, { "epoch": 449.21238938053096, "grad_norm": 3.781121904467e-08, "learning_rate": 0.08609047044733344, "loss": 0.0, "num_input_tokens_seen": 14543632, "step": 25605 }, { "epoch": 449.3008849557522, "grad_norm": 2.187057468461262e-08, "learning_rate": 0.08603718454553168, "loss": 0.0, "num_input_tokens_seen": 14546160, "step": 25610 }, { "epoch": 449.3893805309734, "grad_norm": 6.018976872468329e-09, "learning_rate": 0.08598390850759997, "loss": 0.0, "num_input_tokens_seen": 14548832, "step": 25615 }, { "epoch": 449.4778761061947, "grad_norm": 9.47996436906351e-09, "learning_rate": 0.08593064234175397, "loss": 0.0, "num_input_tokens_seen": 14551792, "step": 25620 }, { "epoch": 449.56637168141594, "grad_norm": 3.0490348024159175e-08, "learning_rate": 0.08587738605620815, "loss": 0.0, "num_input_tokens_seen": 14554608, "step": 25625 }, { "epoch": 449.65486725663715, "grad_norm": 2.614340210982391e-08, "learning_rate": 0.08582413965917512, "loss": 0.0, "num_input_tokens_seen": 14558080, "step": 25630 }, { "epoch": 449.7433628318584, "grad_norm": 4.157394073445175e-08, "learning_rate": 0.08577090315886628, "loss": 0.0, "num_input_tokens_seen": 14561024, "step": 25635 }, { "epoch": 449.83185840707966, "grad_norm": 3.051622954330924e-08, "learning_rate": 0.08571767656349136, "loss": 0.0, "num_input_tokens_seen": 14564240, "step": 25640 }, { "epoch": 449.92035398230087, "grad_norm": 6.09460641953774e-08, "learning_rate": 0.08566445988125847, "loss": 0.0, "num_input_tokens_seen": 14567040, "step": 25645 }, { "epoch": 450.0, "grad_norm": 2.030329682156662e-08, "learning_rate": 0.08561125312037436, "loss": 0.0, "num_input_tokens_seen": 14569608, "step": 25650 }, { "epoch": 450.08849557522126, "grad_norm": 1.5409142406497267e-08, "learning_rate": 0.08555805628904424, "loss": 0.0, "num_input_tokens_seen": 14572744, "step": 25655 }, { "epoch": 450.17699115044246, "grad_norm": 1.2041556196606962e-08, "learning_rate": 0.08550486939547161, "loss": 0.0, "num_input_tokens_seen": 14575960, "step": 25660 }, { "epoch": 450.2654867256637, "grad_norm": 2.0493065022719748e-08, "learning_rate": 0.08545169244785869, "loss": 0.0, "num_input_tokens_seen": 14578824, "step": 25665 }, { "epoch": 450.353982300885, "grad_norm": 9.6888017608876e-09, "learning_rate": 0.08539852545440589, "loss": 0.0, "num_input_tokens_seen": 14581752, "step": 25670 }, { "epoch": 450.4424778761062, "grad_norm": 1.802425586561185e-08, "learning_rate": 0.08534536842331235, "loss": 0.0, "num_input_tokens_seen": 14584520, "step": 25675 }, { "epoch": 450.53097345132744, "grad_norm": 1.569453900174267e-08, "learning_rate": 0.08529222136277545, "loss": 0.0, "num_input_tokens_seen": 14587320, "step": 25680 }, { "epoch": 450.6194690265487, "grad_norm": 1.657067016935798e-08, "learning_rate": 0.08523908428099125, "loss": 0.0, "num_input_tokens_seen": 14590216, "step": 25685 }, { "epoch": 450.7079646017699, "grad_norm": 2.4220101479954792e-08, "learning_rate": 0.08518595718615402, "loss": 0.0, "num_input_tokens_seen": 14592824, "step": 25690 }, { "epoch": 450.79646017699116, "grad_norm": 8.054455769013202e-09, "learning_rate": 0.08513284008645675, "loss": 0.0, "num_input_tokens_seen": 14595464, "step": 25695 }, { "epoch": 450.88495575221236, "grad_norm": 1.712121999730698e-08, "learning_rate": 0.08507973299009065, "loss": 0.0, "num_input_tokens_seen": 14598680, "step": 25700 }, { "epoch": 450.9734513274336, "grad_norm": 9.727110672486106e-09, "learning_rate": 0.08502663590524563, "loss": 0.0, "num_input_tokens_seen": 14601352, "step": 25705 }, { "epoch": 451.05309734513276, "grad_norm": 2.2093328055916572e-08, "learning_rate": 0.08497354884010981, "loss": 0.0, "num_input_tokens_seen": 14603616, "step": 25710 }, { "epoch": 451.14159292035396, "grad_norm": 1.7201818636181088e-08, "learning_rate": 0.0849204718028699, "loss": 0.0, "num_input_tokens_seen": 14606192, "step": 25715 }, { "epoch": 451.2300884955752, "grad_norm": 2.0157310487434188e-08, "learning_rate": 0.08486740480171118, "loss": 0.0, "num_input_tokens_seen": 14608576, "step": 25720 }, { "epoch": 451.3185840707965, "grad_norm": 1.3712981861147e-08, "learning_rate": 0.08481434784481706, "loss": 0.0, "num_input_tokens_seen": 14611856, "step": 25725 }, { "epoch": 451.4070796460177, "grad_norm": 2.4859916791797332e-08, "learning_rate": 0.08476130094036968, "loss": 0.0, "num_input_tokens_seen": 14614592, "step": 25730 }, { "epoch": 451.49557522123894, "grad_norm": 2.6413951914605605e-08, "learning_rate": 0.08470826409654961, "loss": 0.0, "num_input_tokens_seen": 14618064, "step": 25735 }, { "epoch": 451.5840707964602, "grad_norm": 1.3077826821472627e-08, "learning_rate": 0.08465523732153564, "loss": 0.0, "num_input_tokens_seen": 14620832, "step": 25740 }, { "epoch": 451.6725663716814, "grad_norm": 3.175239626784787e-08, "learning_rate": 0.08460222062350532, "loss": 0.0, "num_input_tokens_seen": 14623856, "step": 25745 }, { "epoch": 451.76106194690266, "grad_norm": 1.0286514751101095e-08, "learning_rate": 0.08454921401063442, "loss": 0.0, "num_input_tokens_seen": 14626592, "step": 25750 }, { "epoch": 451.8495575221239, "grad_norm": 2.5587311824892822e-08, "learning_rate": 0.08449621749109716, "loss": 0.0, "num_input_tokens_seen": 14629616, "step": 25755 }, { "epoch": 451.9380530973451, "grad_norm": 2.4857811808942643e-08, "learning_rate": 0.08444323107306641, "loss": 0.0, "num_input_tokens_seen": 14632288, "step": 25760 }, { "epoch": 452.01769911504425, "grad_norm": 1.9037411647104818e-08, "learning_rate": 0.0843902547647132, "loss": 0.0, "num_input_tokens_seen": 14635040, "step": 25765 }, { "epoch": 452.1061946902655, "grad_norm": 2.481990613034668e-08, "learning_rate": 0.0843372885742072, "loss": 0.0, "num_input_tokens_seen": 14638080, "step": 25770 }, { "epoch": 452.1946902654867, "grad_norm": 2.7649472045254697e-08, "learning_rate": 0.08428433250971652, "loss": 0.0, "num_input_tokens_seen": 14640768, "step": 25775 }, { "epoch": 452.283185840708, "grad_norm": 3.090655198434433e-08, "learning_rate": 0.08423138657940757, "loss": 0.0, "num_input_tokens_seen": 14643808, "step": 25780 }, { "epoch": 452.37168141592923, "grad_norm": 2.8886210756695618e-08, "learning_rate": 0.08417845079144536, "loss": 0.0, "num_input_tokens_seen": 14646704, "step": 25785 }, { "epoch": 452.46017699115043, "grad_norm": 1.8269835422302094e-08, "learning_rate": 0.08412552515399314, "loss": 0.0, "num_input_tokens_seen": 14649808, "step": 25790 }, { "epoch": 452.5486725663717, "grad_norm": 7.819893177440917e-09, "learning_rate": 0.08407260967521278, "loss": 0.0, "num_input_tokens_seen": 14652400, "step": 25795 }, { "epoch": 452.6371681415929, "grad_norm": 3.363176048765126e-08, "learning_rate": 0.08401970436326454, "loss": 0.0, "num_input_tokens_seen": 14655696, "step": 25800 }, { "epoch": 452.6371681415929, "eval_loss": 0.7248790264129639, "eval_runtime": 0.9145, "eval_samples_per_second": 27.338, "eval_steps_per_second": 14.216, "num_input_tokens_seen": 14655696, "step": 25800 }, { "epoch": 452.72566371681415, "grad_norm": 6.869238067253036e-09, "learning_rate": 0.08396680922630702, "loss": 0.0, "num_input_tokens_seen": 14658528, "step": 25805 }, { "epoch": 452.8141592920354, "grad_norm": 1.1808471533925058e-08, "learning_rate": 0.08391392427249732, "loss": 0.0, "num_input_tokens_seen": 14661792, "step": 25810 }, { "epoch": 452.9026548672566, "grad_norm": 7.543462743342388e-09, "learning_rate": 0.08386104950999107, "loss": 0.0, "num_input_tokens_seen": 14664272, "step": 25815 }, { "epoch": 452.9911504424779, "grad_norm": 1.2344845146117223e-08, "learning_rate": 0.0838081849469421, "loss": 0.0, "num_input_tokens_seen": 14666800, "step": 25820 }, { "epoch": 453.070796460177, "grad_norm": 2.0235297881754377e-08, "learning_rate": 0.08375533059150281, "loss": 0.0, "num_input_tokens_seen": 14669096, "step": 25825 }, { "epoch": 453.1592920353982, "grad_norm": 1.372174995850628e-08, "learning_rate": 0.08370248645182406, "loss": 0.0, "num_input_tokens_seen": 14672008, "step": 25830 }, { "epoch": 453.24778761061947, "grad_norm": 1.8422300129827818e-08, "learning_rate": 0.083649652536055, "loss": 0.0, "num_input_tokens_seen": 14674920, "step": 25835 }, { "epoch": 453.3362831858407, "grad_norm": 7.225873677185746e-09, "learning_rate": 0.08359682885234339, "loss": 0.0, "num_input_tokens_seen": 14678232, "step": 25840 }, { "epoch": 453.42477876106193, "grad_norm": 1.55314907601678e-08, "learning_rate": 0.08354401540883516, "loss": 0.0, "num_input_tokens_seen": 14680744, "step": 25845 }, { "epoch": 453.5132743362832, "grad_norm": 2.195238124613752e-08, "learning_rate": 0.0834912122136749, "loss": 0.0, "num_input_tokens_seen": 14683560, "step": 25850 }, { "epoch": 453.60176991150445, "grad_norm": 8.774865278837751e-09, "learning_rate": 0.0834384192750056, "loss": 0.0, "num_input_tokens_seen": 14686808, "step": 25855 }, { "epoch": 453.69026548672565, "grad_norm": 2.3718182973198054e-08, "learning_rate": 0.08338563660096844, "loss": 0.0, "num_input_tokens_seen": 14689640, "step": 25860 }, { "epoch": 453.7787610619469, "grad_norm": 2.1569453778624847e-08, "learning_rate": 0.08333286419970329, "loss": 0.0, "num_input_tokens_seen": 14692248, "step": 25865 }, { "epoch": 453.86725663716817, "grad_norm": 1.2287381778719464e-08, "learning_rate": 0.08328010207934824, "loss": 0.0, "num_input_tokens_seen": 14695032, "step": 25870 }, { "epoch": 453.95575221238937, "grad_norm": 1.7634054216841832e-08, "learning_rate": 0.08322735024803989, "loss": 0.0, "num_input_tokens_seen": 14697960, "step": 25875 }, { "epoch": 454.0353982300885, "grad_norm": 2.3953031558221483e-08, "learning_rate": 0.08317460871391331, "loss": 0.0, "num_input_tokens_seen": 14700632, "step": 25880 }, { "epoch": 454.12389380530976, "grad_norm": 9.206029716324338e-09, "learning_rate": 0.08312187748510179, "loss": 0.0, "num_input_tokens_seen": 14703112, "step": 25885 }, { "epoch": 454.21238938053096, "grad_norm": 9.430844549740414e-09, "learning_rate": 0.08306915656973726, "loss": 0.0, "num_input_tokens_seen": 14705736, "step": 25890 }, { "epoch": 454.3008849557522, "grad_norm": 1.4949970150723857e-08, "learning_rate": 0.08301644597594988, "loss": 0.0, "num_input_tokens_seen": 14708888, "step": 25895 }, { "epoch": 454.3893805309734, "grad_norm": 2.4575541601734585e-08, "learning_rate": 0.08296374571186826, "loss": 0.0, "num_input_tokens_seen": 14711496, "step": 25900 }, { "epoch": 454.4778761061947, "grad_norm": 2.778141094950115e-08, "learning_rate": 0.08291105578561955, "loss": 0.0, "num_input_tokens_seen": 14713992, "step": 25905 }, { "epoch": 454.56637168141594, "grad_norm": 2.2121483311821066e-08, "learning_rate": 0.08285837620532904, "loss": 0.0, "num_input_tokens_seen": 14716824, "step": 25910 }, { "epoch": 454.65486725663715, "grad_norm": 2.3367876522684128e-08, "learning_rate": 0.0828057069791207, "loss": 0.0, "num_input_tokens_seen": 14719784, "step": 25915 }, { "epoch": 454.7433628318584, "grad_norm": 3.2769953861588874e-08, "learning_rate": 0.0827530481151168, "loss": 0.0, "num_input_tokens_seen": 14722456, "step": 25920 }, { "epoch": 454.83185840707966, "grad_norm": 2.4188160807625536e-08, "learning_rate": 0.08270039962143792, "loss": 0.0, "num_input_tokens_seen": 14725944, "step": 25925 }, { "epoch": 454.92035398230087, "grad_norm": 1.6900735033686942e-08, "learning_rate": 0.08264776150620314, "loss": 0.0, "num_input_tokens_seen": 14728984, "step": 25930 }, { "epoch": 455.0, "grad_norm": 5.6147001714634825e-08, "learning_rate": 0.08259513377753, "loss": 0.0, "num_input_tokens_seen": 14731520, "step": 25935 }, { "epoch": 455.08849557522126, "grad_norm": 6.20616269486618e-09, "learning_rate": 0.08254251644353423, "loss": 0.0, "num_input_tokens_seen": 14733952, "step": 25940 }, { "epoch": 455.17699115044246, "grad_norm": 9.417130186761824e-09, "learning_rate": 0.08248990951233022, "loss": 0.0, "num_input_tokens_seen": 14737200, "step": 25945 }, { "epoch": 455.2654867256637, "grad_norm": 3.5309636103875164e-08, "learning_rate": 0.08243731299203048, "loss": 0.0, "num_input_tokens_seen": 14740208, "step": 25950 }, { "epoch": 455.353982300885, "grad_norm": 1.8698736781175285e-08, "learning_rate": 0.08238472689074612, "loss": 0.0, "num_input_tokens_seen": 14742944, "step": 25955 }, { "epoch": 455.4424778761062, "grad_norm": 2.1812951445099316e-08, "learning_rate": 0.08233215121658666, "loss": 0.0, "num_input_tokens_seen": 14745824, "step": 25960 }, { "epoch": 455.53097345132744, "grad_norm": 2.241643848321928e-08, "learning_rate": 0.08227958597765982, "loss": 0.0, "num_input_tokens_seen": 14748576, "step": 25965 }, { "epoch": 455.6194690265487, "grad_norm": 2.4458254088699505e-08, "learning_rate": 0.08222703118207181, "loss": 0.0, "num_input_tokens_seen": 14750992, "step": 25970 }, { "epoch": 455.7079646017699, "grad_norm": 4.6724650815122004e-08, "learning_rate": 0.08217448683792734, "loss": 0.0, "num_input_tokens_seen": 14753936, "step": 25975 }, { "epoch": 455.79646017699116, "grad_norm": 6.966250687412412e-09, "learning_rate": 0.08212195295332926, "loss": 0.0, "num_input_tokens_seen": 14756976, "step": 25980 }, { "epoch": 455.88495575221236, "grad_norm": 2.328061654566227e-08, "learning_rate": 0.08206942953637915, "loss": 0.0, "num_input_tokens_seen": 14760288, "step": 25985 }, { "epoch": 455.9734513274336, "grad_norm": 1.4692023597717707e-08, "learning_rate": 0.08201691659517658, "loss": 0.0, "num_input_tokens_seen": 14763120, "step": 25990 }, { "epoch": 456.05309734513276, "grad_norm": 1.766782986578619e-08, "learning_rate": 0.08196441413781981, "loss": 0.0, "num_input_tokens_seen": 14765480, "step": 25995 }, { "epoch": 456.14159292035396, "grad_norm": 3.0788282145977064e-08, "learning_rate": 0.08191192217240544, "loss": 0.0, "num_input_tokens_seen": 14768168, "step": 26000 }, { "epoch": 456.14159292035396, "eval_loss": 0.7227019667625427, "eval_runtime": 0.9419, "eval_samples_per_second": 26.543, "eval_steps_per_second": 13.803, "num_input_tokens_seen": 14768168, "step": 26000 }, { "epoch": 456.2300884955752, "grad_norm": 1.551413220113318e-08, "learning_rate": 0.08185944070702823, "loss": 0.0, "num_input_tokens_seen": 14770904, "step": 26005 }, { "epoch": 456.3185840707965, "grad_norm": 6.904313565314624e-09, "learning_rate": 0.08180696974978159, "loss": 0.0, "num_input_tokens_seen": 14774024, "step": 26010 }, { "epoch": 456.4070796460177, "grad_norm": 2.792873310397681e-08, "learning_rate": 0.08175450930875724, "loss": 0.0, "num_input_tokens_seen": 14777208, "step": 26015 }, { "epoch": 456.49557522123894, "grad_norm": 1.9269453588321994e-08, "learning_rate": 0.08170205939204513, "loss": 0.0, "num_input_tokens_seen": 14779928, "step": 26020 }, { "epoch": 456.5840707964602, "grad_norm": 2.111347008337816e-08, "learning_rate": 0.08164962000773379, "loss": 0.0, "num_input_tokens_seen": 14783064, "step": 26025 }, { "epoch": 456.6725663716814, "grad_norm": 1.8315503780286235e-08, "learning_rate": 0.08159719116390995, "loss": 0.0, "num_input_tokens_seen": 14786072, "step": 26030 }, { "epoch": 456.76106194690266, "grad_norm": 9.732796790729026e-09, "learning_rate": 0.08154477286865887, "loss": 0.0, "num_input_tokens_seen": 14788584, "step": 26035 }, { "epoch": 456.8495575221239, "grad_norm": 1.4343155108065275e-08, "learning_rate": 0.08149236513006404, "loss": 0.0, "num_input_tokens_seen": 14791480, "step": 26040 }, { "epoch": 456.9380530973451, "grad_norm": 2.4700414158473905e-08, "learning_rate": 0.08143996795620746, "loss": 0.0, "num_input_tokens_seen": 14794696, "step": 26045 }, { "epoch": 457.01769911504425, "grad_norm": 1.5974714884237073e-08, "learning_rate": 0.08138758135516938, "loss": 0.0, "num_input_tokens_seen": 14796992, "step": 26050 }, { "epoch": 457.1061946902655, "grad_norm": 1.850927766611221e-08, "learning_rate": 0.08133520533502851, "loss": 0.0, "num_input_tokens_seen": 14800080, "step": 26055 }, { "epoch": 457.1946902654867, "grad_norm": 2.138373567106555e-08, "learning_rate": 0.08128283990386184, "loss": 0.0, "num_input_tokens_seen": 14802624, "step": 26060 }, { "epoch": 457.283185840708, "grad_norm": 2.281599620346242e-08, "learning_rate": 0.08123048506974488, "loss": 0.0, "num_input_tokens_seen": 14805408, "step": 26065 }, { "epoch": 457.37168141592923, "grad_norm": 1.7417775666217494e-08, "learning_rate": 0.08117814084075124, "loss": 0.0, "num_input_tokens_seen": 14808416, "step": 26070 }, { "epoch": 457.46017699115043, "grad_norm": 1.2906270718815449e-08, "learning_rate": 0.08112580722495318, "loss": 0.0, "num_input_tokens_seen": 14810992, "step": 26075 }, { "epoch": 457.5486725663717, "grad_norm": 1.875088351255272e-08, "learning_rate": 0.08107348423042122, "loss": 0.0, "num_input_tokens_seen": 14813776, "step": 26080 }, { "epoch": 457.6371681415929, "grad_norm": 2.4430056200230865e-08, "learning_rate": 0.08102117186522413, "loss": 0.0, "num_input_tokens_seen": 14817152, "step": 26085 }, { "epoch": 457.72566371681415, "grad_norm": 2.7215504516675537e-08, "learning_rate": 0.08096887013742916, "loss": 0.0, "num_input_tokens_seen": 14819984, "step": 26090 }, { "epoch": 457.8141592920354, "grad_norm": 1.548033345954991e-08, "learning_rate": 0.08091657905510198, "loss": 0.0, "num_input_tokens_seen": 14823344, "step": 26095 }, { "epoch": 457.9026548672566, "grad_norm": 1.9701548836792426e-08, "learning_rate": 0.08086429862630642, "loss": 0.0, "num_input_tokens_seen": 14826016, "step": 26100 }, { "epoch": 457.9911504424779, "grad_norm": 1.562801088539345e-08, "learning_rate": 0.08081202885910488, "loss": 0.0, "num_input_tokens_seen": 14828992, "step": 26105 }, { "epoch": 458.070796460177, "grad_norm": 1.2045366482027475e-08, "learning_rate": 0.08075976976155795, "loss": 0.0, "num_input_tokens_seen": 14831296, "step": 26110 }, { "epoch": 458.1592920353982, "grad_norm": 3.930703229571009e-08, "learning_rate": 0.08070752134172461, "loss": 0.0, "num_input_tokens_seen": 14834464, "step": 26115 }, { "epoch": 458.24778761061947, "grad_norm": 3.178202589992907e-08, "learning_rate": 0.08065528360766229, "loss": 0.0, "num_input_tokens_seen": 14837632, "step": 26120 }, { "epoch": 458.3362831858407, "grad_norm": 1.0115079440708996e-08, "learning_rate": 0.08060305656742664, "loss": 0.0, "num_input_tokens_seen": 14840336, "step": 26125 }, { "epoch": 458.42477876106193, "grad_norm": 1.1643413344586406e-08, "learning_rate": 0.08055084022907182, "loss": 0.0, "num_input_tokens_seen": 14842496, "step": 26130 }, { "epoch": 458.5132743362832, "grad_norm": 1.5300811284646443e-08, "learning_rate": 0.08049863460065014, "loss": 0.0, "num_input_tokens_seen": 14845360, "step": 26135 }, { "epoch": 458.60176991150445, "grad_norm": 1.4275751247794233e-08, "learning_rate": 0.0804464396902124, "loss": 0.0, "num_input_tokens_seen": 14848080, "step": 26140 }, { "epoch": 458.69026548672565, "grad_norm": 1.2652974668014849e-08, "learning_rate": 0.08039425550580777, "loss": 0.0, "num_input_tokens_seen": 14850864, "step": 26145 }, { "epoch": 458.7787610619469, "grad_norm": 1.4648738222433622e-08, "learning_rate": 0.08034208205548363, "loss": 0.0, "num_input_tokens_seen": 14854176, "step": 26150 }, { "epoch": 458.86725663716817, "grad_norm": 1.172292751761006e-08, "learning_rate": 0.08028991934728581, "loss": 0.0, "num_input_tokens_seen": 14857696, "step": 26155 }, { "epoch": 458.95575221238937, "grad_norm": 2.5387524971165476e-08, "learning_rate": 0.0802377673892585, "loss": 0.0, "num_input_tokens_seen": 14860256, "step": 26160 }, { "epoch": 459.0353982300885, "grad_norm": 1.1503896502063071e-08, "learning_rate": 0.0801856261894441, "loss": 0.0, "num_input_tokens_seen": 14862496, "step": 26165 }, { "epoch": 459.12389380530976, "grad_norm": 1.0334208155882152e-08, "learning_rate": 0.08013349575588354, "loss": 0.0, "num_input_tokens_seen": 14865568, "step": 26170 }, { "epoch": 459.21238938053096, "grad_norm": 9.992651150980691e-09, "learning_rate": 0.08008137609661586, "loss": 0.0, "num_input_tokens_seen": 14868448, "step": 26175 }, { "epoch": 459.3008849557522, "grad_norm": 5.888314902335878e-08, "learning_rate": 0.08002926721967872, "loss": 0.0, "num_input_tokens_seen": 14871296, "step": 26180 }, { "epoch": 459.3893805309734, "grad_norm": 6.226792947927606e-08, "learning_rate": 0.07997716913310782, "loss": 0.0, "num_input_tokens_seen": 14873776, "step": 26185 }, { "epoch": 459.4778761061947, "grad_norm": 1.9160124153927427e-08, "learning_rate": 0.07992508184493745, "loss": 0.0, "num_input_tokens_seen": 14876544, "step": 26190 }, { "epoch": 459.56637168141594, "grad_norm": 1.3366991957752816e-08, "learning_rate": 0.07987300536320001, "loss": 0.0, "num_input_tokens_seen": 14879248, "step": 26195 }, { "epoch": 459.65486725663715, "grad_norm": 2.0903412334405402e-08, "learning_rate": 0.07982093969592649, "loss": 0.0, "num_input_tokens_seen": 14882048, "step": 26200 }, { "epoch": 459.65486725663715, "eval_loss": 0.7370618581771851, "eval_runtime": 0.9384, "eval_samples_per_second": 26.642, "eval_steps_per_second": 13.854, "num_input_tokens_seen": 14882048, "step": 26200 }, { "epoch": 459.7433628318584, "grad_norm": 1.832665397216715e-08, "learning_rate": 0.07976888485114592, "loss": 0.0, "num_input_tokens_seen": 14885712, "step": 26205 }, { "epoch": 459.83185840707966, "grad_norm": 2.630437556661036e-08, "learning_rate": 0.07971684083688595, "loss": 0.0, "num_input_tokens_seen": 14888896, "step": 26210 }, { "epoch": 459.92035398230087, "grad_norm": 1.0203955902454709e-08, "learning_rate": 0.0796648076611723, "loss": 0.0, "num_input_tokens_seen": 14891792, "step": 26215 }, { "epoch": 460.0, "grad_norm": 3.76410191904597e-09, "learning_rate": 0.07961278533202922, "loss": 0.0, "num_input_tokens_seen": 14893968, "step": 26220 }, { "epoch": 460.08849557522126, "grad_norm": 2.9959412728430834e-08, "learning_rate": 0.07956077385747919, "loss": 0.0, "num_input_tokens_seen": 14897008, "step": 26225 }, { "epoch": 460.17699115044246, "grad_norm": 1.8504596965840392e-08, "learning_rate": 0.079508773245543, "loss": 0.0, "num_input_tokens_seen": 14899680, "step": 26230 }, { "epoch": 460.2654867256637, "grad_norm": 1.532199611631313e-08, "learning_rate": 0.07945678350423982, "loss": 0.0, "num_input_tokens_seen": 14902800, "step": 26235 }, { "epoch": 460.353982300885, "grad_norm": 2.5165359573975365e-08, "learning_rate": 0.07940480464158717, "loss": 0.0, "num_input_tokens_seen": 14905440, "step": 26240 }, { "epoch": 460.4424778761062, "grad_norm": 3.718771068861315e-08, "learning_rate": 0.07935283666560076, "loss": 0.0, "num_input_tokens_seen": 14908336, "step": 26245 }, { "epoch": 460.53097345132744, "grad_norm": 3.408083770750636e-08, "learning_rate": 0.07930087958429478, "loss": 0.0, "num_input_tokens_seen": 14910976, "step": 26250 }, { "epoch": 460.6194690265487, "grad_norm": 3.698157158282811e-08, "learning_rate": 0.07924893340568159, "loss": 0.0, "num_input_tokens_seen": 14913952, "step": 26255 }, { "epoch": 460.7079646017699, "grad_norm": 7.631489218340448e-09, "learning_rate": 0.07919699813777205, "loss": 0.0, "num_input_tokens_seen": 14917056, "step": 26260 }, { "epoch": 460.79646017699116, "grad_norm": 2.438881274713367e-08, "learning_rate": 0.07914507378857515, "loss": 0.0, "num_input_tokens_seen": 14919856, "step": 26265 }, { "epoch": 460.88495575221236, "grad_norm": 1.0818485662866806e-08, "learning_rate": 0.07909316036609822, "loss": 0.0, "num_input_tokens_seen": 14922096, "step": 26270 }, { "epoch": 460.9734513274336, "grad_norm": 2.102343543697316e-08, "learning_rate": 0.07904125787834704, "loss": 0.0, "num_input_tokens_seen": 14925632, "step": 26275 }, { "epoch": 461.05309734513276, "grad_norm": 2.0087298935322906e-08, "learning_rate": 0.07898936633332569, "loss": 0.0, "num_input_tokens_seen": 14927848, "step": 26280 }, { "epoch": 461.14159292035396, "grad_norm": 1.7843444055642976e-08, "learning_rate": 0.07893748573903635, "loss": 0.0, "num_input_tokens_seen": 14930584, "step": 26285 }, { "epoch": 461.2300884955752, "grad_norm": 1.0353384816141897e-08, "learning_rate": 0.0788856161034798, "loss": 0.0, "num_input_tokens_seen": 14933784, "step": 26290 }, { "epoch": 461.3185840707965, "grad_norm": 2.6429315624909577e-08, "learning_rate": 0.07883375743465487, "loss": 0.0, "num_input_tokens_seen": 14936216, "step": 26295 }, { "epoch": 461.4070796460177, "grad_norm": 1.3660075737220723e-08, "learning_rate": 0.07878190974055888, "loss": 0.0, "num_input_tokens_seen": 14939304, "step": 26300 }, { "epoch": 461.49557522123894, "grad_norm": 2.382215491536499e-08, "learning_rate": 0.07873007302918746, "loss": 0.0, "num_input_tokens_seen": 14942392, "step": 26305 }, { "epoch": 461.5840707964602, "grad_norm": 1.3382996932875812e-08, "learning_rate": 0.07867824730853433, "loss": 0.0, "num_input_tokens_seen": 14945288, "step": 26310 }, { "epoch": 461.6725663716814, "grad_norm": 1.7320793688213598e-08, "learning_rate": 0.07862643258659176, "loss": 0.0, "num_input_tokens_seen": 14948376, "step": 26315 }, { "epoch": 461.76106194690266, "grad_norm": 1.594868948018302e-08, "learning_rate": 0.07857462887135026, "loss": 0.0, "num_input_tokens_seen": 14951320, "step": 26320 }, { "epoch": 461.8495575221239, "grad_norm": 1.8506549182006893e-08, "learning_rate": 0.0785228361707986, "loss": 0.0, "num_input_tokens_seen": 14954360, "step": 26325 }, { "epoch": 461.9380530973451, "grad_norm": 7.034090199198317e-09, "learning_rate": 0.07847105449292378, "loss": 0.0, "num_input_tokens_seen": 14956952, "step": 26330 }, { "epoch": 462.01769911504425, "grad_norm": 2.4126149966718913e-08, "learning_rate": 0.0784192838457113, "loss": 0.0, "num_input_tokens_seen": 14959064, "step": 26335 }, { "epoch": 462.1061946902655, "grad_norm": 1.0865077726407435e-08, "learning_rate": 0.07836752423714473, "loss": 0.0, "num_input_tokens_seen": 14961480, "step": 26340 }, { "epoch": 462.1946902654867, "grad_norm": 1.7342996372349262e-08, "learning_rate": 0.07831577567520616, "loss": 0.0, "num_input_tokens_seen": 14964344, "step": 26345 }, { "epoch": 462.283185840708, "grad_norm": 1.8700065496091156e-08, "learning_rate": 0.07826403816787579, "loss": 0.0, "num_input_tokens_seen": 14966744, "step": 26350 }, { "epoch": 462.37168141592923, "grad_norm": 1.0280844620069729e-08, "learning_rate": 0.0782123117231322, "loss": 0.0, "num_input_tokens_seen": 14970328, "step": 26355 }, { "epoch": 462.46017699115043, "grad_norm": 1.3751360050662242e-08, "learning_rate": 0.07816059634895237, "loss": 0.0, "num_input_tokens_seen": 14973160, "step": 26360 }, { "epoch": 462.5486725663717, "grad_norm": 2.022829548309346e-08, "learning_rate": 0.0781088920533113, "loss": 0.0, "num_input_tokens_seen": 14976136, "step": 26365 }, { "epoch": 462.6371681415929, "grad_norm": 1.3748574723138063e-08, "learning_rate": 0.07805719884418257, "loss": 0.0, "num_input_tokens_seen": 14978712, "step": 26370 }, { "epoch": 462.72566371681415, "grad_norm": 2.3566180118450575e-08, "learning_rate": 0.07800551672953779, "loss": 0.0, "num_input_tokens_seen": 14981528, "step": 26375 }, { "epoch": 462.8141592920354, "grad_norm": 1.177709840760599e-08, "learning_rate": 0.07795384571734709, "loss": 0.0, "num_input_tokens_seen": 14984488, "step": 26380 }, { "epoch": 462.9026548672566, "grad_norm": 1.5148618359717148e-08, "learning_rate": 0.07790218581557883, "loss": 0.0, "num_input_tokens_seen": 14987208, "step": 26385 }, { "epoch": 462.9911504424779, "grad_norm": 1.6355846454985112e-08, "learning_rate": 0.07785053703219949, "loss": 0.0, "num_input_tokens_seen": 14990504, "step": 26390 }, { "epoch": 463.070796460177, "grad_norm": 9.710634074622249e-09, "learning_rate": 0.07779889937517409, "loss": 0.0, "num_input_tokens_seen": 14992888, "step": 26395 }, { "epoch": 463.1592920353982, "grad_norm": 4.3124899207214185e-08, "learning_rate": 0.0777472728524657, "loss": 0.0, "num_input_tokens_seen": 14996008, "step": 26400 }, { "epoch": 463.1592920353982, "eval_loss": 0.755020022392273, "eval_runtime": 0.9399, "eval_samples_per_second": 26.599, "eval_steps_per_second": 13.831, "num_input_tokens_seen": 14996008, "step": 26400 }, { "epoch": 463.24778761061947, "grad_norm": 1.828844631290849e-08, "learning_rate": 0.07769565747203584, "loss": 0.0, "num_input_tokens_seen": 14998904, "step": 26405 }, { "epoch": 463.3362831858407, "grad_norm": 3.2458103760291124e-08, "learning_rate": 0.07764405324184427, "loss": 0.0, "num_input_tokens_seen": 15001768, "step": 26410 }, { "epoch": 463.42477876106193, "grad_norm": 1.8316868022338895e-08, "learning_rate": 0.07759246016984889, "loss": 0.0, "num_input_tokens_seen": 15004696, "step": 26415 }, { "epoch": 463.5132743362832, "grad_norm": 1.9511675830585773e-08, "learning_rate": 0.07754087826400609, "loss": 0.0, "num_input_tokens_seen": 15007400, "step": 26420 }, { "epoch": 463.60176991150445, "grad_norm": 2.8701963472599346e-08, "learning_rate": 0.0774893075322705, "loss": 0.0, "num_input_tokens_seen": 15010040, "step": 26425 }, { "epoch": 463.69026548672565, "grad_norm": 2.4164316769770267e-08, "learning_rate": 0.07743774798259484, "loss": 0.0, "num_input_tokens_seen": 15012792, "step": 26430 }, { "epoch": 463.7787610619469, "grad_norm": 9.449704130304326e-09, "learning_rate": 0.07738619962293032, "loss": 0.0, "num_input_tokens_seen": 15015944, "step": 26435 }, { "epoch": 463.86725663716817, "grad_norm": 1.4060736575061128e-08, "learning_rate": 0.0773346624612264, "loss": 0.0, "num_input_tokens_seen": 15018952, "step": 26440 }, { "epoch": 463.95575221238937, "grad_norm": 1.0691037388710356e-08, "learning_rate": 0.07728313650543066, "loss": 0.0, "num_input_tokens_seen": 15021608, "step": 26445 }, { "epoch": 464.0353982300885, "grad_norm": 1.4473712894869095e-08, "learning_rate": 0.07723162176348913, "loss": 0.0, "num_input_tokens_seen": 15024248, "step": 26450 }, { "epoch": 464.12389380530976, "grad_norm": 2.1448563813919463e-08, "learning_rate": 0.07718011824334593, "loss": 0.0, "num_input_tokens_seen": 15027448, "step": 26455 }, { "epoch": 464.21238938053096, "grad_norm": 2.0578115211833392e-08, "learning_rate": 0.07712862595294363, "loss": 0.0, "num_input_tokens_seen": 15030568, "step": 26460 }, { "epoch": 464.3008849557522, "grad_norm": 1.1305594682653464e-08, "learning_rate": 0.07707714490022301, "loss": 0.0, "num_input_tokens_seen": 15032920, "step": 26465 }, { "epoch": 464.3893805309734, "grad_norm": 2.9708688842333686e-08, "learning_rate": 0.07702567509312298, "loss": 0.0, "num_input_tokens_seen": 15036120, "step": 26470 }, { "epoch": 464.4778761061947, "grad_norm": 2.9727674544233196e-08, "learning_rate": 0.07697421653958098, "loss": 0.0, "num_input_tokens_seen": 15039080, "step": 26475 }, { "epoch": 464.56637168141594, "grad_norm": 3.5697777178711476e-08, "learning_rate": 0.07692276924753247, "loss": 0.0, "num_input_tokens_seen": 15041496, "step": 26480 }, { "epoch": 464.65486725663715, "grad_norm": 3.0138807005641866e-08, "learning_rate": 0.07687133322491124, "loss": 0.0, "num_input_tokens_seen": 15044024, "step": 26485 }, { "epoch": 464.7433628318584, "grad_norm": 1.884195377499509e-08, "learning_rate": 0.07681990847964948, "loss": 0.0, "num_input_tokens_seen": 15046536, "step": 26490 }, { "epoch": 464.83185840707966, "grad_norm": 4.120063579193811e-08, "learning_rate": 0.0767684950196774, "loss": 0.0, "num_input_tokens_seen": 15050024, "step": 26495 }, { "epoch": 464.92035398230087, "grad_norm": 3.2930216775639565e-08, "learning_rate": 0.0767170928529237, "loss": 0.0, "num_input_tokens_seen": 15053384, "step": 26500 }, { "epoch": 465.0, "grad_norm": 8.251271665926652e-09, "learning_rate": 0.07666570198731526, "loss": 0.0, "num_input_tokens_seen": 15055576, "step": 26505 }, { "epoch": 465.08849557522126, "grad_norm": 2.2037793812046402e-08, "learning_rate": 0.07661432243077708, "loss": 0.0, "num_input_tokens_seen": 15058232, "step": 26510 }, { "epoch": 465.17699115044246, "grad_norm": 2.0737275008286815e-08, "learning_rate": 0.0765629541912326, "loss": 0.0, "num_input_tokens_seen": 15061512, "step": 26515 }, { "epoch": 465.2654867256637, "grad_norm": 2.7345794961775027e-08, "learning_rate": 0.07651159727660352, "loss": 0.0, "num_input_tokens_seen": 15064536, "step": 26520 }, { "epoch": 465.353982300885, "grad_norm": 1.7652560302394704e-08, "learning_rate": 0.07646025169480959, "loss": 0.0, "num_input_tokens_seen": 15067416, "step": 26525 }, { "epoch": 465.4424778761062, "grad_norm": 2.7957186787830324e-08, "learning_rate": 0.07640891745376908, "loss": 0.0, "num_input_tokens_seen": 15070200, "step": 26530 }, { "epoch": 465.53097345132744, "grad_norm": 2.381276686946876e-08, "learning_rate": 0.07635759456139822, "loss": 0.0, "num_input_tokens_seen": 15072808, "step": 26535 }, { "epoch": 465.6194690265487, "grad_norm": 5.04851875859913e-08, "learning_rate": 0.0763062830256118, "loss": 0.0, "num_input_tokens_seen": 15075800, "step": 26540 }, { "epoch": 465.7079646017699, "grad_norm": 2.7456808382453346e-08, "learning_rate": 0.07625498285432258, "loss": 0.0, "num_input_tokens_seen": 15078232, "step": 26545 }, { "epoch": 465.79646017699116, "grad_norm": 1.2724507669759078e-08, "learning_rate": 0.07620369405544176, "loss": 0.0, "num_input_tokens_seen": 15081096, "step": 26550 }, { "epoch": 465.88495575221236, "grad_norm": 6.733528401525746e-09, "learning_rate": 0.07615241663687868, "loss": 0.0, "num_input_tokens_seen": 15084616, "step": 26555 }, { "epoch": 465.9734513274336, "grad_norm": 3.3804905541501284e-08, "learning_rate": 0.07610115060654106, "loss": 0.0, "num_input_tokens_seen": 15087160, "step": 26560 }, { "epoch": 466.05309734513276, "grad_norm": 6.3425731333666135e-09, "learning_rate": 0.07604989597233458, "loss": 0.0, "num_input_tokens_seen": 15089240, "step": 26565 }, { "epoch": 466.14159292035396, "grad_norm": 1.1826374546330953e-08, "learning_rate": 0.07599865274216352, "loss": 0.0, "num_input_tokens_seen": 15092200, "step": 26570 }, { "epoch": 466.2300884955752, "grad_norm": 2.0637283881796975e-08, "learning_rate": 0.07594742092393013, "loss": 0.0, "num_input_tokens_seen": 15094808, "step": 26575 }, { "epoch": 466.3185840707965, "grad_norm": 2.0089132135581167e-08, "learning_rate": 0.07589620052553503, "loss": 0.0, "num_input_tokens_seen": 15097560, "step": 26580 }, { "epoch": 466.4070796460177, "grad_norm": 2.7731783092121987e-08, "learning_rate": 0.0758449915548771, "loss": 0.0, "num_input_tokens_seen": 15100824, "step": 26585 }, { "epoch": 466.49557522123894, "grad_norm": 3.522254132803937e-08, "learning_rate": 0.07579379401985332, "loss": 0.0, "num_input_tokens_seen": 15103368, "step": 26590 }, { "epoch": 466.5840707964602, "grad_norm": 8.521155336893571e-09, "learning_rate": 0.07574260792835905, "loss": 0.0, "num_input_tokens_seen": 15106504, "step": 26595 }, { "epoch": 466.6725663716814, "grad_norm": 1.8836969317703733e-08, "learning_rate": 0.07569143328828784, "loss": 0.0, "num_input_tokens_seen": 15109352, "step": 26600 }, { "epoch": 466.6725663716814, "eval_loss": 0.7279479503631592, "eval_runtime": 0.9243, "eval_samples_per_second": 27.046, "eval_steps_per_second": 14.064, "num_input_tokens_seen": 15109352, "step": 26600 }, { "epoch": 466.76106194690266, "grad_norm": 2.5071402731668968e-08, "learning_rate": 0.0756402701075314, "loss": 0.0, "num_input_tokens_seen": 15112392, "step": 26605 }, { "epoch": 466.8495575221239, "grad_norm": 2.4407542653648306e-08, "learning_rate": 0.07558911839397982, "loss": 0.0, "num_input_tokens_seen": 15115688, "step": 26610 }, { "epoch": 466.9380530973451, "grad_norm": 1.8362836584628894e-08, "learning_rate": 0.07553797815552123, "loss": 0.0, "num_input_tokens_seen": 15118104, "step": 26615 }, { "epoch": 467.01769911504425, "grad_norm": 1.4815571880433254e-08, "learning_rate": 0.07548684940004222, "loss": 0.0, "num_input_tokens_seen": 15120496, "step": 26620 }, { "epoch": 467.1061946902655, "grad_norm": 3.0804685025032086e-08, "learning_rate": 0.07543573213542744, "loss": 0.0, "num_input_tokens_seen": 15123328, "step": 26625 }, { "epoch": 467.1946902654867, "grad_norm": 1.3218560468430951e-08, "learning_rate": 0.0753846263695597, "loss": 0.0, "num_input_tokens_seen": 15125856, "step": 26630 }, { "epoch": 467.283185840708, "grad_norm": 1.424525208904015e-08, "learning_rate": 0.07533353211032029, "loss": 0.0, "num_input_tokens_seen": 15128512, "step": 26635 }, { "epoch": 467.37168141592923, "grad_norm": 2.451504244049829e-08, "learning_rate": 0.07528244936558857, "loss": 0.0, "num_input_tokens_seen": 15131520, "step": 26640 }, { "epoch": 467.46017699115043, "grad_norm": 2.2337729888022295e-08, "learning_rate": 0.07523137814324206, "loss": 0.0, "num_input_tokens_seen": 15134128, "step": 26645 }, { "epoch": 467.5486725663717, "grad_norm": 1.8173603066884425e-08, "learning_rate": 0.07518031845115672, "loss": 0.0, "num_input_tokens_seen": 15136816, "step": 26650 }, { "epoch": 467.6371681415929, "grad_norm": 1.2570815499657328e-08, "learning_rate": 0.07512927029720647, "loss": 0.0, "num_input_tokens_seen": 15140256, "step": 26655 }, { "epoch": 467.72566371681415, "grad_norm": 2.6296046229390413e-08, "learning_rate": 0.0750782336892636, "loss": 0.0, "num_input_tokens_seen": 15143248, "step": 26660 }, { "epoch": 467.8141592920354, "grad_norm": 2.4679909671476707e-08, "learning_rate": 0.0750272086351987, "loss": 0.0, "num_input_tokens_seen": 15146240, "step": 26665 }, { "epoch": 467.9026548672566, "grad_norm": 2.5128377600935892e-08, "learning_rate": 0.07497619514288031, "loss": 0.0, "num_input_tokens_seen": 15149520, "step": 26670 }, { "epoch": 467.9911504424779, "grad_norm": 2.746814153908872e-08, "learning_rate": 0.07492519322017545, "loss": 0.0, "num_input_tokens_seen": 15152272, "step": 26675 }, { "epoch": 468.070796460177, "grad_norm": 2.348684446928928e-08, "learning_rate": 0.0748742028749493, "loss": 0.0, "num_input_tokens_seen": 15155144, "step": 26680 }, { "epoch": 468.1592920353982, "grad_norm": 1.4589614849569443e-08, "learning_rate": 0.0748232241150651, "loss": 0.0, "num_input_tokens_seen": 15158280, "step": 26685 }, { "epoch": 468.24778761061947, "grad_norm": 2.7930594725944502e-08, "learning_rate": 0.07477225694838453, "loss": 0.0, "num_input_tokens_seen": 15161736, "step": 26690 }, { "epoch": 468.3362831858407, "grad_norm": 1.2642954239083792e-08, "learning_rate": 0.07472130138276731, "loss": 0.0, "num_input_tokens_seen": 15164072, "step": 26695 }, { "epoch": 468.42477876106193, "grad_norm": 1.5674876507887348e-08, "learning_rate": 0.07467035742607138, "loss": 0.0, "num_input_tokens_seen": 15167192, "step": 26700 }, { "epoch": 468.5132743362832, "grad_norm": 1.1083265860634128e-08, "learning_rate": 0.07461942508615303, "loss": 0.0, "num_input_tokens_seen": 15170264, "step": 26705 }, { "epoch": 468.60176991150445, "grad_norm": 1.2504946411695528e-08, "learning_rate": 0.07456850437086657, "loss": 0.0, "num_input_tokens_seen": 15173768, "step": 26710 }, { "epoch": 468.69026548672565, "grad_norm": 4.4026275958231054e-08, "learning_rate": 0.07451759528806468, "loss": 0.0, "num_input_tokens_seen": 15176584, "step": 26715 }, { "epoch": 468.7787610619469, "grad_norm": 2.2286126721837718e-08, "learning_rate": 0.0744666978455982, "loss": 0.0, "num_input_tokens_seen": 15179032, "step": 26720 }, { "epoch": 468.86725663716817, "grad_norm": 1.0850369491777201e-08, "learning_rate": 0.07441581205131609, "loss": 0.0, "num_input_tokens_seen": 15181624, "step": 26725 }, { "epoch": 468.95575221238937, "grad_norm": 6.022408882699892e-08, "learning_rate": 0.07436493791306566, "loss": 0.0, "num_input_tokens_seen": 15184088, "step": 26730 }, { "epoch": 469.0353982300885, "grad_norm": 2.8226319059854177e-08, "learning_rate": 0.07431407543869223, "loss": 0.0, "num_input_tokens_seen": 15186016, "step": 26735 }, { "epoch": 469.12389380530976, "grad_norm": 9.274656598279307e-09, "learning_rate": 0.0742632246360395, "loss": 0.0, "num_input_tokens_seen": 15188768, "step": 26740 }, { "epoch": 469.21238938053096, "grad_norm": 9.977712878139755e-09, "learning_rate": 0.07421238551294934, "loss": 0.0, "num_input_tokens_seen": 15191872, "step": 26745 }, { "epoch": 469.3008849557522, "grad_norm": 1.5472718217779402e-08, "learning_rate": 0.07416155807726171, "loss": 0.0, "num_input_tokens_seen": 15194592, "step": 26750 }, { "epoch": 469.3893805309734, "grad_norm": 2.7133379987276385e-08, "learning_rate": 0.07411074233681492, "loss": 0.0, "num_input_tokens_seen": 15197680, "step": 26755 }, { "epoch": 469.4778761061947, "grad_norm": 1.0527031690799049e-08, "learning_rate": 0.07405993829944528, "loss": 0.0, "num_input_tokens_seen": 15200560, "step": 26760 }, { "epoch": 469.56637168141594, "grad_norm": 2.1121369542242974e-08, "learning_rate": 0.07400914597298755, "loss": 0.0, "num_input_tokens_seen": 15203216, "step": 26765 }, { "epoch": 469.65486725663715, "grad_norm": 1.1805872723869015e-08, "learning_rate": 0.07395836536527445, "loss": 0.0, "num_input_tokens_seen": 15205920, "step": 26770 }, { "epoch": 469.7433628318584, "grad_norm": 2.970399926027767e-08, "learning_rate": 0.07390759648413696, "loss": 0.0, "num_input_tokens_seen": 15208880, "step": 26775 }, { "epoch": 469.83185840707966, "grad_norm": 2.852095093430762e-08, "learning_rate": 0.07385683933740435, "loss": 0.0, "num_input_tokens_seen": 15211968, "step": 26780 }, { "epoch": 469.92035398230087, "grad_norm": 2.3176815133751916e-08, "learning_rate": 0.07380609393290402, "loss": 0.0, "num_input_tokens_seen": 15215120, "step": 26785 }, { "epoch": 470.0, "grad_norm": 2.9119251010456537e-08, "learning_rate": 0.07375536027846147, "loss": 0.0, "num_input_tokens_seen": 15217640, "step": 26790 }, { "epoch": 470.08849557522126, "grad_norm": 3.951613436470325e-08, "learning_rate": 0.07370463838190057, "loss": 0.0, "num_input_tokens_seen": 15220568, "step": 26795 }, { "epoch": 470.17699115044246, "grad_norm": 1.4186101182644961e-08, "learning_rate": 0.07365392825104317, "loss": 0.0, "num_input_tokens_seen": 15223592, "step": 26800 }, { "epoch": 470.17699115044246, "eval_loss": 0.738929033279419, "eval_runtime": 0.9374, "eval_samples_per_second": 26.67, "eval_steps_per_second": 13.869, "num_input_tokens_seen": 15223592, "step": 26800 }, { "epoch": 470.2654867256637, "grad_norm": 8.684468255637512e-09, "learning_rate": 0.07360322989370945, "loss": 0.0, "num_input_tokens_seen": 15226888, "step": 26805 }, { "epoch": 470.353982300885, "grad_norm": 2.8926530504236325e-08, "learning_rate": 0.07355254331771781, "loss": 0.0, "num_input_tokens_seen": 15229496, "step": 26810 }, { "epoch": 470.4424778761062, "grad_norm": 1.4983911000854278e-08, "learning_rate": 0.07350186853088461, "loss": 0.0, "num_input_tokens_seen": 15232088, "step": 26815 }, { "epoch": 470.53097345132744, "grad_norm": 2.0816749213281582e-08, "learning_rate": 0.07345120554102462, "loss": 0.0, "num_input_tokens_seen": 15235576, "step": 26820 }, { "epoch": 470.6194690265487, "grad_norm": 1.5178038381691294e-08, "learning_rate": 0.07340055435595079, "loss": 0.0, "num_input_tokens_seen": 15238120, "step": 26825 }, { "epoch": 470.7079646017699, "grad_norm": 3.084651822859996e-08, "learning_rate": 0.07334991498347401, "loss": 0.0, "num_input_tokens_seen": 15241112, "step": 26830 }, { "epoch": 470.79646017699116, "grad_norm": 1.7046366096451493e-08, "learning_rate": 0.07329928743140365, "loss": 0.0, "num_input_tokens_seen": 15243688, "step": 26835 }, { "epoch": 470.88495575221236, "grad_norm": 2.3278268201920582e-08, "learning_rate": 0.07324867170754705, "loss": 0.0, "num_input_tokens_seen": 15246424, "step": 26840 }, { "epoch": 470.9734513274336, "grad_norm": 3.077843047094575e-08, "learning_rate": 0.07319806781970974, "loss": 0.0, "num_input_tokens_seen": 15249320, "step": 26845 }, { "epoch": 471.05309734513276, "grad_norm": 2.55559182704701e-08, "learning_rate": 0.07314747577569555, "loss": 0.0, "num_input_tokens_seen": 15251360, "step": 26850 }, { "epoch": 471.14159292035396, "grad_norm": 4.134213327233738e-08, "learning_rate": 0.07309689558330636, "loss": 0.0, "num_input_tokens_seen": 15254256, "step": 26855 }, { "epoch": 471.2300884955752, "grad_norm": 6.970477528511765e-09, "learning_rate": 0.0730463272503423, "loss": 0.0, "num_input_tokens_seen": 15257424, "step": 26860 }, { "epoch": 471.3185840707965, "grad_norm": 9.620774399365928e-09, "learning_rate": 0.07299577078460168, "loss": 0.0, "num_input_tokens_seen": 15260464, "step": 26865 }, { "epoch": 471.4070796460177, "grad_norm": 2.5922693325242108e-08, "learning_rate": 0.07294522619388083, "loss": 0.0, "num_input_tokens_seen": 15263712, "step": 26870 }, { "epoch": 471.49557522123894, "grad_norm": 8.512454741094189e-09, "learning_rate": 0.07289469348597452, "loss": 0.0, "num_input_tokens_seen": 15266256, "step": 26875 }, { "epoch": 471.5840707964602, "grad_norm": 1.3382488894819744e-08, "learning_rate": 0.07284417266867535, "loss": 0.0, "num_input_tokens_seen": 15269312, "step": 26880 }, { "epoch": 471.6725663716814, "grad_norm": 9.108938492374818e-09, "learning_rate": 0.07279366374977439, "loss": 0.0, "num_input_tokens_seen": 15272032, "step": 26885 }, { "epoch": 471.76106194690266, "grad_norm": 7.0129599905044415e-09, "learning_rate": 0.07274316673706074, "loss": 0.0, "num_input_tokens_seen": 15274912, "step": 26890 }, { "epoch": 471.8495575221239, "grad_norm": 3.0220579816386817e-08, "learning_rate": 0.07269268163832161, "loss": 0.0, "num_input_tokens_seen": 15277952, "step": 26895 }, { "epoch": 471.9380530973451, "grad_norm": 2.4196570080903257e-08, "learning_rate": 0.07264220846134248, "loss": 0.0, "num_input_tokens_seen": 15280512, "step": 26900 }, { "epoch": 472.01769911504425, "grad_norm": 1.8187554573501075e-08, "learning_rate": 0.07259174721390699, "loss": 0.0, "num_input_tokens_seen": 15282872, "step": 26905 }, { "epoch": 472.1061946902655, "grad_norm": 1.2322418641019794e-08, "learning_rate": 0.07254129790379686, "loss": 0.0, "num_input_tokens_seen": 15286456, "step": 26910 }, { "epoch": 472.1946902654867, "grad_norm": 1.468863786158181e-08, "learning_rate": 0.072490860538792, "loss": 0.0, "num_input_tokens_seen": 15289368, "step": 26915 }, { "epoch": 472.283185840708, "grad_norm": 1.8187281014547807e-08, "learning_rate": 0.07244043512667042, "loss": 0.0, "num_input_tokens_seen": 15292488, "step": 26920 }, { "epoch": 472.37168141592923, "grad_norm": 1.2780495772801714e-08, "learning_rate": 0.07239002167520843, "loss": 0.0, "num_input_tokens_seen": 15294952, "step": 26925 }, { "epoch": 472.46017699115043, "grad_norm": 2.922030439833634e-08, "learning_rate": 0.07233962019218045, "loss": 0.0, "num_input_tokens_seen": 15297928, "step": 26930 }, { "epoch": 472.5486725663717, "grad_norm": 5.622412313499581e-09, "learning_rate": 0.07228923068535892, "loss": 0.0, "num_input_tokens_seen": 15300760, "step": 26935 }, { "epoch": 472.6371681415929, "grad_norm": 3.978725970910091e-08, "learning_rate": 0.0722388531625146, "loss": 0.0, "num_input_tokens_seen": 15303640, "step": 26940 }, { "epoch": 472.72566371681415, "grad_norm": 1.4614865762041518e-08, "learning_rate": 0.07218848763141639, "loss": 0.0, "num_input_tokens_seen": 15306424, "step": 26945 }, { "epoch": 472.8141592920354, "grad_norm": 3.219632560558239e-08, "learning_rate": 0.07213813409983118, "loss": 0.0, "num_input_tokens_seen": 15309560, "step": 26950 }, { "epoch": 472.9026548672566, "grad_norm": 1.4026952932510994e-08, "learning_rate": 0.0720877925755242, "loss": 0.0, "num_input_tokens_seen": 15312440, "step": 26955 }, { "epoch": 472.9911504424779, "grad_norm": 3.1585955184709746e-08, "learning_rate": 0.07203746306625866, "loss": 0.0, "num_input_tokens_seen": 15314872, "step": 26960 }, { "epoch": 473.070796460177, "grad_norm": 2.6663244057090196e-08, "learning_rate": 0.07198714557979606, "loss": 0.0, "num_input_tokens_seen": 15317736, "step": 26965 }, { "epoch": 473.1592920353982, "grad_norm": 2.5832925132363016e-08, "learning_rate": 0.07193684012389602, "loss": 0.0, "num_input_tokens_seen": 15320888, "step": 26970 }, { "epoch": 473.24778761061947, "grad_norm": 1.977051944379582e-08, "learning_rate": 0.07188654670631621, "loss": 0.0, "num_input_tokens_seen": 15323944, "step": 26975 }, { "epoch": 473.3362831858407, "grad_norm": 1.0030976937969172e-08, "learning_rate": 0.07183626533481258, "loss": 0.0, "num_input_tokens_seen": 15327048, "step": 26980 }, { "epoch": 473.42477876106193, "grad_norm": 8.7425551242859e-09, "learning_rate": 0.07178599601713909, "loss": 0.0, "num_input_tokens_seen": 15329816, "step": 26985 }, { "epoch": 473.5132743362832, "grad_norm": 2.6188081037048505e-08, "learning_rate": 0.07173573876104786, "loss": 0.0, "num_input_tokens_seen": 15332728, "step": 26990 }, { "epoch": 473.60176991150445, "grad_norm": 1.183982867303257e-08, "learning_rate": 0.0716854935742893, "loss": 0.0, "num_input_tokens_seen": 15335192, "step": 26995 }, { "epoch": 473.69026548672565, "grad_norm": 1.4243016543957765e-08, "learning_rate": 0.07163526046461174, "loss": 0.0, "num_input_tokens_seen": 15338072, "step": 27000 }, { "epoch": 473.69026548672565, "eval_loss": 0.7722395658493042, "eval_runtime": 0.9415, "eval_samples_per_second": 26.552, "eval_steps_per_second": 13.807, "num_input_tokens_seen": 15338072, "step": 27000 }, { "epoch": 473.7787610619469, "grad_norm": 1.4597269171190419e-08, "learning_rate": 0.07158503943976181, "loss": 0.0, "num_input_tokens_seen": 15341128, "step": 27005 }, { "epoch": 473.86725663716817, "grad_norm": 2.5061135389137235e-08, "learning_rate": 0.07153483050748427, "loss": 0.0, "num_input_tokens_seen": 15343592, "step": 27010 }, { "epoch": 473.95575221238937, "grad_norm": 1.848863995235206e-08, "learning_rate": 0.07148463367552188, "loss": 0.0, "num_input_tokens_seen": 15346248, "step": 27015 }, { "epoch": 474.0353982300885, "grad_norm": 2.8274918406623328e-08, "learning_rate": 0.07143444895161565, "loss": 0.0, "num_input_tokens_seen": 15348456, "step": 27020 }, { "epoch": 474.12389380530976, "grad_norm": 3.677578419569727e-08, "learning_rate": 0.07138427634350476, "loss": 0.0, "num_input_tokens_seen": 15351736, "step": 27025 }, { "epoch": 474.21238938053096, "grad_norm": 1.264080129459444e-08, "learning_rate": 0.07133411585892636, "loss": 0.0, "num_input_tokens_seen": 15354824, "step": 27030 }, { "epoch": 474.3008849557522, "grad_norm": 3.4439036511457743e-08, "learning_rate": 0.07128396750561593, "loss": 0.0, "num_input_tokens_seen": 15357480, "step": 27035 }, { "epoch": 474.3893805309734, "grad_norm": 3.204685938840157e-08, "learning_rate": 0.07123383129130685, "loss": 0.0, "num_input_tokens_seen": 15360664, "step": 27040 }, { "epoch": 474.4778761061947, "grad_norm": 2.138198951229242e-08, "learning_rate": 0.07118370722373084, "loss": 0.0, "num_input_tokens_seen": 15363368, "step": 27045 }, { "epoch": 474.56637168141594, "grad_norm": 1.7911188976427184e-08, "learning_rate": 0.07113359531061769, "loss": 0.0, "num_input_tokens_seen": 15366024, "step": 27050 }, { "epoch": 474.65486725663715, "grad_norm": 4.1598248401442106e-08, "learning_rate": 0.07108349555969525, "loss": 0.0, "num_input_tokens_seen": 15368904, "step": 27055 }, { "epoch": 474.7433628318584, "grad_norm": 2.206866156484466e-08, "learning_rate": 0.07103340797868944, "loss": 0.0, "num_input_tokens_seen": 15371720, "step": 27060 }, { "epoch": 474.83185840707966, "grad_norm": 2.886568140070267e-08, "learning_rate": 0.07098333257532453, "loss": 0.0, "num_input_tokens_seen": 15375016, "step": 27065 }, { "epoch": 474.92035398230087, "grad_norm": 4.514050289117222e-08, "learning_rate": 0.07093326935732269, "loss": 0.0, "num_input_tokens_seen": 15377832, "step": 27070 }, { "epoch": 475.0, "grad_norm": 6.404533792192524e-08, "learning_rate": 0.0708832183324044, "loss": 0.0, "num_input_tokens_seen": 15379912, "step": 27075 }, { "epoch": 475.08849557522126, "grad_norm": 1.5245694484633532e-08, "learning_rate": 0.07083317950828799, "loss": 0.0, "num_input_tokens_seen": 15382568, "step": 27080 }, { "epoch": 475.17699115044246, "grad_norm": 1.146172134980361e-08, "learning_rate": 0.0707831528926902, "loss": 0.0, "num_input_tokens_seen": 15385384, "step": 27085 }, { "epoch": 475.2654867256637, "grad_norm": 5.0678274021720426e-08, "learning_rate": 0.07073313849332578, "loss": 0.0, "num_input_tokens_seen": 15388488, "step": 27090 }, { "epoch": 475.353982300885, "grad_norm": 2.6282231502250397e-08, "learning_rate": 0.07068313631790749, "loss": 0.0, "num_input_tokens_seen": 15391576, "step": 27095 }, { "epoch": 475.4424778761062, "grad_norm": 1.5744435089004583e-08, "learning_rate": 0.07063314637414632, "loss": 0.0, "num_input_tokens_seen": 15394008, "step": 27100 }, { "epoch": 475.53097345132744, "grad_norm": 1.731838850105305e-08, "learning_rate": 0.07058316866975144, "loss": 0.0, "num_input_tokens_seen": 15396680, "step": 27105 }, { "epoch": 475.6194690265487, "grad_norm": 2.116708408550494e-08, "learning_rate": 0.0705332032124299, "loss": 0.0, "num_input_tokens_seen": 15399592, "step": 27110 }, { "epoch": 475.7079646017699, "grad_norm": 1.059663201630201e-08, "learning_rate": 0.0704832500098871, "loss": 0.0, "num_input_tokens_seen": 15402920, "step": 27115 }, { "epoch": 475.79646017699116, "grad_norm": 2.0187647109537465e-08, "learning_rate": 0.07043330906982641, "loss": 0.0, "num_input_tokens_seen": 15405896, "step": 27120 }, { "epoch": 475.88495575221236, "grad_norm": 9.376956988660368e-09, "learning_rate": 0.07038338039994936, "loss": 0.0, "num_input_tokens_seen": 15408936, "step": 27125 }, { "epoch": 475.9734513274336, "grad_norm": 2.42500473035534e-08, "learning_rate": 0.07033346400795562, "loss": 0.0, "num_input_tokens_seen": 15411768, "step": 27130 }, { "epoch": 476.05309734513276, "grad_norm": 1.4219494026690427e-08, "learning_rate": 0.07028355990154282, "loss": 0.0, "num_input_tokens_seen": 15414264, "step": 27135 }, { "epoch": 476.14159292035396, "grad_norm": 1.4152766070196776e-08, "learning_rate": 0.07023366808840685, "loss": 0.0, "num_input_tokens_seen": 15417240, "step": 27140 }, { "epoch": 476.2300884955752, "grad_norm": 2.3683616845460165e-08, "learning_rate": 0.07018378857624172, "loss": 0.0, "num_input_tokens_seen": 15420360, "step": 27145 }, { "epoch": 476.3185840707965, "grad_norm": 3.933042691528499e-08, "learning_rate": 0.0701339213727394, "loss": 0.0, "num_input_tokens_seen": 15423784, "step": 27150 }, { "epoch": 476.4070796460177, "grad_norm": 2.159858603079101e-08, "learning_rate": 0.07008406648559008, "loss": 0.0, "num_input_tokens_seen": 15426936, "step": 27155 }, { "epoch": 476.49557522123894, "grad_norm": 4.904070394218252e-08, "learning_rate": 0.07003422392248196, "loss": 0.0, "num_input_tokens_seen": 15429864, "step": 27160 }, { "epoch": 476.5840707964602, "grad_norm": 3.336716503099524e-08, "learning_rate": 0.06998439369110142, "loss": 0.0, "num_input_tokens_seen": 15432792, "step": 27165 }, { "epoch": 476.6725663716814, "grad_norm": 1.5013210230563345e-08, "learning_rate": 0.06993457579913295, "loss": 0.0, "num_input_tokens_seen": 15436024, "step": 27170 }, { "epoch": 476.76106194690266, "grad_norm": 2.735541571041722e-08, "learning_rate": 0.06988477025425903, "loss": 0.0, "num_input_tokens_seen": 15438648, "step": 27175 }, { "epoch": 476.8495575221239, "grad_norm": 2.179154989789822e-08, "learning_rate": 0.06983497706416032, "loss": 0.0, "num_input_tokens_seen": 15441224, "step": 27180 }, { "epoch": 476.9380530973451, "grad_norm": 2.5198746200771893e-08, "learning_rate": 0.0697851962365156, "loss": 0.0, "num_input_tokens_seen": 15443704, "step": 27185 }, { "epoch": 477.01769911504425, "grad_norm": 3.6157786098556244e-08, "learning_rate": 0.06973542777900163, "loss": 0.0, "num_input_tokens_seen": 15445760, "step": 27190 }, { "epoch": 477.1061946902655, "grad_norm": 1.7848517330776303e-08, "learning_rate": 0.06968567169929342, "loss": 0.0, "num_input_tokens_seen": 15448704, "step": 27195 }, { "epoch": 477.1946902654867, "grad_norm": 1.987315023654901e-08, "learning_rate": 0.06963592800506392, "loss": 0.0, "num_input_tokens_seen": 15451312, "step": 27200 }, { "epoch": 477.1946902654867, "eval_loss": 0.754585862159729, "eval_runtime": 0.9422, "eval_samples_per_second": 26.535, "eval_steps_per_second": 13.798, "num_input_tokens_seen": 15451312, "step": 27200 }, { "epoch": 477.283185840708, "grad_norm": 1.98118659255897e-08, "learning_rate": 0.06958619670398417, "loss": 0.0, "num_input_tokens_seen": 15454432, "step": 27205 }, { "epoch": 477.37168141592923, "grad_norm": 3.153532546207316e-08, "learning_rate": 0.0695364778037235, "loss": 0.0, "num_input_tokens_seen": 15456992, "step": 27210 }, { "epoch": 477.46017699115043, "grad_norm": 1.1336310556941953e-08, "learning_rate": 0.06948677131194907, "loss": 0.0, "num_input_tokens_seen": 15460000, "step": 27215 }, { "epoch": 477.5486725663717, "grad_norm": 4.671350595231161e-08, "learning_rate": 0.06943707723632629, "loss": 0.0, "num_input_tokens_seen": 15462736, "step": 27220 }, { "epoch": 477.6371681415929, "grad_norm": 3.82123666042844e-08, "learning_rate": 0.06938739558451867, "loss": 0.0, "num_input_tokens_seen": 15465216, "step": 27225 }, { "epoch": 477.72566371681415, "grad_norm": 2.7914021316632898e-08, "learning_rate": 0.06933772636418763, "loss": 0.0, "num_input_tokens_seen": 15468384, "step": 27230 }, { "epoch": 477.8141592920354, "grad_norm": 1.2741340427169234e-08, "learning_rate": 0.06928806958299293, "loss": 0.0, "num_input_tokens_seen": 15471680, "step": 27235 }, { "epoch": 477.9026548672566, "grad_norm": 2.0837330083622874e-08, "learning_rate": 0.06923842524859211, "loss": 0.0, "num_input_tokens_seen": 15474256, "step": 27240 }, { "epoch": 477.9911504424779, "grad_norm": 1.810075467290062e-08, "learning_rate": 0.06918879336864105, "loss": 0.0, "num_input_tokens_seen": 15477216, "step": 27245 }, { "epoch": 478.070796460177, "grad_norm": 2.6881158632363622e-08, "learning_rate": 0.06913917395079362, "loss": 0.0, "num_input_tokens_seen": 15479632, "step": 27250 }, { "epoch": 478.1592920353982, "grad_norm": 1.3797913922530824e-08, "learning_rate": 0.0690895670027017, "loss": 0.0, "num_input_tokens_seen": 15482416, "step": 27255 }, { "epoch": 478.24778761061947, "grad_norm": 2.6186812718265173e-08, "learning_rate": 0.06903997253201531, "loss": 0.0, "num_input_tokens_seen": 15485280, "step": 27260 }, { "epoch": 478.3362831858407, "grad_norm": 2.8028910747934788e-08, "learning_rate": 0.06899039054638263, "loss": 0.0, "num_input_tokens_seen": 15487776, "step": 27265 }, { "epoch": 478.42477876106193, "grad_norm": 1.4396940528627056e-08, "learning_rate": 0.06894082105344976, "loss": 0.0, "num_input_tokens_seen": 15490944, "step": 27270 }, { "epoch": 478.5132743362832, "grad_norm": 1.340068678246098e-08, "learning_rate": 0.06889126406086087, "loss": 0.0, "num_input_tokens_seen": 15493584, "step": 27275 }, { "epoch": 478.60176991150445, "grad_norm": 2.012478361734793e-08, "learning_rate": 0.0688417195762584, "loss": 0.0, "num_input_tokens_seen": 15496672, "step": 27280 }, { "epoch": 478.69026548672565, "grad_norm": 7.284918002170571e-09, "learning_rate": 0.06879218760728262, "loss": 0.0, "num_input_tokens_seen": 15499664, "step": 27285 }, { "epoch": 478.7787610619469, "grad_norm": 6.207403036029291e-09, "learning_rate": 0.06874266816157207, "loss": 0.0, "num_input_tokens_seen": 15502736, "step": 27290 }, { "epoch": 478.86725663716817, "grad_norm": 1.0570317066083135e-08, "learning_rate": 0.06869316124676321, "loss": 0.0, "num_input_tokens_seen": 15505280, "step": 27295 }, { "epoch": 478.95575221238937, "grad_norm": 2.7189454243625732e-08, "learning_rate": 0.06864366687049062, "loss": 0.0, "num_input_tokens_seen": 15508256, "step": 27300 }, { "epoch": 479.0353982300885, "grad_norm": 4.533600872491661e-08, "learning_rate": 0.06859418504038704, "loss": 0.0, "num_input_tokens_seen": 15510952, "step": 27305 }, { "epoch": 479.12389380530976, "grad_norm": 1.766427359939371e-08, "learning_rate": 0.06854471576408311, "loss": 0.0, "num_input_tokens_seen": 15513672, "step": 27310 }, { "epoch": 479.21238938053096, "grad_norm": 2.8404043561636172e-08, "learning_rate": 0.06849525904920767, "loss": 0.0, "num_input_tokens_seen": 15516376, "step": 27315 }, { "epoch": 479.3008849557522, "grad_norm": 1.4365200584620652e-08, "learning_rate": 0.06844581490338748, "loss": 0.0, "num_input_tokens_seen": 15519176, "step": 27320 }, { "epoch": 479.3893805309734, "grad_norm": 3.500647238752208e-08, "learning_rate": 0.06839638333424752, "loss": 0.0, "num_input_tokens_seen": 15522648, "step": 27325 }, { "epoch": 479.4778761061947, "grad_norm": 1.5155226407159716e-08, "learning_rate": 0.06834696434941082, "loss": 0.0, "num_input_tokens_seen": 15525672, "step": 27330 }, { "epoch": 479.56637168141594, "grad_norm": 1.918274605827719e-08, "learning_rate": 0.06829755795649824, "loss": 0.0, "num_input_tokens_seen": 15528648, "step": 27335 }, { "epoch": 479.65486725663715, "grad_norm": 6.46174953544687e-08, "learning_rate": 0.06824816416312904, "loss": 0.0, "num_input_tokens_seen": 15531416, "step": 27340 }, { "epoch": 479.7433628318584, "grad_norm": 2.2823300582786032e-08, "learning_rate": 0.06819878297692027, "loss": 0.0, "num_input_tokens_seen": 15533992, "step": 27345 }, { "epoch": 479.83185840707966, "grad_norm": 1.2284036898790873e-08, "learning_rate": 0.0681494144054871, "loss": 0.0, "num_input_tokens_seen": 15536600, "step": 27350 }, { "epoch": 479.92035398230087, "grad_norm": 1.5650218898599633e-08, "learning_rate": 0.06810005845644286, "loss": 0.0, "num_input_tokens_seen": 15539992, "step": 27355 }, { "epoch": 480.0, "grad_norm": 2.9691902270201354e-09, "learning_rate": 0.06805071513739878, "loss": 0.0, "num_input_tokens_seen": 15542312, "step": 27360 }, { "epoch": 480.08849557522126, "grad_norm": 1.4223238586907883e-08, "learning_rate": 0.06800138445596428, "loss": 0.0, "num_input_tokens_seen": 15545032, "step": 27365 }, { "epoch": 480.17699115044246, "grad_norm": 4.109620377334977e-08, "learning_rate": 0.06795206641974678, "loss": 0.0, "num_input_tokens_seen": 15548616, "step": 27370 }, { "epoch": 480.2654867256637, "grad_norm": 1.5817493093095436e-08, "learning_rate": 0.06790276103635169, "loss": 0.0, "num_input_tokens_seen": 15551624, "step": 27375 }, { "epoch": 480.353982300885, "grad_norm": 2.0078244844512483e-08, "learning_rate": 0.0678534683133826, "loss": 0.0, "num_input_tokens_seen": 15555016, "step": 27380 }, { "epoch": 480.4424778761062, "grad_norm": 4.0415947921701445e-08, "learning_rate": 0.06780418825844095, "loss": 0.0, "num_input_tokens_seen": 15557608, "step": 27385 }, { "epoch": 480.53097345132744, "grad_norm": 1.6421818571643598e-08, "learning_rate": 0.0677549208791264, "loss": 0.0, "num_input_tokens_seen": 15560360, "step": 27390 }, { "epoch": 480.6194690265487, "grad_norm": 1.2078074540511352e-08, "learning_rate": 0.06770566618303668, "loss": 0.0, "num_input_tokens_seen": 15562952, "step": 27395 }, { "epoch": 480.7079646017699, "grad_norm": 1.5483969662000163e-08, "learning_rate": 0.06765642417776736, "loss": 0.0, "num_input_tokens_seen": 15565784, "step": 27400 }, { "epoch": 480.7079646017699, "eval_loss": 0.7863607406616211, "eval_runtime": 0.9418, "eval_samples_per_second": 26.546, "eval_steps_per_second": 13.804, "num_input_tokens_seen": 15565784, "step": 27400 }, { "epoch": 480.79646017699116, "grad_norm": 3.369859769009054e-08, "learning_rate": 0.0676071948709122, "loss": 0.0, "num_input_tokens_seen": 15568328, "step": 27405 }, { "epoch": 480.88495575221236, "grad_norm": 8.449854149716884e-09, "learning_rate": 0.06755797827006307, "loss": 0.0, "num_input_tokens_seen": 15571400, "step": 27410 }, { "epoch": 480.9734513274336, "grad_norm": 2.1604401823083208e-08, "learning_rate": 0.06750877438280974, "loss": 0.0, "num_input_tokens_seen": 15574088, "step": 27415 }, { "epoch": 481.05309734513276, "grad_norm": 3.30394094305575e-08, "learning_rate": 0.06745958321673998, "loss": 0.0, "num_input_tokens_seen": 15576984, "step": 27420 }, { "epoch": 481.14159292035396, "grad_norm": 2.9118734090616272e-08, "learning_rate": 0.0674104047794398, "loss": 0.0, "num_input_tokens_seen": 15579496, "step": 27425 }, { "epoch": 481.2300884955752, "grad_norm": 1.8822717606781225e-08, "learning_rate": 0.06736123907849303, "loss": 0.0, "num_input_tokens_seen": 15582376, "step": 27430 }, { "epoch": 481.3185840707965, "grad_norm": 1.6837292449167762e-08, "learning_rate": 0.06731208612148178, "loss": 0.0, "num_input_tokens_seen": 15585416, "step": 27435 }, { "epoch": 481.4070796460177, "grad_norm": 1.7802994634052993e-08, "learning_rate": 0.0672629459159859, "loss": 0.0, "num_input_tokens_seen": 15588184, "step": 27440 }, { "epoch": 481.49557522123894, "grad_norm": 6.902975968614555e-08, "learning_rate": 0.0672138184695835, "loss": 0.0, "num_input_tokens_seen": 15590984, "step": 27445 }, { "epoch": 481.5840707964602, "grad_norm": 1.8512443134000023e-08, "learning_rate": 0.0671647037898507, "loss": 0.0, "num_input_tokens_seen": 15593608, "step": 27450 }, { "epoch": 481.6725663716814, "grad_norm": 1.5314611800931743e-08, "learning_rate": 0.0671156018843615, "loss": 0.0, "num_input_tokens_seen": 15596728, "step": 27455 }, { "epoch": 481.76106194690266, "grad_norm": 4.142725984479512e-08, "learning_rate": 0.06706651276068812, "loss": 0.0, "num_input_tokens_seen": 15599736, "step": 27460 }, { "epoch": 481.8495575221239, "grad_norm": 4.1768966951849507e-08, "learning_rate": 0.06701743642640064, "loss": 0.0, "num_input_tokens_seen": 15602520, "step": 27465 }, { "epoch": 481.9380530973451, "grad_norm": 1.752091449702675e-08, "learning_rate": 0.06696837288906729, "loss": 0.0, "num_input_tokens_seen": 15605320, "step": 27470 }, { "epoch": 482.01769911504425, "grad_norm": 3.1443633474737e-08, "learning_rate": 0.06691932215625432, "loss": 0.0, "num_input_tokens_seen": 15607816, "step": 27475 }, { "epoch": 482.1061946902655, "grad_norm": 1.9127451622580338e-08, "learning_rate": 0.06687028423552589, "loss": 0.0, "num_input_tokens_seen": 15610568, "step": 27480 }, { "epoch": 482.1946902654867, "grad_norm": 2.356654782431633e-08, "learning_rate": 0.06682125913444435, "loss": 0.0, "num_input_tokens_seen": 15613480, "step": 27485 }, { "epoch": 482.283185840708, "grad_norm": 7.140675606365221e-09, "learning_rate": 0.0667722468605699, "loss": 0.0, "num_input_tokens_seen": 15616344, "step": 27490 }, { "epoch": 482.37168141592923, "grad_norm": 2.4066673987022114e-08, "learning_rate": 0.06672324742146094, "loss": 0.0, "num_input_tokens_seen": 15618984, "step": 27495 }, { "epoch": 482.46017699115043, "grad_norm": 2.5545725534925623e-08, "learning_rate": 0.06667426082467373, "loss": 0.0, "num_input_tokens_seen": 15621560, "step": 27500 }, { "epoch": 482.5486725663717, "grad_norm": 2.811770194455221e-08, "learning_rate": 0.0666252870777626, "loss": 0.0, "num_input_tokens_seen": 15624664, "step": 27505 }, { "epoch": 482.6371681415929, "grad_norm": 2.852794445118434e-08, "learning_rate": 0.06657632618827995, "loss": 0.0, "num_input_tokens_seen": 15627640, "step": 27510 }, { "epoch": 482.72566371681415, "grad_norm": 3.7351075121705435e-08, "learning_rate": 0.06652737816377623, "loss": 0.0, "num_input_tokens_seen": 15631112, "step": 27515 }, { "epoch": 482.8141592920354, "grad_norm": 5.0921499905598466e-08, "learning_rate": 0.06647844301179971, "loss": 0.0, "num_input_tokens_seen": 15633832, "step": 27520 }, { "epoch": 482.9026548672566, "grad_norm": 1.5320024360221396e-08, "learning_rate": 0.06642952073989689, "loss": 0.0, "num_input_tokens_seen": 15636792, "step": 27525 }, { "epoch": 482.9911504424779, "grad_norm": 2.2708201541377093e-08, "learning_rate": 0.06638061135561223, "loss": 0.0, "num_input_tokens_seen": 15639560, "step": 27530 }, { "epoch": 483.070796460177, "grad_norm": 2.5551171844995224e-08, "learning_rate": 0.06633171486648808, "loss": 0.0, "num_input_tokens_seen": 15642000, "step": 27535 }, { "epoch": 483.1592920353982, "grad_norm": 2.150577138593235e-08, "learning_rate": 0.06628283128006499, "loss": 0.0, "num_input_tokens_seen": 15645056, "step": 27540 }, { "epoch": 483.24778761061947, "grad_norm": 4.3691319007166385e-08, "learning_rate": 0.0662339606038813, "loss": 0.0, "num_input_tokens_seen": 15648016, "step": 27545 }, { "epoch": 483.3362831858407, "grad_norm": 5.009442816117371e-08, "learning_rate": 0.06618510284547358, "loss": 0.0, "num_input_tokens_seen": 15650528, "step": 27550 }, { "epoch": 483.42477876106193, "grad_norm": 2.7134323232758106e-08, "learning_rate": 0.06613625801237633, "loss": 0.0, "num_input_tokens_seen": 15653824, "step": 27555 }, { "epoch": 483.5132743362832, "grad_norm": 2.52503191688902e-08, "learning_rate": 0.066087426112122, "loss": 0.0, "num_input_tokens_seen": 15656496, "step": 27560 }, { "epoch": 483.60176991150445, "grad_norm": 3.938269799164118e-08, "learning_rate": 0.06603860715224101, "loss": 0.0, "num_input_tokens_seen": 15659120, "step": 27565 }, { "epoch": 483.69026548672565, "grad_norm": 4.00681834378247e-08, "learning_rate": 0.06598980114026198, "loss": 0.0, "num_input_tokens_seen": 15662032, "step": 27570 }, { "epoch": 483.7787610619469, "grad_norm": 3.038986662318166e-08, "learning_rate": 0.06594100808371128, "loss": 0.0, "num_input_tokens_seen": 15664544, "step": 27575 }, { "epoch": 483.86725663716817, "grad_norm": 2.09384296567805e-08, "learning_rate": 0.06589222799011357, "loss": 0.0, "num_input_tokens_seen": 15667200, "step": 27580 }, { "epoch": 483.95575221238937, "grad_norm": 1.6550583126218044e-08, "learning_rate": 0.0658434608669912, "loss": 0.0, "num_input_tokens_seen": 15670000, "step": 27585 }, { "epoch": 484.0353982300885, "grad_norm": 2.7876955854821972e-08, "learning_rate": 0.06579470672186473, "loss": 0.0, "num_input_tokens_seen": 15672776, "step": 27590 }, { "epoch": 484.12389380530976, "grad_norm": 3.6109415901819375e-08, "learning_rate": 0.06574596556225275, "loss": 0.0, "num_input_tokens_seen": 15676328, "step": 27595 }, { "epoch": 484.21238938053096, "grad_norm": 1.9713812804411646e-08, "learning_rate": 0.06569723739567161, "loss": 0.0, "num_input_tokens_seen": 15679720, "step": 27600 }, { "epoch": 484.21238938053096, "eval_loss": 0.7917570471763611, "eval_runtime": 0.9429, "eval_samples_per_second": 26.515, "eval_steps_per_second": 13.788, "num_input_tokens_seen": 15679720, "step": 27600 }, { "epoch": 484.3008849557522, "grad_norm": 3.1416043100307434e-08, "learning_rate": 0.06564852222963588, "loss": 0.0, "num_input_tokens_seen": 15682680, "step": 27605 }, { "epoch": 484.3893805309734, "grad_norm": 1.3314604530023644e-08, "learning_rate": 0.06559982007165813, "loss": 0.0, "num_input_tokens_seen": 15685336, "step": 27610 }, { "epoch": 484.4778761061947, "grad_norm": 1.3376977747725505e-08, "learning_rate": 0.06555113092924868, "loss": 0.0, "num_input_tokens_seen": 15687944, "step": 27615 }, { "epoch": 484.56637168141594, "grad_norm": 8.574632559543716e-09, "learning_rate": 0.06550245480991615, "loss": 0.0, "num_input_tokens_seen": 15690776, "step": 27620 }, { "epoch": 484.65486725663715, "grad_norm": 2.8876039337433212e-08, "learning_rate": 0.0654537917211669, "loss": 0.0, "num_input_tokens_seen": 15693704, "step": 27625 }, { "epoch": 484.7433628318584, "grad_norm": 2.2639429886339713e-08, "learning_rate": 0.0654051416705055, "loss": 0.0, "num_input_tokens_seen": 15696456, "step": 27630 }, { "epoch": 484.83185840707966, "grad_norm": 2.965112599895292e-08, "learning_rate": 0.06535650466543427, "loss": 0.0, "num_input_tokens_seen": 15699304, "step": 27635 }, { "epoch": 484.92035398230087, "grad_norm": 2.835233559039807e-08, "learning_rate": 0.0653078807134538, "loss": 0.0, "num_input_tokens_seen": 15701880, "step": 27640 }, { "epoch": 485.0, "grad_norm": 1.805410754229797e-08, "learning_rate": 0.06525926982206236, "loss": 0.0, "num_input_tokens_seen": 15704328, "step": 27645 }, { "epoch": 485.08849557522126, "grad_norm": 2.415598743255032e-08, "learning_rate": 0.06521067199875648, "loss": 0.0, "num_input_tokens_seen": 15707336, "step": 27650 }, { "epoch": 485.17699115044246, "grad_norm": 6.43802451349984e-08, "learning_rate": 0.06516208725103047, "loss": 0.0, "num_input_tokens_seen": 15709944, "step": 27655 }, { "epoch": 485.2654867256637, "grad_norm": 1.2468158949729968e-08, "learning_rate": 0.06511351558637678, "loss": 0.0, "num_input_tokens_seen": 15712472, "step": 27660 }, { "epoch": 485.353982300885, "grad_norm": 1.5650506668407616e-08, "learning_rate": 0.06506495701228569, "loss": 0.0, "num_input_tokens_seen": 15715112, "step": 27665 }, { "epoch": 485.4424778761062, "grad_norm": 9.567106218355548e-09, "learning_rate": 0.06501641153624559, "loss": 0.0, "num_input_tokens_seen": 15717704, "step": 27670 }, { "epoch": 485.53097345132744, "grad_norm": 2.889923322868526e-08, "learning_rate": 0.06496787916574286, "loss": 0.0, "num_input_tokens_seen": 15720360, "step": 27675 }, { "epoch": 485.6194690265487, "grad_norm": 1.3565387924074912e-08, "learning_rate": 0.06491935990826168, "loss": 0.0, "num_input_tokens_seen": 15723352, "step": 27680 }, { "epoch": 485.7079646017699, "grad_norm": 3.368909062828607e-08, "learning_rate": 0.0648708537712844, "loss": 0.0, "num_input_tokens_seen": 15726600, "step": 27685 }, { "epoch": 485.79646017699116, "grad_norm": 1.4761341482483203e-08, "learning_rate": 0.06482236076229132, "loss": 0.0, "num_input_tokens_seen": 15729752, "step": 27690 }, { "epoch": 485.88495575221236, "grad_norm": 2.0190856986346262e-08, "learning_rate": 0.06477388088876056, "loss": 0.0, "num_input_tokens_seen": 15732568, "step": 27695 }, { "epoch": 485.9734513274336, "grad_norm": 2.1439051423044475e-08, "learning_rate": 0.06472541415816846, "loss": 0.0, "num_input_tokens_seen": 15735464, "step": 27700 }, { "epoch": 486.05309734513276, "grad_norm": 1.1702649516109886e-08, "learning_rate": 0.06467696057798909, "loss": 0.0, "num_input_tokens_seen": 15737976, "step": 27705 }, { "epoch": 486.14159292035396, "grad_norm": 2.1986894083170228e-08, "learning_rate": 0.0646285201556946, "loss": 0.0, "num_input_tokens_seen": 15740776, "step": 27710 }, { "epoch": 486.2300884955752, "grad_norm": 2.2645885167094093e-08, "learning_rate": 0.06458009289875521, "loss": 0.0, "num_input_tokens_seen": 15744008, "step": 27715 }, { "epoch": 486.3185840707965, "grad_norm": 1.5002578734879535e-08, "learning_rate": 0.0645316788146389, "loss": 0.0, "num_input_tokens_seen": 15746824, "step": 27720 }, { "epoch": 486.4070796460177, "grad_norm": 2.9855009131551924e-08, "learning_rate": 0.06448327791081175, "loss": 0.0, "num_input_tokens_seen": 15749496, "step": 27725 }, { "epoch": 486.49557522123894, "grad_norm": 1.9493542779969175e-08, "learning_rate": 0.0644348901947379, "loss": 0.0, "num_input_tokens_seen": 15751848, "step": 27730 }, { "epoch": 486.5840707964602, "grad_norm": 4.596095593001337e-08, "learning_rate": 0.06438651567387917, "loss": 0.0, "num_input_tokens_seen": 15755016, "step": 27735 }, { "epoch": 486.6725663716814, "grad_norm": 1.6629547516799903e-08, "learning_rate": 0.0643381543556957, "loss": 0.0, "num_input_tokens_seen": 15758056, "step": 27740 }, { "epoch": 486.76106194690266, "grad_norm": 2.371603180506554e-08, "learning_rate": 0.06428980624764526, "loss": 0.0, "num_input_tokens_seen": 15760712, "step": 27745 }, { "epoch": 486.8495575221239, "grad_norm": 4.484798310500082e-08, "learning_rate": 0.06424147135718378, "loss": 0.0, "num_input_tokens_seen": 15763544, "step": 27750 }, { "epoch": 486.9380530973451, "grad_norm": 2.0263472677584105e-08, "learning_rate": 0.06419314969176519, "loss": 0.0, "num_input_tokens_seen": 15766648, "step": 27755 }, { "epoch": 487.01769911504425, "grad_norm": 1.879711497565495e-08, "learning_rate": 0.06414484125884118, "loss": 0.0, "num_input_tokens_seen": 15769352, "step": 27760 }, { "epoch": 487.1061946902655, "grad_norm": 7.728368167647659e-09, "learning_rate": 0.06409654606586157, "loss": 0.0, "num_input_tokens_seen": 15771960, "step": 27765 }, { "epoch": 487.1946902654867, "grad_norm": 1.8745836882771982e-08, "learning_rate": 0.06404826412027415, "loss": 0.0, "num_input_tokens_seen": 15774872, "step": 27770 }, { "epoch": 487.283185840708, "grad_norm": 2.445794144989577e-08, "learning_rate": 0.06399999542952453, "loss": 0.0, "num_input_tokens_seen": 15777672, "step": 27775 }, { "epoch": 487.37168141592923, "grad_norm": 9.112805798849877e-08, "learning_rate": 0.0639517400010563, "loss": 0.0, "num_input_tokens_seen": 15780872, "step": 27780 }, { "epoch": 487.46017699115043, "grad_norm": 3.012747740172017e-08, "learning_rate": 0.06390349784231118, "loss": 0.0, "num_input_tokens_seen": 15784040, "step": 27785 }, { "epoch": 487.5486725663717, "grad_norm": 4.162907529803306e-08, "learning_rate": 0.06385526896072859, "loss": 0.0, "num_input_tokens_seen": 15786696, "step": 27790 }, { "epoch": 487.6371681415929, "grad_norm": 4.07349531883483e-08, "learning_rate": 0.06380705336374613, "loss": 0.0, "num_input_tokens_seen": 15789272, "step": 27795 }, { "epoch": 487.72566371681415, "grad_norm": 1.9945302298651768e-08, "learning_rate": 0.06375885105879918, "loss": 0.0, "num_input_tokens_seen": 15792680, "step": 27800 }, { "epoch": 487.72566371681415, "eval_loss": 0.8236088156700134, "eval_runtime": 0.9399, "eval_samples_per_second": 26.599, "eval_steps_per_second": 13.831, "num_input_tokens_seen": 15792680, "step": 27800 }, { "epoch": 487.8141592920354, "grad_norm": 1.9408627593975325e-08, "learning_rate": 0.06371066205332115, "loss": 0.0, "num_input_tokens_seen": 15795880, "step": 27805 }, { "epoch": 487.9026548672566, "grad_norm": 9.965536840184086e-09, "learning_rate": 0.06366248635474347, "loss": 0.0, "num_input_tokens_seen": 15798424, "step": 27810 }, { "epoch": 487.9911504424779, "grad_norm": 2.6575495581937503e-08, "learning_rate": 0.06361432397049532, "loss": 0.0, "num_input_tokens_seen": 15801624, "step": 27815 }, { "epoch": 488.070796460177, "grad_norm": 2.4796168673901775e-08, "learning_rate": 0.06356617490800408, "loss": 0.0, "num_input_tokens_seen": 15803736, "step": 27820 }, { "epoch": 488.1592920353982, "grad_norm": 4.6748819926278884e-08, "learning_rate": 0.06351803917469478, "loss": 0.0, "num_input_tokens_seen": 15806408, "step": 27825 }, { "epoch": 488.24778761061947, "grad_norm": 2.189625725179667e-08, "learning_rate": 0.06346991677799067, "loss": 0.0, "num_input_tokens_seen": 15809416, "step": 27830 }, { "epoch": 488.3362831858407, "grad_norm": 2.7210502295815786e-08, "learning_rate": 0.06342180772531283, "loss": 0.0, "num_input_tokens_seen": 15812392, "step": 27835 }, { "epoch": 488.42477876106193, "grad_norm": 3.458385222643301e-08, "learning_rate": 0.06337371202408021, "loss": 0.0, "num_input_tokens_seen": 15815896, "step": 27840 }, { "epoch": 488.5132743362832, "grad_norm": 1.2597342724518512e-08, "learning_rate": 0.06332562968170984, "loss": 0.0, "num_input_tokens_seen": 15819336, "step": 27845 }, { "epoch": 488.60176991150445, "grad_norm": 3.673816806326613e-08, "learning_rate": 0.06327756070561656, "loss": 0.0, "num_input_tokens_seen": 15822296, "step": 27850 }, { "epoch": 488.69026548672565, "grad_norm": 2.837514223585913e-08, "learning_rate": 0.06322950510321329, "loss": 0.0, "num_input_tokens_seen": 15825352, "step": 27855 }, { "epoch": 488.7787610619469, "grad_norm": 4.3403726834867484e-08, "learning_rate": 0.06318146288191076, "loss": 0.0, "num_input_tokens_seen": 15827672, "step": 27860 }, { "epoch": 488.86725663716817, "grad_norm": 3.095751210935305e-08, "learning_rate": 0.06313343404911763, "loss": 0.0, "num_input_tokens_seen": 15830072, "step": 27865 }, { "epoch": 488.95575221238937, "grad_norm": 1.8648185218239632e-08, "learning_rate": 0.0630854186122406, "loss": 0.0, "num_input_tokens_seen": 15832920, "step": 27870 }, { "epoch": 489.0353982300885, "grad_norm": 2.1929654536734233e-08, "learning_rate": 0.06303741657868431, "loss": 0.0, "num_input_tokens_seen": 15835384, "step": 27875 }, { "epoch": 489.12389380530976, "grad_norm": 5.489443566375485e-08, "learning_rate": 0.06298942795585115, "loss": 0.0, "num_input_tokens_seen": 15838136, "step": 27880 }, { "epoch": 489.21238938053096, "grad_norm": 1.785471681614581e-08, "learning_rate": 0.06294145275114167, "loss": 0.0, "num_input_tokens_seen": 15841240, "step": 27885 }, { "epoch": 489.3008849557522, "grad_norm": 2.340670413047974e-08, "learning_rate": 0.06289349097195428, "loss": 0.0, "num_input_tokens_seen": 15843800, "step": 27890 }, { "epoch": 489.3893805309734, "grad_norm": 5.529674496074222e-08, "learning_rate": 0.06284554262568516, "loss": 0.0, "num_input_tokens_seen": 15846568, "step": 27895 }, { "epoch": 489.4778761061947, "grad_norm": 1.0953157492110677e-08, "learning_rate": 0.06279760771972868, "loss": 0.0, "num_input_tokens_seen": 15849160, "step": 27900 }, { "epoch": 489.56637168141594, "grad_norm": 6.143937980596093e-08, "learning_rate": 0.06274968626147688, "loss": 0.0, "num_input_tokens_seen": 15852296, "step": 27905 }, { "epoch": 489.65486725663715, "grad_norm": 2.6424698873483976e-08, "learning_rate": 0.06270177825831993, "loss": 0.0, "num_input_tokens_seen": 15855480, "step": 27910 }, { "epoch": 489.7433628318584, "grad_norm": 2.735449911028809e-08, "learning_rate": 0.06265388371764587, "loss": 0.0, "num_input_tokens_seen": 15857880, "step": 27915 }, { "epoch": 489.83185840707966, "grad_norm": 9.226087449576426e-09, "learning_rate": 0.0626060026468406, "loss": 0.0, "num_input_tokens_seen": 15861416, "step": 27920 }, { "epoch": 489.92035398230087, "grad_norm": 2.228159168282673e-08, "learning_rate": 0.06255813505328794, "loss": 0.0, "num_input_tokens_seen": 15863928, "step": 27925 }, { "epoch": 490.0, "grad_norm": 2.348622274439549e-08, "learning_rate": 0.06251028094436978, "loss": 0.0, "num_input_tokens_seen": 15866288, "step": 27930 }, { "epoch": 490.08849557522126, "grad_norm": 2.2664737642230648e-08, "learning_rate": 0.06246244032746568, "loss": 0.0, "num_input_tokens_seen": 15868624, "step": 27935 }, { "epoch": 490.17699115044246, "grad_norm": 6.58412124820984e-09, "learning_rate": 0.06241461320995342, "loss": 0.0, "num_input_tokens_seen": 15871104, "step": 27940 }, { "epoch": 490.2654867256637, "grad_norm": 2.4284016575393252e-08, "learning_rate": 0.062366799599208426, "loss": 0.0, "num_input_tokens_seen": 15874144, "step": 27945 }, { "epoch": 490.353982300885, "grad_norm": 1.7138152230700143e-08, "learning_rate": 0.06231899950260418, "loss": 0.0, "num_input_tokens_seen": 15877168, "step": 27950 }, { "epoch": 490.4424778761062, "grad_norm": 2.356945394410559e-08, "learning_rate": 0.06227121292751214, "loss": 0.0, "num_input_tokens_seen": 15879920, "step": 27955 }, { "epoch": 490.53097345132744, "grad_norm": 3.791276981246483e-08, "learning_rate": 0.062223439881301496, "loss": 0.0, "num_input_tokens_seen": 15882768, "step": 27960 }, { "epoch": 490.6194690265487, "grad_norm": 1.990859743727924e-08, "learning_rate": 0.06217568037133948, "loss": 0.0, "num_input_tokens_seen": 15885952, "step": 27965 }, { "epoch": 490.7079646017699, "grad_norm": 4.34998845832979e-08, "learning_rate": 0.06212793440499126, "loss": 0.0, "num_input_tokens_seen": 15889264, "step": 27970 }, { "epoch": 490.79646017699116, "grad_norm": 7.3873769323995475e-09, "learning_rate": 0.062080201989619783, "loss": 0.0, "num_input_tokens_seen": 15891904, "step": 27975 }, { "epoch": 490.88495575221236, "grad_norm": 1.5405792197498158e-08, "learning_rate": 0.062032483132586094, "loss": 0.0, "num_input_tokens_seen": 15895296, "step": 27980 }, { "epoch": 490.9734513274336, "grad_norm": 9.528178246398511e-09, "learning_rate": 0.0619847778412489, "loss": 0.0, "num_input_tokens_seen": 15897952, "step": 27985 }, { "epoch": 491.05309734513276, "grad_norm": 5.402615954608336e-08, "learning_rate": 0.06193708612296509, "loss": 0.0, "num_input_tokens_seen": 15900272, "step": 27990 }, { "epoch": 491.14159292035396, "grad_norm": 3.92483592293047e-08, "learning_rate": 0.06188940798508923, "loss": 0.0, "num_input_tokens_seen": 15903680, "step": 27995 }, { "epoch": 491.2300884955752, "grad_norm": 1.2660685833054686e-08, "learning_rate": 0.06184174343497397, "loss": 0.0, "num_input_tokens_seen": 15906624, "step": 28000 }, { "epoch": 491.2300884955752, "eval_loss": 0.8142685294151306, "eval_runtime": 0.9239, "eval_samples_per_second": 27.059, "eval_steps_per_second": 14.071, "num_input_tokens_seen": 15906624, "step": 28000 }, { "epoch": 491.3185840707965, "grad_norm": 8.694552633414787e-09, "learning_rate": 0.061794092479969726, "loss": 0.0, "num_input_tokens_seen": 15909952, "step": 28005 }, { "epoch": 491.4070796460177, "grad_norm": 1.289255457948002e-08, "learning_rate": 0.06174645512742485, "loss": 0.0, "num_input_tokens_seen": 15912832, "step": 28010 }, { "epoch": 491.49557522123894, "grad_norm": 1.0240805536909647e-08, "learning_rate": 0.06169883138468565, "loss": 0.0, "num_input_tokens_seen": 15915616, "step": 28015 }, { "epoch": 491.5840707964602, "grad_norm": 2.0840547065859027e-08, "learning_rate": 0.06165122125909637, "loss": 0.0, "num_input_tokens_seen": 15918304, "step": 28020 }, { "epoch": 491.6725663716814, "grad_norm": 5.386306867194435e-08, "learning_rate": 0.061603624757998965, "loss": 0.0, "num_input_tokens_seen": 15921024, "step": 28025 }, { "epoch": 491.76106194690266, "grad_norm": 1.7394617302102233e-08, "learning_rate": 0.0615560418887335, "loss": 0.0, "num_input_tokens_seen": 15924064, "step": 28030 }, { "epoch": 491.8495575221239, "grad_norm": 4.1831796693259093e-08, "learning_rate": 0.06150847265863787, "loss": 0.0, "num_input_tokens_seen": 15926880, "step": 28035 }, { "epoch": 491.9380530973451, "grad_norm": 3.253825298088486e-08, "learning_rate": 0.061460917075047757, "loss": 0.0, "num_input_tokens_seen": 15929184, "step": 28040 }, { "epoch": 492.01769911504425, "grad_norm": 2.224761352920268e-08, "learning_rate": 0.06141337514529694, "loss": 0.0, "num_input_tokens_seen": 15931552, "step": 28045 }, { "epoch": 492.1061946902655, "grad_norm": 1.1628559448695341e-08, "learning_rate": 0.06136584687671687, "loss": 0.0, "num_input_tokens_seen": 15934192, "step": 28050 }, { "epoch": 492.1946902654867, "grad_norm": 1.1007839972876354e-08, "learning_rate": 0.061318332276637064, "loss": 0.0, "num_input_tokens_seen": 15937104, "step": 28055 }, { "epoch": 492.283185840708, "grad_norm": 3.031972894973478e-08, "learning_rate": 0.06127083135238491, "loss": 0.0, "num_input_tokens_seen": 15939728, "step": 28060 }, { "epoch": 492.37168141592923, "grad_norm": 2.0503948761074753e-08, "learning_rate": 0.06122334411128555, "loss": 0.0, "num_input_tokens_seen": 15943264, "step": 28065 }, { "epoch": 492.46017699115043, "grad_norm": 2.228597573150637e-08, "learning_rate": 0.06117587056066223, "loss": 0.0, "num_input_tokens_seen": 15945904, "step": 28070 }, { "epoch": 492.5486725663717, "grad_norm": 1.5795006191865468e-08, "learning_rate": 0.06112841070783589, "loss": 0.0, "num_input_tokens_seen": 15949104, "step": 28075 }, { "epoch": 492.6371681415929, "grad_norm": 7.878153240881147e-08, "learning_rate": 0.061080964560125406, "loss": 0.0, "num_input_tokens_seen": 15952032, "step": 28080 }, { "epoch": 492.72566371681415, "grad_norm": 3.1131495603631265e-08, "learning_rate": 0.06103353212484766, "loss": 0.0, "num_input_tokens_seen": 15954944, "step": 28085 }, { "epoch": 492.8141592920354, "grad_norm": 6.106213135126382e-08, "learning_rate": 0.06098611340931722, "loss": 0.0, "num_input_tokens_seen": 15957568, "step": 28090 }, { "epoch": 492.9026548672566, "grad_norm": 2.354839345741766e-08, "learning_rate": 0.06093870842084672, "loss": 0.0, "num_input_tokens_seen": 15960352, "step": 28095 }, { "epoch": 492.9911504424779, "grad_norm": 1.8488929498516882e-08, "learning_rate": 0.06089131716674666, "loss": 0.0, "num_input_tokens_seen": 15963360, "step": 28100 }, { "epoch": 493.070796460177, "grad_norm": 3.493559219691633e-08, "learning_rate": 0.060843939654325226, "loss": 0.0, "num_input_tokens_seen": 15966096, "step": 28105 }, { "epoch": 493.1592920353982, "grad_norm": 2.542613941614036e-08, "learning_rate": 0.06079657589088873, "loss": 0.0, "num_input_tokens_seen": 15968816, "step": 28110 }, { "epoch": 493.24778761061947, "grad_norm": 2.5576811779615127e-08, "learning_rate": 0.06074922588374126, "loss": 0.0, "num_input_tokens_seen": 15972160, "step": 28115 }, { "epoch": 493.3362831858407, "grad_norm": 2.2905075169887823e-08, "learning_rate": 0.06070188964018472, "loss": 0.0, "num_input_tokens_seen": 15974976, "step": 28120 }, { "epoch": 493.42477876106193, "grad_norm": 9.6494066070818e-09, "learning_rate": 0.06065456716751902, "loss": 0.0, "num_input_tokens_seen": 15977984, "step": 28125 }, { "epoch": 493.5132743362832, "grad_norm": 2.1603394628755268e-08, "learning_rate": 0.06060725847304182, "loss": 0.0, "num_input_tokens_seen": 15981376, "step": 28130 }, { "epoch": 493.60176991150445, "grad_norm": 3.144153026823915e-08, "learning_rate": 0.06055996356404877, "loss": 0.0, "num_input_tokens_seen": 15983952, "step": 28135 }, { "epoch": 493.69026548672565, "grad_norm": 2.370323670675134e-08, "learning_rate": 0.06051268244783327, "loss": 0.0, "num_input_tokens_seen": 15987056, "step": 28140 }, { "epoch": 493.7787610619469, "grad_norm": 4.5777778012734416e-08, "learning_rate": 0.06046541513168676, "loss": 0.0, "num_input_tokens_seen": 15989728, "step": 28145 }, { "epoch": 493.86725663716817, "grad_norm": 1.752186307157899e-08, "learning_rate": 0.060418161622898356, "loss": 0.0, "num_input_tokens_seen": 15992624, "step": 28150 }, { "epoch": 493.95575221238937, "grad_norm": 1.6456723983537813e-08, "learning_rate": 0.06037092192875521, "loss": 0.0, "num_input_tokens_seen": 15995184, "step": 28155 }, { "epoch": 494.0353982300885, "grad_norm": 1.7588114076261263e-08, "learning_rate": 0.060323696056542225, "loss": 0.0, "num_input_tokens_seen": 15997584, "step": 28160 }, { "epoch": 494.12389380530976, "grad_norm": 2.1456079579706966e-08, "learning_rate": 0.06027648401354229, "loss": 0.0, "num_input_tokens_seen": 16000352, "step": 28165 }, { "epoch": 494.21238938053096, "grad_norm": 2.761967188291692e-08, "learning_rate": 0.06022928580703601, "loss": 0.0, "num_input_tokens_seen": 16003136, "step": 28170 }, { "epoch": 494.3008849557522, "grad_norm": 3.057186859223293e-08, "learning_rate": 0.060182101444301986, "loss": 0.0, "num_input_tokens_seen": 16005744, "step": 28175 }, { "epoch": 494.3893805309734, "grad_norm": 2.4746283244780898e-08, "learning_rate": 0.06013493093261669, "loss": 0.0, "num_input_tokens_seen": 16008176, "step": 28180 }, { "epoch": 494.4778761061947, "grad_norm": 1.6186842088927733e-08, "learning_rate": 0.06008777427925432, "loss": 0.0, "num_input_tokens_seen": 16010960, "step": 28185 }, { "epoch": 494.56637168141594, "grad_norm": 2.602211068847282e-08, "learning_rate": 0.06004063149148705, "loss": 0.0, "num_input_tokens_seen": 16013568, "step": 28190 }, { "epoch": 494.65486725663715, "grad_norm": 1.6462561092112082e-08, "learning_rate": 0.05999350257658497, "loss": 0.0, "num_input_tokens_seen": 16016880, "step": 28195 }, { "epoch": 494.7433628318584, "grad_norm": 3.4576402185848565e-08, "learning_rate": 0.05994638754181582, "loss": 0.0, "num_input_tokens_seen": 16019936, "step": 28200 }, { "epoch": 494.7433628318584, "eval_loss": 0.8141090869903564, "eval_runtime": 0.9456, "eval_samples_per_second": 26.439, "eval_steps_per_second": 13.748, "num_input_tokens_seen": 16019936, "step": 28200 }, { "epoch": 494.83185840707966, "grad_norm": 7.953757652501281e-09, "learning_rate": 0.059899286394445445, "loss": 0.0, "num_input_tokens_seen": 16022880, "step": 28205 }, { "epoch": 494.92035398230087, "grad_norm": 2.8022602904798077e-08, "learning_rate": 0.059852199141737346, "loss": 0.0, "num_input_tokens_seen": 16025536, "step": 28210 }, { "epoch": 495.0, "grad_norm": 6.728201107364384e-09, "learning_rate": 0.05980512579095304, "loss": 0.0, "num_input_tokens_seen": 16028400, "step": 28215 }, { "epoch": 495.08849557522126, "grad_norm": 3.6913878176392245e-08, "learning_rate": 0.05975806634935181, "loss": 0.0, "num_input_tokens_seen": 16031440, "step": 28220 }, { "epoch": 495.17699115044246, "grad_norm": 1.9348675550645567e-08, "learning_rate": 0.05971102082419076, "loss": 0.0, "num_input_tokens_seen": 16034208, "step": 28225 }, { "epoch": 495.2654867256637, "grad_norm": 5.969614846890181e-08, "learning_rate": 0.05966398922272492, "loss": 0.0, "num_input_tokens_seen": 16037008, "step": 28230 }, { "epoch": 495.353982300885, "grad_norm": 1.1078350681259508e-08, "learning_rate": 0.059616971552207236, "loss": 0.0, "num_input_tokens_seen": 16039616, "step": 28235 }, { "epoch": 495.4424778761062, "grad_norm": 9.736626616074773e-09, "learning_rate": 0.059569967819888305, "loss": 0.0, "num_input_tokens_seen": 16042784, "step": 28240 }, { "epoch": 495.53097345132744, "grad_norm": 1.0250367665776139e-08, "learning_rate": 0.05952297803301681, "loss": 0.0, "num_input_tokens_seen": 16045504, "step": 28245 }, { "epoch": 495.6194690265487, "grad_norm": 3.726715647189849e-08, "learning_rate": 0.059476002198839056, "loss": 0.0, "num_input_tokens_seen": 16048560, "step": 28250 }, { "epoch": 495.7079646017699, "grad_norm": 2.0539919987072608e-08, "learning_rate": 0.05942904032459935, "loss": 0.0, "num_input_tokens_seen": 16051424, "step": 28255 }, { "epoch": 495.79646017699116, "grad_norm": 4.3088018486514557e-08, "learning_rate": 0.05938209241753987, "loss": 0.0, "num_input_tokens_seen": 16054320, "step": 28260 }, { "epoch": 495.88495575221236, "grad_norm": 4.867461100843684e-08, "learning_rate": 0.05933515848490046, "loss": 0.0, "num_input_tokens_seen": 16057616, "step": 28265 }, { "epoch": 495.9734513274336, "grad_norm": 1.3896435113736061e-08, "learning_rate": 0.059288238533918985, "loss": 0.0, "num_input_tokens_seen": 16060192, "step": 28270 }, { "epoch": 496.05309734513276, "grad_norm": 1.1646048569957657e-08, "learning_rate": 0.05924133257183113, "loss": 0.0, "num_input_tokens_seen": 16062768, "step": 28275 }, { "epoch": 496.14159292035396, "grad_norm": 3.000115711415674e-08, "learning_rate": 0.059194440605870285, "loss": 0.0, "num_input_tokens_seen": 16065344, "step": 28280 }, { "epoch": 496.2300884955752, "grad_norm": 2.2600032067998654e-08, "learning_rate": 0.059147562643267884, "loss": 0.0, "num_input_tokens_seen": 16068416, "step": 28285 }, { "epoch": 496.3185840707965, "grad_norm": 1.3439295010186925e-08, "learning_rate": 0.059100698691253055, "loss": 0.0, "num_input_tokens_seen": 16071088, "step": 28290 }, { "epoch": 496.4070796460177, "grad_norm": 2.6902037930653933e-08, "learning_rate": 0.05905384875705273, "loss": 0.0, "num_input_tokens_seen": 16073616, "step": 28295 }, { "epoch": 496.49557522123894, "grad_norm": 4.9219959663560076e-08, "learning_rate": 0.05900701284789189, "loss": 0.0, "num_input_tokens_seen": 16076064, "step": 28300 }, { "epoch": 496.5840707964602, "grad_norm": 3.5713867418962764e-08, "learning_rate": 0.058960190970993115, "loss": 0.0, "num_input_tokens_seen": 16079136, "step": 28305 }, { "epoch": 496.6725663716814, "grad_norm": 3.053920138995636e-08, "learning_rate": 0.058913383133576955, "loss": 0.0, "num_input_tokens_seen": 16081920, "step": 28310 }, { "epoch": 496.76106194690266, "grad_norm": 2.0759999941333263e-08, "learning_rate": 0.05886658934286185, "loss": 0.0, "num_input_tokens_seen": 16085072, "step": 28315 }, { "epoch": 496.8495575221239, "grad_norm": 1.439461971841638e-08, "learning_rate": 0.058819809606063846, "loss": 0.0, "num_input_tokens_seen": 16088336, "step": 28320 }, { "epoch": 496.9380530973451, "grad_norm": 2.7589235784830635e-08, "learning_rate": 0.05877304393039711, "loss": 0.0, "num_input_tokens_seen": 16091408, "step": 28325 }, { "epoch": 497.01769911504425, "grad_norm": 1.142614980409462e-08, "learning_rate": 0.05872629232307338, "loss": 0.0, "num_input_tokens_seen": 16094072, "step": 28330 }, { "epoch": 497.1061946902655, "grad_norm": 1.3095178275079888e-08, "learning_rate": 0.05867955479130239, "loss": 0.0, "num_input_tokens_seen": 16096984, "step": 28335 }, { "epoch": 497.1946902654867, "grad_norm": 1.2280130690101032e-08, "learning_rate": 0.058632831342291705, "loss": 0.0, "num_input_tokens_seen": 16099432, "step": 28340 }, { "epoch": 497.283185840708, "grad_norm": 1.3576397783765515e-08, "learning_rate": 0.05858612198324655, "loss": 0.0, "num_input_tokens_seen": 16102472, "step": 28345 }, { "epoch": 497.37168141592923, "grad_norm": 3.565315154219206e-08, "learning_rate": 0.05853942672137025, "loss": 0.0, "num_input_tokens_seen": 16105208, "step": 28350 }, { "epoch": 497.46017699115043, "grad_norm": 2.4666229947456486e-08, "learning_rate": 0.05849274556386363, "loss": 0.0, "num_input_tokens_seen": 16108248, "step": 28355 }, { "epoch": 497.5486725663717, "grad_norm": 2.1424915175316528e-08, "learning_rate": 0.05844607851792567, "loss": 0.0, "num_input_tokens_seen": 16111864, "step": 28360 }, { "epoch": 497.6371681415929, "grad_norm": 1.9844481613517928e-08, "learning_rate": 0.058399425590752924, "loss": 0.0, "num_input_tokens_seen": 16114456, "step": 28365 }, { "epoch": 497.72566371681415, "grad_norm": 2.148550493075163e-08, "learning_rate": 0.05835278678953985, "loss": 0.0, "num_input_tokens_seen": 16117240, "step": 28370 }, { "epoch": 497.8141592920354, "grad_norm": 1.4044058360695999e-08, "learning_rate": 0.05830616212147874, "loss": 0.0, "num_input_tokens_seen": 16119992, "step": 28375 }, { "epoch": 497.9026548672566, "grad_norm": 1.9756232205736524e-08, "learning_rate": 0.058259551593759784, "loss": 0.0, "num_input_tokens_seen": 16122744, "step": 28380 }, { "epoch": 497.9911504424779, "grad_norm": 3.442849916268642e-08, "learning_rate": 0.058212955213570804, "loss": 0.0, "num_input_tokens_seen": 16125528, "step": 28385 }, { "epoch": 498.070796460177, "grad_norm": 1.3224599193506492e-08, "learning_rate": 0.0581663729880976, "loss": 0.0, "num_input_tokens_seen": 16127848, "step": 28390 }, { "epoch": 498.1592920353982, "grad_norm": 3.631077305499275e-08, "learning_rate": 0.05811980492452379, "loss": 0.0, "num_input_tokens_seen": 16130488, "step": 28395 }, { "epoch": 498.24778761061947, "grad_norm": 2.4789184038809253e-08, "learning_rate": 0.058073251030030644, "loss": 0.0, "num_input_tokens_seen": 16133784, "step": 28400 }, { "epoch": 498.24778761061947, "eval_loss": 0.8328125476837158, "eval_runtime": 0.9273, "eval_samples_per_second": 26.961, "eval_steps_per_second": 14.02, "num_input_tokens_seen": 16133784, "step": 28400 }, { "epoch": 498.3362831858407, "grad_norm": 1.4849216078971494e-08, "learning_rate": 0.05802671131179747, "loss": 0.0, "num_input_tokens_seen": 16137032, "step": 28405 }, { "epoch": 498.42477876106193, "grad_norm": 3.991146613202545e-08, "learning_rate": 0.057980185777001154, "loss": 0.0, "num_input_tokens_seen": 16139944, "step": 28410 }, { "epoch": 498.5132743362832, "grad_norm": 1.8264449508365033e-08, "learning_rate": 0.057933674432816606, "loss": 0.0, "num_input_tokens_seen": 16142856, "step": 28415 }, { "epoch": 498.60176991150445, "grad_norm": 2.6703254718540848e-08, "learning_rate": 0.05788717728641648, "loss": 0.0, "num_input_tokens_seen": 16145848, "step": 28420 }, { "epoch": 498.69026548672565, "grad_norm": 3.6586325080634197e-08, "learning_rate": 0.057840694344971126, "loss": 0.0, "num_input_tokens_seen": 16148488, "step": 28425 }, { "epoch": 498.7787610619469, "grad_norm": 2.8330678247812102e-08, "learning_rate": 0.0577942256156489, "loss": 0.0, "num_input_tokens_seen": 16151384, "step": 28430 }, { "epoch": 498.86725663716817, "grad_norm": 2.2354765150112144e-08, "learning_rate": 0.057747771105615804, "loss": 0.0, "num_input_tokens_seen": 16154248, "step": 28435 }, { "epoch": 498.95575221238937, "grad_norm": 2.8928404560701892e-08, "learning_rate": 0.05770133082203568, "loss": 0.0, "num_input_tokens_seen": 16157208, "step": 28440 }, { "epoch": 499.0353982300885, "grad_norm": 2.0818939461264563e-08, "learning_rate": 0.0576549047720703, "loss": 0.0, "num_input_tokens_seen": 16159192, "step": 28445 }, { "epoch": 499.12389380530976, "grad_norm": 2.9270646351164942e-08, "learning_rate": 0.05760849296287902, "loss": 0.0, "num_input_tokens_seen": 16161672, "step": 28450 }, { "epoch": 499.21238938053096, "grad_norm": 4.931879260539063e-08, "learning_rate": 0.05756209540161919, "loss": 0.0, "num_input_tokens_seen": 16164632, "step": 28455 }, { "epoch": 499.3008849557522, "grad_norm": 3.217629895857499e-08, "learning_rate": 0.05751571209544595, "loss": 0.0, "num_input_tokens_seen": 16167432, "step": 28460 }, { "epoch": 499.3893805309734, "grad_norm": 2.0592622718140774e-08, "learning_rate": 0.057469343051512085, "loss": 0.0, "num_input_tokens_seen": 16170008, "step": 28465 }, { "epoch": 499.4778761061947, "grad_norm": 2.5735415576377818e-08, "learning_rate": 0.057422988276968324, "loss": 0.0, "num_input_tokens_seen": 16172872, "step": 28470 }, { "epoch": 499.56637168141594, "grad_norm": 2.3149690164814274e-08, "learning_rate": 0.05737664777896323, "loss": 0.0, "num_input_tokens_seen": 16176408, "step": 28475 }, { "epoch": 499.65486725663715, "grad_norm": 2.550867961303993e-08, "learning_rate": 0.057330321564642975, "loss": 0.0, "num_input_tokens_seen": 16179272, "step": 28480 }, { "epoch": 499.7433628318584, "grad_norm": 3.6891190546839425e-08, "learning_rate": 0.05728400964115174, "loss": 0.0, "num_input_tokens_seen": 16182472, "step": 28485 }, { "epoch": 499.83185840707966, "grad_norm": 2.0965389424532077e-08, "learning_rate": 0.057237712015631305, "loss": 0.0, "num_input_tokens_seen": 16185496, "step": 28490 }, { "epoch": 499.92035398230087, "grad_norm": 1.5196134128814265e-08, "learning_rate": 0.057191428695221425, "loss": 0.0, "num_input_tokens_seen": 16188728, "step": 28495 }, { "epoch": 500.0, "grad_norm": 3.021751382448201e-08, "learning_rate": 0.05714515968705958, "loss": 0.0, "num_input_tokens_seen": 16190856, "step": 28500 }, { "epoch": 500.08849557522126, "grad_norm": 1.1447442105350092e-08, "learning_rate": 0.05709890499828099, "loss": 0.0, "num_input_tokens_seen": 16194104, "step": 28505 }, { "epoch": 500.17699115044246, "grad_norm": 2.6149480802928338e-08, "learning_rate": 0.05705266463601868, "loss": 0.0, "num_input_tokens_seen": 16196776, "step": 28510 }, { "epoch": 500.2654867256637, "grad_norm": 3.982164642479802e-08, "learning_rate": 0.057006438607403565, "loss": 0.0, "num_input_tokens_seen": 16199848, "step": 28515 }, { "epoch": 500.353982300885, "grad_norm": 1.975698182832275e-08, "learning_rate": 0.056960226919564205, "loss": 0.0, "num_input_tokens_seen": 16202504, "step": 28520 }, { "epoch": 500.4424778761062, "grad_norm": 2.6764048755012482e-08, "learning_rate": 0.05691402957962713, "loss": 0.0, "num_input_tokens_seen": 16205448, "step": 28525 }, { "epoch": 500.53097345132744, "grad_norm": 3.057009223539353e-08, "learning_rate": 0.05686784659471642, "loss": 0.0, "num_input_tokens_seen": 16208456, "step": 28530 }, { "epoch": 500.6194690265487, "grad_norm": 4.706728162773288e-08, "learning_rate": 0.056821677971954136, "loss": 0.0, "num_input_tokens_seen": 16211032, "step": 28535 }, { "epoch": 500.7079646017699, "grad_norm": 1.940224159113768e-08, "learning_rate": 0.05677552371846012, "loss": 0.0, "num_input_tokens_seen": 16214136, "step": 28540 }, { "epoch": 500.79646017699116, "grad_norm": 3.322316288745242e-08, "learning_rate": 0.05672938384135182, "loss": 0.0, "num_input_tokens_seen": 16216984, "step": 28545 }, { "epoch": 500.88495575221236, "grad_norm": 2.349199590412354e-08, "learning_rate": 0.05668325834774465, "loss": 0.0, "num_input_tokens_seen": 16220072, "step": 28550 }, { "epoch": 500.9734513274336, "grad_norm": 2.1922977211374928e-08, "learning_rate": 0.05663714724475177, "loss": 0.0, "num_input_tokens_seen": 16222648, "step": 28555 }, { "epoch": 501.05309734513276, "grad_norm": 4.224552085929645e-08, "learning_rate": 0.05659105053948403, "loss": 0.0, "num_input_tokens_seen": 16225112, "step": 28560 }, { "epoch": 501.14159292035396, "grad_norm": 2.9150140079536868e-08, "learning_rate": 0.056544968239050176, "loss": 0.0, "num_input_tokens_seen": 16228200, "step": 28565 }, { "epoch": 501.2300884955752, "grad_norm": 6.210702707676319e-08, "learning_rate": 0.056498900350556616, "loss": 0.0, "num_input_tokens_seen": 16231048, "step": 28570 }, { "epoch": 501.3185840707965, "grad_norm": 3.161594008815882e-08, "learning_rate": 0.05645284688110766, "loss": 0.0, "num_input_tokens_seen": 16233848, "step": 28575 }, { "epoch": 501.4070796460177, "grad_norm": 2.6499064276208628e-08, "learning_rate": 0.05640680783780532, "loss": 0.0, "num_input_tokens_seen": 16236648, "step": 28580 }, { "epoch": 501.49557522123894, "grad_norm": 1.4709315543370849e-08, "learning_rate": 0.056360783227749324, "loss": 0.0, "num_input_tokens_seen": 16239512, "step": 28585 }, { "epoch": 501.5840707964602, "grad_norm": 7.689310166369978e-08, "learning_rate": 0.05631477305803728, "loss": 0.0, "num_input_tokens_seen": 16242424, "step": 28590 }, { "epoch": 501.6725663716814, "grad_norm": 2.3818163441546858e-08, "learning_rate": 0.05626877733576462, "loss": 0.0, "num_input_tokens_seen": 16245352, "step": 28595 }, { "epoch": 501.76106194690266, "grad_norm": 2.467539594874779e-08, "learning_rate": 0.05622279606802435, "loss": 0.0, "num_input_tokens_seen": 16248200, "step": 28600 }, { "epoch": 501.76106194690266, "eval_loss": 0.818134605884552, "eval_runtime": 0.9336, "eval_samples_per_second": 26.778, "eval_steps_per_second": 13.924, "num_input_tokens_seen": 16248200, "step": 28600 }, { "epoch": 501.8495575221239, "grad_norm": 1.694440321386992e-08, "learning_rate": 0.05617682926190744, "loss": 0.0, "num_input_tokens_seen": 16251432, "step": 28605 }, { "epoch": 501.9380530973451, "grad_norm": 3.18505790630752e-08, "learning_rate": 0.05613087692450248, "loss": 0.0, "num_input_tokens_seen": 16254008, "step": 28610 }, { "epoch": 502.01769911504425, "grad_norm": 2.2534404564567012e-08, "learning_rate": 0.05608493906289592, "loss": 0.0, "num_input_tokens_seen": 16256376, "step": 28615 }, { "epoch": 502.1061946902655, "grad_norm": 2.131108445269092e-08, "learning_rate": 0.05603901568417201, "loss": 0.0, "num_input_tokens_seen": 16258984, "step": 28620 }, { "epoch": 502.1946902654867, "grad_norm": 4.512608242634997e-08, "learning_rate": 0.055993106795412625, "loss": 0.0, "num_input_tokens_seen": 16261784, "step": 28625 }, { "epoch": 502.283185840708, "grad_norm": 2.373664464982994e-08, "learning_rate": 0.05594721240369759, "loss": 0.0, "num_input_tokens_seen": 16264840, "step": 28630 }, { "epoch": 502.37168141592923, "grad_norm": 3.599945586074682e-08, "learning_rate": 0.055901332516104296, "loss": 0.0, "num_input_tokens_seen": 16267768, "step": 28635 }, { "epoch": 502.46017699115043, "grad_norm": 2.127608134117054e-08, "learning_rate": 0.05585546713970804, "loss": 0.0, "num_input_tokens_seen": 16270488, "step": 28640 }, { "epoch": 502.5486725663717, "grad_norm": 2.104763119348263e-08, "learning_rate": 0.05580961628158189, "loss": 0.0, "num_input_tokens_seen": 16273240, "step": 28645 }, { "epoch": 502.6371681415929, "grad_norm": 3.63764414146317e-08, "learning_rate": 0.05576377994879659, "loss": 0.0, "num_input_tokens_seen": 16276024, "step": 28650 }, { "epoch": 502.72566371681415, "grad_norm": 7.436902205171236e-09, "learning_rate": 0.05571795814842063, "loss": 0.0, "num_input_tokens_seen": 16278952, "step": 28655 }, { "epoch": 502.8141592920354, "grad_norm": 3.817187987920079e-08, "learning_rate": 0.05567215088752037, "loss": 0.0, "num_input_tokens_seen": 16282248, "step": 28660 }, { "epoch": 502.9026548672566, "grad_norm": 4.1582907783777046e-08, "learning_rate": 0.05562635817315981, "loss": 0.0, "num_input_tokens_seen": 16285032, "step": 28665 }, { "epoch": 502.9911504424779, "grad_norm": 2.5961027105836365e-08, "learning_rate": 0.05558058001240083, "loss": 0.0, "num_input_tokens_seen": 16287864, "step": 28670 }, { "epoch": 503.070796460177, "grad_norm": 1.8941047841281033e-08, "learning_rate": 0.055534816412302915, "loss": 0.0, "num_input_tokens_seen": 16290344, "step": 28675 }, { "epoch": 503.1592920353982, "grad_norm": 2.0407469492056407e-08, "learning_rate": 0.055489067379923436, "loss": 0.0, "num_input_tokens_seen": 16292936, "step": 28680 }, { "epoch": 503.24778761061947, "grad_norm": 4.07561024928782e-08, "learning_rate": 0.055443332922317505, "loss": 0.0, "num_input_tokens_seen": 16296264, "step": 28685 }, { "epoch": 503.3362831858407, "grad_norm": 4.481668725020427e-08, "learning_rate": 0.055397613046537876, "loss": 0.0, "num_input_tokens_seen": 16298648, "step": 28690 }, { "epoch": 503.42477876106193, "grad_norm": 3.546748672533795e-08, "learning_rate": 0.055351907759635145, "loss": 0.0, "num_input_tokens_seen": 16301512, "step": 28695 }, { "epoch": 503.5132743362832, "grad_norm": 1.4936768266693434e-08, "learning_rate": 0.05530621706865772, "loss": 0.0, "num_input_tokens_seen": 16304440, "step": 28700 }, { "epoch": 503.60176991150445, "grad_norm": 1.9022813546598627e-08, "learning_rate": 0.055260540980651564, "loss": 0.0, "num_input_tokens_seen": 16307096, "step": 28705 }, { "epoch": 503.69026548672565, "grad_norm": 3.676198190305513e-08, "learning_rate": 0.05521487950266062, "loss": 0.0, "num_input_tokens_seen": 16310216, "step": 28710 }, { "epoch": 503.7787610619469, "grad_norm": 1.7432252974458606e-08, "learning_rate": 0.055169232641726344, "loss": 0.0, "num_input_tokens_seen": 16312904, "step": 28715 }, { "epoch": 503.86725663716817, "grad_norm": 4.344085979823831e-08, "learning_rate": 0.055123600404888166, "loss": 0.0, "num_input_tokens_seen": 16316040, "step": 28720 }, { "epoch": 503.95575221238937, "grad_norm": 1.5790016405503593e-08, "learning_rate": 0.05507798279918309, "loss": 0.0, "num_input_tokens_seen": 16319224, "step": 28725 }, { "epoch": 504.0353982300885, "grad_norm": 2.304961732590982e-08, "learning_rate": 0.0550323798316459, "loss": 0.0, "num_input_tokens_seen": 16321928, "step": 28730 }, { "epoch": 504.12389380530976, "grad_norm": 1.9296637177035336e-08, "learning_rate": 0.05498679150930916, "loss": 0.0, "num_input_tokens_seen": 16324424, "step": 28735 }, { "epoch": 504.21238938053096, "grad_norm": 1.720329123600095e-08, "learning_rate": 0.05494121783920323, "loss": 0.0, "num_input_tokens_seen": 16327096, "step": 28740 }, { "epoch": 504.3008849557522, "grad_norm": 3.848321483701511e-08, "learning_rate": 0.05489565882835605, "loss": 0.0, "num_input_tokens_seen": 16329848, "step": 28745 }, { "epoch": 504.3893805309734, "grad_norm": 1.5898670824299188e-08, "learning_rate": 0.05485011448379348, "loss": 0.0, "num_input_tokens_seen": 16332984, "step": 28750 }, { "epoch": 504.4778761061947, "grad_norm": 4.4323787307121165e-08, "learning_rate": 0.05480458481253893, "loss": 0.0, "num_input_tokens_seen": 16335496, "step": 28755 }, { "epoch": 504.56637168141594, "grad_norm": 9.92130289034776e-09, "learning_rate": 0.054759069821613715, "loss": 0.0, "num_input_tokens_seen": 16338520, "step": 28760 }, { "epoch": 504.65486725663715, "grad_norm": 2.261002229886344e-08, "learning_rate": 0.05471356951803683, "loss": 0.0, "num_input_tokens_seen": 16341640, "step": 28765 }, { "epoch": 504.7433628318584, "grad_norm": 1.652521319783773e-08, "learning_rate": 0.054668083908824945, "loss": 0.0, "num_input_tokens_seen": 16344840, "step": 28770 }, { "epoch": 504.83185840707966, "grad_norm": 2.936182141866084e-08, "learning_rate": 0.054622613000992526, "loss": 0.0, "num_input_tokens_seen": 16347896, "step": 28775 }, { "epoch": 504.92035398230087, "grad_norm": 2.476798321993101e-08, "learning_rate": 0.05457715680155182, "loss": 0.0, "num_input_tokens_seen": 16350840, "step": 28780 }, { "epoch": 505.0, "grad_norm": 1.8739035567705287e-07, "learning_rate": 0.05453171531751265, "loss": 0.0, "num_input_tokens_seen": 16353448, "step": 28785 }, { "epoch": 505.08849557522126, "grad_norm": 6.131436691703129e-09, "learning_rate": 0.05448628855588276, "loss": 0.0, "num_input_tokens_seen": 16356024, "step": 28790 }, { "epoch": 505.17699115044246, "grad_norm": 4.4934754583891845e-08, "learning_rate": 0.05444087652366746, "loss": 0.0, "num_input_tokens_seen": 16359016, "step": 28795 }, { "epoch": 505.2654867256637, "grad_norm": 1.5114716589437194e-08, "learning_rate": 0.05439547922786984, "loss": 0.0, "num_input_tokens_seen": 16361560, "step": 28800 }, { "epoch": 505.2654867256637, "eval_loss": 0.8467451333999634, "eval_runtime": 0.9417, "eval_samples_per_second": 26.549, "eval_steps_per_second": 13.805, "num_input_tokens_seen": 16361560, "step": 28800 }, { "epoch": 505.353982300885, "grad_norm": 2.672634558109621e-08, "learning_rate": 0.0543500966754908, "loss": 0.0, "num_input_tokens_seen": 16364184, "step": 28805 }, { "epoch": 505.4424778761062, "grad_norm": 3.228395684118368e-08, "learning_rate": 0.05430472887352882, "loss": 0.0, "num_input_tokens_seen": 16367160, "step": 28810 }, { "epoch": 505.53097345132744, "grad_norm": 2.6152303433946145e-08, "learning_rate": 0.05425937582898023, "loss": 0.0, "num_input_tokens_seen": 16370264, "step": 28815 }, { "epoch": 505.6194690265487, "grad_norm": 2.8168180676857446e-08, "learning_rate": 0.054214037548839085, "loss": 0.0, "num_input_tokens_seen": 16373224, "step": 28820 }, { "epoch": 505.7079646017699, "grad_norm": 3.929902092636439e-08, "learning_rate": 0.05416871404009703, "loss": 0.0, "num_input_tokens_seen": 16375992, "step": 28825 }, { "epoch": 505.79646017699116, "grad_norm": 2.5480760612595077e-08, "learning_rate": 0.054123405309743605, "loss": 0.0, "num_input_tokens_seen": 16379000, "step": 28830 }, { "epoch": 505.88495575221236, "grad_norm": 2.3334717269563043e-08, "learning_rate": 0.0540781113647659, "loss": 0.0, "num_input_tokens_seen": 16381640, "step": 28835 }, { "epoch": 505.9734513274336, "grad_norm": 1.1558757506691109e-08, "learning_rate": 0.054032832212148836, "loss": 0.0, "num_input_tokens_seen": 16384680, "step": 28840 }, { "epoch": 506.05309734513276, "grad_norm": 3.1090152674551064e-08, "learning_rate": 0.0539875678588751, "loss": 0.0, "num_input_tokens_seen": 16387568, "step": 28845 }, { "epoch": 506.14159292035396, "grad_norm": 2.8511927041563467e-08, "learning_rate": 0.05394231831192492, "loss": 0.0, "num_input_tokens_seen": 16390688, "step": 28850 }, { "epoch": 506.2300884955752, "grad_norm": 8.384094485336391e-08, "learning_rate": 0.05389708357827639, "loss": 0.0, "num_input_tokens_seen": 16393184, "step": 28855 }, { "epoch": 506.3185840707965, "grad_norm": 2.509328922428722e-08, "learning_rate": 0.05385186366490533, "loss": 0.0, "num_input_tokens_seen": 16396352, "step": 28860 }, { "epoch": 506.4070796460177, "grad_norm": 2.4643224350029413e-08, "learning_rate": 0.053806658578785166, "loss": 0.0, "num_input_tokens_seen": 16399376, "step": 28865 }, { "epoch": 506.49557522123894, "grad_norm": 2.6407469988498633e-08, "learning_rate": 0.05376146832688705, "loss": 0.0, "num_input_tokens_seen": 16402032, "step": 28870 }, { "epoch": 506.5840707964602, "grad_norm": 2.2452860903854344e-08, "learning_rate": 0.053716292916179964, "loss": 0.0, "num_input_tokens_seen": 16405216, "step": 28875 }, { "epoch": 506.6725663716814, "grad_norm": 4.3088057566365023e-08, "learning_rate": 0.05367113235363045, "loss": 0.0, "num_input_tokens_seen": 16408240, "step": 28880 }, { "epoch": 506.76106194690266, "grad_norm": 3.5525879127362714e-08, "learning_rate": 0.05362598664620289, "loss": 0.0, "num_input_tokens_seen": 16410608, "step": 28885 }, { "epoch": 506.8495575221239, "grad_norm": 3.1250678489413986e-08, "learning_rate": 0.053580855800859285, "loss": 0.0, "num_input_tokens_seen": 16413552, "step": 28890 }, { "epoch": 506.9380530973451, "grad_norm": 1.1197639260274173e-08, "learning_rate": 0.05353573982455938, "loss": 0.0, "num_input_tokens_seen": 16416544, "step": 28895 }, { "epoch": 507.01769911504425, "grad_norm": 1.572987429199202e-08, "learning_rate": 0.053490638724260686, "loss": 0.0, "num_input_tokens_seen": 16419008, "step": 28900 }, { "epoch": 507.1061946902655, "grad_norm": 3.269613557677076e-08, "learning_rate": 0.05344555250691827, "loss": 0.0, "num_input_tokens_seen": 16422096, "step": 28905 }, { "epoch": 507.1946902654867, "grad_norm": 2.189130654528526e-08, "learning_rate": 0.053400481179485086, "loss": 0.0, "num_input_tokens_seen": 16424464, "step": 28910 }, { "epoch": 507.283185840708, "grad_norm": 2.6447834144960325e-08, "learning_rate": 0.05335542474891159, "loss": 0.0, "num_input_tokens_seen": 16427456, "step": 28915 }, { "epoch": 507.37168141592923, "grad_norm": 2.484727268381448e-08, "learning_rate": 0.053310383222146124, "loss": 0.0, "num_input_tokens_seen": 16429840, "step": 28920 }, { "epoch": 507.46017699115043, "grad_norm": 2.6976909595077814e-08, "learning_rate": 0.053265356606134684, "loss": 0.0, "num_input_tokens_seen": 16432976, "step": 28925 }, { "epoch": 507.5486725663717, "grad_norm": 2.7755039155863415e-08, "learning_rate": 0.053220344907820856, "loss": 0.0, "num_input_tokens_seen": 16435584, "step": 28930 }, { "epoch": 507.6371681415929, "grad_norm": 2.863412440490265e-08, "learning_rate": 0.05317534813414608, "loss": 0.0, "num_input_tokens_seen": 16438512, "step": 28935 }, { "epoch": 507.72566371681415, "grad_norm": 2.9745237384304346e-08, "learning_rate": 0.05313036629204942, "loss": 0.0, "num_input_tokens_seen": 16441184, "step": 28940 }, { "epoch": 507.8141592920354, "grad_norm": 2.8400426899111153e-08, "learning_rate": 0.05308539938846756, "loss": 0.0, "num_input_tokens_seen": 16443776, "step": 28945 }, { "epoch": 507.9026548672566, "grad_norm": 8.082785996066377e-09, "learning_rate": 0.05304044743033507, "loss": 0.0, "num_input_tokens_seen": 16446544, "step": 28950 }, { "epoch": 507.9911504424779, "grad_norm": 1.980573394178009e-08, "learning_rate": 0.05299551042458401, "loss": 0.0, "num_input_tokens_seen": 16450080, "step": 28955 }, { "epoch": 508.070796460177, "grad_norm": 1.5059377744819358e-08, "learning_rate": 0.052950588378144266, "loss": 0.0, "num_input_tokens_seen": 16452536, "step": 28960 }, { "epoch": 508.1592920353982, "grad_norm": 4.387280228002055e-08, "learning_rate": 0.052905681297943465, "loss": 0.0, "num_input_tokens_seen": 16455144, "step": 28965 }, { "epoch": 508.24778761061947, "grad_norm": 3.6386680335454e-08, "learning_rate": 0.0528607891909067, "loss": 0.0, "num_input_tokens_seen": 16458168, "step": 28970 }, { "epoch": 508.3362831858407, "grad_norm": 2.744015503708397e-08, "learning_rate": 0.05281591206395697, "loss": 0.0, "num_input_tokens_seen": 16461112, "step": 28975 }, { "epoch": 508.42477876106193, "grad_norm": 2.9299224379997213e-08, "learning_rate": 0.05277104992401496, "loss": 0.0, "num_input_tokens_seen": 16464376, "step": 28980 }, { "epoch": 508.5132743362832, "grad_norm": 3.4909103163727195e-08, "learning_rate": 0.05272620277799884, "loss": 0.0, "num_input_tokens_seen": 16467480, "step": 28985 }, { "epoch": 508.60176991150445, "grad_norm": 3.998922082359968e-08, "learning_rate": 0.05268137063282473, "loss": 0.0, "num_input_tokens_seen": 16470344, "step": 28990 }, { "epoch": 508.69026548672565, "grad_norm": 4.239520379201167e-08, "learning_rate": 0.0526365534954062, "loss": 0.0, "num_input_tokens_seen": 16472792, "step": 28995 }, { "epoch": 508.7787610619469, "grad_norm": 3.674533743946995e-08, "learning_rate": 0.052591751372654656, "loss": 0.0, "num_input_tokens_seen": 16475624, "step": 29000 }, { "epoch": 508.7787610619469, "eval_loss": 0.844280481338501, "eval_runtime": 0.9382, "eval_samples_per_second": 26.647, "eval_steps_per_second": 13.857, "num_input_tokens_seen": 16475624, "step": 29000 }, { "epoch": 508.86725663716817, "grad_norm": 2.8284812714218788e-08, "learning_rate": 0.05254696427147921, "loss": 0.0, "num_input_tokens_seen": 16478728, "step": 29005 }, { "epoch": 508.95575221238937, "grad_norm": 2.916135777297768e-08, "learning_rate": 0.052502192198786546, "loss": 0.0, "num_input_tokens_seen": 16481352, "step": 29010 }, { "epoch": 509.0353982300885, "grad_norm": 2.9801244139093797e-08, "learning_rate": 0.05245743516148103, "loss": 0.0, "num_input_tokens_seen": 16483560, "step": 29015 }, { "epoch": 509.12389380530976, "grad_norm": 1.0416578710703561e-08, "learning_rate": 0.05241269316646486, "loss": 0.0, "num_input_tokens_seen": 16486200, "step": 29020 }, { "epoch": 509.21238938053096, "grad_norm": 2.9459432226985882e-08, "learning_rate": 0.052367966220637725, "loss": 0.0, "num_input_tokens_seen": 16488856, "step": 29025 }, { "epoch": 509.3008849557522, "grad_norm": 3.927716818452609e-08, "learning_rate": 0.05232325433089716, "loss": 0.0, "num_input_tokens_seen": 16491672, "step": 29030 }, { "epoch": 509.3893805309734, "grad_norm": 8.482283675448343e-08, "learning_rate": 0.052278557504138214, "loss": 0.0, "num_input_tokens_seen": 16494904, "step": 29035 }, { "epoch": 509.4778761061947, "grad_norm": 4.7782826584352733e-08, "learning_rate": 0.05223387574725372, "loss": 0.0, "num_input_tokens_seen": 16497816, "step": 29040 }, { "epoch": 509.56637168141594, "grad_norm": 3.986857777249497e-08, "learning_rate": 0.05218920906713428, "loss": 0.0, "num_input_tokens_seen": 16500984, "step": 29045 }, { "epoch": 509.65486725663715, "grad_norm": 2.0131782463295167e-08, "learning_rate": 0.05214455747066789, "loss": 0.0, "num_input_tokens_seen": 16503864, "step": 29050 }, { "epoch": 509.7433628318584, "grad_norm": 1.9117724292527782e-08, "learning_rate": 0.05209992096474048, "loss": 0.0, "num_input_tokens_seen": 16507080, "step": 29055 }, { "epoch": 509.83185840707966, "grad_norm": 2.2873281935176237e-08, "learning_rate": 0.05205529955623559, "loss": 0.0, "num_input_tokens_seen": 16509768, "step": 29060 }, { "epoch": 509.92035398230087, "grad_norm": 3.72949067184436e-08, "learning_rate": 0.052010693252034314, "loss": 0.0, "num_input_tokens_seen": 16512296, "step": 29065 }, { "epoch": 510.0, "grad_norm": 8.723119293563286e-08, "learning_rate": 0.0519661020590156, "loss": 0.0, "num_input_tokens_seen": 16514688, "step": 29070 }, { "epoch": 510.08849557522126, "grad_norm": 1.6141571634875618e-08, "learning_rate": 0.05192152598405586, "loss": 0.0, "num_input_tokens_seen": 16517760, "step": 29075 }, { "epoch": 510.17699115044246, "grad_norm": 2.649319696956809e-08, "learning_rate": 0.05187696503402941, "loss": 0.0, "num_input_tokens_seen": 16520672, "step": 29080 }, { "epoch": 510.2654867256637, "grad_norm": 2.7812822267492265e-08, "learning_rate": 0.05183241921580798, "loss": 0.0, "num_input_tokens_seen": 16523504, "step": 29085 }, { "epoch": 510.353982300885, "grad_norm": 4.02806143995349e-08, "learning_rate": 0.051787888536261206, "loss": 0.0, "num_input_tokens_seen": 16526496, "step": 29090 }, { "epoch": 510.4424778761062, "grad_norm": 2.1580598641435245e-08, "learning_rate": 0.051743373002256184, "loss": 0.0, "num_input_tokens_seen": 16529376, "step": 29095 }, { "epoch": 510.53097345132744, "grad_norm": 1.849710784540548e-08, "learning_rate": 0.05169887262065787, "loss": 0.0, "num_input_tokens_seen": 16532480, "step": 29100 }, { "epoch": 510.6194690265487, "grad_norm": 2.0600948502647043e-08, "learning_rate": 0.051654387398328665, "loss": 0.0, "num_input_tokens_seen": 16535088, "step": 29105 }, { "epoch": 510.7079646017699, "grad_norm": 1.8335377660605445e-08, "learning_rate": 0.05160991734212888, "loss": 0.0, "num_input_tokens_seen": 16538384, "step": 29110 }, { "epoch": 510.79646017699116, "grad_norm": 2.7240655953164605e-08, "learning_rate": 0.051565462458916224, "loss": 0.0, "num_input_tokens_seen": 16541168, "step": 29115 }, { "epoch": 510.88495575221236, "grad_norm": 8.116147753867153e-09, "learning_rate": 0.05152102275554627, "loss": 0.0, "num_input_tokens_seen": 16543808, "step": 29120 }, { "epoch": 510.9734513274336, "grad_norm": 1.8128849532672575e-08, "learning_rate": 0.05147659823887222, "loss": 0.0, "num_input_tokens_seen": 16546624, "step": 29125 }, { "epoch": 511.05309734513276, "grad_norm": 3.000089066063083e-08, "learning_rate": 0.05143218891574479, "loss": 0.0, "num_input_tokens_seen": 16548824, "step": 29130 }, { "epoch": 511.14159292035396, "grad_norm": 2.809193588859671e-08, "learning_rate": 0.0513877947930125, "loss": 0.0, "num_input_tokens_seen": 16551416, "step": 29135 }, { "epoch": 511.2300884955752, "grad_norm": 5.4165720797527683e-08, "learning_rate": 0.051343415877521566, "loss": 0.0, "num_input_tokens_seen": 16554536, "step": 29140 }, { "epoch": 511.3185840707965, "grad_norm": 3.113231272777739e-08, "learning_rate": 0.051299052176115634, "loss": 0.0, "num_input_tokens_seen": 16557032, "step": 29145 }, { "epoch": 511.4070796460177, "grad_norm": 1.5424014065956726e-08, "learning_rate": 0.051254703695636256, "loss": 0.0, "num_input_tokens_seen": 16559880, "step": 29150 }, { "epoch": 511.49557522123894, "grad_norm": 2.0386742960454285e-08, "learning_rate": 0.05121037044292249, "loss": 0.0, "num_input_tokens_seen": 16562824, "step": 29155 }, { "epoch": 511.5840707964602, "grad_norm": 2.165664270137313e-08, "learning_rate": 0.05116605242481101, "loss": 0.0, "num_input_tokens_seen": 16565800, "step": 29160 }, { "epoch": 511.6725663716814, "grad_norm": 3.212280930142697e-08, "learning_rate": 0.05112174964813634, "loss": 0.0, "num_input_tokens_seen": 16568408, "step": 29165 }, { "epoch": 511.76106194690266, "grad_norm": 2.8603329482734807e-08, "learning_rate": 0.05107746211973038, "loss": 0.0, "num_input_tokens_seen": 16571496, "step": 29170 }, { "epoch": 511.8495575221239, "grad_norm": 3.3619109274241055e-08, "learning_rate": 0.05103318984642291, "loss": 0.0, "num_input_tokens_seen": 16575064, "step": 29175 }, { "epoch": 511.9380530973451, "grad_norm": 1.4348926491436487e-08, "learning_rate": 0.05098893283504131, "loss": 0.0, "num_input_tokens_seen": 16577848, "step": 29180 }, { "epoch": 512.0176991150443, "grad_norm": 3.472871767939978e-08, "learning_rate": 0.050944691092410475, "loss": 0.0, "num_input_tokens_seen": 16580344, "step": 29185 }, { "epoch": 512.1061946902655, "grad_norm": 2.0656349519754258e-08, "learning_rate": 0.05090046462535313, "loss": 0.0, "num_input_tokens_seen": 16583176, "step": 29190 }, { "epoch": 512.1946902654868, "grad_norm": 2.828059741943889e-08, "learning_rate": 0.050856253440689454, "loss": 0.0, "num_input_tokens_seen": 16586280, "step": 29195 }, { "epoch": 512.2831858407079, "grad_norm": 1.752897915707763e-08, "learning_rate": 0.050812057545237405, "loss": 0.0, "num_input_tokens_seen": 16588984, "step": 29200 }, { "epoch": 512.2831858407079, "eval_loss": 0.8483059406280518, "eval_runtime": 0.9324, "eval_samples_per_second": 26.812, "eval_steps_per_second": 13.942, "num_input_tokens_seen": 16588984, "step": 29200 }, { "epoch": 512.3716814159292, "grad_norm": 1.4201051001805354e-08, "learning_rate": 0.0507678769458126, "loss": 0.0, "num_input_tokens_seen": 16591880, "step": 29205 }, { "epoch": 512.4601769911504, "grad_norm": 5.1089703134721276e-08, "learning_rate": 0.050723711649228155, "loss": 0.0, "num_input_tokens_seen": 16595352, "step": 29210 }, { "epoch": 512.5486725663717, "grad_norm": 2.5490205501910168e-08, "learning_rate": 0.05067956166229496, "loss": 0.0, "num_input_tokens_seen": 16597560, "step": 29215 }, { "epoch": 512.637168141593, "grad_norm": 3.743438270475963e-08, "learning_rate": 0.05063542699182155, "loss": 0.0, "num_input_tokens_seen": 16600440, "step": 29220 }, { "epoch": 512.7256637168142, "grad_norm": 2.9439753745919006e-08, "learning_rate": 0.050591307644613996, "loss": 0.0, "num_input_tokens_seen": 16603464, "step": 29225 }, { "epoch": 512.8141592920354, "grad_norm": 2.175112889801767e-08, "learning_rate": 0.05054720362747599, "loss": 0.0, "num_input_tokens_seen": 16606360, "step": 29230 }, { "epoch": 512.9026548672566, "grad_norm": 5.2740748657242875e-08, "learning_rate": 0.050503114947209035, "loss": 0.0, "num_input_tokens_seen": 16609224, "step": 29235 }, { "epoch": 512.9911504424779, "grad_norm": 3.6781099055360755e-08, "learning_rate": 0.05045904161061207, "loss": 0.0, "num_input_tokens_seen": 16612152, "step": 29240 }, { "epoch": 513.070796460177, "grad_norm": 4.0774597920290034e-08, "learning_rate": 0.05041498362448185, "loss": 0.0, "num_input_tokens_seen": 16614920, "step": 29245 }, { "epoch": 513.1592920353983, "grad_norm": 3.065213860509175e-08, "learning_rate": 0.05037094099561256, "loss": 0.0, "num_input_tokens_seen": 16617624, "step": 29250 }, { "epoch": 513.2477876106195, "grad_norm": 3.9994269229737256e-08, "learning_rate": 0.05032691373079624, "loss": 0.0, "num_input_tokens_seen": 16620296, "step": 29255 }, { "epoch": 513.3362831858407, "grad_norm": 3.560701955507284e-08, "learning_rate": 0.05028290183682234, "loss": 0.0, "num_input_tokens_seen": 16623352, "step": 29260 }, { "epoch": 513.4247787610619, "grad_norm": 3.003927773193027e-08, "learning_rate": 0.050238905320478096, "loss": 0.0, "num_input_tokens_seen": 16626120, "step": 29265 }, { "epoch": 513.5132743362832, "grad_norm": 5.296265470633443e-08, "learning_rate": 0.05019492418854838, "loss": 0.0, "num_input_tokens_seen": 16629112, "step": 29270 }, { "epoch": 513.6017699115044, "grad_norm": 1.1777527397782706e-08, "learning_rate": 0.05015095844781554, "loss": 0.0, "num_input_tokens_seen": 16631800, "step": 29275 }, { "epoch": 513.6902654867257, "grad_norm": 1.597134691166957e-08, "learning_rate": 0.05010700810505968, "loss": 0.0, "num_input_tokens_seen": 16634680, "step": 29280 }, { "epoch": 513.7787610619469, "grad_norm": 1.0867720057206043e-08, "learning_rate": 0.05006307316705856, "loss": 0.0, "num_input_tokens_seen": 16637736, "step": 29285 }, { "epoch": 513.8672566371681, "grad_norm": 2.098987117449269e-08, "learning_rate": 0.0500191536405874, "loss": 0.0, "num_input_tokens_seen": 16640120, "step": 29290 }, { "epoch": 513.9557522123894, "grad_norm": 3.402622894554952e-08, "learning_rate": 0.04997524953241922, "loss": 0.0, "num_input_tokens_seen": 16643400, "step": 29295 }, { "epoch": 514.0353982300885, "grad_norm": 2.429042211815613e-08, "learning_rate": 0.049931360849324556, "loss": 0.0, "num_input_tokens_seen": 16645576, "step": 29300 }, { "epoch": 514.1238938053098, "grad_norm": 3.951849336658597e-08, "learning_rate": 0.04988748759807155, "loss": 0.0, "num_input_tokens_seen": 16648184, "step": 29305 }, { "epoch": 514.212389380531, "grad_norm": 2.726404346731215e-08, "learning_rate": 0.0498436297854261, "loss": 0.0, "num_input_tokens_seen": 16650536, "step": 29310 }, { "epoch": 514.3008849557522, "grad_norm": 2.015318401049626e-08, "learning_rate": 0.04979978741815152, "loss": 0.0, "num_input_tokens_seen": 16653352, "step": 29315 }, { "epoch": 514.3893805309734, "grad_norm": 4.053950775073645e-08, "learning_rate": 0.04975596050300891, "loss": 0.0, "num_input_tokens_seen": 16656040, "step": 29320 }, { "epoch": 514.4778761061947, "grad_norm": 5.401480507316592e-08, "learning_rate": 0.049712149046757005, "loss": 0.0, "num_input_tokens_seen": 16659160, "step": 29325 }, { "epoch": 514.566371681416, "grad_norm": 3.0904171666179536e-08, "learning_rate": 0.04966835305615194, "loss": 0.0, "num_input_tokens_seen": 16662312, "step": 29330 }, { "epoch": 514.6548672566372, "grad_norm": 3.263906123152083e-08, "learning_rate": 0.049624572537947755, "loss": 0.0, "num_input_tokens_seen": 16665432, "step": 29335 }, { "epoch": 514.7433628318585, "grad_norm": 5.911403277991667e-08, "learning_rate": 0.04958080749889582, "loss": 0.0, "num_input_tokens_seen": 16668440, "step": 29340 }, { "epoch": 514.8318584070796, "grad_norm": 3.7967637922520225e-08, "learning_rate": 0.049537057945745304, "loss": 0.0, "num_input_tokens_seen": 16671416, "step": 29345 }, { "epoch": 514.9203539823009, "grad_norm": 4.673431419632834e-08, "learning_rate": 0.049493323885243, "loss": 0.0, "num_input_tokens_seen": 16674584, "step": 29350 }, { "epoch": 515.0, "grad_norm": 1.0012173845552752e-07, "learning_rate": 0.04944960532413318, "loss": 0.0, "num_input_tokens_seen": 16676800, "step": 29355 }, { "epoch": 515.0884955752213, "grad_norm": 2.979025026661475e-08, "learning_rate": 0.049405902269157774, "loss": 0.0, "num_input_tokens_seen": 16679280, "step": 29360 }, { "epoch": 515.1769911504425, "grad_norm": 1.896069257156796e-08, "learning_rate": 0.04936221472705646, "loss": 0.0, "num_input_tokens_seen": 16681936, "step": 29365 }, { "epoch": 515.2654867256637, "grad_norm": 7.168839744053912e-09, "learning_rate": 0.04931854270456632, "loss": 0.0, "num_input_tokens_seen": 16684608, "step": 29370 }, { "epoch": 515.3539823008849, "grad_norm": 3.334028875201511e-08, "learning_rate": 0.049274886208422075, "loss": 0.0, "num_input_tokens_seen": 16688048, "step": 29375 }, { "epoch": 515.4424778761062, "grad_norm": 3.951340232788425e-08, "learning_rate": 0.049231245245356235, "loss": 0.0, "num_input_tokens_seen": 16691232, "step": 29380 }, { "epoch": 515.5309734513274, "grad_norm": 6.65333246274713e-08, "learning_rate": 0.049187619822098655, "loss": 0.0, "num_input_tokens_seen": 16693840, "step": 29385 }, { "epoch": 515.6194690265487, "grad_norm": 2.4580064206247698e-08, "learning_rate": 0.04914400994537705, "loss": 0.0, "num_input_tokens_seen": 16696720, "step": 29390 }, { "epoch": 515.70796460177, "grad_norm": 1.3508534735251487e-08, "learning_rate": 0.049100415621916485, "loss": 0.0, "num_input_tokens_seen": 16699984, "step": 29395 }, { "epoch": 515.7964601769911, "grad_norm": 3.430937667303624e-08, "learning_rate": 0.04905683685843981, "loss": 0.0, "num_input_tokens_seen": 16702496, "step": 29400 }, { "epoch": 515.7964601769911, "eval_loss": 0.8532394766807556, "eval_runtime": 0.9436, "eval_samples_per_second": 26.494, "eval_steps_per_second": 13.777, "num_input_tokens_seen": 16702496, "step": 29400 }, { "epoch": 515.8849557522124, "grad_norm": 4.999023417440185e-08, "learning_rate": 0.049013273661667495, "loss": 0.0, "num_input_tokens_seen": 16705664, "step": 29405 }, { "epoch": 515.9734513274336, "grad_norm": 4.204308012845104e-08, "learning_rate": 0.048969726038317396, "loss": 0.0, "num_input_tokens_seen": 16708528, "step": 29410 }, { "epoch": 516.0530973451328, "grad_norm": 8.744005697280954e-08, "learning_rate": 0.048926193995105206, "loss": 0.0, "num_input_tokens_seen": 16710584, "step": 29415 }, { "epoch": 516.141592920354, "grad_norm": 1.793705450836569e-08, "learning_rate": 0.048882677538744035, "loss": 0.0, "num_input_tokens_seen": 16713224, "step": 29420 }, { "epoch": 516.2300884955753, "grad_norm": 5.983815754007082e-08, "learning_rate": 0.048839176675944715, "loss": 0.0, "num_input_tokens_seen": 16716648, "step": 29425 }, { "epoch": 516.3185840707964, "grad_norm": 6.715271183566074e-08, "learning_rate": 0.04879569141341566, "loss": 0.0, "num_input_tokens_seen": 16719608, "step": 29430 }, { "epoch": 516.4070796460177, "grad_norm": 2.9746280105769074e-08, "learning_rate": 0.04875222175786274, "loss": 0.0, "num_input_tokens_seen": 16722216, "step": 29435 }, { "epoch": 516.4955752212389, "grad_norm": 6.658923012992091e-08, "learning_rate": 0.04870876771598966, "loss": 0.0, "num_input_tokens_seen": 16725176, "step": 29440 }, { "epoch": 516.5840707964602, "grad_norm": 2.517070640806196e-08, "learning_rate": 0.04866532929449744, "loss": 0.0, "num_input_tokens_seen": 16727832, "step": 29445 }, { "epoch": 516.6725663716815, "grad_norm": 2.5854749452491887e-08, "learning_rate": 0.048621906500084945, "loss": 0.0, "num_input_tokens_seen": 16730760, "step": 29450 }, { "epoch": 516.7610619469026, "grad_norm": 2.742712013059645e-08, "learning_rate": 0.04857849933944845, "loss": 0.0, "num_input_tokens_seen": 16733416, "step": 29455 }, { "epoch": 516.8495575221239, "grad_norm": 1.4542333559575127e-08, "learning_rate": 0.048535107819281866, "loss": 0.0, "num_input_tokens_seen": 16736632, "step": 29460 }, { "epoch": 516.9380530973451, "grad_norm": 3.3786982100991736e-08, "learning_rate": 0.04849173194627675, "loss": 0.0, "num_input_tokens_seen": 16739592, "step": 29465 }, { "epoch": 517.0176991150443, "grad_norm": 2.862393344571501e-08, "learning_rate": 0.04844837172712223, "loss": 0.0, "num_input_tokens_seen": 16741720, "step": 29470 }, { "epoch": 517.1061946902655, "grad_norm": 4.980795509368363e-08, "learning_rate": 0.04840502716850494, "loss": 0.0, "num_input_tokens_seen": 16744920, "step": 29475 }, { "epoch": 517.1946902654868, "grad_norm": 3.2123029569675055e-08, "learning_rate": 0.04836169827710916, "loss": 0.0, "num_input_tokens_seen": 16747704, "step": 29480 }, { "epoch": 517.2831858407079, "grad_norm": 4.1118287441577195e-08, "learning_rate": 0.04831838505961684, "loss": 0.0, "num_input_tokens_seen": 16750760, "step": 29485 }, { "epoch": 517.3716814159292, "grad_norm": 5.207387943073627e-08, "learning_rate": 0.048275087522707295, "loss": 0.0, "num_input_tokens_seen": 16753672, "step": 29490 }, { "epoch": 517.4601769911504, "grad_norm": 5.071173703186105e-08, "learning_rate": 0.04823180567305766, "loss": 0.0, "num_input_tokens_seen": 16756328, "step": 29495 }, { "epoch": 517.5486725663717, "grad_norm": 2.4260797815145452e-08, "learning_rate": 0.04818853951734244, "loss": 0.0, "num_input_tokens_seen": 16758728, "step": 29500 }, { "epoch": 517.637168141593, "grad_norm": 2.6837739142138162e-08, "learning_rate": 0.04814528906223387, "loss": 0.0, "num_input_tokens_seen": 16761656, "step": 29505 }, { "epoch": 517.7256637168142, "grad_norm": 4.9602871143861194e-08, "learning_rate": 0.04810205431440177, "loss": 0.0, "num_input_tokens_seen": 16764280, "step": 29510 }, { "epoch": 517.8141592920354, "grad_norm": 2.4037381862740403e-08, "learning_rate": 0.04805883528051341, "loss": 0.0, "num_input_tokens_seen": 16767160, "step": 29515 }, { "epoch": 517.9026548672566, "grad_norm": 3.178397278702505e-08, "learning_rate": 0.048015631967233685, "loss": 0.0, "num_input_tokens_seen": 16770168, "step": 29520 }, { "epoch": 517.9911504424779, "grad_norm": 2.292821577043469e-08, "learning_rate": 0.04797244438122517, "loss": 0.0, "num_input_tokens_seen": 16773192, "step": 29525 }, { "epoch": 518.070796460177, "grad_norm": 2.4429491318755936e-08, "learning_rate": 0.04792927252914784, "loss": 0.0, "num_input_tokens_seen": 16775816, "step": 29530 }, { "epoch": 518.1592920353983, "grad_norm": 4.6681964960271216e-08, "learning_rate": 0.04788611641765944, "loss": 0.0, "num_input_tokens_seen": 16778696, "step": 29535 }, { "epoch": 518.2477876106195, "grad_norm": 1.8904907861383435e-08, "learning_rate": 0.04784297605341508, "loss": 0.0, "num_input_tokens_seen": 16781720, "step": 29540 }, { "epoch": 518.3362831858407, "grad_norm": 4.786094720543588e-08, "learning_rate": 0.04779985144306761, "loss": 0.0, "num_input_tokens_seen": 16785320, "step": 29545 }, { "epoch": 518.4247787610619, "grad_norm": 3.780314727919176e-08, "learning_rate": 0.047756742593267405, "loss": 0.0, "num_input_tokens_seen": 16788184, "step": 29550 }, { "epoch": 518.5132743362832, "grad_norm": 2.8370763516250008e-08, "learning_rate": 0.047713649510662315, "loss": 0.0, "num_input_tokens_seen": 16790488, "step": 29555 }, { "epoch": 518.6017699115044, "grad_norm": 1.6876551711675347e-08, "learning_rate": 0.04767057220189789, "loss": 0.0, "num_input_tokens_seen": 16793080, "step": 29560 }, { "epoch": 518.6902654867257, "grad_norm": 9.829182090470567e-08, "learning_rate": 0.04762751067361722, "loss": 0.0, "num_input_tokens_seen": 16796024, "step": 29565 }, { "epoch": 518.7787610619469, "grad_norm": 4.740511627687738e-08, "learning_rate": 0.04758446493246086, "loss": 0.0, "num_input_tokens_seen": 16799032, "step": 29570 }, { "epoch": 518.8672566371681, "grad_norm": 3.457608599433115e-08, "learning_rate": 0.047541434985067084, "loss": 0.0, "num_input_tokens_seen": 16802152, "step": 29575 }, { "epoch": 518.9557522123894, "grad_norm": 4.6436429812501956e-08, "learning_rate": 0.047498420838071556, "loss": 0.0, "num_input_tokens_seen": 16804712, "step": 29580 }, { "epoch": 519.0353982300885, "grad_norm": 2.8709404631399593e-08, "learning_rate": 0.04745542249810772, "loss": 0.0, "num_input_tokens_seen": 16806992, "step": 29585 }, { "epoch": 519.1238938053098, "grad_norm": 3.0813335882839965e-08, "learning_rate": 0.047412439971806324, "loss": 0.0, "num_input_tokens_seen": 16810432, "step": 29590 }, { "epoch": 519.212389380531, "grad_norm": 4.479647586208557e-08, "learning_rate": 0.04736947326579592, "loss": 0.0, "num_input_tokens_seen": 16813504, "step": 29595 }, { "epoch": 519.3008849557522, "grad_norm": 5.5190778169844634e-08, "learning_rate": 0.04732652238670245, "loss": 0.0, "num_input_tokens_seen": 16816272, "step": 29600 }, { "epoch": 519.3008849557522, "eval_loss": 0.8267478346824646, "eval_runtime": 0.9426, "eval_samples_per_second": 26.522, "eval_steps_per_second": 13.792, "num_input_tokens_seen": 16816272, "step": 29600 }, { "epoch": 519.3893805309734, "grad_norm": 3.44584840661355e-08, "learning_rate": 0.04728358734114952, "loss": 0.0, "num_input_tokens_seen": 16818864, "step": 29605 }, { "epoch": 519.4778761061947, "grad_norm": 5.3015135392797674e-08, "learning_rate": 0.04724066813575821, "loss": 0.0, "num_input_tokens_seen": 16821888, "step": 29610 }, { "epoch": 519.566371681416, "grad_norm": 3.833364203842393e-08, "learning_rate": 0.04719776477714729, "loss": 0.0, "num_input_tokens_seen": 16824464, "step": 29615 }, { "epoch": 519.6548672566372, "grad_norm": 2.3377213054232016e-08, "learning_rate": 0.047154877271932856, "loss": 0.0, "num_input_tokens_seen": 16827456, "step": 29620 }, { "epoch": 519.7433628318585, "grad_norm": 1.2596831133748765e-08, "learning_rate": 0.0471120056267288, "loss": 0.0, "num_input_tokens_seen": 16830080, "step": 29625 }, { "epoch": 519.8318584070796, "grad_norm": 2.1195917909722084e-08, "learning_rate": 0.047069149848146495, "loss": 0.0, "num_input_tokens_seen": 16832912, "step": 29630 }, { "epoch": 519.9203539823009, "grad_norm": 3.0343471735250205e-08, "learning_rate": 0.04702630994279473, "loss": 0.0, "num_input_tokens_seen": 16835984, "step": 29635 }, { "epoch": 520.0, "grad_norm": 1.8847362781571064e-08, "learning_rate": 0.046983485917280035, "loss": 0.0, "num_input_tokens_seen": 16838416, "step": 29640 }, { "epoch": 520.0884955752213, "grad_norm": 2.3111866198632924e-08, "learning_rate": 0.04694067777820644, "loss": 0.0, "num_input_tokens_seen": 16841680, "step": 29645 }, { "epoch": 520.1769911504425, "grad_norm": 3.839771878233478e-08, "learning_rate": 0.046897885532175415, "loss": 0.0, "num_input_tokens_seen": 16844688, "step": 29650 }, { "epoch": 520.2654867256637, "grad_norm": 2.0054155669413376e-08, "learning_rate": 0.04685510918578613, "loss": 0.0, "num_input_tokens_seen": 16847440, "step": 29655 }, { "epoch": 520.3539823008849, "grad_norm": 2.5513697821111236e-08, "learning_rate": 0.04681234874563519, "loss": 0.0, "num_input_tokens_seen": 16850000, "step": 29660 }, { "epoch": 520.4424778761062, "grad_norm": 6.973971267143497e-08, "learning_rate": 0.046769604218316836, "loss": 0.0, "num_input_tokens_seen": 16852880, "step": 29665 }, { "epoch": 520.5309734513274, "grad_norm": 3.7972913702333244e-08, "learning_rate": 0.04672687561042279, "loss": 0.0, "num_input_tokens_seen": 16855152, "step": 29670 }, { "epoch": 520.6194690265487, "grad_norm": 2.3902821055798995e-08, "learning_rate": 0.046684162928542286, "loss": 0.0, "num_input_tokens_seen": 16857888, "step": 29675 }, { "epoch": 520.70796460177, "grad_norm": 3.1049424364937295e-08, "learning_rate": 0.04664146617926222, "loss": 0.0, "num_input_tokens_seen": 16860784, "step": 29680 }, { "epoch": 520.7964601769911, "grad_norm": 5.769940614186453e-08, "learning_rate": 0.046598785369167, "loss": 0.0, "num_input_tokens_seen": 16863808, "step": 29685 }, { "epoch": 520.8849557522124, "grad_norm": 2.6933346219948362e-08, "learning_rate": 0.046556120504838434, "loss": 0.0, "num_input_tokens_seen": 16866656, "step": 29690 }, { "epoch": 520.9734513274336, "grad_norm": 5.3680867750927064e-08, "learning_rate": 0.04651347159285609, "loss": 0.0, "num_input_tokens_seen": 16869920, "step": 29695 }, { "epoch": 521.0530973451328, "grad_norm": 7.420442216243828e-08, "learning_rate": 0.04647083863979688, "loss": 0.0, "num_input_tokens_seen": 16872592, "step": 29700 }, { "epoch": 521.141592920354, "grad_norm": 2.2767252971789276e-08, "learning_rate": 0.04642822165223538, "loss": 0.0, "num_input_tokens_seen": 16875248, "step": 29705 }, { "epoch": 521.2300884955753, "grad_norm": 2.310086166801284e-08, "learning_rate": 0.046385620636743716, "loss": 0.0, "num_input_tokens_seen": 16877952, "step": 29710 }, { "epoch": 521.3185840707964, "grad_norm": 4.741790249340738e-08, "learning_rate": 0.04634303559989141, "loss": 0.0, "num_input_tokens_seen": 16880848, "step": 29715 }, { "epoch": 521.4070796460177, "grad_norm": 3.698026063148063e-08, "learning_rate": 0.046300466548245635, "loss": 0.0, "num_input_tokens_seen": 16883664, "step": 29720 }, { "epoch": 521.4955752212389, "grad_norm": 1.3696088707604304e-08, "learning_rate": 0.04625791348837114, "loss": 0.0, "num_input_tokens_seen": 16886560, "step": 29725 }, { "epoch": 521.5840707964602, "grad_norm": 1.4152764293839937e-08, "learning_rate": 0.046215376426830095, "loss": 0.0, "num_input_tokens_seen": 16889408, "step": 29730 }, { "epoch": 521.6725663716815, "grad_norm": 3.6289883098561404e-08, "learning_rate": 0.04617285537018219, "loss": 0.0, "num_input_tokens_seen": 16892192, "step": 29735 }, { "epoch": 521.7610619469026, "grad_norm": 3.5760123751060746e-08, "learning_rate": 0.046130350324984803, "loss": 0.0, "num_input_tokens_seen": 16894688, "step": 29740 }, { "epoch": 521.8495575221239, "grad_norm": 3.4232353840479846e-08, "learning_rate": 0.046087861297792666, "loss": 0.0, "num_input_tokens_seen": 16897888, "step": 29745 }, { "epoch": 521.9380530973451, "grad_norm": 3.545966364981723e-08, "learning_rate": 0.0460453882951582, "loss": 0.0, "num_input_tokens_seen": 16901152, "step": 29750 }, { "epoch": 522.0176991150443, "grad_norm": 3.7371506778072217e-08, "learning_rate": 0.04600293132363119, "loss": 0.0, "num_input_tokens_seen": 16903504, "step": 29755 }, { "epoch": 522.1061946902655, "grad_norm": 5.1345608653718955e-08, "learning_rate": 0.045960490389759086, "loss": 0.0, "num_input_tokens_seen": 16905968, "step": 29760 }, { "epoch": 522.1946902654868, "grad_norm": 1.703772944949833e-08, "learning_rate": 0.04591806550008685, "loss": 0.0, "num_input_tokens_seen": 16908688, "step": 29765 }, { "epoch": 522.2831858407079, "grad_norm": 3.310366736286596e-08, "learning_rate": 0.045875656661156825, "loss": 0.0, "num_input_tokens_seen": 16911840, "step": 29770 }, { "epoch": 522.3716814159292, "grad_norm": 2.2744281125142152e-08, "learning_rate": 0.04583326387950911, "loss": 0.0, "num_input_tokens_seen": 16915072, "step": 29775 }, { "epoch": 522.4601769911504, "grad_norm": 3.007609095107e-08, "learning_rate": 0.0457908871616811, "loss": 0.0, "num_input_tokens_seen": 16917680, "step": 29780 }, { "epoch": 522.5486725663717, "grad_norm": 2.8211539770950367e-08, "learning_rate": 0.04574852651420786, "loss": 0.0, "num_input_tokens_seen": 16920240, "step": 29785 }, { "epoch": 522.637168141593, "grad_norm": 1.0891797685985694e-08, "learning_rate": 0.045706181943621985, "loss": 0.0, "num_input_tokens_seen": 16922992, "step": 29790 }, { "epoch": 522.7256637168142, "grad_norm": 3.354238842234736e-08, "learning_rate": 0.04566385345645344, "loss": 0.0, "num_input_tokens_seen": 16925824, "step": 29795 }, { "epoch": 522.8141592920354, "grad_norm": 3.038427820456491e-08, "learning_rate": 0.04562154105922993, "loss": 0.0, "num_input_tokens_seen": 16929072, "step": 29800 }, { "epoch": 522.8141592920354, "eval_loss": 0.8346766233444214, "eval_runtime": 0.9421, "eval_samples_per_second": 26.536, "eval_steps_per_second": 13.799, "num_input_tokens_seen": 16929072, "step": 29800 }, { "epoch": 522.9026548672566, "grad_norm": 5.133327363182616e-08, "learning_rate": 0.04557924475847642, "loss": 0.0, "num_input_tokens_seen": 16932176, "step": 29805 }, { "epoch": 522.9911504424779, "grad_norm": 2.6894479532302284e-08, "learning_rate": 0.04553696456071567, "loss": 0.0, "num_input_tokens_seen": 16934736, "step": 29810 }, { "epoch": 523.070796460177, "grad_norm": 2.3539461935229156e-08, "learning_rate": 0.045494700472467724, "loss": 0.0, "num_input_tokens_seen": 16937232, "step": 29815 }, { "epoch": 523.1592920353983, "grad_norm": 2.3968286910758252e-08, "learning_rate": 0.04545245250025024, "loss": 0.0, "num_input_tokens_seen": 16940544, "step": 29820 }, { "epoch": 523.2477876106195, "grad_norm": 3.3039601277096153e-08, "learning_rate": 0.045410220650578384, "loss": 0.0, "num_input_tokens_seen": 16943328, "step": 29825 }, { "epoch": 523.3362831858407, "grad_norm": 3.189406072579004e-08, "learning_rate": 0.04536800492996492, "loss": 0.0, "num_input_tokens_seen": 16946032, "step": 29830 }, { "epoch": 523.4247787610619, "grad_norm": 4.444512669010692e-08, "learning_rate": 0.04532580534491994, "loss": 0.0, "num_input_tokens_seen": 16949232, "step": 29835 }, { "epoch": 523.5132743362832, "grad_norm": 2.9004690205169936e-08, "learning_rate": 0.045283621901951183, "loss": 0.0, "num_input_tokens_seen": 16952080, "step": 29840 }, { "epoch": 523.6017699115044, "grad_norm": 2.6571099098759987e-08, "learning_rate": 0.04524145460756393, "loss": 0.0, "num_input_tokens_seen": 16955328, "step": 29845 }, { "epoch": 523.6902654867257, "grad_norm": 2.2561501111795224e-08, "learning_rate": 0.045199303468260794, "loss": 0.0, "num_input_tokens_seen": 16957632, "step": 29850 }, { "epoch": 523.7787610619469, "grad_norm": 2.216491878925808e-08, "learning_rate": 0.04515716849054214, "loss": 0.0, "num_input_tokens_seen": 16960688, "step": 29855 }, { "epoch": 523.8672566371681, "grad_norm": 3.598429998419306e-08, "learning_rate": 0.04511504968090558, "loss": 0.0, "num_input_tokens_seen": 16963424, "step": 29860 }, { "epoch": 523.9557522123894, "grad_norm": 3.7788421280993134e-08, "learning_rate": 0.04507294704584644, "loss": 0.0, "num_input_tokens_seen": 16966576, "step": 29865 }, { "epoch": 524.0353982300885, "grad_norm": 3.0785965776658486e-08, "learning_rate": 0.04503086059185749, "loss": 0.0, "num_input_tokens_seen": 16968544, "step": 29870 }, { "epoch": 524.1238938053098, "grad_norm": 3.671055992526817e-08, "learning_rate": 0.04498879032542893, "loss": 0.0, "num_input_tokens_seen": 16971152, "step": 29875 }, { "epoch": 524.212389380531, "grad_norm": 4.923289509406459e-08, "learning_rate": 0.0449467362530486, "loss": 0.0, "num_input_tokens_seen": 16973984, "step": 29880 }, { "epoch": 524.3008849557522, "grad_norm": 2.1601596955633795e-08, "learning_rate": 0.04490469838120171, "loss": 0.0, "num_input_tokens_seen": 16977440, "step": 29885 }, { "epoch": 524.3893805309734, "grad_norm": 5.584973550298855e-08, "learning_rate": 0.04486267671637101, "loss": 0.0, "num_input_tokens_seen": 16980368, "step": 29890 }, { "epoch": 524.4778761061947, "grad_norm": 3.744687759876797e-08, "learning_rate": 0.04482067126503683, "loss": 0.0, "num_input_tokens_seen": 16982768, "step": 29895 }, { "epoch": 524.566371681416, "grad_norm": 3.5290170785629016e-08, "learning_rate": 0.04477868203367687, "loss": 0.0, "num_input_tokens_seen": 16985440, "step": 29900 }, { "epoch": 524.6548672566372, "grad_norm": 2.7590207452021787e-08, "learning_rate": 0.044736709028766426, "loss": 0.0, "num_input_tokens_seen": 16988608, "step": 29905 }, { "epoch": 524.7433628318585, "grad_norm": 2.639009366589562e-08, "learning_rate": 0.04469475225677832, "loss": 0.0, "num_input_tokens_seen": 16991488, "step": 29910 }, { "epoch": 524.8318584070796, "grad_norm": 1.6971421601397196e-08, "learning_rate": 0.04465281172418273, "loss": 0.0, "num_input_tokens_seen": 16994688, "step": 29915 }, { "epoch": 524.9203539823009, "grad_norm": 1.9176601639969704e-08, "learning_rate": 0.044610887437447476, "loss": 0.0, "num_input_tokens_seen": 16997584, "step": 29920 }, { "epoch": 525.0, "grad_norm": 1.1036419067522729e-07, "learning_rate": 0.044568979403037744, "loss": 0.0, "num_input_tokens_seen": 16999896, "step": 29925 }, { "epoch": 525.0884955752213, "grad_norm": 1.431159812881333e-08, "learning_rate": 0.04452708762741631, "loss": 0.0, "num_input_tokens_seen": 17002184, "step": 29930 }, { "epoch": 525.1769911504425, "grad_norm": 6.116609796436023e-08, "learning_rate": 0.044485212117043475, "loss": 0.0, "num_input_tokens_seen": 17004472, "step": 29935 }, { "epoch": 525.2654867256637, "grad_norm": 2.2071755978458896e-08, "learning_rate": 0.04444335287837687, "loss": 0.0, "num_input_tokens_seen": 17007528, "step": 29940 }, { "epoch": 525.3539823008849, "grad_norm": 4.1353096946750156e-08, "learning_rate": 0.04440150991787179, "loss": 0.0, "num_input_tokens_seen": 17010824, "step": 29945 }, { "epoch": 525.4424778761062, "grad_norm": 3.4516663305339534e-08, "learning_rate": 0.04435968324198088, "loss": 0.0, "num_input_tokens_seen": 17013688, "step": 29950 }, { "epoch": 525.5309734513274, "grad_norm": 3.3870481530584584e-08, "learning_rate": 0.04431787285715442, "loss": 0.0, "num_input_tokens_seen": 17016264, "step": 29955 }, { "epoch": 525.6194690265487, "grad_norm": 3.81201274990417e-08, "learning_rate": 0.04427607876984004, "loss": 0.0, "num_input_tokens_seen": 17019576, "step": 29960 }, { "epoch": 525.70796460177, "grad_norm": 2.1900358859738844e-08, "learning_rate": 0.044234300986482886, "loss": 0.0, "num_input_tokens_seen": 17022472, "step": 29965 }, { "epoch": 525.7964601769911, "grad_norm": 2.1081550727330978e-08, "learning_rate": 0.04419253951352566, "loss": 0.0, "num_input_tokens_seen": 17025880, "step": 29970 }, { "epoch": 525.8849557522124, "grad_norm": 7.032826943031978e-08, "learning_rate": 0.044150794357408533, "loss": 0.0, "num_input_tokens_seen": 17028856, "step": 29975 }, { "epoch": 525.9734513274336, "grad_norm": 1.0127313210261946e-08, "learning_rate": 0.044109065524569065, "loss": 0.0, "num_input_tokens_seen": 17031848, "step": 29980 }, { "epoch": 526.0530973451328, "grad_norm": 1.469257604469476e-08, "learning_rate": 0.0440673530214424, "loss": 0.0, "num_input_tokens_seen": 17034336, "step": 29985 }, { "epoch": 526.141592920354, "grad_norm": 2.4382469376860172e-08, "learning_rate": 0.04402565685446117, "loss": 0.0, "num_input_tokens_seen": 17036992, "step": 29990 }, { "epoch": 526.2300884955753, "grad_norm": 1.2174481867077702e-08, "learning_rate": 0.04398397703005536, "loss": 0.0, "num_input_tokens_seen": 17039888, "step": 29995 }, { "epoch": 526.3185840707964, "grad_norm": 3.4689634276219294e-08, "learning_rate": 0.043942313554652626, "loss": 0.0, "num_input_tokens_seen": 17043120, "step": 30000 }, { "epoch": 526.3185840707964, "eval_loss": 0.8453069925308228, "eval_runtime": 0.9342, "eval_samples_per_second": 26.761, "eval_steps_per_second": 13.916, "num_input_tokens_seen": 17043120, "step": 30000 }, { "epoch": 526.4070796460177, "grad_norm": 2.2309293967737176e-08, "learning_rate": 0.0439006664346779, "loss": 0.0, "num_input_tokens_seen": 17046224, "step": 30005 }, { "epoch": 526.4955752212389, "grad_norm": 4.186296820307689e-08, "learning_rate": 0.043859035676553755, "loss": 0.0, "num_input_tokens_seen": 17049360, "step": 30010 }, { "epoch": 526.5840707964602, "grad_norm": 6.36867127923324e-08, "learning_rate": 0.043817421286700194, "loss": 0.0, "num_input_tokens_seen": 17052352, "step": 30015 }, { "epoch": 526.6725663716815, "grad_norm": 2.9082153574222502e-08, "learning_rate": 0.043775823271534585, "loss": 0.0, "num_input_tokens_seen": 17055312, "step": 30020 }, { "epoch": 526.7610619469026, "grad_norm": 1.735475940733977e-08, "learning_rate": 0.04373424163747197, "loss": 0.0, "num_input_tokens_seen": 17057760, "step": 30025 }, { "epoch": 526.8495575221239, "grad_norm": 1.9995599842559386e-08, "learning_rate": 0.04369267639092473, "loss": 0.0, "num_input_tokens_seen": 17060432, "step": 30030 }, { "epoch": 526.9380530973451, "grad_norm": 4.241191575715675e-08, "learning_rate": 0.04365112753830268, "loss": 0.0, "num_input_tokens_seen": 17063104, "step": 30035 }, { "epoch": 527.0176991150443, "grad_norm": 2.6602270608577783e-08, "learning_rate": 0.04360959508601327, "loss": 0.0, "num_input_tokens_seen": 17065312, "step": 30040 }, { "epoch": 527.1061946902655, "grad_norm": 2.829878376076067e-08, "learning_rate": 0.04356807904046123, "loss": 0.0, "num_input_tokens_seen": 17068096, "step": 30045 }, { "epoch": 527.1946902654868, "grad_norm": 5.735458685762751e-08, "learning_rate": 0.04352657940804892, "loss": 0.0, "num_input_tokens_seen": 17070832, "step": 30050 }, { "epoch": 527.2831858407079, "grad_norm": 3.031236417427863e-08, "learning_rate": 0.04348509619517613, "loss": 0.0, "num_input_tokens_seen": 17073680, "step": 30055 }, { "epoch": 527.3716814159292, "grad_norm": 3.452461783126637e-08, "learning_rate": 0.04344362940824002, "loss": 0.0, "num_input_tokens_seen": 17076512, "step": 30060 }, { "epoch": 527.4601769911504, "grad_norm": 5.480072218233545e-08, "learning_rate": 0.04340217905363533, "loss": 0.0, "num_input_tokens_seen": 17079632, "step": 30065 }, { "epoch": 527.5486725663717, "grad_norm": 1.3834167589266144e-08, "learning_rate": 0.04336074513775425, "loss": 0.0, "num_input_tokens_seen": 17082176, "step": 30070 }, { "epoch": 527.637168141593, "grad_norm": 1.8299987303294074e-08, "learning_rate": 0.04331932766698636, "loss": 0.0, "num_input_tokens_seen": 17085872, "step": 30075 }, { "epoch": 527.7256637168142, "grad_norm": 2.9905233844829127e-08, "learning_rate": 0.0432779266477188, "loss": 0.0, "num_input_tokens_seen": 17089216, "step": 30080 }, { "epoch": 527.8141592920354, "grad_norm": 2.292353862287655e-08, "learning_rate": 0.04323654208633607, "loss": 0.0, "num_input_tokens_seen": 17091968, "step": 30085 }, { "epoch": 527.9026548672566, "grad_norm": 1.1361504625995167e-08, "learning_rate": 0.04319517398922024, "loss": 0.0, "num_input_tokens_seen": 17094224, "step": 30090 }, { "epoch": 527.9911504424779, "grad_norm": 3.015043859022626e-08, "learning_rate": 0.04315382236275079, "loss": 0.0, "num_input_tokens_seen": 17097264, "step": 30095 }, { "epoch": 528.070796460177, "grad_norm": 1.498019663870309e-08, "learning_rate": 0.043112487213304664, "loss": 0.0, "num_input_tokens_seen": 17099152, "step": 30100 }, { "epoch": 528.1592920353983, "grad_norm": 3.257978420379004e-08, "learning_rate": 0.04307116854725618, "loss": 0.0, "num_input_tokens_seen": 17101872, "step": 30105 }, { "epoch": 528.2477876106195, "grad_norm": 3.181616747838234e-08, "learning_rate": 0.043029866370977325, "loss": 0.0, "num_input_tokens_seen": 17105024, "step": 30110 }, { "epoch": 528.3362831858407, "grad_norm": 3.9364557835597225e-08, "learning_rate": 0.04298858069083728, "loss": 0.0, "num_input_tokens_seen": 17107536, "step": 30115 }, { "epoch": 528.4247787610619, "grad_norm": 2.7584645678757624e-08, "learning_rate": 0.04294731151320295, "loss": 0.0, "num_input_tokens_seen": 17111008, "step": 30120 }, { "epoch": 528.5132743362832, "grad_norm": 4.6382641727404916e-08, "learning_rate": 0.04290605884443841, "loss": 0.0, "num_input_tokens_seen": 17113520, "step": 30125 }, { "epoch": 528.6017699115044, "grad_norm": 1.3384648056558035e-08, "learning_rate": 0.04286482269090545, "loss": 0.0, "num_input_tokens_seen": 17116144, "step": 30130 }, { "epoch": 528.6902654867257, "grad_norm": 5.289134463737355e-08, "learning_rate": 0.04282360305896323, "loss": 0.0, "num_input_tokens_seen": 17118912, "step": 30135 }, { "epoch": 528.7787610619469, "grad_norm": 3.90367489444543e-08, "learning_rate": 0.04278239995496822, "loss": 0.0, "num_input_tokens_seen": 17121584, "step": 30140 }, { "epoch": 528.8672566371681, "grad_norm": 2.5517637780581026e-08, "learning_rate": 0.042741213385274514, "loss": 0.0, "num_input_tokens_seen": 17124800, "step": 30145 }, { "epoch": 528.9557522123894, "grad_norm": 5.5791467445942544e-08, "learning_rate": 0.04270004335623366, "loss": 0.0, "num_input_tokens_seen": 17127600, "step": 30150 }, { "epoch": 529.0353982300885, "grad_norm": 3.1629291186163755e-08, "learning_rate": 0.04265888987419448, "loss": 0.0, "num_input_tokens_seen": 17130488, "step": 30155 }, { "epoch": 529.1238938053098, "grad_norm": 1.7164248689027772e-08, "learning_rate": 0.04261775294550346, "loss": 0.0, "num_input_tokens_seen": 17133512, "step": 30160 }, { "epoch": 529.212389380531, "grad_norm": 4.892986282811762e-08, "learning_rate": 0.042576632576504354, "loss": 0.0, "num_input_tokens_seen": 17135688, "step": 30165 }, { "epoch": 529.3008849557522, "grad_norm": 4.623865024200313e-08, "learning_rate": 0.0425355287735385, "loss": 0.0, "num_input_tokens_seen": 17138888, "step": 30170 }, { "epoch": 529.3893805309734, "grad_norm": 4.397288222435236e-08, "learning_rate": 0.0424944415429446, "loss": 0.0, "num_input_tokens_seen": 17141656, "step": 30175 }, { "epoch": 529.4778761061947, "grad_norm": 2.931733256161806e-08, "learning_rate": 0.04245337089105877, "loss": 0.0, "num_input_tokens_seen": 17144344, "step": 30180 }, { "epoch": 529.566371681416, "grad_norm": 6.252415118979116e-08, "learning_rate": 0.04241231682421467, "loss": 0.0, "num_input_tokens_seen": 17147272, "step": 30185 }, { "epoch": 529.6548672566372, "grad_norm": 3.012245386457835e-08, "learning_rate": 0.04237127934874337, "loss": 0.0, "num_input_tokens_seen": 17150568, "step": 30190 }, { "epoch": 529.7433628318585, "grad_norm": 2.9811292989734284e-08, "learning_rate": 0.042330258470973305, "loss": 0.0, "num_input_tokens_seen": 17153800, "step": 30195 }, { "epoch": 529.8318584070796, "grad_norm": 3.261952841171478e-08, "learning_rate": 0.042289254197230515, "loss": 0.0, "num_input_tokens_seen": 17156344, "step": 30200 }, { "epoch": 529.8318584070796, "eval_loss": 0.8593949675559998, "eval_runtime": 0.9405, "eval_samples_per_second": 26.581, "eval_steps_per_second": 13.822, "num_input_tokens_seen": 17156344, "step": 30200 }, { "epoch": 529.9203539823009, "grad_norm": 2.3842584795374933e-08, "learning_rate": 0.04224826653383823, "loss": 0.0, "num_input_tokens_seen": 17159240, "step": 30205 }, { "epoch": 530.0, "grad_norm": 1.9002403917056654e-07, "learning_rate": 0.04220729548711735, "loss": 0.0, "num_input_tokens_seen": 17161240, "step": 30210 }, { "epoch": 530.0884955752213, "grad_norm": 2.3626236966833858e-08, "learning_rate": 0.04216634106338616, "loss": 0.0, "num_input_tokens_seen": 17164104, "step": 30215 }, { "epoch": 530.1769911504425, "grad_norm": 2.656368458531233e-08, "learning_rate": 0.04212540326896025, "loss": 0.0, "num_input_tokens_seen": 17167256, "step": 30220 }, { "epoch": 530.2654867256637, "grad_norm": 2.9524969136218715e-08, "learning_rate": 0.0420844821101528, "loss": 0.0, "num_input_tokens_seen": 17170360, "step": 30225 }, { "epoch": 530.3539823008849, "grad_norm": 2.0288666746637318e-08, "learning_rate": 0.04204357759327441, "loss": 0.0, "num_input_tokens_seen": 17173128, "step": 30230 }, { "epoch": 530.4424778761062, "grad_norm": 4.009054777043275e-08, "learning_rate": 0.042002689724632954, "loss": 0.0, "num_input_tokens_seen": 17176024, "step": 30235 }, { "epoch": 530.5309734513274, "grad_norm": 5.783320844443551e-08, "learning_rate": 0.04196181851053398, "loss": 0.0, "num_input_tokens_seen": 17178920, "step": 30240 }, { "epoch": 530.6194690265487, "grad_norm": 1.742279742700248e-08, "learning_rate": 0.041920963957280295, "loss": 0.0, "num_input_tokens_seen": 17181768, "step": 30245 }, { "epoch": 530.70796460177, "grad_norm": 2.8896593562421913e-08, "learning_rate": 0.04188012607117212, "loss": 0.0, "num_input_tokens_seen": 17184536, "step": 30250 }, { "epoch": 530.7964601769911, "grad_norm": 2.117886488406384e-08, "learning_rate": 0.04183930485850725, "loss": 0.0, "num_input_tokens_seen": 17187192, "step": 30255 }, { "epoch": 530.8849557522124, "grad_norm": 3.528489145310232e-08, "learning_rate": 0.04179850032558078, "loss": 0.0, "num_input_tokens_seen": 17190344, "step": 30260 }, { "epoch": 530.9734513274336, "grad_norm": 5.74377274631388e-08, "learning_rate": 0.041757712478685295, "loss": 0.0, "num_input_tokens_seen": 17192936, "step": 30265 }, { "epoch": 531.0530973451328, "grad_norm": 4.88250755381614e-08, "learning_rate": 0.04171694132411085, "loss": 0.0, "num_input_tokens_seen": 17195168, "step": 30270 }, { "epoch": 531.141592920354, "grad_norm": 2.3258065695586083e-08, "learning_rate": 0.04167618686814479, "loss": 0.0, "num_input_tokens_seen": 17198016, "step": 30275 }, { "epoch": 531.2300884955753, "grad_norm": 2.6014577159116925e-08, "learning_rate": 0.041635449117072024, "loss": 0.0, "num_input_tokens_seen": 17200832, "step": 30280 }, { "epoch": 531.3185840707964, "grad_norm": 3.183180652399642e-08, "learning_rate": 0.04159472807717477, "loss": 0.0, "num_input_tokens_seen": 17205024, "step": 30285 }, { "epoch": 531.4070796460177, "grad_norm": 2.432869017354733e-08, "learning_rate": 0.041554023754732744, "loss": 0.0, "num_input_tokens_seen": 17207392, "step": 30290 }, { "epoch": 531.4955752212389, "grad_norm": 3.5500132611332447e-08, "learning_rate": 0.04151333615602311, "loss": 0.0, "num_input_tokens_seen": 17210176, "step": 30295 }, { "epoch": 531.5840707964602, "grad_norm": 4.9134960988794774e-08, "learning_rate": 0.04147266528732034, "loss": 0.0, "num_input_tokens_seen": 17212864, "step": 30300 }, { "epoch": 531.6725663716815, "grad_norm": 6.704637200982688e-08, "learning_rate": 0.0414320111548964, "loss": 0.0, "num_input_tokens_seen": 17215360, "step": 30305 }, { "epoch": 531.7610619469026, "grad_norm": 4.3165769625375106e-08, "learning_rate": 0.04139137376502076, "loss": 0.0, "num_input_tokens_seen": 17218048, "step": 30310 }, { "epoch": 531.8495575221239, "grad_norm": 4.6039577483725225e-08, "learning_rate": 0.04135075312396014, "loss": 0.0, "num_input_tokens_seen": 17220688, "step": 30315 }, { "epoch": 531.9380530973451, "grad_norm": 3.999831932333109e-08, "learning_rate": 0.04131014923797875, "loss": 0.0, "num_input_tokens_seen": 17224112, "step": 30320 }, { "epoch": 532.0176991150443, "grad_norm": 2.9569726223144244e-08, "learning_rate": 0.04126956211333819, "loss": 0.0, "num_input_tokens_seen": 17226920, "step": 30325 }, { "epoch": 532.1061946902655, "grad_norm": 1.7049474720920443e-08, "learning_rate": 0.041228991756297545, "loss": 0.0, "num_input_tokens_seen": 17229416, "step": 30330 }, { "epoch": 532.1946902654868, "grad_norm": 4.28885016390268e-08, "learning_rate": 0.04118843817311332, "loss": 0.0, "num_input_tokens_seen": 17232344, "step": 30335 }, { "epoch": 532.2831858407079, "grad_norm": 3.997123698695759e-08, "learning_rate": 0.0411479013700393, "loss": 0.0, "num_input_tokens_seen": 17235544, "step": 30340 }, { "epoch": 532.3716814159292, "grad_norm": 2.3179870467515684e-08, "learning_rate": 0.0411073813533268, "loss": 0.0, "num_input_tokens_seen": 17238456, "step": 30345 }, { "epoch": 532.4601769911504, "grad_norm": 2.848982205705397e-08, "learning_rate": 0.04106687812922456, "loss": 0.0, "num_input_tokens_seen": 17241512, "step": 30350 }, { "epoch": 532.5486725663717, "grad_norm": 2.9639437570949667e-08, "learning_rate": 0.041026391703978635, "loss": 0.0, "num_input_tokens_seen": 17244456, "step": 30355 }, { "epoch": 532.637168141593, "grad_norm": 4.6895252125978004e-08, "learning_rate": 0.04098592208383259, "loss": 0.0, "num_input_tokens_seen": 17246920, "step": 30360 }, { "epoch": 532.7256637168142, "grad_norm": 5.388551471696701e-08, "learning_rate": 0.040945469275027256, "loss": 0.0, "num_input_tokens_seen": 17249880, "step": 30365 }, { "epoch": 532.8141592920354, "grad_norm": 1.890628986700449e-08, "learning_rate": 0.04090503328380104, "loss": 0.0, "num_input_tokens_seen": 17252920, "step": 30370 }, { "epoch": 532.9026548672566, "grad_norm": 3.4279597826980535e-08, "learning_rate": 0.04086461411638971, "loss": 0.0, "num_input_tokens_seen": 17255336, "step": 30375 }, { "epoch": 532.9911504424779, "grad_norm": 4.0175688553745204e-08, "learning_rate": 0.04082421177902631, "loss": 0.0, "num_input_tokens_seen": 17258136, "step": 30380 }, { "epoch": 533.070796460177, "grad_norm": 2.0590750438032046e-08, "learning_rate": 0.04078382627794149, "loss": 0.0, "num_input_tokens_seen": 17260336, "step": 30385 }, { "epoch": 533.1592920353983, "grad_norm": 3.100868894989617e-08, "learning_rate": 0.04074345761936316, "loss": 0.0, "num_input_tokens_seen": 17262688, "step": 30390 }, { "epoch": 533.2477876106195, "grad_norm": 3.874368559309005e-08, "learning_rate": 0.04070310580951663, "loss": 0.0, "num_input_tokens_seen": 17266016, "step": 30395 }, { "epoch": 533.3362831858407, "grad_norm": 2.2068034510880352e-08, "learning_rate": 0.040662770854624726, "loss": 0.0, "num_input_tokens_seen": 17268656, "step": 30400 }, { "epoch": 533.3362831858407, "eval_loss": 0.8479840159416199, "eval_runtime": 0.9458, "eval_samples_per_second": 26.432, "eval_steps_per_second": 13.744, "num_input_tokens_seen": 17268656, "step": 30400 }, { "epoch": 533.4247787610619, "grad_norm": 3.772976242544246e-08, "learning_rate": 0.040622452760907535, "loss": 0.0, "num_input_tokens_seen": 17271920, "step": 30405 }, { "epoch": 533.5132743362832, "grad_norm": 3.317260421908941e-08, "learning_rate": 0.04058215153458265, "loss": 0.0, "num_input_tokens_seen": 17274720, "step": 30410 }, { "epoch": 533.6017699115044, "grad_norm": 3.926645320007083e-08, "learning_rate": 0.04054186718186507, "loss": 0.0, "num_input_tokens_seen": 17277680, "step": 30415 }, { "epoch": 533.6902654867257, "grad_norm": 1.0326212418476644e-07, "learning_rate": 0.04050159970896708, "loss": 0.0, "num_input_tokens_seen": 17280192, "step": 30420 }, { "epoch": 533.7787610619469, "grad_norm": 2.822047129313887e-08, "learning_rate": 0.04046134912209843, "loss": 0.0, "num_input_tokens_seen": 17283296, "step": 30425 }, { "epoch": 533.8672566371681, "grad_norm": 5.4026394025186164e-08, "learning_rate": 0.040421115427466354, "loss": 0.0, "num_input_tokens_seen": 17286688, "step": 30430 }, { "epoch": 533.9557522123894, "grad_norm": 3.829650907505311e-08, "learning_rate": 0.04038089863127529, "loss": 0.0, "num_input_tokens_seen": 17289952, "step": 30435 }, { "epoch": 534.0353982300885, "grad_norm": 2.2439321512024435e-08, "learning_rate": 0.04034069873972727, "loss": 0.0, "num_input_tokens_seen": 17292320, "step": 30440 }, { "epoch": 534.1238938053098, "grad_norm": 5.6094929590244647e-08, "learning_rate": 0.040300515759021514, "loss": 0.0, "num_input_tokens_seen": 17294800, "step": 30445 }, { "epoch": 534.212389380531, "grad_norm": 3.666077574848714e-08, "learning_rate": 0.04026034969535478, "loss": 0.0, "num_input_tokens_seen": 17297888, "step": 30450 }, { "epoch": 534.3008849557522, "grad_norm": 2.8543153618443284e-08, "learning_rate": 0.040220200554921266, "loss": 0.0, "num_input_tokens_seen": 17300704, "step": 30455 }, { "epoch": 534.3893805309734, "grad_norm": 3.4917931657219015e-08, "learning_rate": 0.0401800683439124, "loss": 0.0, "num_input_tokens_seen": 17303120, "step": 30460 }, { "epoch": 534.4778761061947, "grad_norm": 4.2677367417809364e-08, "learning_rate": 0.04013995306851704, "loss": 0.0, "num_input_tokens_seen": 17305888, "step": 30465 }, { "epoch": 534.566371681416, "grad_norm": 2.5832742167608558e-08, "learning_rate": 0.040099854734921545, "loss": 0.0, "num_input_tokens_seen": 17309120, "step": 30470 }, { "epoch": 534.6548672566372, "grad_norm": 5.166131700207188e-08, "learning_rate": 0.0400597733493095, "loss": 0.0, "num_input_tokens_seen": 17312464, "step": 30475 }, { "epoch": 534.7433628318585, "grad_norm": 4.166736289334949e-08, "learning_rate": 0.04001970891786203, "loss": 0.0, "num_input_tokens_seen": 17315360, "step": 30480 }, { "epoch": 534.8318584070796, "grad_norm": 2.3584780350915935e-08, "learning_rate": 0.03997966144675752, "loss": 0.0, "num_input_tokens_seen": 17318048, "step": 30485 }, { "epoch": 534.9203539823009, "grad_norm": 4.088485638931161e-08, "learning_rate": 0.039939630942171796, "loss": 0.0, "num_input_tokens_seen": 17320976, "step": 30490 }, { "epoch": 535.0, "grad_norm": 1.7726721424082825e-08, "learning_rate": 0.03989961741027815, "loss": 0.0, "num_input_tokens_seen": 17323544, "step": 30495 }, { "epoch": 535.0884955752213, "grad_norm": 2.3595703169121407e-08, "learning_rate": 0.03985962085724704, "loss": 0.0, "num_input_tokens_seen": 17326568, "step": 30500 }, { "epoch": 535.1769911504425, "grad_norm": 2.625287542912247e-08, "learning_rate": 0.03981964128924656, "loss": 0.0, "num_input_tokens_seen": 17329160, "step": 30505 }, { "epoch": 535.2654867256637, "grad_norm": 1.1916505116005283e-08, "learning_rate": 0.03977967871244197, "loss": 0.0, "num_input_tokens_seen": 17331704, "step": 30510 }, { "epoch": 535.3539823008849, "grad_norm": 2.4710063328825527e-08, "learning_rate": 0.03973973313299602, "loss": 0.0, "num_input_tokens_seen": 17334616, "step": 30515 }, { "epoch": 535.4424778761062, "grad_norm": 1.0128180605306625e-07, "learning_rate": 0.0396998045570689, "loss": 0.0, "num_input_tokens_seen": 17337672, "step": 30520 }, { "epoch": 535.5309734513274, "grad_norm": 6.665836593811036e-08, "learning_rate": 0.03965989299081798, "loss": 0.0, "num_input_tokens_seen": 17340328, "step": 30525 }, { "epoch": 535.6194690265487, "grad_norm": 1.943642402579826e-08, "learning_rate": 0.039619998440398235, "loss": 0.0, "num_input_tokens_seen": 17343304, "step": 30530 }, { "epoch": 535.70796460177, "grad_norm": 2.3033901896951647e-08, "learning_rate": 0.03958012091196184, "loss": 0.0, "num_input_tokens_seen": 17346472, "step": 30535 }, { "epoch": 535.7964601769911, "grad_norm": 4.2769926267283154e-08, "learning_rate": 0.039540260411658396, "loss": 0.0, "num_input_tokens_seen": 17349448, "step": 30540 }, { "epoch": 535.8849557522124, "grad_norm": 2.2525632914494054e-08, "learning_rate": 0.03950041694563496, "loss": 0.0, "num_input_tokens_seen": 17352248, "step": 30545 }, { "epoch": 535.9734513274336, "grad_norm": 3.301075324202429e-08, "learning_rate": 0.0394605905200358, "loss": 0.0, "num_input_tokens_seen": 17355080, "step": 30550 }, { "epoch": 536.0530973451328, "grad_norm": 4.365591621535714e-08, "learning_rate": 0.03942078114100272, "loss": 0.0, "num_input_tokens_seen": 17357600, "step": 30555 }, { "epoch": 536.141592920354, "grad_norm": 2.629039741464112e-08, "learning_rate": 0.03938098881467485, "loss": 0.0, "num_input_tokens_seen": 17360256, "step": 30560 }, { "epoch": 536.2300884955753, "grad_norm": 1.1203712624308082e-08, "learning_rate": 0.039341213547188586, "loss": 0.0, "num_input_tokens_seen": 17363392, "step": 30565 }, { "epoch": 536.3185840707964, "grad_norm": 6.203335090049222e-08, "learning_rate": 0.03930145534467782, "loss": 0.0, "num_input_tokens_seen": 17366256, "step": 30570 }, { "epoch": 536.4070796460177, "grad_norm": 1.1802469224164724e-08, "learning_rate": 0.0392617142132738, "loss": 0.0, "num_input_tokens_seen": 17369040, "step": 30575 }, { "epoch": 536.4955752212389, "grad_norm": 3.533337888939059e-08, "learning_rate": 0.03922199015910504, "loss": 0.0, "num_input_tokens_seen": 17372240, "step": 30580 }, { "epoch": 536.5840707964602, "grad_norm": 1.5790133645054993e-08, "learning_rate": 0.039182283188297556, "loss": 0.0, "num_input_tokens_seen": 17375264, "step": 30585 }, { "epoch": 536.6725663716815, "grad_norm": 4.344568083070044e-08, "learning_rate": 0.039142593306974595, "loss": 0.0, "num_input_tokens_seen": 17378016, "step": 30590 }, { "epoch": 536.7610619469026, "grad_norm": 4.8057675172685776e-08, "learning_rate": 0.039102920521256856, "loss": 0.0, "num_input_tokens_seen": 17380928, "step": 30595 }, { "epoch": 536.8495575221239, "grad_norm": 1.8951032743075302e-08, "learning_rate": 0.03906326483726243, "loss": 0.0, "num_input_tokens_seen": 17383696, "step": 30600 }, { "epoch": 536.8495575221239, "eval_loss": 0.8396360874176025, "eval_runtime": 0.9413, "eval_samples_per_second": 26.558, "eval_steps_per_second": 13.81, "num_input_tokens_seen": 17383696, "step": 30600 }, { "epoch": 536.9380530973451, "grad_norm": 3.8259294399267674e-08, "learning_rate": 0.039023626261106704, "loss": 0.0, "num_input_tokens_seen": 17386624, "step": 30605 }, { "epoch": 537.0176991150443, "grad_norm": 7.001153790042736e-08, "learning_rate": 0.03898400479890237, "loss": 0.0, "num_input_tokens_seen": 17389112, "step": 30610 }, { "epoch": 537.1061946902655, "grad_norm": 1.4266182013500384e-08, "learning_rate": 0.038944400456759655, "loss": 0.0, "num_input_tokens_seen": 17392024, "step": 30615 }, { "epoch": 537.1946902654868, "grad_norm": 3.2257016613357337e-08, "learning_rate": 0.038904813240785964, "loss": 0.0, "num_input_tokens_seen": 17394344, "step": 30620 }, { "epoch": 537.2831858407079, "grad_norm": 2.9296947090529102e-08, "learning_rate": 0.03886524315708621, "loss": 0.0, "num_input_tokens_seen": 17397416, "step": 30625 }, { "epoch": 537.3716814159292, "grad_norm": 5.163908056715627e-08, "learning_rate": 0.03882569021176255, "loss": 0.0, "num_input_tokens_seen": 17400328, "step": 30630 }, { "epoch": 537.4601769911504, "grad_norm": 2.241670493674519e-08, "learning_rate": 0.038786154410914535, "loss": 0.0, "num_input_tokens_seen": 17403080, "step": 30635 }, { "epoch": 537.5486725663717, "grad_norm": 1.773529589854661e-08, "learning_rate": 0.03874663576063917, "loss": 0.0, "num_input_tokens_seen": 17405448, "step": 30640 }, { "epoch": 537.637168141593, "grad_norm": 1.5271179876208407e-08, "learning_rate": 0.038707134267030624, "loss": 0.0, "num_input_tokens_seen": 17408312, "step": 30645 }, { "epoch": 537.7256637168142, "grad_norm": 3.644825596893497e-08, "learning_rate": 0.038667649936180555, "loss": 0.0, "num_input_tokens_seen": 17410936, "step": 30650 }, { "epoch": 537.8141592920354, "grad_norm": 2.58982524314888e-08, "learning_rate": 0.038628182774178, "loss": 0.0, "num_input_tokens_seen": 17413768, "step": 30655 }, { "epoch": 537.9026548672566, "grad_norm": 1.4474243137385656e-08, "learning_rate": 0.038588732787109226, "loss": 0.0, "num_input_tokens_seen": 17416952, "step": 30660 }, { "epoch": 537.9911504424779, "grad_norm": 2.943834864765904e-08, "learning_rate": 0.03854929998105795, "loss": 0.0, "num_input_tokens_seen": 17419864, "step": 30665 }, { "epoch": 538.070796460177, "grad_norm": 2.1845057318614636e-08, "learning_rate": 0.03850988436210518, "loss": 0.0, "num_input_tokens_seen": 17422224, "step": 30670 }, { "epoch": 538.1592920353983, "grad_norm": 3.004060644684614e-08, "learning_rate": 0.03847048593632933, "loss": 0.0, "num_input_tokens_seen": 17424848, "step": 30675 }, { "epoch": 538.2477876106195, "grad_norm": 2.9340915475017937e-08, "learning_rate": 0.038431104709806096, "loss": 0.0, "num_input_tokens_seen": 17427600, "step": 30680 }, { "epoch": 538.3362831858407, "grad_norm": 2.470131477139148e-08, "learning_rate": 0.0383917406886086, "loss": 0.0, "num_input_tokens_seen": 17429824, "step": 30685 }, { "epoch": 538.4247787610619, "grad_norm": 2.6874486636074835e-08, "learning_rate": 0.03835239387880722, "loss": 0.0, "num_input_tokens_seen": 17431984, "step": 30690 }, { "epoch": 538.5132743362832, "grad_norm": 1.9907522741391404e-08, "learning_rate": 0.03831306428646979, "loss": 0.0, "num_input_tokens_seen": 17435168, "step": 30695 }, { "epoch": 538.6017699115044, "grad_norm": 4.233086770000227e-08, "learning_rate": 0.03827375191766135, "loss": 0.0, "num_input_tokens_seen": 17438368, "step": 30700 }, { "epoch": 538.6902654867257, "grad_norm": 3.315192387276511e-08, "learning_rate": 0.03823445677844446, "loss": 0.0, "num_input_tokens_seen": 17441264, "step": 30705 }, { "epoch": 538.7787610619469, "grad_norm": 7.578910299343988e-08, "learning_rate": 0.03819517887487881, "loss": 0.0, "num_input_tokens_seen": 17444256, "step": 30710 }, { "epoch": 538.8672566371681, "grad_norm": 2.29587566735745e-08, "learning_rate": 0.03815591821302161, "loss": 0.0, "num_input_tokens_seen": 17446960, "step": 30715 }, { "epoch": 538.9557522123894, "grad_norm": 5.681452819317201e-08, "learning_rate": 0.03811667479892739, "loss": 0.0, "num_input_tokens_seen": 17450416, "step": 30720 }, { "epoch": 539.0353982300885, "grad_norm": 4.069336156931058e-08, "learning_rate": 0.03807744863864788, "loss": 0.0, "num_input_tokens_seen": 17453288, "step": 30725 }, { "epoch": 539.1238938053098, "grad_norm": 1.1174649650058655e-08, "learning_rate": 0.03803823973823229, "loss": 0.0, "num_input_tokens_seen": 17456168, "step": 30730 }, { "epoch": 539.212389380531, "grad_norm": 7.005844082641488e-08, "learning_rate": 0.03799904810372719, "loss": 0.0, "num_input_tokens_seen": 17459080, "step": 30735 }, { "epoch": 539.3008849557522, "grad_norm": 4.437412570723609e-08, "learning_rate": 0.03795987374117632, "loss": 0.0, "num_input_tokens_seen": 17462104, "step": 30740 }, { "epoch": 539.3893805309734, "grad_norm": 2.7218057141453755e-08, "learning_rate": 0.03792071665662093, "loss": 0.0, "num_input_tokens_seen": 17464792, "step": 30745 }, { "epoch": 539.4778761061947, "grad_norm": 4.5633658629640195e-08, "learning_rate": 0.03788157685609952, "loss": 0.0, "num_input_tokens_seen": 17467960, "step": 30750 }, { "epoch": 539.566371681416, "grad_norm": 5.566777261378775e-08, "learning_rate": 0.037842454345647876, "loss": 0.0, "num_input_tokens_seen": 17470616, "step": 30755 }, { "epoch": 539.6548672566372, "grad_norm": 1.6966039240173814e-08, "learning_rate": 0.03780334913129929, "loss": 0.0, "num_input_tokens_seen": 17473512, "step": 30760 }, { "epoch": 539.7433628318585, "grad_norm": 3.429098782703477e-08, "learning_rate": 0.037764261219084175, "loss": 0.0, "num_input_tokens_seen": 17475992, "step": 30765 }, { "epoch": 539.8318584070796, "grad_norm": 3.7960621313004594e-08, "learning_rate": 0.037725190615030414, "loss": 0.0, "num_input_tokens_seen": 17479144, "step": 30770 }, { "epoch": 539.9203539823009, "grad_norm": 7.69148513768414e-08, "learning_rate": 0.037686137325163224, "loss": 0.0, "num_input_tokens_seen": 17481544, "step": 30775 }, { "epoch": 540.0, "grad_norm": 2.1673645278497133e-07, "learning_rate": 0.037647101355505065, "loss": 0.0, "num_input_tokens_seen": 17483856, "step": 30780 }, { "epoch": 540.0884955752213, "grad_norm": 3.602072951025548e-08, "learning_rate": 0.03760808271207581, "loss": 0.0, "num_input_tokens_seen": 17486816, "step": 30785 }, { "epoch": 540.1769911504425, "grad_norm": 2.9547752689040863e-08, "learning_rate": 0.03756908140089258, "loss": 0.0, "num_input_tokens_seen": 17489920, "step": 30790 }, { "epoch": 540.2654867256637, "grad_norm": 2.885857064427455e-08, "learning_rate": 0.03753009742796989, "loss": 0.0, "num_input_tokens_seen": 17493120, "step": 30795 }, { "epoch": 540.3539823008849, "grad_norm": 9.006566870084498e-08, "learning_rate": 0.037491130799319615, "loss": 0.0, "num_input_tokens_seen": 17495648, "step": 30800 }, { "epoch": 540.3539823008849, "eval_loss": 0.8238071203231812, "eval_runtime": 0.9403, "eval_samples_per_second": 26.588, "eval_steps_per_second": 13.826, "num_input_tokens_seen": 17495648, "step": 30800 }, { "epoch": 540.4424778761062, "grad_norm": 3.9104893545527375e-08, "learning_rate": 0.03745218152095079, "loss": 0.0, "num_input_tokens_seen": 17498352, "step": 30805 }, { "epoch": 540.5309734513274, "grad_norm": 2.3799181292361027e-08, "learning_rate": 0.037413249598869935, "loss": 0.0, "num_input_tokens_seen": 17501120, "step": 30810 }, { "epoch": 540.6194690265487, "grad_norm": 1.5952405618691046e-08, "learning_rate": 0.037374335039080886, "loss": 0.0, "num_input_tokens_seen": 17504240, "step": 30815 }, { "epoch": 540.70796460177, "grad_norm": 6.995137624699055e-08, "learning_rate": 0.037335437847584724, "loss": 0.0, "num_input_tokens_seen": 17507136, "step": 30820 }, { "epoch": 540.7964601769911, "grad_norm": 5.477207665194328e-08, "learning_rate": 0.03729655803037983, "loss": 0.0, "num_input_tokens_seen": 17509952, "step": 30825 }, { "epoch": 540.8849557522124, "grad_norm": 2.7077252440221855e-08, "learning_rate": 0.03725769559346207, "loss": 0.0, "num_input_tokens_seen": 17512608, "step": 30830 }, { "epoch": 540.9734513274336, "grad_norm": 4.5790756075803074e-08, "learning_rate": 0.03721885054282439, "loss": 0.0, "num_input_tokens_seen": 17515712, "step": 30835 }, { "epoch": 541.0530973451328, "grad_norm": 4.63393234895193e-08, "learning_rate": 0.03718002288445731, "loss": 0.0, "num_input_tokens_seen": 17518920, "step": 30840 }, { "epoch": 541.141592920354, "grad_norm": 4.0495507391824503e-08, "learning_rate": 0.03714121262434844, "loss": 0.0, "num_input_tokens_seen": 17521688, "step": 30845 }, { "epoch": 541.2300884955753, "grad_norm": 1.2810676075503125e-08, "learning_rate": 0.037102419768482844, "loss": 0.0, "num_input_tokens_seen": 17524584, "step": 30850 }, { "epoch": 541.3185840707964, "grad_norm": 3.165702011642679e-08, "learning_rate": 0.03706364432284293, "loss": 0.0, "num_input_tokens_seen": 17527096, "step": 30855 }, { "epoch": 541.4070796460177, "grad_norm": 1.6560621318717494e-08, "learning_rate": 0.03702488629340828, "loss": 0.0, "num_input_tokens_seen": 17530072, "step": 30860 }, { "epoch": 541.4955752212389, "grad_norm": 4.631999317439295e-08, "learning_rate": 0.036986145686155915, "loss": 0.0, "num_input_tokens_seen": 17532920, "step": 30865 }, { "epoch": 541.5840707964602, "grad_norm": 1.845766028907292e-08, "learning_rate": 0.036947422507060075, "loss": 0.0, "num_input_tokens_seen": 17535288, "step": 30870 }, { "epoch": 541.6725663716815, "grad_norm": 1.466239840652861e-08, "learning_rate": 0.0369087167620924, "loss": 0.0, "num_input_tokens_seen": 17538088, "step": 30875 }, { "epoch": 541.7610619469026, "grad_norm": 8.869184142668018e-08, "learning_rate": 0.03687002845722183, "loss": 0.0, "num_input_tokens_seen": 17541112, "step": 30880 }, { "epoch": 541.8495575221239, "grad_norm": 2.774811846961711e-08, "learning_rate": 0.03683135759841451, "loss": 0.0, "num_input_tokens_seen": 17543800, "step": 30885 }, { "epoch": 541.9380530973451, "grad_norm": 3.294646333529272e-08, "learning_rate": 0.03679270419163406, "loss": 0.0, "num_input_tokens_seen": 17546808, "step": 30890 }, { "epoch": 542.0176991150443, "grad_norm": 2.7817140590968847e-08, "learning_rate": 0.03675406824284127, "loss": 0.0, "num_input_tokens_seen": 17549200, "step": 30895 }, { "epoch": 542.1061946902655, "grad_norm": 2.4936021247867757e-08, "learning_rate": 0.03671544975799425, "loss": 0.0, "num_input_tokens_seen": 17551856, "step": 30900 }, { "epoch": 542.1946902654868, "grad_norm": 4.340290615800768e-08, "learning_rate": 0.03667684874304854, "loss": 0.0, "num_input_tokens_seen": 17554480, "step": 30905 }, { "epoch": 542.2831858407079, "grad_norm": 4.062257374926048e-08, "learning_rate": 0.03663826520395683, "loss": 0.0, "num_input_tokens_seen": 17556944, "step": 30910 }, { "epoch": 542.3716814159292, "grad_norm": 3.032761597410172e-08, "learning_rate": 0.03659969914666922, "loss": 0.0, "num_input_tokens_seen": 17559408, "step": 30915 }, { "epoch": 542.4601769911504, "grad_norm": 2.90691239968055e-08, "learning_rate": 0.036561150577133106, "loss": 0.0, "num_input_tokens_seen": 17562704, "step": 30920 }, { "epoch": 542.5486725663717, "grad_norm": 3.3326333692684784e-08, "learning_rate": 0.036522619501293103, "loss": 0.0, "num_input_tokens_seen": 17566128, "step": 30925 }, { "epoch": 542.637168141593, "grad_norm": 3.8087360820782123e-08, "learning_rate": 0.03648410592509122, "loss": 0.0, "num_input_tokens_seen": 17569056, "step": 30930 }, { "epoch": 542.7256637168142, "grad_norm": 3.946432158841162e-08, "learning_rate": 0.03644560985446676, "loss": 0.0, "num_input_tokens_seen": 17571920, "step": 30935 }, { "epoch": 542.8141592920354, "grad_norm": 4.466085457011104e-08, "learning_rate": 0.036407131295356256, "loss": 0.0, "num_input_tokens_seen": 17574656, "step": 30940 }, { "epoch": 542.9026548672566, "grad_norm": 2.172793323040878e-08, "learning_rate": 0.03636867025369362, "loss": 0.0, "num_input_tokens_seen": 17577936, "step": 30945 }, { "epoch": 542.9911504424779, "grad_norm": 2.2984563585737305e-08, "learning_rate": 0.03633022673540999, "loss": 0.0, "num_input_tokens_seen": 17580992, "step": 30950 }, { "epoch": 543.070796460177, "grad_norm": 4.3926217330181316e-08, "learning_rate": 0.03629180074643385, "loss": 0.0, "num_input_tokens_seen": 17583632, "step": 30955 }, { "epoch": 543.1592920353983, "grad_norm": 8.652971672518106e-08, "learning_rate": 0.03625339229269102, "loss": 0.0, "num_input_tokens_seen": 17586720, "step": 30960 }, { "epoch": 543.2477876106195, "grad_norm": 1.3439332313680552e-08, "learning_rate": 0.036215001380104535, "loss": 0.0, "num_input_tokens_seen": 17589520, "step": 30965 }, { "epoch": 543.3362831858407, "grad_norm": 3.953411464863166e-08, "learning_rate": 0.03617662801459471, "loss": 0.0, "num_input_tokens_seen": 17592240, "step": 30970 }, { "epoch": 543.4247787610619, "grad_norm": 2.1940850913892973e-08, "learning_rate": 0.036138272202079276, "loss": 0.0, "num_input_tokens_seen": 17595472, "step": 30975 }, { "epoch": 543.5132743362832, "grad_norm": 3.5493293637500756e-08, "learning_rate": 0.036099933948473106, "loss": 0.0, "num_input_tokens_seen": 17598656, "step": 30980 }, { "epoch": 543.6017699115044, "grad_norm": 5.849438977634236e-08, "learning_rate": 0.03606161325968851, "loss": 0.0, "num_input_tokens_seen": 17601600, "step": 30985 }, { "epoch": 543.6902654867257, "grad_norm": 4.781225371175424e-08, "learning_rate": 0.03602331014163496, "loss": 0.0, "num_input_tokens_seen": 17604112, "step": 30990 }, { "epoch": 543.7787610619469, "grad_norm": 5.9467808455337945e-08, "learning_rate": 0.035985024600219295, "loss": 0.0, "num_input_tokens_seen": 17606848, "step": 30995 }, { "epoch": 543.8672566371681, "grad_norm": 2.6160455135482152e-08, "learning_rate": 0.03594675664134569, "loss": 0.0, "num_input_tokens_seen": 17609616, "step": 31000 }, { "epoch": 543.8672566371681, "eval_loss": 0.8566185832023621, "eval_runtime": 0.9274, "eval_samples_per_second": 26.958, "eval_steps_per_second": 14.018, "num_input_tokens_seen": 17609616, "step": 31000 }, { "epoch": 543.9557522123894, "grad_norm": 1.3883992622254482e-08, "learning_rate": 0.03590850627091545, "loss": 0.0, "num_input_tokens_seen": 17612352, "step": 31005 }, { "epoch": 544.0353982300885, "grad_norm": 3.7168774724705145e-08, "learning_rate": 0.03587027349482731, "loss": 0.0, "num_input_tokens_seen": 17614896, "step": 31010 }, { "epoch": 544.1238938053098, "grad_norm": 3.182884000807462e-08, "learning_rate": 0.035832058318977275, "loss": 0.0, "num_input_tokens_seen": 17617664, "step": 31015 }, { "epoch": 544.212389380531, "grad_norm": 1.7536212482127667e-08, "learning_rate": 0.03579386074925853, "loss": 0.0, "num_input_tokens_seen": 17620752, "step": 31020 }, { "epoch": 544.3008849557522, "grad_norm": 9.159756331200697e-08, "learning_rate": 0.035755680791561696, "loss": 0.0, "num_input_tokens_seen": 17623200, "step": 31025 }, { "epoch": 544.3893805309734, "grad_norm": 2.5169116568690697e-08, "learning_rate": 0.03571751845177454, "loss": 0.0, "num_input_tokens_seen": 17625744, "step": 31030 }, { "epoch": 544.4778761061947, "grad_norm": 4.8361922466710894e-08, "learning_rate": 0.03567937373578225, "loss": 0.0, "num_input_tokens_seen": 17628736, "step": 31035 }, { "epoch": 544.566371681416, "grad_norm": 4.049661583849229e-08, "learning_rate": 0.03564124664946711, "loss": 0.0, "num_input_tokens_seen": 17631328, "step": 31040 }, { "epoch": 544.6548672566372, "grad_norm": 9.25178067490151e-09, "learning_rate": 0.035603137198708924, "loss": 0.0, "num_input_tokens_seen": 17634928, "step": 31045 }, { "epoch": 544.7433628318585, "grad_norm": 2.7220419696050158e-08, "learning_rate": 0.035565045389384514, "loss": 0.0, "num_input_tokens_seen": 17637808, "step": 31050 }, { "epoch": 544.8318584070796, "grad_norm": 3.681022775481324e-08, "learning_rate": 0.03552697122736823, "loss": 0.0, "num_input_tokens_seen": 17640384, "step": 31055 }, { "epoch": 544.9203539823009, "grad_norm": 5.8117507251154166e-08, "learning_rate": 0.03548891471853153, "loss": 0.0, "num_input_tokens_seen": 17643376, "step": 31060 }, { "epoch": 545.0, "grad_norm": 5.037058414814055e-08, "learning_rate": 0.03545087586874322, "loss": 0.0, "num_input_tokens_seen": 17646072, "step": 31065 }, { "epoch": 545.0884955752213, "grad_norm": 3.628938571864637e-08, "learning_rate": 0.03541285468386935, "loss": 0.0, "num_input_tokens_seen": 17648888, "step": 31070 }, { "epoch": 545.1769911504425, "grad_norm": 3.014700666881254e-08, "learning_rate": 0.03537485116977327, "loss": 0.0, "num_input_tokens_seen": 17651992, "step": 31075 }, { "epoch": 545.2654867256637, "grad_norm": 1.078429257006519e-08, "learning_rate": 0.03533686533231565, "loss": 0.0, "num_input_tokens_seen": 17654792, "step": 31080 }, { "epoch": 545.3539823008849, "grad_norm": 1.9376816595695345e-08, "learning_rate": 0.0352988971773543, "loss": 0.0, "num_input_tokens_seen": 17657592, "step": 31085 }, { "epoch": 545.4424778761062, "grad_norm": 2.490453177017571e-08, "learning_rate": 0.03526094671074443, "loss": 0.0, "num_input_tokens_seen": 17660344, "step": 31090 }, { "epoch": 545.5309734513274, "grad_norm": 3.357365940814816e-08, "learning_rate": 0.03522301393833852, "loss": 0.0, "num_input_tokens_seen": 17663032, "step": 31095 }, { "epoch": 545.6194690265487, "grad_norm": 3.5713895840672194e-08, "learning_rate": 0.035185098865986204, "loss": 0.0, "num_input_tokens_seen": 17666024, "step": 31100 }, { "epoch": 545.70796460177, "grad_norm": 3.3485139994127167e-08, "learning_rate": 0.03514720149953453, "loss": 0.0, "num_input_tokens_seen": 17669080, "step": 31105 }, { "epoch": 545.7964601769911, "grad_norm": 5.826250060181337e-08, "learning_rate": 0.03510932184482773, "loss": 0.0, "num_input_tokens_seen": 17672040, "step": 31110 }, { "epoch": 545.8849557522124, "grad_norm": 5.163633431948256e-08, "learning_rate": 0.03507145990770724, "loss": 0.0, "num_input_tokens_seen": 17675288, "step": 31115 }, { "epoch": 545.9734513274336, "grad_norm": 9.21094045480686e-09, "learning_rate": 0.035033615694011984, "loss": 0.0, "num_input_tokens_seen": 17678200, "step": 31120 }, { "epoch": 546.0530973451328, "grad_norm": 1.5970718081348423e-08, "learning_rate": 0.03499578920957788, "loss": 0.0, "num_input_tokens_seen": 17680416, "step": 31125 }, { "epoch": 546.141592920354, "grad_norm": 3.033219897474737e-08, "learning_rate": 0.034957980460238375, "loss": 0.0, "num_input_tokens_seen": 17683328, "step": 31130 }, { "epoch": 546.2300884955753, "grad_norm": 3.6943855974413964e-08, "learning_rate": 0.03492018945182393, "loss": 0.0, "num_input_tokens_seen": 17686576, "step": 31135 }, { "epoch": 546.3185840707964, "grad_norm": 3.6947497505934734e-08, "learning_rate": 0.03488241619016247, "loss": 0.0, "num_input_tokens_seen": 17689184, "step": 31140 }, { "epoch": 546.4070796460177, "grad_norm": 4.0759790209676794e-08, "learning_rate": 0.03484466068107913, "loss": 0.0, "num_input_tokens_seen": 17691776, "step": 31145 }, { "epoch": 546.4955752212389, "grad_norm": 2.872407911524988e-08, "learning_rate": 0.034806922930396195, "loss": 0.0, "num_input_tokens_seen": 17694752, "step": 31150 }, { "epoch": 546.5840707964602, "grad_norm": 2.5599362629691313e-08, "learning_rate": 0.03476920294393337, "loss": 0.0, "num_input_tokens_seen": 17697488, "step": 31155 }, { "epoch": 546.6725663716815, "grad_norm": 2.376644481216772e-08, "learning_rate": 0.03473150072750755, "loss": 0.0, "num_input_tokens_seen": 17700560, "step": 31160 }, { "epoch": 546.7610619469026, "grad_norm": 1.8179228788994806e-08, "learning_rate": 0.03469381628693284, "loss": 0.0, "num_input_tokens_seen": 17703584, "step": 31165 }, { "epoch": 546.8495575221239, "grad_norm": 2.8041927890853913e-08, "learning_rate": 0.03465614962802072, "loss": 0.0, "num_input_tokens_seen": 17706320, "step": 31170 }, { "epoch": 546.9380530973451, "grad_norm": 9.38844024744867e-09, "learning_rate": 0.0346185007565798, "loss": 0.0, "num_input_tokens_seen": 17709136, "step": 31175 }, { "epoch": 547.0176991150443, "grad_norm": 5.596477237190811e-08, "learning_rate": 0.03458086967841609, "loss": 0.0, "num_input_tokens_seen": 17712368, "step": 31180 }, { "epoch": 547.1061946902655, "grad_norm": 1.981118202820653e-08, "learning_rate": 0.03454325639933266, "loss": 0.0, "num_input_tokens_seen": 17715392, "step": 31185 }, { "epoch": 547.1946902654868, "grad_norm": 3.531403081069584e-08, "learning_rate": 0.03450566092513007, "loss": 0.0, "num_input_tokens_seen": 17718640, "step": 31190 }, { "epoch": 547.2831858407079, "grad_norm": 1.440098795768563e-08, "learning_rate": 0.034468083261605914, "loss": 0.0, "num_input_tokens_seen": 17721200, "step": 31195 }, { "epoch": 547.3716814159292, "grad_norm": 5.371366285089607e-08, "learning_rate": 0.03443052341455522, "loss": 0.0, "num_input_tokens_seen": 17723600, "step": 31200 }, { "epoch": 547.3716814159292, "eval_loss": 0.8399250507354736, "eval_runtime": 0.9391, "eval_samples_per_second": 26.622, "eval_steps_per_second": 13.844, "num_input_tokens_seen": 17723600, "step": 31200 }, { "epoch": 547.4601769911504, "grad_norm": 4.252002483440265e-08, "learning_rate": 0.0343929813897701, "loss": 0.0, "num_input_tokens_seen": 17726800, "step": 31205 }, { "epoch": 547.5486725663717, "grad_norm": 2.8403734475546116e-08, "learning_rate": 0.034355457193040125, "loss": 0.0, "num_input_tokens_seen": 17729728, "step": 31210 }, { "epoch": 547.637168141593, "grad_norm": 5.0726054467986614e-08, "learning_rate": 0.03431795083015186, "loss": 0.0, "num_input_tokens_seen": 17732624, "step": 31215 }, { "epoch": 547.7256637168142, "grad_norm": 1.86413533498353e-08, "learning_rate": 0.03428046230688936, "loss": 0.0, "num_input_tokens_seen": 17735488, "step": 31220 }, { "epoch": 547.8141592920354, "grad_norm": 5.762649024632083e-08, "learning_rate": 0.034242991629033805, "loss": 0.0, "num_input_tokens_seen": 17737824, "step": 31225 }, { "epoch": 547.9026548672566, "grad_norm": 3.433035189459588e-08, "learning_rate": 0.03420553880236362, "loss": 0.0, "num_input_tokens_seen": 17740432, "step": 31230 }, { "epoch": 547.9911504424779, "grad_norm": 3.1796215438362196e-08, "learning_rate": 0.03416810383265449, "loss": 0.0, "num_input_tokens_seen": 17743584, "step": 31235 }, { "epoch": 548.070796460177, "grad_norm": 2.5893761801398796e-08, "learning_rate": 0.03413068672567944, "loss": 0.0, "num_input_tokens_seen": 17746264, "step": 31240 }, { "epoch": 548.1592920353983, "grad_norm": 3.165169459862227e-08, "learning_rate": 0.034093287487208565, "loss": 0.0, "num_input_tokens_seen": 17748776, "step": 31245 }, { "epoch": 548.2477876106195, "grad_norm": 1.7191043255593286e-08, "learning_rate": 0.03405590612300937, "loss": 0.0, "num_input_tokens_seen": 17751256, "step": 31250 }, { "epoch": 548.3362831858407, "grad_norm": 3.7597761348706626e-08, "learning_rate": 0.03401854263884646, "loss": 0.0, "num_input_tokens_seen": 17754136, "step": 31255 }, { "epoch": 548.4247787610619, "grad_norm": 2.8538433838320998e-08, "learning_rate": 0.033981197040481824, "loss": 0.0, "num_input_tokens_seen": 17757288, "step": 31260 }, { "epoch": 548.5132743362832, "grad_norm": 3.107983204131415e-08, "learning_rate": 0.03394386933367459, "loss": 0.0, "num_input_tokens_seen": 17760312, "step": 31265 }, { "epoch": 548.6017699115044, "grad_norm": 4.832480016148111e-08, "learning_rate": 0.033906559524181104, "loss": 0.0, "num_input_tokens_seen": 17762920, "step": 31270 }, { "epoch": 548.6902654867257, "grad_norm": 2.2438721103412718e-08, "learning_rate": 0.033869267617755085, "loss": 0.0, "num_input_tokens_seen": 17765608, "step": 31275 }, { "epoch": 548.7787610619469, "grad_norm": 3.2907532698800424e-08, "learning_rate": 0.0338319936201474, "loss": 0.0, "num_input_tokens_seen": 17768024, "step": 31280 }, { "epoch": 548.8672566371681, "grad_norm": 8.947360186084552e-09, "learning_rate": 0.033794737537106136, "loss": 0.0, "num_input_tokens_seen": 17771368, "step": 31285 }, { "epoch": 548.9557522123894, "grad_norm": 2.2511027708560505e-08, "learning_rate": 0.03375749937437671, "loss": 0.0, "num_input_tokens_seen": 17774648, "step": 31290 }, { "epoch": 549.0353982300885, "grad_norm": 2.7076419328864176e-08, "learning_rate": 0.033720279137701634, "loss": 0.0, "num_input_tokens_seen": 17776816, "step": 31295 }, { "epoch": 549.1238938053098, "grad_norm": 2.6090201998840712e-08, "learning_rate": 0.03368307683282078, "loss": 0.0, "num_input_tokens_seen": 17779472, "step": 31300 }, { "epoch": 549.212389380531, "grad_norm": 1.8019884251430085e-08, "learning_rate": 0.033645892465471235, "loss": 0.0, "num_input_tokens_seen": 17782336, "step": 31305 }, { "epoch": 549.3008849557522, "grad_norm": 1.9626821057272537e-08, "learning_rate": 0.03360872604138724, "loss": 0.0, "num_input_tokens_seen": 17784976, "step": 31310 }, { "epoch": 549.3893805309734, "grad_norm": 4.0561754843793096e-08, "learning_rate": 0.03357157756630034, "loss": 0.0, "num_input_tokens_seen": 17787536, "step": 31315 }, { "epoch": 549.4778761061947, "grad_norm": 1.499696899998071e-08, "learning_rate": 0.033534447045939365, "loss": 0.0, "num_input_tokens_seen": 17790512, "step": 31320 }, { "epoch": 549.566371681416, "grad_norm": 1.836124852161447e-08, "learning_rate": 0.03349733448603026, "loss": 0.0, "num_input_tokens_seen": 17793200, "step": 31325 }, { "epoch": 549.6548672566372, "grad_norm": 4.2785885057128326e-08, "learning_rate": 0.03346023989229619, "loss": 0.0, "num_input_tokens_seen": 17796080, "step": 31330 }, { "epoch": 549.7433628318585, "grad_norm": 4.649458773542392e-08, "learning_rate": 0.03342316327045769, "loss": 0.0, "num_input_tokens_seen": 17798960, "step": 31335 }, { "epoch": 549.8318584070796, "grad_norm": 3.0656714500310045e-08, "learning_rate": 0.033386104626232385, "loss": 0.0, "num_input_tokens_seen": 17801680, "step": 31340 }, { "epoch": 549.9203539823009, "grad_norm": 3.9996077560999765e-08, "learning_rate": 0.03334906396533525, "loss": 0.0, "num_input_tokens_seen": 17805264, "step": 31345 }, { "epoch": 550.0, "grad_norm": 2.4193161252128448e-08, "learning_rate": 0.033312041293478326, "loss": 0.0, "num_input_tokens_seen": 17807696, "step": 31350 }, { "epoch": 550.0884955752213, "grad_norm": 5.8012279424701774e-08, "learning_rate": 0.03327503661637103, "loss": 0.0, "num_input_tokens_seen": 17810464, "step": 31355 }, { "epoch": 550.1769911504425, "grad_norm": 2.2500366014810425e-08, "learning_rate": 0.03323804993971998, "loss": 0.0, "num_input_tokens_seen": 17813504, "step": 31360 }, { "epoch": 550.2654867256637, "grad_norm": 1.3214218164137037e-08, "learning_rate": 0.033201081269228924, "loss": 0.0, "num_input_tokens_seen": 17816384, "step": 31365 }, { "epoch": 550.3539823008849, "grad_norm": 5.556846360832424e-08, "learning_rate": 0.03316413061059895, "loss": 0.0, "num_input_tokens_seen": 17819248, "step": 31370 }, { "epoch": 550.4424778761062, "grad_norm": 3.774916024212871e-08, "learning_rate": 0.03312719796952827, "loss": 0.0, "num_input_tokens_seen": 17822096, "step": 31375 }, { "epoch": 550.5309734513274, "grad_norm": 5.7359073935003835e-08, "learning_rate": 0.03309028335171236, "loss": 0.0, "num_input_tokens_seen": 17825216, "step": 31380 }, { "epoch": 550.6194690265487, "grad_norm": 2.7910477484738294e-08, "learning_rate": 0.03305338676284398, "loss": 0.0, "num_input_tokens_seen": 17827856, "step": 31385 }, { "epoch": 550.70796460177, "grad_norm": 7.590455908257354e-08, "learning_rate": 0.03301650820861296, "loss": 0.0, "num_input_tokens_seen": 17830752, "step": 31390 }, { "epoch": 550.7964601769911, "grad_norm": 2.4801002140861783e-08, "learning_rate": 0.03297964769470652, "loss": 0.0, "num_input_tokens_seen": 17833584, "step": 31395 }, { "epoch": 550.8849557522124, "grad_norm": 4.9061831930430344e-08, "learning_rate": 0.032942805226808945, "loss": 0.0, "num_input_tokens_seen": 17836576, "step": 31400 }, { "epoch": 550.8849557522124, "eval_loss": 0.8476734757423401, "eval_runtime": 0.9389, "eval_samples_per_second": 26.627, "eval_steps_per_second": 13.846, "num_input_tokens_seen": 17836576, "step": 31400 }, { "epoch": 550.9734513274336, "grad_norm": 2.5127473435304637e-08, "learning_rate": 0.03290598081060187, "loss": 0.0, "num_input_tokens_seen": 17839360, "step": 31405 }, { "epoch": 551.0530973451328, "grad_norm": 7.082100950128734e-08, "learning_rate": 0.03286917445176407, "loss": 0.0, "num_input_tokens_seen": 17841592, "step": 31410 }, { "epoch": 551.141592920354, "grad_norm": 2.289892364615298e-08, "learning_rate": 0.032832386155971456, "loss": 0.0, "num_input_tokens_seen": 17844552, "step": 31415 }, { "epoch": 551.2300884955753, "grad_norm": 1.6438567840282303e-08, "learning_rate": 0.032795615928897334, "loss": 0.0, "num_input_tokens_seen": 17847512, "step": 31420 }, { "epoch": 551.3185840707964, "grad_norm": 1.1411323441734567e-07, "learning_rate": 0.03275886377621215, "loss": 0.0, "num_input_tokens_seen": 17850616, "step": 31425 }, { "epoch": 551.4070796460177, "grad_norm": 3.739004128533452e-08, "learning_rate": 0.03272212970358348, "loss": 0.0, "num_input_tokens_seen": 17853432, "step": 31430 }, { "epoch": 551.4955752212389, "grad_norm": 3.041462193209554e-08, "learning_rate": 0.032685413716676215, "loss": 0.0, "num_input_tokens_seen": 17856856, "step": 31435 }, { "epoch": 551.5840707964602, "grad_norm": 3.998993136633544e-08, "learning_rate": 0.032648715821152474, "loss": 0.0, "num_input_tokens_seen": 17860120, "step": 31440 }, { "epoch": 551.6725663716815, "grad_norm": 1.9666817507868473e-08, "learning_rate": 0.03261203602267143, "loss": 0.0, "num_input_tokens_seen": 17862840, "step": 31445 }, { "epoch": 551.7610619469026, "grad_norm": 1.1997927096274452e-08, "learning_rate": 0.03257537432688966, "loss": 0.0, "num_input_tokens_seen": 17865560, "step": 31450 }, { "epoch": 551.8495575221239, "grad_norm": 6.111061878755208e-08, "learning_rate": 0.03253873073946077, "loss": 0.0, "num_input_tokens_seen": 17868056, "step": 31455 }, { "epoch": 551.9380530973451, "grad_norm": 3.2120215820441445e-08, "learning_rate": 0.03250210526603572, "loss": 0.0, "num_input_tokens_seen": 17870984, "step": 31460 }, { "epoch": 552.0176991150443, "grad_norm": 3.304077367261016e-08, "learning_rate": 0.03246549791226266, "loss": 0.0, "num_input_tokens_seen": 17873320, "step": 31465 }, { "epoch": 552.1061946902655, "grad_norm": 3.412790405832311e-08, "learning_rate": 0.03242890868378679, "loss": 0.0, "num_input_tokens_seen": 17876056, "step": 31470 }, { "epoch": 552.1946902654868, "grad_norm": 3.385157398838601e-08, "learning_rate": 0.03239233758625074, "loss": 0.0, "num_input_tokens_seen": 17879000, "step": 31475 }, { "epoch": 552.2831858407079, "grad_norm": 6.846718036968014e-08, "learning_rate": 0.032355784625294204, "loss": 0.0, "num_input_tokens_seen": 17881992, "step": 31480 }, { "epoch": 552.3716814159292, "grad_norm": 8.533422146683733e-08, "learning_rate": 0.03231924980655402, "loss": 0.0, "num_input_tokens_seen": 17884488, "step": 31485 }, { "epoch": 552.4601769911504, "grad_norm": 4.2557292800893265e-08, "learning_rate": 0.032282733135664446, "loss": 0.0, "num_input_tokens_seen": 17887144, "step": 31490 }, { "epoch": 552.5486725663717, "grad_norm": 2.732546633410493e-08, "learning_rate": 0.03224623461825669, "loss": 0.0, "num_input_tokens_seen": 17889704, "step": 31495 }, { "epoch": 552.637168141593, "grad_norm": 3.731884135049768e-08, "learning_rate": 0.03220975425995937, "loss": 0.0, "num_input_tokens_seen": 17892952, "step": 31500 }, { "epoch": 552.7256637168142, "grad_norm": 1.95855847096027e-08, "learning_rate": 0.032173292066398206, "loss": 0.0, "num_input_tokens_seen": 17895880, "step": 31505 }, { "epoch": 552.8141592920354, "grad_norm": 3.9044206090466105e-08, "learning_rate": 0.03213684804319606, "loss": 0.0, "num_input_tokens_seen": 17898680, "step": 31510 }, { "epoch": 552.9026548672566, "grad_norm": 1.7305790578348024e-08, "learning_rate": 0.03210042219597312, "loss": 0.0, "num_input_tokens_seen": 17902360, "step": 31515 }, { "epoch": 552.9911504424779, "grad_norm": 2.646485164348178e-08, "learning_rate": 0.03206401453034675, "loss": 0.0, "num_input_tokens_seen": 17904840, "step": 31520 }, { "epoch": 553.070796460177, "grad_norm": 2.8611649938170558e-08, "learning_rate": 0.03202762505193136, "loss": 0.0, "num_input_tokens_seen": 17907416, "step": 31525 }, { "epoch": 553.1592920353983, "grad_norm": 2.5846130569107117e-08, "learning_rate": 0.031991253766338754, "loss": 0.0, "num_input_tokens_seen": 17910024, "step": 31530 }, { "epoch": 553.2477876106195, "grad_norm": 2.6244679318665476e-08, "learning_rate": 0.03195490067917778, "loss": 0.0, "num_input_tokens_seen": 17913384, "step": 31535 }, { "epoch": 553.3362831858407, "grad_norm": 2.0899900476933908e-08, "learning_rate": 0.03191856579605461, "loss": 0.0, "num_input_tokens_seen": 17916520, "step": 31540 }, { "epoch": 553.4247787610619, "grad_norm": 5.597342322971599e-08, "learning_rate": 0.031882249122572454, "loss": 0.0, "num_input_tokens_seen": 17919768, "step": 31545 }, { "epoch": 553.5132743362832, "grad_norm": 5.620238141545997e-08, "learning_rate": 0.03184595066433188, "loss": 0.0, "num_input_tokens_seen": 17922728, "step": 31550 }, { "epoch": 553.6017699115044, "grad_norm": 2.35721504537878e-08, "learning_rate": 0.03180967042693049, "loss": 0.0, "num_input_tokens_seen": 17925240, "step": 31555 }, { "epoch": 553.6902654867257, "grad_norm": 3.47364057518007e-08, "learning_rate": 0.03177340841596323, "loss": 0.0, "num_input_tokens_seen": 17928088, "step": 31560 }, { "epoch": 553.7787610619469, "grad_norm": 2.63867789840333e-08, "learning_rate": 0.03173716463702209, "loss": 0.0, "num_input_tokens_seen": 17930728, "step": 31565 }, { "epoch": 553.8672566371681, "grad_norm": 4.45282850591866e-08, "learning_rate": 0.03170093909569638, "loss": 0.0, "num_input_tokens_seen": 17933432, "step": 31570 }, { "epoch": 553.9557522123894, "grad_norm": 1.686256112520823e-08, "learning_rate": 0.03166473179757246, "loss": 0.0, "num_input_tokens_seen": 17936280, "step": 31575 }, { "epoch": 554.0353982300885, "grad_norm": 2.6738042890883662e-08, "learning_rate": 0.031628542748234005, "loss": 0.0, "num_input_tokens_seen": 17938712, "step": 31580 }, { "epoch": 554.1238938053098, "grad_norm": 5.158285887318925e-08, "learning_rate": 0.03159237195326184, "loss": 0.0, "num_input_tokens_seen": 17941640, "step": 31585 }, { "epoch": 554.212389380531, "grad_norm": 2.9914776433770385e-08, "learning_rate": 0.031556219418233875, "loss": 0.0, "num_input_tokens_seen": 17944392, "step": 31590 }, { "epoch": 554.3008849557522, "grad_norm": 2.835413859259006e-08, "learning_rate": 0.03152008514872533, "loss": 0.0, "num_input_tokens_seen": 17947416, "step": 31595 }, { "epoch": 554.3893805309734, "grad_norm": 2.5087224742037506e-08, "learning_rate": 0.03148396915030862, "loss": 0.0, "num_input_tokens_seen": 17949928, "step": 31600 }, { "epoch": 554.3893805309734, "eval_loss": 0.8595802783966064, "eval_runtime": 0.9324, "eval_samples_per_second": 26.812, "eval_steps_per_second": 13.942, "num_input_tokens_seen": 17949928, "step": 31600 }, { "epoch": 554.4778761061947, "grad_norm": 6.19829449988174e-08, "learning_rate": 0.03144787142855318, "loss": 0.0, "num_input_tokens_seen": 17952648, "step": 31605 }, { "epoch": 554.566371681416, "grad_norm": 6.947912112309496e-08, "learning_rate": 0.031411791989025835, "loss": 0.0, "num_input_tokens_seen": 17955656, "step": 31610 }, { "epoch": 554.6548672566372, "grad_norm": 6.083919856791908e-08, "learning_rate": 0.031375730837290394, "loss": 0.0, "num_input_tokens_seen": 17958504, "step": 31615 }, { "epoch": 554.7433628318585, "grad_norm": 5.6194210173998727e-08, "learning_rate": 0.031339687978908015, "loss": 0.0, "num_input_tokens_seen": 17961272, "step": 31620 }, { "epoch": 554.8318584070796, "grad_norm": 3.8240870026129414e-08, "learning_rate": 0.03130366341943694, "loss": 0.0, "num_input_tokens_seen": 17964104, "step": 31625 }, { "epoch": 554.9203539823009, "grad_norm": 3.0447075971551385e-08, "learning_rate": 0.031267657164432555, "loss": 0.0, "num_input_tokens_seen": 17967416, "step": 31630 }, { "epoch": 555.0, "grad_norm": 2.56572736390126e-08, "learning_rate": 0.03123166921944752, "loss": 0.0, "num_input_tokens_seen": 17969928, "step": 31635 }, { "epoch": 555.0884955752213, "grad_norm": 1.8560474046580566e-08, "learning_rate": 0.031195699590031666, "loss": 0.0, "num_input_tokens_seen": 17973176, "step": 31640 }, { "epoch": 555.1769911504425, "grad_norm": 4.826016564152269e-08, "learning_rate": 0.031159748281731885, "loss": 0.0, "num_input_tokens_seen": 17976056, "step": 31645 }, { "epoch": 555.2654867256637, "grad_norm": 1.0812028605755586e-08, "learning_rate": 0.031123815300092394, "loss": 0.0, "num_input_tokens_seen": 17979336, "step": 31650 }, { "epoch": 555.3539823008849, "grad_norm": 1.869250354502583e-08, "learning_rate": 0.031087900650654424, "loss": 0.0, "num_input_tokens_seen": 17982168, "step": 31655 }, { "epoch": 555.4424778761062, "grad_norm": 2.573244906045602e-08, "learning_rate": 0.031052004338956534, "loss": 0.0, "num_input_tokens_seen": 17985144, "step": 31660 }, { "epoch": 555.5309734513274, "grad_norm": 2.3535612569958175e-08, "learning_rate": 0.031016126370534407, "loss": 0.0, "num_input_tokens_seen": 17988200, "step": 31665 }, { "epoch": 555.6194690265487, "grad_norm": 2.6288189403089746e-08, "learning_rate": 0.030980266750920804, "loss": 0.0, "num_input_tokens_seen": 17990808, "step": 31670 }, { "epoch": 555.70796460177, "grad_norm": 2.3900422974065805e-08, "learning_rate": 0.030944425485645747, "loss": 0.0, "num_input_tokens_seen": 17993320, "step": 31675 }, { "epoch": 555.7964601769911, "grad_norm": 2.417526268061465e-08, "learning_rate": 0.03090860258023647, "loss": 0.0, "num_input_tokens_seen": 17995992, "step": 31680 }, { "epoch": 555.8849557522124, "grad_norm": 2.234373219778263e-08, "learning_rate": 0.030872798040217236, "loss": 0.0, "num_input_tokens_seen": 17998856, "step": 31685 }, { "epoch": 555.9734513274336, "grad_norm": 6.266418495215476e-08, "learning_rate": 0.03083701187110964, "loss": 0.0, "num_input_tokens_seen": 18001784, "step": 31690 }, { "epoch": 556.0530973451328, "grad_norm": 3.0253762162146813e-08, "learning_rate": 0.030801244078432294, "loss": 0.0, "num_input_tokens_seen": 18003784, "step": 31695 }, { "epoch": 556.141592920354, "grad_norm": 4.093229932777831e-08, "learning_rate": 0.030765494667701024, "loss": 0.0, "num_input_tokens_seen": 18007000, "step": 31700 }, { "epoch": 556.2300884955753, "grad_norm": 2.7375516964411872e-08, "learning_rate": 0.030729763644428913, "loss": 0.0, "num_input_tokens_seen": 18009592, "step": 31705 }, { "epoch": 556.3185840707964, "grad_norm": 8.043864596629646e-08, "learning_rate": 0.030694051014126048, "loss": 0.0, "num_input_tokens_seen": 18012728, "step": 31710 }, { "epoch": 556.4070796460177, "grad_norm": 1.828237294887458e-08, "learning_rate": 0.030658356782299792, "loss": 0.0, "num_input_tokens_seen": 18015144, "step": 31715 }, { "epoch": 556.4955752212389, "grad_norm": 3.643975787781528e-08, "learning_rate": 0.030622680954454726, "loss": 0.0, "num_input_tokens_seen": 18018264, "step": 31720 }, { "epoch": 556.5840707964602, "grad_norm": 2.6426654642364156e-08, "learning_rate": 0.030587023536092398, "loss": 0.0, "num_input_tokens_seen": 18020776, "step": 31725 }, { "epoch": 556.6725663716815, "grad_norm": 3.458106689890883e-08, "learning_rate": 0.03055138453271171, "loss": 0.0, "num_input_tokens_seen": 18024152, "step": 31730 }, { "epoch": 556.7610619469026, "grad_norm": 3.006467963473369e-08, "learning_rate": 0.03051576394980858, "loss": 0.0, "num_input_tokens_seen": 18026984, "step": 31735 }, { "epoch": 556.8495575221239, "grad_norm": 2.9623114627952418e-08, "learning_rate": 0.030480161792876187, "loss": 0.0, "num_input_tokens_seen": 18029896, "step": 31740 }, { "epoch": 556.9380530973451, "grad_norm": 2.5514179213814714e-08, "learning_rate": 0.030444578067404846, "loss": 0.0, "num_input_tokens_seen": 18033000, "step": 31745 }, { "epoch": 557.0176991150443, "grad_norm": 2.4994012193246817e-08, "learning_rate": 0.030409012778881975, "loss": 0.0, "num_input_tokens_seen": 18035296, "step": 31750 }, { "epoch": 557.1061946902655, "grad_norm": 6.215282866151028e-08, "learning_rate": 0.030373465932792235, "loss": 0.0, "num_input_tokens_seen": 18038320, "step": 31755 }, { "epoch": 557.1946902654868, "grad_norm": 2.907481899683262e-08, "learning_rate": 0.030337937534617342, "loss": 0.0, "num_input_tokens_seen": 18041040, "step": 31760 }, { "epoch": 557.2831858407079, "grad_norm": 1.764807144866154e-08, "learning_rate": 0.030302427589836277, "loss": 0.0, "num_input_tokens_seen": 18044080, "step": 31765 }, { "epoch": 557.3716814159292, "grad_norm": 3.404825932307176e-08, "learning_rate": 0.030266936103925095, "loss": 0.0, "num_input_tokens_seen": 18046656, "step": 31770 }, { "epoch": 557.4601769911504, "grad_norm": 2.076536631534509e-08, "learning_rate": 0.030231463082356982, "loss": 0.0, "num_input_tokens_seen": 18049968, "step": 31775 }, { "epoch": 557.5486725663717, "grad_norm": 5.153796678314393e-08, "learning_rate": 0.030196008530602367, "loss": 0.0, "num_input_tokens_seen": 18052640, "step": 31780 }, { "epoch": 557.637168141593, "grad_norm": 2.960424083653379e-08, "learning_rate": 0.030160572454128842, "loss": 0.0, "num_input_tokens_seen": 18055680, "step": 31785 }, { "epoch": 557.7256637168142, "grad_norm": 2.4410988785916743e-08, "learning_rate": 0.03012515485840098, "loss": 0.0, "num_input_tokens_seen": 18058944, "step": 31790 }, { "epoch": 557.8141592920354, "grad_norm": 4.005080711522169e-08, "learning_rate": 0.030089755748880734, "loss": 0.0, "num_input_tokens_seen": 18062080, "step": 31795 }, { "epoch": 557.9026548672566, "grad_norm": 2.4467023962415624e-08, "learning_rate": 0.030054375131027003, "loss": 0.0, "num_input_tokens_seen": 18064576, "step": 31800 }, { "epoch": 557.9026548672566, "eval_loss": 0.8298357725143433, "eval_runtime": 0.942, "eval_samples_per_second": 26.539, "eval_steps_per_second": 13.8, "num_input_tokens_seen": 18064576, "step": 31800 }, { "epoch": 557.9911504424779, "grad_norm": 2.7345832265268655e-08, "learning_rate": 0.030019013010295942, "loss": 0.0, "num_input_tokens_seen": 18067184, "step": 31805 }, { "epoch": 558.070796460177, "grad_norm": 6.256162521367514e-08, "learning_rate": 0.029983669392140897, "loss": 0.0, "num_input_tokens_seen": 18069656, "step": 31810 }, { "epoch": 558.1592920353983, "grad_norm": 3.381957824899473e-08, "learning_rate": 0.029948344282012217, "loss": 0.0, "num_input_tokens_seen": 18072440, "step": 31815 }, { "epoch": 558.2477876106195, "grad_norm": 3.403372517141179e-08, "learning_rate": 0.029913037685357507, "loss": 0.0, "num_input_tokens_seen": 18075960, "step": 31820 }, { "epoch": 558.3362831858407, "grad_norm": 3.2477558420396235e-08, "learning_rate": 0.029877749607621528, "loss": 0.0, "num_input_tokens_seen": 18079208, "step": 31825 }, { "epoch": 558.4247787610619, "grad_norm": 1.88536581902099e-08, "learning_rate": 0.029842480054246077, "loss": 0.0, "num_input_tokens_seen": 18082024, "step": 31830 }, { "epoch": 558.5132743362832, "grad_norm": 2.714562263861353e-08, "learning_rate": 0.02980722903067022, "loss": 0.0, "num_input_tokens_seen": 18084456, "step": 31835 }, { "epoch": 558.6017699115044, "grad_norm": 5.2803123651301576e-08, "learning_rate": 0.029771996542330113, "loss": 0.0, "num_input_tokens_seen": 18087352, "step": 31840 }, { "epoch": 558.6902654867257, "grad_norm": 3.9160958920092526e-08, "learning_rate": 0.029736782594658954, "loss": 0.0, "num_input_tokens_seen": 18090072, "step": 31845 }, { "epoch": 558.7787610619469, "grad_norm": 2.982779179205863e-08, "learning_rate": 0.029701587193087284, "loss": 0.0, "num_input_tokens_seen": 18093160, "step": 31850 }, { "epoch": 558.8672566371681, "grad_norm": 2.153304734520134e-08, "learning_rate": 0.0296664103430426, "loss": 0.0, "num_input_tokens_seen": 18095976, "step": 31855 }, { "epoch": 558.9557522123894, "grad_norm": 1.8007920488116724e-08, "learning_rate": 0.029631252049949652, "loss": 0.0, "num_input_tokens_seen": 18098824, "step": 31860 }, { "epoch": 559.0353982300885, "grad_norm": 3.839394935312157e-08, "learning_rate": 0.02959611231923031, "loss": 0.0, "num_input_tokens_seen": 18101160, "step": 31865 }, { "epoch": 559.1238938053098, "grad_norm": 1.2693112338979518e-08, "learning_rate": 0.029560991156303507, "loss": 0.0, "num_input_tokens_seen": 18103528, "step": 31870 }, { "epoch": 559.212389380531, "grad_norm": 2.7216882969582912e-08, "learning_rate": 0.02952588856658544, "loss": 0.0, "num_input_tokens_seen": 18106248, "step": 31875 }, { "epoch": 559.3008849557522, "grad_norm": 2.5439712558750216e-08, "learning_rate": 0.029490804555489296, "loss": 0.0, "num_input_tokens_seen": 18109000, "step": 31880 }, { "epoch": 559.3893805309734, "grad_norm": 4.557321986453644e-08, "learning_rate": 0.029455739128425484, "loss": 0.0, "num_input_tokens_seen": 18112120, "step": 31885 }, { "epoch": 559.4778761061947, "grad_norm": 3.218249133851714e-08, "learning_rate": 0.029420692290801607, "loss": 0.0, "num_input_tokens_seen": 18115448, "step": 31890 }, { "epoch": 559.566371681416, "grad_norm": 2.5356014177191355e-08, "learning_rate": 0.02938566404802223, "loss": 0.0, "num_input_tokens_seen": 18118536, "step": 31895 }, { "epoch": 559.6548672566372, "grad_norm": 2.936116061391658e-08, "learning_rate": 0.029350654405489195, "loss": 0.0, "num_input_tokens_seen": 18121528, "step": 31900 }, { "epoch": 559.7433628318585, "grad_norm": 2.6928661966962864e-08, "learning_rate": 0.02931566336860145, "loss": 0.0, "num_input_tokens_seen": 18124504, "step": 31905 }, { "epoch": 559.8318584070796, "grad_norm": 1.3868465487121284e-08, "learning_rate": 0.02928069094275505, "loss": 0.0, "num_input_tokens_seen": 18127512, "step": 31910 }, { "epoch": 559.9203539823009, "grad_norm": 2.4330798709115697e-08, "learning_rate": 0.02924573713334314, "loss": 0.0, "num_input_tokens_seen": 18130072, "step": 31915 }, { "epoch": 560.0, "grad_norm": 3.2770607560905773e-08, "learning_rate": 0.02921080194575603, "loss": 0.0, "num_input_tokens_seen": 18132392, "step": 31920 }, { "epoch": 560.0884955752213, "grad_norm": 2.9676643364950905e-08, "learning_rate": 0.029175885385381177, "loss": 0.0, "num_input_tokens_seen": 18134904, "step": 31925 }, { "epoch": 560.1769911504425, "grad_norm": 3.825673289270526e-08, "learning_rate": 0.029140987457603223, "loss": 0.0, "num_input_tokens_seen": 18138536, "step": 31930 }, { "epoch": 560.2654867256637, "grad_norm": 2.9683103974775804e-08, "learning_rate": 0.029106108167803763, "loss": 0.0, "num_input_tokens_seen": 18141096, "step": 31935 }, { "epoch": 560.3539823008849, "grad_norm": 4.779053597303573e-08, "learning_rate": 0.029071247521361674, "loss": 0.0, "num_input_tokens_seen": 18143880, "step": 31940 }, { "epoch": 560.4424778761062, "grad_norm": 4.9870823914943685e-08, "learning_rate": 0.029036405523652945, "loss": 0.0, "num_input_tokens_seen": 18147144, "step": 31945 }, { "epoch": 560.5309734513274, "grad_norm": 3.9890583281021463e-08, "learning_rate": 0.029001582180050577, "loss": 0.0, "num_input_tokens_seen": 18149880, "step": 31950 }, { "epoch": 560.6194690265487, "grad_norm": 2.5825666938317227e-08, "learning_rate": 0.02896677749592482, "loss": 0.0, "num_input_tokens_seen": 18153160, "step": 31955 }, { "epoch": 560.70796460177, "grad_norm": 1.9866073230900838e-08, "learning_rate": 0.028931991476642938, "loss": 0.0, "num_input_tokens_seen": 18155784, "step": 31960 }, { "epoch": 560.7964601769911, "grad_norm": 3.027640005370813e-08, "learning_rate": 0.028897224127569412, "loss": 0.0, "num_input_tokens_seen": 18158344, "step": 31965 }, { "epoch": 560.8849557522124, "grad_norm": 2.915602870245948e-08, "learning_rate": 0.028862475454065832, "loss": 0.0, "num_input_tokens_seen": 18161032, "step": 31970 }, { "epoch": 560.9734513274336, "grad_norm": 2.4197587933372233e-08, "learning_rate": 0.028827745461490806, "loss": 0.0, "num_input_tokens_seen": 18163976, "step": 31975 }, { "epoch": 561.0530973451328, "grad_norm": 2.728687853448264e-08, "learning_rate": 0.028793034155200212, "loss": 0.0, "num_input_tokens_seen": 18166088, "step": 31980 }, { "epoch": 561.141592920354, "grad_norm": 9.924986166254257e-09, "learning_rate": 0.028758341540546944, "loss": 0.0, "num_input_tokens_seen": 18168776, "step": 31985 }, { "epoch": 561.2300884955753, "grad_norm": 3.079158261698467e-08, "learning_rate": 0.02872366762288098, "loss": 0.0, "num_input_tokens_seen": 18171368, "step": 31990 }, { "epoch": 561.3185840707964, "grad_norm": 3.646580282179457e-08, "learning_rate": 0.028689012407549567, "loss": 0.0, "num_input_tokens_seen": 18174280, "step": 31995 }, { "epoch": 561.4070796460177, "grad_norm": 2.737489879223176e-08, "learning_rate": 0.028654375899896892, "loss": 0.0, "num_input_tokens_seen": 18177096, "step": 32000 }, { "epoch": 561.4070796460177, "eval_loss": 0.8073485493659973, "eval_runtime": 0.9421, "eval_samples_per_second": 26.535, "eval_steps_per_second": 13.798, "num_input_tokens_seen": 18177096, "step": 32000 }, { "epoch": 561.4955752212389, "grad_norm": 6.593278101263422e-08, "learning_rate": 0.02861975810526437, "loss": 0.0, "num_input_tokens_seen": 18180024, "step": 32005 }, { "epoch": 561.5840707964602, "grad_norm": 3.013640181848132e-08, "learning_rate": 0.02858515902899056, "loss": 0.0, "num_input_tokens_seen": 18182696, "step": 32010 }, { "epoch": 561.6725663716815, "grad_norm": 4.189232427620482e-08, "learning_rate": 0.028550578676410976, "loss": 0.0, "num_input_tokens_seen": 18185784, "step": 32015 }, { "epoch": 561.7610619469026, "grad_norm": 3.2263297811141456e-08, "learning_rate": 0.02851601705285837, "loss": 0.0, "num_input_tokens_seen": 18188264, "step": 32020 }, { "epoch": 561.8495575221239, "grad_norm": 3.2265280225374227e-08, "learning_rate": 0.028481474163662666, "loss": 0.0, "num_input_tokens_seen": 18191592, "step": 32025 }, { "epoch": 561.9380530973451, "grad_norm": 2.8984558753109013e-08, "learning_rate": 0.028446950014150683, "loss": 0.0, "num_input_tokens_seen": 18194616, "step": 32030 }, { "epoch": 562.0176991150443, "grad_norm": 2.788895514527212e-08, "learning_rate": 0.028412444609646596, "loss": 0.0, "num_input_tokens_seen": 18197592, "step": 32035 }, { "epoch": 562.1061946902655, "grad_norm": 3.237154899693451e-08, "learning_rate": 0.028377957955471465, "loss": 0.0, "num_input_tokens_seen": 18200152, "step": 32040 }, { "epoch": 562.1946902654868, "grad_norm": 5.2415952467299576e-08, "learning_rate": 0.0283434900569436, "loss": 0.0, "num_input_tokens_seen": 18202856, "step": 32045 }, { "epoch": 562.2831858407079, "grad_norm": 6.5770898061146e-08, "learning_rate": 0.028309040919378456, "loss": 0.0, "num_input_tokens_seen": 18206424, "step": 32050 }, { "epoch": 562.3716814159292, "grad_norm": 4.403535669439407e-08, "learning_rate": 0.02827461054808848, "loss": 0.0, "num_input_tokens_seen": 18208904, "step": 32055 }, { "epoch": 562.4601769911504, "grad_norm": 2.3453424091712805e-08, "learning_rate": 0.028240198948383186, "loss": 0.0, "num_input_tokens_seen": 18211496, "step": 32060 }, { "epoch": 562.5486725663717, "grad_norm": 2.679263744198579e-08, "learning_rate": 0.028205806125569402, "loss": 0.0, "num_input_tokens_seen": 18214104, "step": 32065 }, { "epoch": 562.637168141593, "grad_norm": 4.4978946789342444e-08, "learning_rate": 0.028171432084950834, "loss": 0.0, "num_input_tokens_seen": 18217016, "step": 32070 }, { "epoch": 562.7256637168142, "grad_norm": 2.7222410992067125e-08, "learning_rate": 0.028137076831828478, "loss": 0.0, "num_input_tokens_seen": 18219688, "step": 32075 }, { "epoch": 562.8141592920354, "grad_norm": 3.742650633853373e-08, "learning_rate": 0.028102740371500238, "loss": 0.0, "num_input_tokens_seen": 18222680, "step": 32080 }, { "epoch": 562.9026548672566, "grad_norm": 3.6969943550957396e-08, "learning_rate": 0.0280684227092613, "loss": 0.0, "num_input_tokens_seen": 18226072, "step": 32085 }, { "epoch": 562.9911504424779, "grad_norm": 6.437138466708348e-08, "learning_rate": 0.02803412385040392, "loss": 0.0, "num_input_tokens_seen": 18228840, "step": 32090 }, { "epoch": 563.070796460177, "grad_norm": 6.781178996106973e-08, "learning_rate": 0.027999843800217306, "loss": 0.0, "num_input_tokens_seen": 18231328, "step": 32095 }, { "epoch": 563.1592920353983, "grad_norm": 1.0549150886163261e-08, "learning_rate": 0.027965582563987932, "loss": 0.0, "num_input_tokens_seen": 18234000, "step": 32100 }, { "epoch": 563.2477876106195, "grad_norm": 2.5789802293729736e-08, "learning_rate": 0.027931340146999346, "loss": 0.0, "num_input_tokens_seen": 18236352, "step": 32105 }, { "epoch": 563.3362831858407, "grad_norm": 3.195966868929645e-08, "learning_rate": 0.02789711655453208, "loss": 0.0, "num_input_tokens_seen": 18239312, "step": 32110 }, { "epoch": 563.4247787610619, "grad_norm": 2.4339726678590523e-08, "learning_rate": 0.02786291179186392, "loss": 0.0, "num_input_tokens_seen": 18242208, "step": 32115 }, { "epoch": 563.5132743362832, "grad_norm": 2.7263903135121836e-08, "learning_rate": 0.02782872586426961, "loss": 0.0, "num_input_tokens_seen": 18244624, "step": 32120 }, { "epoch": 563.6017699115044, "grad_norm": 2.0251736287946187e-08, "learning_rate": 0.027794558777021083, "loss": 0.0, "num_input_tokens_seen": 18247712, "step": 32125 }, { "epoch": 563.6902654867257, "grad_norm": 4.2750105677669126e-08, "learning_rate": 0.02776041053538734, "loss": 0.0, "num_input_tokens_seen": 18251168, "step": 32130 }, { "epoch": 563.7787610619469, "grad_norm": 2.8182634892459646e-08, "learning_rate": 0.027726281144634407, "loss": 0.0, "num_input_tokens_seen": 18254016, "step": 32135 }, { "epoch": 563.8672566371681, "grad_norm": 4.1281658980096836e-08, "learning_rate": 0.02769217061002552, "loss": 0.0, "num_input_tokens_seen": 18257392, "step": 32140 }, { "epoch": 563.9557522123894, "grad_norm": 2.923917108432761e-08, "learning_rate": 0.027658078936820967, "loss": 0.0, "num_input_tokens_seen": 18260528, "step": 32145 }, { "epoch": 564.0353982300885, "grad_norm": 2.9200583284705317e-08, "learning_rate": 0.02762400613027805, "loss": 0.0, "num_input_tokens_seen": 18263056, "step": 32150 }, { "epoch": 564.1238938053098, "grad_norm": 9.65773736538722e-08, "learning_rate": 0.027589952195651295, "loss": 0.0, "num_input_tokens_seen": 18265872, "step": 32155 }, { "epoch": 564.212389380531, "grad_norm": 3.699310013871582e-08, "learning_rate": 0.027555917138192186, "loss": 0.0, "num_input_tokens_seen": 18268544, "step": 32160 }, { "epoch": 564.3008849557522, "grad_norm": 1.1616115358492607e-07, "learning_rate": 0.027521900963149375, "loss": 0.0, "num_input_tokens_seen": 18271472, "step": 32165 }, { "epoch": 564.3893805309734, "grad_norm": 3.3389301989927844e-08, "learning_rate": 0.027487903675768633, "loss": 0.0, "num_input_tokens_seen": 18274480, "step": 32170 }, { "epoch": 564.4778761061947, "grad_norm": 1.9287098140807757e-08, "learning_rate": 0.027453925281292677, "loss": 0.0, "num_input_tokens_seen": 18277248, "step": 32175 }, { "epoch": 564.566371681416, "grad_norm": 4.8059312973691704e-08, "learning_rate": 0.027419965784961475, "loss": 0.0, "num_input_tokens_seen": 18279808, "step": 32180 }, { "epoch": 564.6548672566372, "grad_norm": 2.216758687723086e-08, "learning_rate": 0.027386025192012015, "loss": 0.0, "num_input_tokens_seen": 18282448, "step": 32185 }, { "epoch": 564.7433628318585, "grad_norm": 3.0641785997431725e-08, "learning_rate": 0.027352103507678277, "loss": 0.0, "num_input_tokens_seen": 18285472, "step": 32190 }, { "epoch": 564.8318584070796, "grad_norm": 5.520942636394466e-08, "learning_rate": 0.027318200737191527, "loss": 0.0, "num_input_tokens_seen": 18287728, "step": 32195 }, { "epoch": 564.9203539823009, "grad_norm": 1.777510050260389e-08, "learning_rate": 0.027284316885779935, "loss": 0.0, "num_input_tokens_seen": 18290608, "step": 32200 }, { "epoch": 564.9203539823009, "eval_loss": 0.8398646116256714, "eval_runtime": 0.9257, "eval_samples_per_second": 27.008, "eval_steps_per_second": 14.044, "num_input_tokens_seen": 18290608, "step": 32200 }, { "epoch": 565.0, "grad_norm": 5.1819675661590736e-08, "learning_rate": 0.027250451958668785, "loss": 0.0, "num_input_tokens_seen": 18293712, "step": 32205 }, { "epoch": 565.0884955752213, "grad_norm": 2.6454848978119117e-08, "learning_rate": 0.027216605961080536, "loss": 0.0, "num_input_tokens_seen": 18296784, "step": 32210 }, { "epoch": 565.1769911504425, "grad_norm": 8.71311954142584e-07, "learning_rate": 0.02718277889823461, "loss": 0.0, "num_input_tokens_seen": 18299248, "step": 32215 }, { "epoch": 565.2654867256637, "grad_norm": 8.797027106766109e-08, "learning_rate": 0.027148970775347604, "loss": 0.0, "num_input_tokens_seen": 18302000, "step": 32220 }, { "epoch": 565.3539823008849, "grad_norm": 3.618352906187283e-08, "learning_rate": 0.027115181597633174, "loss": 0.0, "num_input_tokens_seen": 18304720, "step": 32225 }, { "epoch": 565.4424778761062, "grad_norm": 4.6224425176433215e-08, "learning_rate": 0.027081411370301976, "loss": 0.0, "num_input_tokens_seen": 18307456, "step": 32230 }, { "epoch": 565.5309734513274, "grad_norm": 6.115289608032981e-08, "learning_rate": 0.027047660098561875, "loss": 0.0, "num_input_tokens_seen": 18310592, "step": 32235 }, { "epoch": 565.6194690265487, "grad_norm": 8.40072900132327e-08, "learning_rate": 0.02701392778761766, "loss": 0.0, "num_input_tokens_seen": 18313648, "step": 32240 }, { "epoch": 565.70796460177, "grad_norm": 5.3477950956448694e-08, "learning_rate": 0.02698021444267133, "loss": 0.0, "num_input_tokens_seen": 18316544, "step": 32245 }, { "epoch": 565.7964601769911, "grad_norm": 2.821511024819756e-08, "learning_rate": 0.026946520068921915, "loss": 0.0, "num_input_tokens_seen": 18319248, "step": 32250 }, { "epoch": 565.8849557522124, "grad_norm": 4.013490695342625e-08, "learning_rate": 0.02691284467156547, "loss": 0.0, "num_input_tokens_seen": 18322176, "step": 32255 }, { "epoch": 565.9734513274336, "grad_norm": 1.6650979262067267e-08, "learning_rate": 0.026879188255795182, "loss": 0.0, "num_input_tokens_seen": 18325200, "step": 32260 }, { "epoch": 566.0530973451328, "grad_norm": 4.401104192197636e-08, "learning_rate": 0.026845550826801328, "loss": 0.0, "num_input_tokens_seen": 18327664, "step": 32265 }, { "epoch": 566.141592920354, "grad_norm": 3.2155664797528516e-08, "learning_rate": 0.02681193238977121, "loss": 0.0, "num_input_tokens_seen": 18330784, "step": 32270 }, { "epoch": 566.2300884955753, "grad_norm": 2.5164494488194578e-08, "learning_rate": 0.026778332949889145, "loss": 0.0, "num_input_tokens_seen": 18333424, "step": 32275 }, { "epoch": 566.3185840707964, "grad_norm": 3.759303268680014e-08, "learning_rate": 0.026744752512336673, "loss": 0.0, "num_input_tokens_seen": 18336512, "step": 32280 }, { "epoch": 566.4070796460177, "grad_norm": 2.6826187493611542e-08, "learning_rate": 0.02671119108229225, "loss": 0.0, "num_input_tokens_seen": 18339616, "step": 32285 }, { "epoch": 566.4955752212389, "grad_norm": 4.1719687260410865e-08, "learning_rate": 0.026677648664931556, "loss": 0.0, "num_input_tokens_seen": 18342144, "step": 32290 }, { "epoch": 566.5840707964602, "grad_norm": 4.476478210335699e-08, "learning_rate": 0.026644125265427154, "loss": 0.0, "num_input_tokens_seen": 18344944, "step": 32295 }, { "epoch": 566.6725663716815, "grad_norm": 2.6721707513388537e-08, "learning_rate": 0.026610620888948822, "loss": 0.0, "num_input_tokens_seen": 18347792, "step": 32300 }, { "epoch": 566.7610619469026, "grad_norm": 7.628389653291379e-08, "learning_rate": 0.026577135540663408, "loss": 0.0, "num_input_tokens_seen": 18350816, "step": 32305 }, { "epoch": 566.8495575221239, "grad_norm": 1.5132718189647676e-08, "learning_rate": 0.026543669225734673, "loss": 0.0, "num_input_tokens_seen": 18353712, "step": 32310 }, { "epoch": 566.9380530973451, "grad_norm": 1.565828711136419e-08, "learning_rate": 0.02651022194932363, "loss": 0.0, "num_input_tokens_seen": 18356640, "step": 32315 }, { "epoch": 567.0176991150443, "grad_norm": 3.9371933269194415e-08, "learning_rate": 0.026476793716588194, "loss": 0.0, "num_input_tokens_seen": 18358896, "step": 32320 }, { "epoch": 567.1061946902655, "grad_norm": 3.6551973892073875e-08, "learning_rate": 0.026443384532683467, "loss": 0.0, "num_input_tokens_seen": 18361824, "step": 32325 }, { "epoch": 567.1946902654868, "grad_norm": 3.1842095182810226e-08, "learning_rate": 0.026409994402761584, "loss": 0.0, "num_input_tokens_seen": 18364736, "step": 32330 }, { "epoch": 567.2831858407079, "grad_norm": 1.6364456456585685e-08, "learning_rate": 0.026376623331971653, "loss": 0.0, "num_input_tokens_seen": 18367536, "step": 32335 }, { "epoch": 567.3716814159292, "grad_norm": 3.315091134936665e-08, "learning_rate": 0.026343271325459997, "loss": 0.0, "num_input_tokens_seen": 18370400, "step": 32340 }, { "epoch": 567.4601769911504, "grad_norm": 3.7247289697006636e-08, "learning_rate": 0.02630993838836987, "loss": 0.0, "num_input_tokens_seen": 18373152, "step": 32345 }, { "epoch": 567.5486725663717, "grad_norm": 2.8088948056392837e-08, "learning_rate": 0.026276624525841584, "loss": 0.0, "num_input_tokens_seen": 18376144, "step": 32350 }, { "epoch": 567.637168141593, "grad_norm": 3.939155490684243e-08, "learning_rate": 0.026243329743012637, "loss": 0.0, "num_input_tokens_seen": 18378544, "step": 32355 }, { "epoch": 567.7256637168142, "grad_norm": 3.4171399931892665e-08, "learning_rate": 0.026210054045017438, "loss": 0.0, "num_input_tokens_seen": 18381360, "step": 32360 }, { "epoch": 567.8141592920354, "grad_norm": 1.4622363764260626e-08, "learning_rate": 0.02617679743698755, "loss": 0.0, "num_input_tokens_seen": 18384032, "step": 32365 }, { "epoch": 567.9026548672566, "grad_norm": 2.8346343938778773e-08, "learning_rate": 0.02614355992405158, "loss": 0.0, "num_input_tokens_seen": 18387872, "step": 32370 }, { "epoch": 567.9911504424779, "grad_norm": 2.8952751307542712e-08, "learning_rate": 0.026110341511335115, "loss": 0.0, "num_input_tokens_seen": 18390832, "step": 32375 }, { "epoch": 568.070796460177, "grad_norm": 3.524535330257095e-08, "learning_rate": 0.02607714220396093, "loss": 0.0, "num_input_tokens_seen": 18393512, "step": 32380 }, { "epoch": 568.1592920353983, "grad_norm": 2.264764553672194e-08, "learning_rate": 0.02604396200704869, "loss": 0.0, "num_input_tokens_seen": 18396328, "step": 32385 }, { "epoch": 568.2477876106195, "grad_norm": 2.538227406034821e-08, "learning_rate": 0.02601080092571523, "loss": 0.0, "num_input_tokens_seen": 18399416, "step": 32390 }, { "epoch": 568.3362831858407, "grad_norm": 1.7929195905708184e-08, "learning_rate": 0.025977658965074455, "loss": 0.0, "num_input_tokens_seen": 18402056, "step": 32395 }, { "epoch": 568.4247787610619, "grad_norm": 1.2288979611696504e-08, "learning_rate": 0.02594453613023719, "loss": 0.0, "num_input_tokens_seen": 18404648, "step": 32400 }, { "epoch": 568.4247787610619, "eval_loss": 0.8064342141151428, "eval_runtime": 0.9491, "eval_samples_per_second": 26.341, "eval_steps_per_second": 13.698, "num_input_tokens_seen": 18404648, "step": 32400 }, { "epoch": 568.5132743362832, "grad_norm": 2.4374736895538263e-08, "learning_rate": 0.025911432426311443, "loss": 0.0, "num_input_tokens_seen": 18407176, "step": 32405 }, { "epoch": 568.6017699115044, "grad_norm": 4.249823959412424e-08, "learning_rate": 0.025878347858402234, "loss": 0.0, "num_input_tokens_seen": 18410408, "step": 32410 }, { "epoch": 568.6902654867257, "grad_norm": 6.509661432119174e-08, "learning_rate": 0.025845282431611598, "loss": 0.0, "num_input_tokens_seen": 18413048, "step": 32415 }, { "epoch": 568.7787610619469, "grad_norm": 3.148695526533629e-08, "learning_rate": 0.025812236151038608, "loss": 0.0, "num_input_tokens_seen": 18416088, "step": 32420 }, { "epoch": 568.8672566371681, "grad_norm": 4.357064398163857e-08, "learning_rate": 0.025779209021779468, "loss": 0.0, "num_input_tokens_seen": 18418936, "step": 32425 }, { "epoch": 568.9557522123894, "grad_norm": 3.8739312202551446e-08, "learning_rate": 0.025746201048927324, "loss": 0.0, "num_input_tokens_seen": 18421816, "step": 32430 }, { "epoch": 569.0353982300885, "grad_norm": 3.8993999140757296e-08, "learning_rate": 0.025713212237572485, "loss": 0.0, "num_input_tokens_seen": 18424008, "step": 32435 }, { "epoch": 569.1238938053098, "grad_norm": 4.583429813465045e-08, "learning_rate": 0.025680242592802164, "loss": 0.0, "num_input_tokens_seen": 18427272, "step": 32440 }, { "epoch": 569.212389380531, "grad_norm": 2.3276712113329268e-08, "learning_rate": 0.02564729211970073, "loss": 0.0, "num_input_tokens_seen": 18429656, "step": 32445 }, { "epoch": 569.3008849557522, "grad_norm": 2.673321830570785e-08, "learning_rate": 0.025614360823349617, "loss": 0.0, "num_input_tokens_seen": 18432760, "step": 32450 }, { "epoch": 569.3893805309734, "grad_norm": 4.569829314959861e-08, "learning_rate": 0.025581448708827146, "loss": 0.0, "num_input_tokens_seen": 18435352, "step": 32455 }, { "epoch": 569.4778761061947, "grad_norm": 5.877436137780023e-08, "learning_rate": 0.025548555781208876, "loss": 0.0, "num_input_tokens_seen": 18438264, "step": 32460 }, { "epoch": 569.566371681416, "grad_norm": 3.424300132337521e-08, "learning_rate": 0.02551568204556721, "loss": 0.0, "num_input_tokens_seen": 18441176, "step": 32465 }, { "epoch": 569.6548672566372, "grad_norm": 2.2227427010079737e-08, "learning_rate": 0.02548282750697173, "loss": 0.0, "num_input_tokens_seen": 18444120, "step": 32470 }, { "epoch": 569.7433628318585, "grad_norm": 3.912763446578538e-08, "learning_rate": 0.02544999217048909, "loss": 0.0, "num_input_tokens_seen": 18447480, "step": 32475 }, { "epoch": 569.8318584070796, "grad_norm": 7.86404630304105e-09, "learning_rate": 0.025417176041182793, "loss": 0.0, "num_input_tokens_seen": 18450216, "step": 32480 }, { "epoch": 569.9203539823009, "grad_norm": 1.8243085264657566e-08, "learning_rate": 0.025384379124113596, "loss": 0.0, "num_input_tokens_seen": 18452776, "step": 32485 }, { "epoch": 570.0, "grad_norm": 4.561353250664979e-08, "learning_rate": 0.025351601424339124, "loss": 0.0, "num_input_tokens_seen": 18455456, "step": 32490 }, { "epoch": 570.0884955752213, "grad_norm": 2.160119372263125e-08, "learning_rate": 0.025318842946914184, "loss": 0.0, "num_input_tokens_seen": 18458320, "step": 32495 }, { "epoch": 570.1769911504425, "grad_norm": 4.4841126367600737e-08, "learning_rate": 0.025286103696890494, "loss": 0.0, "num_input_tokens_seen": 18461104, "step": 32500 }, { "epoch": 570.2654867256637, "grad_norm": 4.565321276572831e-08, "learning_rate": 0.025253383679316836, "loss": 0.0, "num_input_tokens_seen": 18464144, "step": 32505 }, { "epoch": 570.3539823008849, "grad_norm": 3.064692677412495e-08, "learning_rate": 0.025220682899239077, "loss": 0.0, "num_input_tokens_seen": 18466960, "step": 32510 }, { "epoch": 570.4424778761062, "grad_norm": 2.9505649479233398e-08, "learning_rate": 0.02518800136170013, "loss": 0.0, "num_input_tokens_seen": 18469600, "step": 32515 }, { "epoch": 570.5309734513274, "grad_norm": 1.699448404224313e-08, "learning_rate": 0.02515533907173981, "loss": 0.0, "num_input_tokens_seen": 18472368, "step": 32520 }, { "epoch": 570.6194690265487, "grad_norm": 3.202852738581896e-08, "learning_rate": 0.025122696034395115, "loss": 0.0, "num_input_tokens_seen": 18474832, "step": 32525 }, { "epoch": 570.70796460177, "grad_norm": 1.0117230431205826e-07, "learning_rate": 0.025090072254700023, "loss": 0.0, "num_input_tokens_seen": 18478032, "step": 32530 }, { "epoch": 570.7964601769911, "grad_norm": 5.329284746835583e-08, "learning_rate": 0.025057467737685468, "loss": 0.0, "num_input_tokens_seen": 18481072, "step": 32535 }, { "epoch": 570.8849557522124, "grad_norm": 3.915934598808235e-08, "learning_rate": 0.025024882488379557, "loss": 0.0, "num_input_tokens_seen": 18484096, "step": 32540 }, { "epoch": 570.9734513274336, "grad_norm": 2.3318298403296467e-08, "learning_rate": 0.02499231651180727, "loss": 0.0, "num_input_tokens_seen": 18487328, "step": 32545 }, { "epoch": 571.0530973451328, "grad_norm": 2.9327864581318863e-08, "learning_rate": 0.024959769812990713, "loss": 0.0, "num_input_tokens_seen": 18490064, "step": 32550 }, { "epoch": 571.141592920354, "grad_norm": 2.7031818561340515e-08, "learning_rate": 0.024927242396949045, "loss": 0.0, "num_input_tokens_seen": 18492528, "step": 32555 }, { "epoch": 571.2300884955753, "grad_norm": 4.288986943379314e-08, "learning_rate": 0.02489473426869836, "loss": 0.0, "num_input_tokens_seen": 18495648, "step": 32560 }, { "epoch": 571.3185840707964, "grad_norm": 4.415762333564999e-08, "learning_rate": 0.024862245433251776, "loss": 0.0, "num_input_tokens_seen": 18498288, "step": 32565 }, { "epoch": 571.4070796460177, "grad_norm": 3.2704079444556555e-08, "learning_rate": 0.024829775895619577, "loss": 0.0, "num_input_tokens_seen": 18500960, "step": 32570 }, { "epoch": 571.4955752212389, "grad_norm": 5.661850721594419e-08, "learning_rate": 0.024797325660808882, "loss": 0.0, "num_input_tokens_seen": 18503616, "step": 32575 }, { "epoch": 571.5840707964602, "grad_norm": 1.1621274609296961e-08, "learning_rate": 0.02476489473382401, "loss": 0.0, "num_input_tokens_seen": 18506528, "step": 32580 }, { "epoch": 571.6725663716815, "grad_norm": 6.148153630647357e-08, "learning_rate": 0.024732483119666127, "loss": 0.0, "num_input_tokens_seen": 18509696, "step": 32585 }, { "epoch": 571.7610619469026, "grad_norm": 3.9085314540443505e-08, "learning_rate": 0.024700090823333548, "loss": 0.0, "num_input_tokens_seen": 18512336, "step": 32590 }, { "epoch": 571.8495575221239, "grad_norm": 1.3702083379030228e-07, "learning_rate": 0.02466771784982163, "loss": 0.0, "num_input_tokens_seen": 18514880, "step": 32595 }, { "epoch": 571.9380530973451, "grad_norm": 2.2061934501493852e-08, "learning_rate": 0.024635364204122594, "loss": 0.0, "num_input_tokens_seen": 18517216, "step": 32600 }, { "epoch": 571.9380530973451, "eval_loss": 0.8300523161888123, "eval_runtime": 0.9362, "eval_samples_per_second": 26.705, "eval_steps_per_second": 13.886, "num_input_tokens_seen": 18517216, "step": 32600 }, { "epoch": 572.0176991150443, "grad_norm": 3.810940540915908e-08, "learning_rate": 0.024603029891225852, "loss": 0.0, "num_input_tokens_seen": 18520328, "step": 32605 }, { "epoch": 572.1061946902655, "grad_norm": 3.867392450729312e-08, "learning_rate": 0.024570714916117748, "loss": 0.0, "num_input_tokens_seen": 18523496, "step": 32610 }, { "epoch": 572.1946902654868, "grad_norm": 3.486623256776511e-08, "learning_rate": 0.024538419283781625, "loss": 0.0, "num_input_tokens_seen": 18526248, "step": 32615 }, { "epoch": 572.2831858407079, "grad_norm": 3.9771745008465587e-08, "learning_rate": 0.024506142999197938, "loss": 0.0, "num_input_tokens_seen": 18528968, "step": 32620 }, { "epoch": 572.3716814159292, "grad_norm": 2.3197344489744864e-08, "learning_rate": 0.024473886067344002, "loss": 0.0, "num_input_tokens_seen": 18532024, "step": 32625 }, { "epoch": 572.4601769911504, "grad_norm": 1.4141008364276786e-08, "learning_rate": 0.02444164849319434, "loss": 0.0, "num_input_tokens_seen": 18534840, "step": 32630 }, { "epoch": 572.5486725663717, "grad_norm": 4.011571874684705e-08, "learning_rate": 0.024409430281720306, "loss": 0.0, "num_input_tokens_seen": 18537848, "step": 32635 }, { "epoch": 572.637168141593, "grad_norm": 3.42027490773944e-08, "learning_rate": 0.024377231437890428, "loss": 0.0, "num_input_tokens_seen": 18540392, "step": 32640 }, { "epoch": 572.7256637168142, "grad_norm": 7.630064402519565e-08, "learning_rate": 0.024345051966670115, "loss": 0.0, "num_input_tokens_seen": 18543512, "step": 32645 }, { "epoch": 572.8141592920354, "grad_norm": 2.401521115302785e-08, "learning_rate": 0.024312891873021884, "loss": 0.0, "num_input_tokens_seen": 18546328, "step": 32650 }, { "epoch": 572.9026548672566, "grad_norm": 2.8659934869779136e-08, "learning_rate": 0.024280751161905183, "loss": 0.0, "num_input_tokens_seen": 18549272, "step": 32655 }, { "epoch": 572.9911504424779, "grad_norm": 2.2277582445440203e-08, "learning_rate": 0.02424862983827658, "loss": 0.0, "num_input_tokens_seen": 18552280, "step": 32660 }, { "epoch": 573.070796460177, "grad_norm": 6.892199166941282e-08, "learning_rate": 0.024216527907089495, "loss": 0.0, "num_input_tokens_seen": 18554696, "step": 32665 }, { "epoch": 573.1592920353983, "grad_norm": 3.356663214049149e-08, "learning_rate": 0.024184445373294505, "loss": 0.0, "num_input_tokens_seen": 18557464, "step": 32670 }, { "epoch": 573.2477876106195, "grad_norm": 3.436805684486899e-08, "learning_rate": 0.02415238224183918, "loss": 0.0, "num_input_tokens_seen": 18560728, "step": 32675 }, { "epoch": 573.3362831858407, "grad_norm": 2.6632571703544272e-08, "learning_rate": 0.024120338517667973, "loss": 0.0, "num_input_tokens_seen": 18563304, "step": 32680 }, { "epoch": 573.4247787610619, "grad_norm": 6.964615550941744e-08, "learning_rate": 0.02408831420572247, "loss": 0.0, "num_input_tokens_seen": 18566248, "step": 32685 }, { "epoch": 573.5132743362832, "grad_norm": 3.350388055878284e-08, "learning_rate": 0.024056309310941264, "loss": 0.0, "num_input_tokens_seen": 18569528, "step": 32690 }, { "epoch": 573.6017699115044, "grad_norm": 1.6335230057507033e-08, "learning_rate": 0.02402432383825982, "loss": 0.0, "num_input_tokens_seen": 18571992, "step": 32695 }, { "epoch": 573.6902654867257, "grad_norm": 2.749369620858033e-08, "learning_rate": 0.023992357792610792, "loss": 0.0, "num_input_tokens_seen": 18574296, "step": 32700 }, { "epoch": 573.7787610619469, "grad_norm": 9.928473332365684e-08, "learning_rate": 0.0239604111789237, "loss": 0.0, "num_input_tokens_seen": 18577624, "step": 32705 }, { "epoch": 573.8672566371681, "grad_norm": 1.4345087784306543e-08, "learning_rate": 0.023928484002125095, "loss": 0.0, "num_input_tokens_seen": 18581016, "step": 32710 }, { "epoch": 573.9557522123894, "grad_norm": 3.1131055067135094e-08, "learning_rate": 0.023896576267138595, "loss": 0.0, "num_input_tokens_seen": 18583576, "step": 32715 }, { "epoch": 574.0353982300885, "grad_norm": 3.827015149227009e-08, "learning_rate": 0.02386468797888471, "loss": 0.0, "num_input_tokens_seen": 18585736, "step": 32720 }, { "epoch": 574.1238938053098, "grad_norm": 2.43714293191033e-08, "learning_rate": 0.023832819142281057, "loss": 0.0, "num_input_tokens_seen": 18588888, "step": 32725 }, { "epoch": 574.212389380531, "grad_norm": 2.878126714733753e-08, "learning_rate": 0.02380096976224225, "loss": 0.0, "num_input_tokens_seen": 18592088, "step": 32730 }, { "epoch": 574.3008849557522, "grad_norm": 1.6510384170942416e-08, "learning_rate": 0.023769139843679777, "loss": 0.0, "num_input_tokens_seen": 18594728, "step": 32735 }, { "epoch": 574.3893805309734, "grad_norm": 1.8461784989654006e-08, "learning_rate": 0.023737329391502287, "loss": 0.0, "num_input_tokens_seen": 18598120, "step": 32740 }, { "epoch": 574.4778761061947, "grad_norm": 4.290354738145652e-08, "learning_rate": 0.023705538410615293, "loss": 0.0, "num_input_tokens_seen": 18600968, "step": 32745 }, { "epoch": 574.566371681416, "grad_norm": 1.0395989846756493e-07, "learning_rate": 0.023673766905921396, "loss": 0.0, "num_input_tokens_seen": 18603944, "step": 32750 }, { "epoch": 574.6548672566372, "grad_norm": 2.8595421142085797e-08, "learning_rate": 0.0236420148823202, "loss": 0.0, "num_input_tokens_seen": 18606520, "step": 32755 }, { "epoch": 574.7433628318585, "grad_norm": 3.7216370429860035e-08, "learning_rate": 0.02361028234470816, "loss": 0.0, "num_input_tokens_seen": 18609512, "step": 32760 }, { "epoch": 574.8318584070796, "grad_norm": 7.77057849177254e-08, "learning_rate": 0.023578569297978913, "loss": 0.0, "num_input_tokens_seen": 18612184, "step": 32765 }, { "epoch": 574.9203539823009, "grad_norm": 1.9879667689792768e-08, "learning_rate": 0.023546875747023025, "loss": 0.0, "num_input_tokens_seen": 18614952, "step": 32770 }, { "epoch": 575.0, "grad_norm": 2.0458056582128847e-08, "learning_rate": 0.02351520169672801, "loss": 0.0, "num_input_tokens_seen": 18617264, "step": 32775 }, { "epoch": 575.0884955752213, "grad_norm": 7.770520227268207e-08, "learning_rate": 0.023483547151978357, "loss": 0.0, "num_input_tokens_seen": 18619920, "step": 32780 }, { "epoch": 575.1769911504425, "grad_norm": 8.760043357369796e-08, "learning_rate": 0.023451912117655675, "loss": 0.0, "num_input_tokens_seen": 18622800, "step": 32785 }, { "epoch": 575.2654867256637, "grad_norm": 3.2108722791690525e-08, "learning_rate": 0.023420296598638417, "loss": 0.0, "num_input_tokens_seen": 18625872, "step": 32790 }, { "epoch": 575.3539823008849, "grad_norm": 3.0624310198845706e-08, "learning_rate": 0.023388700599802165, "loss": 0.0, "num_input_tokens_seen": 18628464, "step": 32795 }, { "epoch": 575.4424778761062, "grad_norm": 1.9383962879260253e-08, "learning_rate": 0.023357124126019334, "loss": 0.0, "num_input_tokens_seen": 18631296, "step": 32800 }, { "epoch": 575.4424778761062, "eval_loss": 0.8266446590423584, "eval_runtime": 0.9399, "eval_samples_per_second": 26.597, "eval_steps_per_second": 13.831, "num_input_tokens_seen": 18631296, "step": 32800 }, { "epoch": 575.5309734513274, "grad_norm": 8.214475144541211e-08, "learning_rate": 0.02332556718215945, "loss": 0.0, "num_input_tokens_seen": 18634368, "step": 32805 }, { "epoch": 575.6194690265487, "grad_norm": 4.868726222184705e-08, "learning_rate": 0.023294029773089035, "loss": 0.0, "num_input_tokens_seen": 18637280, "step": 32810 }, { "epoch": 575.70796460177, "grad_norm": 4.4034806023773854e-08, "learning_rate": 0.023262511903671484, "loss": 0.0, "num_input_tokens_seen": 18640048, "step": 32815 }, { "epoch": 575.7964601769911, "grad_norm": 1.9528227923615304e-08, "learning_rate": 0.023231013578767324, "loss": 0.0, "num_input_tokens_seen": 18642432, "step": 32820 }, { "epoch": 575.8849557522124, "grad_norm": 5.9432824883742796e-08, "learning_rate": 0.0231995348032339, "loss": 0.0, "num_input_tokens_seen": 18645696, "step": 32825 }, { "epoch": 575.9734513274336, "grad_norm": 2.0487650687073256e-08, "learning_rate": 0.023168075581925685, "loss": 0.0, "num_input_tokens_seen": 18649200, "step": 32830 }, { "epoch": 576.0530973451328, "grad_norm": 6.523509910039138e-08, "learning_rate": 0.023136635919694126, "loss": 0.0, "num_input_tokens_seen": 18651216, "step": 32835 }, { "epoch": 576.141592920354, "grad_norm": 2.8305162658170957e-08, "learning_rate": 0.02310521582138753, "loss": 0.0, "num_input_tokens_seen": 18653952, "step": 32840 }, { "epoch": 576.2300884955753, "grad_norm": 3.687975080879369e-08, "learning_rate": 0.023073815291851357, "loss": 0.0, "num_input_tokens_seen": 18656352, "step": 32845 }, { "epoch": 576.3185840707964, "grad_norm": 3.313226670798031e-08, "learning_rate": 0.02304243433592788, "loss": 0.0, "num_input_tokens_seen": 18660192, "step": 32850 }, { "epoch": 576.4070796460177, "grad_norm": 3.709507012672475e-08, "learning_rate": 0.023011072958456513, "loss": 0.0, "num_input_tokens_seen": 18663600, "step": 32855 }, { "epoch": 576.4955752212389, "grad_norm": 3.892803590588301e-08, "learning_rate": 0.022979731164273536, "loss": 0.0, "num_input_tokens_seen": 18666096, "step": 32860 }, { "epoch": 576.5840707964602, "grad_norm": 5.393455282387549e-08, "learning_rate": 0.022948408958212218, "loss": 0.0, "num_input_tokens_seen": 18668576, "step": 32865 }, { "epoch": 576.6725663716815, "grad_norm": 3.763681633017768e-08, "learning_rate": 0.022917106345102876, "loss": 0.0, "num_input_tokens_seen": 18671504, "step": 32870 }, { "epoch": 576.7610619469026, "grad_norm": 3.3128120691117147e-08, "learning_rate": 0.022885823329772785, "loss": 0.0, "num_input_tokens_seen": 18674448, "step": 32875 }, { "epoch": 576.8495575221239, "grad_norm": 2.713363578266126e-08, "learning_rate": 0.02285455991704612, "loss": 0.0, "num_input_tokens_seen": 18677680, "step": 32880 }, { "epoch": 576.9380530973451, "grad_norm": 1.3707372126248174e-08, "learning_rate": 0.022823316111744117, "loss": 0.0, "num_input_tokens_seen": 18680368, "step": 32885 }, { "epoch": 577.0176991150443, "grad_norm": 2.506114782363511e-08, "learning_rate": 0.022792091918685014, "loss": 0.0, "num_input_tokens_seen": 18682512, "step": 32890 }, { "epoch": 577.1061946902655, "grad_norm": 1.3443615998198766e-08, "learning_rate": 0.022760887342683906, "loss": 0.0, "num_input_tokens_seen": 18685056, "step": 32895 }, { "epoch": 577.1946902654868, "grad_norm": 1.3644072538454566e-08, "learning_rate": 0.022729702388552975, "loss": 0.0, "num_input_tokens_seen": 18688352, "step": 32900 }, { "epoch": 577.2831858407079, "grad_norm": 2.430778067719075e-08, "learning_rate": 0.022698537061101292, "loss": 0.0, "num_input_tokens_seen": 18691200, "step": 32905 }, { "epoch": 577.3716814159292, "grad_norm": 2.278237509756309e-08, "learning_rate": 0.022667391365134962, "loss": 0.0, "num_input_tokens_seen": 18693472, "step": 32910 }, { "epoch": 577.4601769911504, "grad_norm": 3.43506094679924e-08, "learning_rate": 0.022636265305457065, "loss": 0.0, "num_input_tokens_seen": 18696320, "step": 32915 }, { "epoch": 577.5486725663717, "grad_norm": 3.028706174745821e-08, "learning_rate": 0.02260515888686764, "loss": 0.0, "num_input_tokens_seen": 18699424, "step": 32920 }, { "epoch": 577.637168141593, "grad_norm": 5.34940376439863e-08, "learning_rate": 0.022574072114163596, "loss": 0.0, "num_input_tokens_seen": 18702352, "step": 32925 }, { "epoch": 577.7256637168142, "grad_norm": 1.7653098538517042e-08, "learning_rate": 0.022543004992139005, "loss": 0.0, "num_input_tokens_seen": 18705824, "step": 32930 }, { "epoch": 577.8141592920354, "grad_norm": 2.9083150110409406e-08, "learning_rate": 0.022511957525584745, "loss": 0.0, "num_input_tokens_seen": 18708848, "step": 32935 }, { "epoch": 577.9026548672566, "grad_norm": 4.0491411112952846e-08, "learning_rate": 0.022480929719288778, "loss": 0.0, "num_input_tokens_seen": 18711600, "step": 32940 }, { "epoch": 577.9911504424779, "grad_norm": 1.3237036355917553e-08, "learning_rate": 0.02244992157803592, "loss": 0.0, "num_input_tokens_seen": 18714512, "step": 32945 }, { "epoch": 578.070796460177, "grad_norm": 3.4463468523426855e-08, "learning_rate": 0.022418933106608047, "loss": 0.0, "num_input_tokens_seen": 18716952, "step": 32950 }, { "epoch": 578.1592920353983, "grad_norm": 2.81897563070288e-08, "learning_rate": 0.022387964309784018, "loss": 0.0, "num_input_tokens_seen": 18719576, "step": 32955 }, { "epoch": 578.2477876106195, "grad_norm": 3.93933525799639e-08, "learning_rate": 0.022357015192339517, "loss": 0.0, "num_input_tokens_seen": 18722424, "step": 32960 }, { "epoch": 578.3362831858407, "grad_norm": 2.1646968662025756e-08, "learning_rate": 0.02232608575904734, "loss": 0.0, "num_input_tokens_seen": 18725784, "step": 32965 }, { "epoch": 578.4247787610619, "grad_norm": 3.3252820941243044e-08, "learning_rate": 0.022295176014677225, "loss": 0.0, "num_input_tokens_seen": 18728648, "step": 32970 }, { "epoch": 578.5132743362832, "grad_norm": 4.0535486078852045e-08, "learning_rate": 0.02226428596399577, "loss": 0.0, "num_input_tokens_seen": 18731080, "step": 32975 }, { "epoch": 578.6017699115044, "grad_norm": 1.2499777213292873e-08, "learning_rate": 0.02223341561176669, "loss": 0.0, "num_input_tokens_seen": 18733704, "step": 32980 }, { "epoch": 578.6902654867257, "grad_norm": 1.4350925781059232e-08, "learning_rate": 0.0222025649627505, "loss": 0.0, "num_input_tokens_seen": 18736664, "step": 32985 }, { "epoch": 578.7787610619469, "grad_norm": 2.3933978354762075e-08, "learning_rate": 0.022171734021704814, "loss": 0.0, "num_input_tokens_seen": 18739928, "step": 32990 }, { "epoch": 578.8672566371681, "grad_norm": 3.0690504360109117e-08, "learning_rate": 0.022140922793384116, "loss": 0.0, "num_input_tokens_seen": 18742552, "step": 32995 }, { "epoch": 578.9557522123894, "grad_norm": 9.433294678728998e-08, "learning_rate": 0.022110131282539934, "loss": 0.0, "num_input_tokens_seen": 18745416, "step": 33000 }, { "epoch": 578.9557522123894, "eval_loss": 0.8064060807228088, "eval_runtime": 0.9353, "eval_samples_per_second": 26.728, "eval_steps_per_second": 13.899, "num_input_tokens_seen": 18745416, "step": 33000 }, { "epoch": 579.0353982300885, "grad_norm": 4.561278643677724e-08, "learning_rate": 0.022079359493920675, "loss": 0.0, "num_input_tokens_seen": 18747704, "step": 33005 }, { "epoch": 579.1238938053098, "grad_norm": 7.839597060410597e-08, "learning_rate": 0.02204860743227169, "loss": 0.0, "num_input_tokens_seen": 18750712, "step": 33010 }, { "epoch": 579.212389380531, "grad_norm": 4.3777475866590976e-08, "learning_rate": 0.022017875102335365, "loss": 0.0, "num_input_tokens_seen": 18753192, "step": 33015 }, { "epoch": 579.3008849557522, "grad_norm": 3.1071223816070415e-08, "learning_rate": 0.02198716250885108, "loss": 0.0, "num_input_tokens_seen": 18756168, "step": 33020 }, { "epoch": 579.3893805309734, "grad_norm": 2.4335571779943166e-08, "learning_rate": 0.021956469656555, "loss": 0.0, "num_input_tokens_seen": 18759176, "step": 33025 }, { "epoch": 579.4778761061947, "grad_norm": 2.1685004014670994e-08, "learning_rate": 0.0219257965501804, "loss": 0.0, "num_input_tokens_seen": 18761576, "step": 33030 }, { "epoch": 579.566371681416, "grad_norm": 3.811739901493638e-08, "learning_rate": 0.021895143194457494, "loss": 0.0, "num_input_tokens_seen": 18764488, "step": 33035 }, { "epoch": 579.6548672566372, "grad_norm": 1.8267517276626677e-08, "learning_rate": 0.021864509594113322, "loss": 0.0, "num_input_tokens_seen": 18767128, "step": 33040 }, { "epoch": 579.7433628318585, "grad_norm": 4.0056733041637926e-08, "learning_rate": 0.02183389575387207, "loss": 0.0, "num_input_tokens_seen": 18770376, "step": 33045 }, { "epoch": 579.8318584070796, "grad_norm": 9.194931038791765e-08, "learning_rate": 0.021803301678454682, "loss": 0.0, "num_input_tokens_seen": 18773064, "step": 33050 }, { "epoch": 579.9203539823009, "grad_norm": 2.902517692859874e-08, "learning_rate": 0.021772727372579213, "loss": 0.0, "num_input_tokens_seen": 18776232, "step": 33055 }, { "epoch": 580.0, "grad_norm": 4.246261653406691e-08, "learning_rate": 0.02174217284096061, "loss": 0.0, "num_input_tokens_seen": 18778768, "step": 33060 }, { "epoch": 580.0884955752213, "grad_norm": 3.27967804025775e-08, "learning_rate": 0.0217116380883107, "loss": 0.0, "num_input_tokens_seen": 18781232, "step": 33065 }, { "epoch": 580.1769911504425, "grad_norm": 3.4815631266837954e-08, "learning_rate": 0.021681123119338425, "loss": 0.0, "num_input_tokens_seen": 18784032, "step": 33070 }, { "epoch": 580.2654867256637, "grad_norm": 1.936228777310589e-08, "learning_rate": 0.02165062793874951, "loss": 0.0, "num_input_tokens_seen": 18786496, "step": 33075 }, { "epoch": 580.3539823008849, "grad_norm": 2.428509127128109e-08, "learning_rate": 0.021620152551246666, "loss": 0.0, "num_input_tokens_seen": 18789728, "step": 33080 }, { "epoch": 580.4424778761062, "grad_norm": 1.3759491324094597e-08, "learning_rate": 0.02158969696152967, "loss": 0.0, "num_input_tokens_seen": 18792496, "step": 33085 }, { "epoch": 580.5309734513274, "grad_norm": 2.2603783733643468e-08, "learning_rate": 0.021559261174295057, "loss": 0.0, "num_input_tokens_seen": 18795040, "step": 33090 }, { "epoch": 580.6194690265487, "grad_norm": 6.647105266210929e-08, "learning_rate": 0.02152884519423646, "loss": 0.0, "num_input_tokens_seen": 18798128, "step": 33095 }, { "epoch": 580.70796460177, "grad_norm": 5.505593847487944e-08, "learning_rate": 0.021498449026044447, "loss": 0.0, "num_input_tokens_seen": 18801024, "step": 33100 }, { "epoch": 580.7964601769911, "grad_norm": 3.197598630322318e-08, "learning_rate": 0.021468072674406414, "loss": 0.0, "num_input_tokens_seen": 18803984, "step": 33105 }, { "epoch": 580.8849557522124, "grad_norm": 3.206887910778278e-08, "learning_rate": 0.021437716144006795, "loss": 0.0, "num_input_tokens_seen": 18806832, "step": 33110 }, { "epoch": 580.9734513274336, "grad_norm": 5.1960629576797146e-08, "learning_rate": 0.021407379439527002, "loss": 0.0, "num_input_tokens_seen": 18809840, "step": 33115 }, { "epoch": 581.0530973451328, "grad_norm": 2.0184886651009037e-08, "learning_rate": 0.021377062565645255, "loss": 0.0, "num_input_tokens_seen": 18812080, "step": 33120 }, { "epoch": 581.141592920354, "grad_norm": 2.8578758914932223e-08, "learning_rate": 0.02134676552703688, "loss": 0.0, "num_input_tokens_seen": 18814976, "step": 33125 }, { "epoch": 581.2300884955753, "grad_norm": 3.902390588450544e-08, "learning_rate": 0.02131648832837398, "loss": 0.0, "num_input_tokens_seen": 18817808, "step": 33130 }, { "epoch": 581.3185840707964, "grad_norm": 1.9326996891777526e-08, "learning_rate": 0.02128623097432574, "loss": 0.0, "num_input_tokens_seen": 18820512, "step": 33135 }, { "epoch": 581.4070796460177, "grad_norm": 3.8728888540617845e-08, "learning_rate": 0.021255993469558192, "loss": 0.0, "num_input_tokens_seen": 18823232, "step": 33140 }, { "epoch": 581.4955752212389, "grad_norm": 2.793134434853073e-08, "learning_rate": 0.021225775818734364, "loss": 0.0, "num_input_tokens_seen": 18825856, "step": 33145 }, { "epoch": 581.5840707964602, "grad_norm": 4.8859803314371675e-08, "learning_rate": 0.021195578026514166, "loss": 0.0, "num_input_tokens_seen": 18828608, "step": 33150 }, { "epoch": 581.6725663716815, "grad_norm": 3.295809491987711e-08, "learning_rate": 0.02116540009755452, "loss": 0.0, "num_input_tokens_seen": 18831824, "step": 33155 }, { "epoch": 581.7610619469026, "grad_norm": 4.7761314903027596e-08, "learning_rate": 0.021135242036509173, "loss": 0.0, "num_input_tokens_seen": 18834416, "step": 33160 }, { "epoch": 581.8495575221239, "grad_norm": 6.672532038010104e-08, "learning_rate": 0.021105103848028967, "loss": 0.0, "num_input_tokens_seen": 18837776, "step": 33165 }, { "epoch": 581.9380530973451, "grad_norm": 7.822969649851075e-08, "learning_rate": 0.021074985536761504, "loss": 0.0, "num_input_tokens_seen": 18840480, "step": 33170 }, { "epoch": 582.0176991150443, "grad_norm": 4.0053386385352496e-08, "learning_rate": 0.021044887107351435, "loss": 0.0, "num_input_tokens_seen": 18843368, "step": 33175 }, { "epoch": 582.1061946902655, "grad_norm": 4.239971573838375e-08, "learning_rate": 0.021014808564440362, "loss": 0.0, "num_input_tokens_seen": 18846072, "step": 33180 }, { "epoch": 582.1946902654868, "grad_norm": 1.145107404454393e-07, "learning_rate": 0.02098474991266671, "loss": 0.0, "num_input_tokens_seen": 18849240, "step": 33185 }, { "epoch": 582.2831858407079, "grad_norm": 6.087695680889738e-08, "learning_rate": 0.02095471115666592, "loss": 0.0, "num_input_tokens_seen": 18851896, "step": 33190 }, { "epoch": 582.3716814159292, "grad_norm": 2.6855319745777706e-08, "learning_rate": 0.020924692301070406, "loss": 0.0, "num_input_tokens_seen": 18855000, "step": 33195 }, { "epoch": 582.4601769911504, "grad_norm": 3.614686505670761e-08, "learning_rate": 0.020894693350509346, "loss": 0.0, "num_input_tokens_seen": 18857896, "step": 33200 }, { "epoch": 582.4601769911504, "eval_loss": 0.8053354024887085, "eval_runtime": 0.9378, "eval_samples_per_second": 26.657, "eval_steps_per_second": 13.862, "num_input_tokens_seen": 18857896, "step": 33200 }, { "epoch": 582.5486725663717, "grad_norm": 3.525465430698205e-08, "learning_rate": 0.020864714309609057, "loss": 0.0, "num_input_tokens_seen": 18860696, "step": 33205 }, { "epoch": 582.637168141593, "grad_norm": 6.400674834594611e-08, "learning_rate": 0.020834755182992604, "loss": 0.0, "num_input_tokens_seen": 18863448, "step": 33210 }, { "epoch": 582.7256637168142, "grad_norm": 1.732410481736224e-08, "learning_rate": 0.02080481597528011, "loss": 0.0, "num_input_tokens_seen": 18866344, "step": 33215 }, { "epoch": 582.8141592920354, "grad_norm": 1.8322291239769584e-08, "learning_rate": 0.020774896691088583, "loss": 0.0, "num_input_tokens_seen": 18869256, "step": 33220 }, { "epoch": 582.9026548672566, "grad_norm": 4.440773437863754e-08, "learning_rate": 0.020744997335031882, "loss": 0.0, "num_input_tokens_seen": 18871912, "step": 33225 }, { "epoch": 582.9911504424779, "grad_norm": 3.361461864415105e-08, "learning_rate": 0.02071511791172092, "loss": 0.0, "num_input_tokens_seen": 18874824, "step": 33230 }, { "epoch": 583.070796460177, "grad_norm": 3.6656341961816e-08, "learning_rate": 0.02068525842576351, "loss": 0.0, "num_input_tokens_seen": 18877168, "step": 33235 }, { "epoch": 583.1592920353983, "grad_norm": 8.482483337957092e-08, "learning_rate": 0.020655418881764264, "loss": 0.0, "num_input_tokens_seen": 18879680, "step": 33240 }, { "epoch": 583.2477876106195, "grad_norm": 4.6300129952214775e-08, "learning_rate": 0.020625599284324923, "loss": 0.0, "num_input_tokens_seen": 18882672, "step": 33245 }, { "epoch": 583.3362831858407, "grad_norm": 4.437966794057502e-08, "learning_rate": 0.02059579963804396, "loss": 0.0, "num_input_tokens_seen": 18885328, "step": 33250 }, { "epoch": 583.4247787610619, "grad_norm": 3.786076163692087e-08, "learning_rate": 0.02056601994751688, "loss": 0.0, "num_input_tokens_seen": 18888608, "step": 33255 }, { "epoch": 583.5132743362832, "grad_norm": 2.1147542383914697e-08, "learning_rate": 0.02053626021733614, "loss": 0.0, "num_input_tokens_seen": 18891328, "step": 33260 }, { "epoch": 583.6017699115044, "grad_norm": 4.345436366293143e-08, "learning_rate": 0.02050652045209097, "loss": 0.0, "num_input_tokens_seen": 18893664, "step": 33265 }, { "epoch": 583.6902654867257, "grad_norm": 6.313324973916679e-08, "learning_rate": 0.020476800656367672, "loss": 0.0, "num_input_tokens_seen": 18896768, "step": 33270 }, { "epoch": 583.7787610619469, "grad_norm": 4.85277631412373e-08, "learning_rate": 0.020447100834749425, "loss": 0.0, "num_input_tokens_seen": 18899664, "step": 33275 }, { "epoch": 583.8672566371681, "grad_norm": 3.6376604839460924e-08, "learning_rate": 0.02041742099181627, "loss": 0.0, "num_input_tokens_seen": 18902896, "step": 33280 }, { "epoch": 583.9557522123894, "grad_norm": 1.9471771750545486e-08, "learning_rate": 0.02038776113214526, "loss": 0.0, "num_input_tokens_seen": 18906096, "step": 33285 }, { "epoch": 584.0353982300885, "grad_norm": 2.512257601949841e-08, "learning_rate": 0.0203581212603103, "loss": 0.0, "num_input_tokens_seen": 18908552, "step": 33290 }, { "epoch": 584.1238938053098, "grad_norm": 2.8062096646408463e-08, "learning_rate": 0.02032850138088219, "loss": 0.0, "num_input_tokens_seen": 18911352, "step": 33295 }, { "epoch": 584.212389380531, "grad_norm": 3.236671375361766e-08, "learning_rate": 0.020298901498428754, "loss": 0.0, "num_input_tokens_seen": 18914264, "step": 33300 }, { "epoch": 584.3008849557522, "grad_norm": 2.803760779102049e-08, "learning_rate": 0.020269321617514595, "loss": 0.0, "num_input_tokens_seen": 18916792, "step": 33305 }, { "epoch": 584.3893805309734, "grad_norm": 3.6190570540384215e-08, "learning_rate": 0.020239761742701343, "loss": 0.0, "num_input_tokens_seen": 18919640, "step": 33310 }, { "epoch": 584.4778761061947, "grad_norm": 3.511195245664567e-08, "learning_rate": 0.02021022187854754, "loss": 0.0, "num_input_tokens_seen": 18922456, "step": 33315 }, { "epoch": 584.566371681416, "grad_norm": 3.435371098703399e-08, "learning_rate": 0.020180702029608522, "loss": 0.0, "num_input_tokens_seen": 18925608, "step": 33320 }, { "epoch": 584.6548672566372, "grad_norm": 3.3487154382783046e-08, "learning_rate": 0.020151202200436695, "loss": 0.0, "num_input_tokens_seen": 18928568, "step": 33325 }, { "epoch": 584.7433628318585, "grad_norm": 2.550681266200172e-08, "learning_rate": 0.020121722395581226, "loss": 0.0, "num_input_tokens_seen": 18931592, "step": 33330 }, { "epoch": 584.8318584070796, "grad_norm": 6.279096709249643e-08, "learning_rate": 0.020092262619588342, "loss": 0.0, "num_input_tokens_seen": 18933800, "step": 33335 }, { "epoch": 584.9203539823009, "grad_norm": 1.1999500770798477e-07, "learning_rate": 0.02006282287700109, "loss": 0.0, "num_input_tokens_seen": 18936952, "step": 33340 }, { "epoch": 585.0, "grad_norm": 5.337918551617804e-09, "learning_rate": 0.020033403172359427, "loss": 0.0, "num_input_tokens_seen": 18939504, "step": 33345 }, { "epoch": 585.0884955752213, "grad_norm": 1.3576371138412924e-08, "learning_rate": 0.020004003510200284, "loss": 0.0, "num_input_tokens_seen": 18942224, "step": 33350 }, { "epoch": 585.1769911504425, "grad_norm": 2.6340652325984593e-08, "learning_rate": 0.019974623895057407, "loss": 0.0, "num_input_tokens_seen": 18944976, "step": 33355 }, { "epoch": 585.2654867256637, "grad_norm": 3.017911254232786e-08, "learning_rate": 0.019945264331461553, "loss": 0.0, "num_input_tokens_seen": 18947680, "step": 33360 }, { "epoch": 585.3539823008849, "grad_norm": 2.0215717100313668e-08, "learning_rate": 0.019915924823940317, "loss": 0.0, "num_input_tokens_seen": 18950320, "step": 33365 }, { "epoch": 585.4424778761062, "grad_norm": 3.243862778390394e-08, "learning_rate": 0.01988660537701816, "loss": 0.0, "num_input_tokens_seen": 18953200, "step": 33370 }, { "epoch": 585.5309734513274, "grad_norm": 3.250254820841292e-08, "learning_rate": 0.01985730599521659, "loss": 0.0, "num_input_tokens_seen": 18955760, "step": 33375 }, { "epoch": 585.6194690265487, "grad_norm": 8.767333525838694e-08, "learning_rate": 0.019828026683053918, "loss": 0.0, "num_input_tokens_seen": 18959216, "step": 33380 }, { "epoch": 585.70796460177, "grad_norm": 4.211726789549175e-08, "learning_rate": 0.01979876744504535, "loss": 0.0, "num_input_tokens_seen": 18962608, "step": 33385 }, { "epoch": 585.7964601769911, "grad_norm": 3.674260184993727e-08, "learning_rate": 0.019769528285703046, "loss": 0.0, "num_input_tokens_seen": 18966064, "step": 33390 }, { "epoch": 585.8849557522124, "grad_norm": 1.803787341714269e-08, "learning_rate": 0.019740309209536098, "loss": 0.0, "num_input_tokens_seen": 18968752, "step": 33395 }, { "epoch": 585.9734513274336, "grad_norm": 6.683244180294423e-08, "learning_rate": 0.019711110221050387, "loss": 0.0, "num_input_tokens_seen": 18971344, "step": 33400 }, { "epoch": 585.9734513274336, "eval_loss": 0.825111985206604, "eval_runtime": 0.9451, "eval_samples_per_second": 26.451, "eval_steps_per_second": 13.754, "num_input_tokens_seen": 18971344, "step": 33400 }, { "epoch": 586.0530973451328, "grad_norm": 4.59213502779221e-08, "learning_rate": 0.019681931324748825, "loss": 0.0, "num_input_tokens_seen": 18973736, "step": 33405 }, { "epoch": 586.141592920354, "grad_norm": 3.744888132928281e-08, "learning_rate": 0.019652772525131094, "loss": 0.0, "num_input_tokens_seen": 18976392, "step": 33410 }, { "epoch": 586.2300884955753, "grad_norm": 4.132190056793661e-08, "learning_rate": 0.019623633826693885, "loss": 0.0, "num_input_tokens_seen": 18979576, "step": 33415 }, { "epoch": 586.3185840707964, "grad_norm": 3.739424769833022e-08, "learning_rate": 0.019594515233930788, "loss": 0.0, "num_input_tokens_seen": 18982136, "step": 33420 }, { "epoch": 586.4070796460177, "grad_norm": 3.733569187147623e-08, "learning_rate": 0.019565416751332186, "loss": 0.0, "num_input_tokens_seen": 18985288, "step": 33425 }, { "epoch": 586.4955752212389, "grad_norm": 5.660006863195122e-08, "learning_rate": 0.019536338383385497, "loss": 0.0, "num_input_tokens_seen": 18987864, "step": 33430 }, { "epoch": 586.5840707964602, "grad_norm": 2.4174513058028424e-08, "learning_rate": 0.019507280134574933, "loss": 0.0, "num_input_tokens_seen": 18991144, "step": 33435 }, { "epoch": 586.6725663716815, "grad_norm": 4.439606371420268e-08, "learning_rate": 0.019478242009381624, "loss": 0.0, "num_input_tokens_seen": 18993976, "step": 33440 }, { "epoch": 586.7610619469026, "grad_norm": 7.264313950372525e-08, "learning_rate": 0.01944922401228367, "loss": 0.0, "num_input_tokens_seen": 18997032, "step": 33445 }, { "epoch": 586.8495575221239, "grad_norm": 2.5145034499018948e-08, "learning_rate": 0.01942022614775593, "loss": 0.0, "num_input_tokens_seen": 18999736, "step": 33450 }, { "epoch": 586.9380530973451, "grad_norm": 1.4579027762806618e-08, "learning_rate": 0.01939124842027029, "loss": 0.0, "num_input_tokens_seen": 19002936, "step": 33455 }, { "epoch": 587.0176991150443, "grad_norm": 2.8067978163903717e-08, "learning_rate": 0.01936229083429551, "loss": 0.0, "num_input_tokens_seen": 19005400, "step": 33460 }, { "epoch": 587.1061946902655, "grad_norm": 4.707524681180075e-08, "learning_rate": 0.019333353394297148, "loss": 0.0, "num_input_tokens_seen": 19007928, "step": 33465 }, { "epoch": 587.1946902654868, "grad_norm": 3.6698079242114545e-08, "learning_rate": 0.019304436104737754, "loss": 0.0, "num_input_tokens_seen": 19010600, "step": 33470 }, { "epoch": 587.2831858407079, "grad_norm": 3.985255503380358e-08, "learning_rate": 0.019275538970076778, "loss": 0.0, "num_input_tokens_seen": 19014456, "step": 33475 }, { "epoch": 587.3716814159292, "grad_norm": 5.961628346540238e-08, "learning_rate": 0.019246661994770434, "loss": 0.0, "num_input_tokens_seen": 19016920, "step": 33480 }, { "epoch": 587.4601769911504, "grad_norm": 3.4593309550245976e-08, "learning_rate": 0.019217805183271985, "loss": 0.0, "num_input_tokens_seen": 19019432, "step": 33485 }, { "epoch": 587.5486725663717, "grad_norm": 1.953532446918871e-08, "learning_rate": 0.019188968540031465, "loss": 0.0, "num_input_tokens_seen": 19022376, "step": 33490 }, { "epoch": 587.637168141593, "grad_norm": 2.6066350855558085e-08, "learning_rate": 0.019160152069495867, "loss": 0.0, "num_input_tokens_seen": 19025416, "step": 33495 }, { "epoch": 587.7256637168142, "grad_norm": 3.321051167404221e-08, "learning_rate": 0.019131355776109103, "loss": 0.0, "num_input_tokens_seen": 19028360, "step": 33500 }, { "epoch": 587.8141592920354, "grad_norm": 4.229014294310218e-08, "learning_rate": 0.019102579664311857, "loss": 0.0, "num_input_tokens_seen": 19031432, "step": 33505 }, { "epoch": 587.9026548672566, "grad_norm": 2.823850842048614e-08, "learning_rate": 0.019073823738541763, "loss": 0.0, "num_input_tokens_seen": 19034456, "step": 33510 }, { "epoch": 587.9911504424779, "grad_norm": 5.61822659506106e-08, "learning_rate": 0.0190450880032334, "loss": 0.0, "num_input_tokens_seen": 19037032, "step": 33515 }, { "epoch": 588.070796460177, "grad_norm": 1.7747391112266087e-08, "learning_rate": 0.019016372462818114, "loss": 0.0, "num_input_tokens_seen": 19039232, "step": 33520 }, { "epoch": 588.1592920353983, "grad_norm": 2.1038021102981475e-08, "learning_rate": 0.018987677121724278, "loss": 0.0, "num_input_tokens_seen": 19042032, "step": 33525 }, { "epoch": 588.2477876106195, "grad_norm": 2.1408277817158705e-08, "learning_rate": 0.018959001984377, "loss": 0.0, "num_input_tokens_seen": 19045184, "step": 33530 }, { "epoch": 588.3362831858407, "grad_norm": 3.2540821592874636e-08, "learning_rate": 0.018930347055198377, "loss": 0.0, "num_input_tokens_seen": 19048288, "step": 33535 }, { "epoch": 588.4247787610619, "grad_norm": 4.6715939561181585e-08, "learning_rate": 0.01890171233860739, "loss": 0.0, "num_input_tokens_seen": 19051520, "step": 33540 }, { "epoch": 588.5132743362832, "grad_norm": 4.914562623525853e-08, "learning_rate": 0.018873097839019807, "loss": 0.0, "num_input_tokens_seen": 19054576, "step": 33545 }, { "epoch": 588.6017699115044, "grad_norm": 3.270942627864315e-08, "learning_rate": 0.0188445035608484, "loss": 0.0, "num_input_tokens_seen": 19057552, "step": 33550 }, { "epoch": 588.6902654867257, "grad_norm": 4.055561575455613e-08, "learning_rate": 0.018815929508502777, "loss": 0.0, "num_input_tokens_seen": 19060400, "step": 33555 }, { "epoch": 588.7787610619469, "grad_norm": 6.182366973916942e-08, "learning_rate": 0.01878737568638934, "loss": 0.0, "num_input_tokens_seen": 19062864, "step": 33560 }, { "epoch": 588.8672566371681, "grad_norm": 4.15416394616841e-08, "learning_rate": 0.01875884209891152, "loss": 0.0, "num_input_tokens_seen": 19065840, "step": 33565 }, { "epoch": 588.9557522123894, "grad_norm": 2.043438307453016e-08, "learning_rate": 0.018730328750469514, "loss": 0.0, "num_input_tokens_seen": 19068432, "step": 33570 }, { "epoch": 589.0353982300885, "grad_norm": 7.736015561476961e-08, "learning_rate": 0.018701835645460473, "loss": 0.0, "num_input_tokens_seen": 19070672, "step": 33575 }, { "epoch": 589.1238938053098, "grad_norm": 1.1331991345286951e-07, "learning_rate": 0.01867336278827838, "loss": 0.0, "num_input_tokens_seen": 19073664, "step": 33580 }, { "epoch": 589.212389380531, "grad_norm": 3.2448326692247065e-08, "learning_rate": 0.018644910183314056, "loss": 0.0, "num_input_tokens_seen": 19076400, "step": 33585 }, { "epoch": 589.3008849557522, "grad_norm": 1.7112533612362313e-08, "learning_rate": 0.01861647783495531, "loss": 0.0, "num_input_tokens_seen": 19079264, "step": 33590 }, { "epoch": 589.3893805309734, "grad_norm": 4.081685034407201e-08, "learning_rate": 0.01858806574758676, "loss": 0.0, "num_input_tokens_seen": 19081904, "step": 33595 }, { "epoch": 589.4778761061947, "grad_norm": 2.3020753303626407e-08, "learning_rate": 0.01855967392558988, "loss": 0.0, "num_input_tokens_seen": 19085248, "step": 33600 }, { "epoch": 589.4778761061947, "eval_loss": 0.8042502999305725, "eval_runtime": 0.9229, "eval_samples_per_second": 27.087, "eval_steps_per_second": 14.085, "num_input_tokens_seen": 19085248, "step": 33600 }, { "epoch": 589.566371681416, "grad_norm": 6.197748803060676e-08, "learning_rate": 0.018531302373343096, "loss": 0.0, "num_input_tokens_seen": 19087968, "step": 33605 }, { "epoch": 589.6548672566372, "grad_norm": 1.650734837710388e-08, "learning_rate": 0.018502951095221588, "loss": 0.0, "num_input_tokens_seen": 19090672, "step": 33610 }, { "epoch": 589.7433628318585, "grad_norm": 4.1191679400753856e-08, "learning_rate": 0.01847462009559751, "loss": 0.0, "num_input_tokens_seen": 19093360, "step": 33615 }, { "epoch": 589.8318584070796, "grad_norm": 5.1916622112457844e-08, "learning_rate": 0.01844630937883992, "loss": 0.0, "num_input_tokens_seen": 19096592, "step": 33620 }, { "epoch": 589.9203539823009, "grad_norm": 8.469802992294717e-08, "learning_rate": 0.018418018949314573, "loss": 0.0, "num_input_tokens_seen": 19099872, "step": 33625 }, { "epoch": 590.0, "grad_norm": 3.449208918482327e-08, "learning_rate": 0.018389748811384315, "loss": 0.0, "num_input_tokens_seen": 19101944, "step": 33630 }, { "epoch": 590.0884955752213, "grad_norm": 3.6248295032237365e-08, "learning_rate": 0.018361498969408658, "loss": 0.0, "num_input_tokens_seen": 19105160, "step": 33635 }, { "epoch": 590.1769911504425, "grad_norm": 3.4686344463352725e-08, "learning_rate": 0.01833326942774415, "loss": 0.0, "num_input_tokens_seen": 19108088, "step": 33640 }, { "epoch": 590.2654867256637, "grad_norm": 2.3227768153333272e-08, "learning_rate": 0.018305060190744155, "loss": 0.0, "num_input_tokens_seen": 19111032, "step": 33645 }, { "epoch": 590.3539823008849, "grad_norm": 4.0627924136060756e-08, "learning_rate": 0.018276871262758846, "loss": 0.0, "num_input_tokens_seen": 19113704, "step": 33650 }, { "epoch": 590.4424778761062, "grad_norm": 4.1117335314311276e-08, "learning_rate": 0.0182487026481353, "loss": 0.0, "num_input_tokens_seen": 19116456, "step": 33655 }, { "epoch": 590.5309734513274, "grad_norm": 4.2765158525526203e-08, "learning_rate": 0.018220554351217538, "loss": 0.0, "num_input_tokens_seen": 19119416, "step": 33660 }, { "epoch": 590.6194690265487, "grad_norm": 6.612514624748655e-08, "learning_rate": 0.01819242637634629, "loss": 0.0, "num_input_tokens_seen": 19122520, "step": 33665 }, { "epoch": 590.70796460177, "grad_norm": 3.485343214038039e-08, "learning_rate": 0.01816431872785933, "loss": 0.0, "num_input_tokens_seen": 19125240, "step": 33670 }, { "epoch": 590.7964601769911, "grad_norm": 2.7006162639509057e-08, "learning_rate": 0.018136231410091148, "loss": 0.0, "num_input_tokens_seen": 19127912, "step": 33675 }, { "epoch": 590.8849557522124, "grad_norm": 3.821957861305236e-08, "learning_rate": 0.018108164427373175, "loss": 0.0, "num_input_tokens_seen": 19130696, "step": 33680 }, { "epoch": 590.9734513274336, "grad_norm": 3.4309096008655615e-08, "learning_rate": 0.01808011778403375, "loss": 0.0, "num_input_tokens_seen": 19133816, "step": 33685 }, { "epoch": 591.0530973451328, "grad_norm": 2.23403269217215e-08, "learning_rate": 0.01805209148439793, "loss": 0.0, "num_input_tokens_seen": 19136248, "step": 33690 }, { "epoch": 591.141592920354, "grad_norm": 5.6722488039895325e-08, "learning_rate": 0.018024085532787757, "loss": 0.0, "num_input_tokens_seen": 19138872, "step": 33695 }, { "epoch": 591.2300884955753, "grad_norm": 1.87525959205459e-08, "learning_rate": 0.017996099933522164, "loss": 0.0, "num_input_tokens_seen": 19141784, "step": 33700 }, { "epoch": 591.3185840707964, "grad_norm": 5.397211921831513e-08, "learning_rate": 0.017968134690916775, "loss": 0.0, "num_input_tokens_seen": 19144808, "step": 33705 }, { "epoch": 591.4070796460177, "grad_norm": 3.013817817532072e-08, "learning_rate": 0.017940189809284263, "loss": 0.0, "num_input_tokens_seen": 19147336, "step": 33710 }, { "epoch": 591.4955752212389, "grad_norm": 2.5857371355186842e-08, "learning_rate": 0.017912265292934024, "loss": 0.0, "num_input_tokens_seen": 19149992, "step": 33715 }, { "epoch": 591.5840707964602, "grad_norm": 2.49486244996433e-08, "learning_rate": 0.017884361146172423, "loss": 0.0, "num_input_tokens_seen": 19153048, "step": 33720 }, { "epoch": 591.6725663716815, "grad_norm": 1.9978255494379482e-08, "learning_rate": 0.01785647737330261, "loss": 0.0, "num_input_tokens_seen": 19156120, "step": 33725 }, { "epoch": 591.7610619469026, "grad_norm": 2.540214616431058e-08, "learning_rate": 0.017828613978624563, "loss": 0.0, "num_input_tokens_seen": 19159192, "step": 33730 }, { "epoch": 591.8495575221239, "grad_norm": 5.505745193090661e-08, "learning_rate": 0.01780077096643523, "loss": 0.0, "num_input_tokens_seen": 19162104, "step": 33735 }, { "epoch": 591.9380530973451, "grad_norm": 4.223941019176891e-08, "learning_rate": 0.017772948341028345, "loss": 0.0, "num_input_tokens_seen": 19165496, "step": 33740 }, { "epoch": 592.0176991150443, "grad_norm": 7.353830966394526e-08, "learning_rate": 0.01774514610669447, "loss": 0.0, "num_input_tokens_seen": 19168032, "step": 33745 }, { "epoch": 592.1061946902655, "grad_norm": 5.3071136818516607e-08, "learning_rate": 0.017717364267721112, "loss": 0.0, "num_input_tokens_seen": 19170752, "step": 33750 }, { "epoch": 592.1946902654868, "grad_norm": 8.303129561681999e-08, "learning_rate": 0.017689602828392513, "loss": 0.0, "num_input_tokens_seen": 19173712, "step": 33755 }, { "epoch": 592.2831858407079, "grad_norm": 3.943177873111381e-08, "learning_rate": 0.017661861792989897, "loss": 0.0, "num_input_tokens_seen": 19176624, "step": 33760 }, { "epoch": 592.3716814159292, "grad_norm": 2.8203135826743164e-08, "learning_rate": 0.017634141165791272, "loss": 0.0, "num_input_tokens_seen": 19179296, "step": 33765 }, { "epoch": 592.4601769911504, "grad_norm": 3.4449129771019216e-08, "learning_rate": 0.017606440951071455, "loss": 0.0, "num_input_tokens_seen": 19182064, "step": 33770 }, { "epoch": 592.5486725663717, "grad_norm": 3.5270122822339545e-08, "learning_rate": 0.017578761153102213, "loss": 0.0, "num_input_tokens_seen": 19184848, "step": 33775 }, { "epoch": 592.637168141593, "grad_norm": 3.145044047414558e-08, "learning_rate": 0.017551101776152146, "loss": 0.0, "num_input_tokens_seen": 19187488, "step": 33780 }, { "epoch": 592.7256637168142, "grad_norm": 5.000208247452065e-08, "learning_rate": 0.017523462824486608, "loss": 0.0, "num_input_tokens_seen": 19190832, "step": 33785 }, { "epoch": 592.8141592920354, "grad_norm": 5.5709094226585876e-08, "learning_rate": 0.01749584430236794, "loss": 0.0, "num_input_tokens_seen": 19193536, "step": 33790 }, { "epoch": 592.9026548672566, "grad_norm": 6.691288945148699e-08, "learning_rate": 0.01746824621405524, "loss": 0.0, "num_input_tokens_seen": 19196176, "step": 33795 }, { "epoch": 592.9911504424779, "grad_norm": 2.3795967862838552e-08, "learning_rate": 0.017440668563804412, "loss": 0.0, "num_input_tokens_seen": 19199136, "step": 33800 }, { "epoch": 592.9911504424779, "eval_loss": 0.801821768283844, "eval_runtime": 0.931, "eval_samples_per_second": 26.852, "eval_steps_per_second": 13.963, "num_input_tokens_seen": 19199136, "step": 33800 }, { "epoch": 593.070796460177, "grad_norm": 1.3524123687602696e-07, "learning_rate": 0.017413111355868392, "loss": 0.0, "num_input_tokens_seen": 19201480, "step": 33805 }, { "epoch": 593.1592920353983, "grad_norm": 3.6091588384579154e-08, "learning_rate": 0.017385574594496748, "loss": 0.0, "num_input_tokens_seen": 19204632, "step": 33810 }, { "epoch": 593.2477876106195, "grad_norm": 5.1029079628506224e-08, "learning_rate": 0.01735805828393605, "loss": 0.0, "num_input_tokens_seen": 19207288, "step": 33815 }, { "epoch": 593.3362831858407, "grad_norm": 4.808816811419092e-08, "learning_rate": 0.017330562428429667, "loss": 0.0, "num_input_tokens_seen": 19210072, "step": 33820 }, { "epoch": 593.4247787610619, "grad_norm": 7.892942477383258e-08, "learning_rate": 0.01730308703221776, "loss": 0.0, "num_input_tokens_seen": 19212712, "step": 33825 }, { "epoch": 593.5132743362832, "grad_norm": 4.0078155905121093e-08, "learning_rate": 0.01727563209953744, "loss": 0.0, "num_input_tokens_seen": 19215864, "step": 33830 }, { "epoch": 593.6017699115044, "grad_norm": 2.3389475245494395e-08, "learning_rate": 0.017248197634622535, "loss": 0.0, "num_input_tokens_seen": 19218776, "step": 33835 }, { "epoch": 593.6902654867257, "grad_norm": 4.139816667247942e-08, "learning_rate": 0.01722078364170383, "loss": 0.0, "num_input_tokens_seen": 19221544, "step": 33840 }, { "epoch": 593.7787610619469, "grad_norm": 5.1973039205677196e-08, "learning_rate": 0.017193390125008905, "loss": 0.0, "num_input_tokens_seen": 19224920, "step": 33845 }, { "epoch": 593.8672566371681, "grad_norm": 4.039899081931253e-08, "learning_rate": 0.017166017088762153, "loss": 0.0, "num_input_tokens_seen": 19227400, "step": 33850 }, { "epoch": 593.9557522123894, "grad_norm": 2.9167159354415162e-08, "learning_rate": 0.017138664537184878, "loss": 0.0, "num_input_tokens_seen": 19230360, "step": 33855 }, { "epoch": 594.0353982300885, "grad_norm": 3.147283322846306e-08, "learning_rate": 0.017111332474495172, "loss": 0.0, "num_input_tokens_seen": 19232576, "step": 33860 }, { "epoch": 594.1238938053098, "grad_norm": 2.9758442821048448e-08, "learning_rate": 0.017084020904907998, "loss": 0.0, "num_input_tokens_seen": 19235184, "step": 33865 }, { "epoch": 594.212389380531, "grad_norm": 5.626391441637679e-08, "learning_rate": 0.017056729832635103, "loss": 0.0, "num_input_tokens_seen": 19237568, "step": 33870 }, { "epoch": 594.3008849557522, "grad_norm": 3.172694817976662e-08, "learning_rate": 0.017029459261885153, "loss": 0.0, "num_input_tokens_seen": 19240160, "step": 33875 }, { "epoch": 594.3893805309734, "grad_norm": 2.9742821539002762e-08, "learning_rate": 0.01700220919686359, "loss": 0.0, "num_input_tokens_seen": 19242960, "step": 33880 }, { "epoch": 594.4778761061947, "grad_norm": 4.1837211028905585e-08, "learning_rate": 0.016974979641772723, "loss": 0.0, "num_input_tokens_seen": 19245824, "step": 33885 }, { "epoch": 594.566371681416, "grad_norm": 2.6871381564319563e-08, "learning_rate": 0.01694777060081169, "loss": 0.0, "num_input_tokens_seen": 19248544, "step": 33890 }, { "epoch": 594.6548672566372, "grad_norm": 3.3365754603664755e-08, "learning_rate": 0.016920582078176444, "loss": 0.0, "num_input_tokens_seen": 19252112, "step": 33895 }, { "epoch": 594.7433628318585, "grad_norm": 6.217400283503594e-08, "learning_rate": 0.016893414078059863, "loss": 0.0, "num_input_tokens_seen": 19255504, "step": 33900 }, { "epoch": 594.8318584070796, "grad_norm": 5.302649697114248e-08, "learning_rate": 0.016866266604651535, "loss": 0.0, "num_input_tokens_seen": 19258432, "step": 33905 }, { "epoch": 594.9203539823009, "grad_norm": 3.104529966435621e-08, "learning_rate": 0.016839139662137976, "loss": 0.0, "num_input_tokens_seen": 19262000, "step": 33910 }, { "epoch": 595.0, "grad_norm": 2.0906432141032383e-08, "learning_rate": 0.01681203325470245, "loss": 0.0, "num_input_tokens_seen": 19264208, "step": 33915 }, { "epoch": 595.0884955752213, "grad_norm": 4.591467117620596e-08, "learning_rate": 0.016784947386525157, "loss": 0.0, "num_input_tokens_seen": 19266768, "step": 33920 }, { "epoch": 595.1769911504425, "grad_norm": 3.779815571647305e-08, "learning_rate": 0.01675788206178308, "loss": 0.0, "num_input_tokens_seen": 19269536, "step": 33925 }, { "epoch": 595.2654867256637, "grad_norm": 4.8437183153282604e-08, "learning_rate": 0.016730837284649986, "loss": 0.0, "num_input_tokens_seen": 19272336, "step": 33930 }, { "epoch": 595.3539823008849, "grad_norm": 2.2709031100021093e-08, "learning_rate": 0.016703813059296583, "loss": 0.0, "num_input_tokens_seen": 19275168, "step": 33935 }, { "epoch": 595.4424778761062, "grad_norm": 8.979455401458836e-08, "learning_rate": 0.016676809389890294, "loss": 0.0, "num_input_tokens_seen": 19278048, "step": 33940 }, { "epoch": 595.5309734513274, "grad_norm": 2.9489573449836826e-08, "learning_rate": 0.016649826280595435, "loss": 0.0, "num_input_tokens_seen": 19280864, "step": 33945 }, { "epoch": 595.6194690265487, "grad_norm": 5.9400342422577523e-08, "learning_rate": 0.016622863735573163, "loss": 0.0, "num_input_tokens_seen": 19283920, "step": 33950 }, { "epoch": 595.70796460177, "grad_norm": 4.8348411496590415e-08, "learning_rate": 0.016595921758981395, "loss": 0.0, "num_input_tokens_seen": 19287024, "step": 33955 }, { "epoch": 595.7964601769911, "grad_norm": 1.4757672417431422e-08, "learning_rate": 0.01656900035497495, "loss": 0.0, "num_input_tokens_seen": 19289616, "step": 33960 }, { "epoch": 595.8849557522124, "grad_norm": 4.382055962537379e-08, "learning_rate": 0.016542099527705485, "loss": 0.0, "num_input_tokens_seen": 19292528, "step": 33965 }, { "epoch": 595.9734513274336, "grad_norm": 2.105832308529898e-08, "learning_rate": 0.01651521928132138, "loss": 0.0, "num_input_tokens_seen": 19295648, "step": 33970 }, { "epoch": 596.0530973451328, "grad_norm": 2.88289836447575e-08, "learning_rate": 0.01648835961996794, "loss": 0.0, "num_input_tokens_seen": 19297952, "step": 33975 }, { "epoch": 596.141592920354, "grad_norm": 2.4784018393120277e-08, "learning_rate": 0.016461520547787285, "loss": 0.0, "num_input_tokens_seen": 19300784, "step": 33980 }, { "epoch": 596.2300884955753, "grad_norm": 2.8511818683796264e-08, "learning_rate": 0.016434702068918266, "loss": 0.0, "num_input_tokens_seen": 19303312, "step": 33985 }, { "epoch": 596.3185840707964, "grad_norm": 1.148695716324255e-07, "learning_rate": 0.01640790418749673, "loss": 0.0, "num_input_tokens_seen": 19306544, "step": 33990 }, { "epoch": 596.4070796460177, "grad_norm": 4.711025525239165e-08, "learning_rate": 0.016381126907655134, "loss": 0.0, "num_input_tokens_seen": 19308912, "step": 33995 }, { "epoch": 596.4955752212389, "grad_norm": 2.217750605382207e-08, "learning_rate": 0.016354370233522948, "loss": 0.0, "num_input_tokens_seen": 19311344, "step": 34000 }, { "epoch": 596.4955752212389, "eval_loss": 0.8218154311180115, "eval_runtime": 0.9422, "eval_samples_per_second": 26.533, "eval_steps_per_second": 13.797, "num_input_tokens_seen": 19311344, "step": 34000 }, { "epoch": 596.5840707964602, "grad_norm": 2.166191848118615e-08, "learning_rate": 0.016327634169226394, "loss": 0.0, "num_input_tokens_seen": 19313920, "step": 34005 }, { "epoch": 596.6725663716815, "grad_norm": 4.9452729911081406e-08, "learning_rate": 0.016300918718888485, "loss": 0.0, "num_input_tokens_seen": 19317408, "step": 34010 }, { "epoch": 596.7610619469026, "grad_norm": 3.5323111546858854e-08, "learning_rate": 0.016274223886629052, "loss": 0.0, "num_input_tokens_seen": 19320352, "step": 34015 }, { "epoch": 596.8495575221239, "grad_norm": 3.6161985406124586e-08, "learning_rate": 0.01624754967656482, "loss": 0.0, "num_input_tokens_seen": 19323360, "step": 34020 }, { "epoch": 596.9380530973451, "grad_norm": 6.497041482589339e-08, "learning_rate": 0.016220896092809235, "loss": 0.0, "num_input_tokens_seen": 19326640, "step": 34025 }, { "epoch": 597.0176991150443, "grad_norm": 7.145755631654538e-08, "learning_rate": 0.01619426313947267, "loss": 0.0, "num_input_tokens_seen": 19328992, "step": 34030 }, { "epoch": 597.1061946902655, "grad_norm": 7.355523479191106e-08, "learning_rate": 0.016167650820662228, "loss": 0.0, "num_input_tokens_seen": 19331680, "step": 34035 }, { "epoch": 597.1946902654868, "grad_norm": 4.4806657939489014e-08, "learning_rate": 0.016141059140481855, "loss": 0.0, "num_input_tokens_seen": 19334352, "step": 34040 }, { "epoch": 597.2831858407079, "grad_norm": 5.163122551721244e-08, "learning_rate": 0.016114488103032374, "loss": 0.0, "num_input_tokens_seen": 19337600, "step": 34045 }, { "epoch": 597.3716814159292, "grad_norm": 3.1968454550224124e-08, "learning_rate": 0.016087937712411293, "loss": 0.0, "num_input_tokens_seen": 19340288, "step": 34050 }, { "epoch": 597.4601769911504, "grad_norm": 4.618501492359428e-08, "learning_rate": 0.01606140797271308, "loss": 0.0, "num_input_tokens_seen": 19343552, "step": 34055 }, { "epoch": 597.5486725663717, "grad_norm": 5.4706518426428374e-08, "learning_rate": 0.01603489888802897, "loss": 0.0, "num_input_tokens_seen": 19346272, "step": 34060 }, { "epoch": 597.637168141593, "grad_norm": 3.3993991621628084e-08, "learning_rate": 0.016008410462446918, "loss": 0.0, "num_input_tokens_seen": 19349104, "step": 34065 }, { "epoch": 597.7256637168142, "grad_norm": 4.23997406073795e-08, "learning_rate": 0.01598194270005185, "loss": 0.0, "num_input_tokens_seen": 19352032, "step": 34070 }, { "epoch": 597.8141592920354, "grad_norm": 3.4767968060123167e-08, "learning_rate": 0.015955495604925356, "loss": 0.0, "num_input_tokens_seen": 19354688, "step": 34075 }, { "epoch": 597.9026548672566, "grad_norm": 3.18040740410197e-08, "learning_rate": 0.01592906918114598, "loss": 0.0, "num_input_tokens_seen": 19357392, "step": 34080 }, { "epoch": 597.9911504424779, "grad_norm": 4.059582892068647e-08, "learning_rate": 0.015902663432788965, "loss": 0.0, "num_input_tokens_seen": 19360480, "step": 34085 }, { "epoch": 598.070796460177, "grad_norm": 3.594236019921482e-08, "learning_rate": 0.01587627836392643, "loss": 0.0, "num_input_tokens_seen": 19362624, "step": 34090 }, { "epoch": 598.1592920353983, "grad_norm": 2.1414141571085565e-08, "learning_rate": 0.01584991397862726, "loss": 0.0, "num_input_tokens_seen": 19365200, "step": 34095 }, { "epoch": 598.2477876106195, "grad_norm": 2.5895806388120945e-08, "learning_rate": 0.015823570280957214, "loss": 0.0, "num_input_tokens_seen": 19368704, "step": 34100 }, { "epoch": 598.3362831858407, "grad_norm": 3.2858682885716917e-08, "learning_rate": 0.015797247274978766, "loss": 0.0, "num_input_tokens_seen": 19371456, "step": 34105 }, { "epoch": 598.4247787610619, "grad_norm": 5.6344998000668056e-08, "learning_rate": 0.015770944964751326, "loss": 0.0, "num_input_tokens_seen": 19374720, "step": 34110 }, { "epoch": 598.5132743362832, "grad_norm": 6.042516531579167e-08, "learning_rate": 0.015744663354330956, "loss": 0.0, "num_input_tokens_seen": 19377728, "step": 34115 }, { "epoch": 598.6017699115044, "grad_norm": 4.385353236102674e-08, "learning_rate": 0.015718402447770664, "loss": 0.0, "num_input_tokens_seen": 19380768, "step": 34120 }, { "epoch": 598.6902654867257, "grad_norm": 2.2873789973232306e-08, "learning_rate": 0.015692162249120224, "loss": 0.0, "num_input_tokens_seen": 19383360, "step": 34125 }, { "epoch": 598.7787610619469, "grad_norm": 4.501018580072014e-08, "learning_rate": 0.01566594276242615, "loss": 0.0, "num_input_tokens_seen": 19386320, "step": 34130 }, { "epoch": 598.8672566371681, "grad_norm": 3.3305653346360486e-08, "learning_rate": 0.015639743991731857, "loss": 0.0, "num_input_tokens_seen": 19389280, "step": 34135 }, { "epoch": 598.9557522123894, "grad_norm": 2.895045092543569e-08, "learning_rate": 0.01561356594107755, "loss": 0.0, "num_input_tokens_seen": 19392048, "step": 34140 }, { "epoch": 599.0353982300885, "grad_norm": 3.2208532729782746e-08, "learning_rate": 0.015587408614500147, "loss": 0.0, "num_input_tokens_seen": 19394200, "step": 34145 }, { "epoch": 599.1238938053098, "grad_norm": 2.4105583307232337e-08, "learning_rate": 0.015561272016033505, "loss": 0.0, "num_input_tokens_seen": 19396904, "step": 34150 }, { "epoch": 599.212389380531, "grad_norm": 3.321252606269809e-08, "learning_rate": 0.015535156149708167, "loss": 0.0, "num_input_tokens_seen": 19399688, "step": 34155 }, { "epoch": 599.3008849557522, "grad_norm": 5.33317994211302e-08, "learning_rate": 0.015509061019551528, "loss": 0.0, "num_input_tokens_seen": 19402568, "step": 34160 }, { "epoch": 599.3893805309734, "grad_norm": 5.3897903029564986e-08, "learning_rate": 0.015482986629587818, "loss": 0.0, "num_input_tokens_seen": 19405336, "step": 34165 }, { "epoch": 599.4778761061947, "grad_norm": 9.562670300056197e-08, "learning_rate": 0.01545693298383799, "loss": 0.0, "num_input_tokens_seen": 19408136, "step": 34170 }, { "epoch": 599.566371681416, "grad_norm": 3.628835187896584e-08, "learning_rate": 0.015430900086319858, "loss": 0.0, "num_input_tokens_seen": 19410840, "step": 34175 }, { "epoch": 599.6548672566372, "grad_norm": 3.3337084204276834e-08, "learning_rate": 0.015404887941048084, "loss": 0.0, "num_input_tokens_seen": 19413720, "step": 34180 }, { "epoch": 599.7433628318585, "grad_norm": 3.048496566293579e-08, "learning_rate": 0.01537889655203397, "loss": 0.0, "num_input_tokens_seen": 19416952, "step": 34185 }, { "epoch": 599.8318584070796, "grad_norm": 5.79546757251137e-08, "learning_rate": 0.015352925923285798, "loss": 0.0, "num_input_tokens_seen": 19419912, "step": 34190 }, { "epoch": 599.9203539823009, "grad_norm": 8.062872325353965e-08, "learning_rate": 0.015326976058808511, "loss": 0.0, "num_input_tokens_seen": 19423032, "step": 34195 }, { "epoch": 600.0, "grad_norm": 2.4700595346871523e-08, "learning_rate": 0.015301046962603908, "loss": 0.0, "num_input_tokens_seen": 19425472, "step": 34200 }, { "epoch": 600.0, "eval_loss": 0.8327299356460571, "eval_runtime": 0.9146, "eval_samples_per_second": 27.336, "eval_steps_per_second": 14.215, "num_input_tokens_seen": 19425472, "step": 34200 }, { "epoch": 600.0884955752213, "grad_norm": 4.320324364925909e-08, "learning_rate": 0.015275138638670626, "loss": 0.0, "num_input_tokens_seen": 19428848, "step": 34205 }, { "epoch": 600.1769911504425, "grad_norm": 3.2690866902385096e-08, "learning_rate": 0.015249251091004001, "loss": 0.0, "num_input_tokens_seen": 19432096, "step": 34210 }, { "epoch": 600.2654867256637, "grad_norm": 2.863466086466815e-08, "learning_rate": 0.01522338432359624, "loss": 0.0, "num_input_tokens_seen": 19434656, "step": 34215 }, { "epoch": 600.3539823008849, "grad_norm": 1.4271901704887568e-07, "learning_rate": 0.01519753834043635, "loss": 0.0, "num_input_tokens_seen": 19437280, "step": 34220 }, { "epoch": 600.4424778761062, "grad_norm": 2.7366452215460413e-08, "learning_rate": 0.015171713145510095, "loss": 0.0, "num_input_tokens_seen": 19440832, "step": 34225 }, { "epoch": 600.5309734513274, "grad_norm": 2.7776865252349126e-08, "learning_rate": 0.01514590874279999, "loss": 0.0, "num_input_tokens_seen": 19443712, "step": 34230 }, { "epoch": 600.6194690265487, "grad_norm": 1.9850736165949456e-08, "learning_rate": 0.015120125136285467, "loss": 0.0, "num_input_tokens_seen": 19446624, "step": 34235 }, { "epoch": 600.70796460177, "grad_norm": 1.6413535419701475e-08, "learning_rate": 0.015094362329942629, "loss": 0.0, "num_input_tokens_seen": 19449424, "step": 34240 }, { "epoch": 600.7964601769911, "grad_norm": 2.5233420686276986e-08, "learning_rate": 0.01506862032774448, "loss": 0.0, "num_input_tokens_seen": 19452176, "step": 34245 }, { "epoch": 600.8849557522124, "grad_norm": 3.56529774592218e-08, "learning_rate": 0.015042899133660697, "loss": 0.0, "num_input_tokens_seen": 19454512, "step": 34250 }, { "epoch": 600.9734513274336, "grad_norm": 2.789635900057874e-08, "learning_rate": 0.01501719875165789, "loss": 0.0, "num_input_tokens_seen": 19457248, "step": 34255 }, { "epoch": 601.0530973451328, "grad_norm": 1.4806207815354355e-08, "learning_rate": 0.014991519185699286, "loss": 0.0, "num_input_tokens_seen": 19459592, "step": 34260 }, { "epoch": 601.141592920354, "grad_norm": 3.874421494742819e-08, "learning_rate": 0.014965860439745054, "loss": 0.0, "num_input_tokens_seen": 19462248, "step": 34265 }, { "epoch": 601.2300884955753, "grad_norm": 2.2196088522719037e-08, "learning_rate": 0.01494022251775211, "loss": 0.0, "num_input_tokens_seen": 19465224, "step": 34270 }, { "epoch": 601.3185840707964, "grad_norm": 4.8973777921901274e-08, "learning_rate": 0.014914605423674109, "loss": 0.0, "num_input_tokens_seen": 19468040, "step": 34275 }, { "epoch": 601.4070796460177, "grad_norm": 2.13692565864676e-08, "learning_rate": 0.014889009161461525, "loss": 0.0, "num_input_tokens_seen": 19470920, "step": 34280 }, { "epoch": 601.4955752212389, "grad_norm": 1.0232218983219354e-07, "learning_rate": 0.014863433735061665, "loss": 0.0, "num_input_tokens_seen": 19473928, "step": 34285 }, { "epoch": 601.5840707964602, "grad_norm": 1.0989203325095787e-08, "learning_rate": 0.014837879148418541, "loss": 0.0, "num_input_tokens_seen": 19477480, "step": 34290 }, { "epoch": 601.6725663716815, "grad_norm": 1.3838260315424122e-07, "learning_rate": 0.01481234540547302, "loss": 0.0, "num_input_tokens_seen": 19479848, "step": 34295 }, { "epoch": 601.7610619469026, "grad_norm": 2.6027333177580658e-08, "learning_rate": 0.014786832510162717, "loss": 0.0, "num_input_tokens_seen": 19482648, "step": 34300 }, { "epoch": 601.8495575221239, "grad_norm": 3.642574952777977e-08, "learning_rate": 0.014761340466422017, "loss": 0.0, "num_input_tokens_seen": 19485432, "step": 34305 }, { "epoch": 601.9380530973451, "grad_norm": 3.509548207603075e-08, "learning_rate": 0.014735869278182144, "loss": 0.0, "num_input_tokens_seen": 19488392, "step": 34310 }, { "epoch": 602.0176991150443, "grad_norm": 2.823318112632478e-08, "learning_rate": 0.014710418949371057, "loss": 0.0, "num_input_tokens_seen": 19490824, "step": 34315 }, { "epoch": 602.1061946902655, "grad_norm": 2.3232180623722343e-08, "learning_rate": 0.014684989483913495, "loss": 0.0, "num_input_tokens_seen": 19493352, "step": 34320 }, { "epoch": 602.1946902654868, "grad_norm": 2.2475546757050324e-08, "learning_rate": 0.014659580885731077, "loss": 0.0, "num_input_tokens_seen": 19496392, "step": 34325 }, { "epoch": 602.2831858407079, "grad_norm": 2.0846218973247232e-08, "learning_rate": 0.014634193158742047, "loss": 0.0, "num_input_tokens_seen": 19499208, "step": 34330 }, { "epoch": 602.3716814159292, "grad_norm": 7.018467584885002e-08, "learning_rate": 0.014608826306861576, "loss": 0.0, "num_input_tokens_seen": 19502056, "step": 34335 }, { "epoch": 602.4601769911504, "grad_norm": 3.861937258875514e-08, "learning_rate": 0.014583480334001486, "loss": 0.0, "num_input_tokens_seen": 19504536, "step": 34340 }, { "epoch": 602.5486725663717, "grad_norm": 7.8321022556338e-08, "learning_rate": 0.014558155244070496, "loss": 0.0, "num_input_tokens_seen": 19507144, "step": 34345 }, { "epoch": 602.637168141593, "grad_norm": 4.482329174493316e-08, "learning_rate": 0.014532851040974036, "loss": 0.0, "num_input_tokens_seen": 19510280, "step": 34350 }, { "epoch": 602.7256637168142, "grad_norm": 3.0667024475405924e-08, "learning_rate": 0.014507567728614335, "loss": 0.0, "num_input_tokens_seen": 19513240, "step": 34355 }, { "epoch": 602.8141592920354, "grad_norm": 2.4785869356946932e-08, "learning_rate": 0.01448230531089037, "loss": 0.0, "num_input_tokens_seen": 19516184, "step": 34360 }, { "epoch": 602.9026548672566, "grad_norm": 2.0210691786815005e-08, "learning_rate": 0.014457063791697993, "loss": 0.0, "num_input_tokens_seen": 19519640, "step": 34365 }, { "epoch": 602.9911504424779, "grad_norm": 3.797019232365528e-08, "learning_rate": 0.01443184317492971, "loss": 0.0, "num_input_tokens_seen": 19522104, "step": 34370 }, { "epoch": 603.070796460177, "grad_norm": 7.321847306229756e-08, "learning_rate": 0.014406643464474822, "loss": 0.0, "num_input_tokens_seen": 19524120, "step": 34375 }, { "epoch": 603.1592920353983, "grad_norm": 2.855558101089173e-08, "learning_rate": 0.014381464664219539, "loss": 0.0, "num_input_tokens_seen": 19526824, "step": 34380 }, { "epoch": 603.2477876106195, "grad_norm": 9.317967908373248e-08, "learning_rate": 0.014356306778046656, "loss": 0.0, "num_input_tokens_seen": 19529816, "step": 34385 }, { "epoch": 603.3362831858407, "grad_norm": 1.1371088959322151e-08, "learning_rate": 0.014331169809835885, "loss": 0.0, "num_input_tokens_seen": 19532840, "step": 34390 }, { "epoch": 603.4247787610619, "grad_norm": 2.363742801492208e-08, "learning_rate": 0.014306053763463644, "loss": 0.0, "num_input_tokens_seen": 19536040, "step": 34395 }, { "epoch": 603.5132743362832, "grad_norm": 2.125154807686158e-08, "learning_rate": 0.014280958642803147, "loss": 0.0, "num_input_tokens_seen": 19539112, "step": 34400 }, { "epoch": 603.5132743362832, "eval_loss": 0.7973053455352783, "eval_runtime": 0.9341, "eval_samples_per_second": 26.763, "eval_steps_per_second": 13.917, "num_input_tokens_seen": 19539112, "step": 34400 }, { "epoch": 603.6017699115044, "grad_norm": 3.959589989221968e-08, "learning_rate": 0.014255884451724404, "loss": 0.0, "num_input_tokens_seen": 19541960, "step": 34405 }, { "epoch": 603.6902654867257, "grad_norm": 4.577048073883816e-08, "learning_rate": 0.014230831194094101, "loss": 0.0, "num_input_tokens_seen": 19544568, "step": 34410 }, { "epoch": 603.7787610619469, "grad_norm": 7.573844129638019e-08, "learning_rate": 0.014205798873775865, "loss": 0.0, "num_input_tokens_seen": 19547272, "step": 34415 }, { "epoch": 603.8672566371681, "grad_norm": 1.1166066826717724e-07, "learning_rate": 0.014180787494629893, "loss": 0.0, "num_input_tokens_seen": 19550296, "step": 34420 }, { "epoch": 603.9557522123894, "grad_norm": 3.446922036687283e-08, "learning_rate": 0.014155797060513314, "loss": 0.0, "num_input_tokens_seen": 19553336, "step": 34425 }, { "epoch": 604.0353982300885, "grad_norm": 5.899133270759194e-08, "learning_rate": 0.014130827575279963, "loss": 0.0, "num_input_tokens_seen": 19555984, "step": 34430 }, { "epoch": 604.1238938053098, "grad_norm": 4.690216570679695e-08, "learning_rate": 0.014105879042780427, "loss": 0.0, "num_input_tokens_seen": 19559088, "step": 34435 }, { "epoch": 604.212389380531, "grad_norm": 2.4524625885646856e-08, "learning_rate": 0.014080951466862113, "loss": 0.0, "num_input_tokens_seen": 19562400, "step": 34440 }, { "epoch": 604.3008849557522, "grad_norm": 2.854947034336419e-08, "learning_rate": 0.014056044851369126, "loss": 0.0, "num_input_tokens_seen": 19565104, "step": 34445 }, { "epoch": 604.3893805309734, "grad_norm": 4.9980648952896445e-08, "learning_rate": 0.014031159200142428, "loss": 0.0, "num_input_tokens_seen": 19567440, "step": 34450 }, { "epoch": 604.4778761061947, "grad_norm": 8.072211699072795e-08, "learning_rate": 0.014006294517019667, "loss": 0.0, "num_input_tokens_seen": 19570256, "step": 34455 }, { "epoch": 604.566371681416, "grad_norm": 3.6168714245832234e-08, "learning_rate": 0.013981450805835276, "loss": 0.0, "num_input_tokens_seen": 19572928, "step": 34460 }, { "epoch": 604.6548672566372, "grad_norm": 1.2096158741314866e-08, "learning_rate": 0.01395662807042049, "loss": 0.0, "num_input_tokens_seen": 19575920, "step": 34465 }, { "epoch": 604.7433628318585, "grad_norm": 3.621848065904487e-08, "learning_rate": 0.013931826314603296, "loss": 0.0, "num_input_tokens_seen": 19579440, "step": 34470 }, { "epoch": 604.8318584070796, "grad_norm": 4.804483921816427e-08, "learning_rate": 0.013907045542208401, "loss": 0.0, "num_input_tokens_seen": 19581968, "step": 34475 }, { "epoch": 604.9203539823009, "grad_norm": 3.2710030239968546e-08, "learning_rate": 0.013882285757057333, "loss": 0.0, "num_input_tokens_seen": 19584656, "step": 34480 }, { "epoch": 605.0, "grad_norm": 5.822453630344171e-08, "learning_rate": 0.013857546962968403, "loss": 0.0, "num_input_tokens_seen": 19587152, "step": 34485 }, { "epoch": 605.0884955752213, "grad_norm": 2.5866460973134053e-08, "learning_rate": 0.013832829163756577, "loss": 0.0, "num_input_tokens_seen": 19589984, "step": 34490 }, { "epoch": 605.1769911504425, "grad_norm": 5.654170820434956e-08, "learning_rate": 0.013808132363233689, "loss": 0.0, "num_input_tokens_seen": 19592784, "step": 34495 }, { "epoch": 605.2654867256637, "grad_norm": 2.5182297136439047e-08, "learning_rate": 0.013783456565208256, "loss": 0.0, "num_input_tokens_seen": 19595488, "step": 34500 }, { "epoch": 605.3539823008849, "grad_norm": 6.181851830433516e-08, "learning_rate": 0.01375880177348564, "loss": 0.0, "num_input_tokens_seen": 19597968, "step": 34505 }, { "epoch": 605.4424778761062, "grad_norm": 1.9925732175352096e-08, "learning_rate": 0.013734167991867928, "loss": 0.0, "num_input_tokens_seen": 19600800, "step": 34510 }, { "epoch": 605.5309734513274, "grad_norm": 4.933725605837935e-08, "learning_rate": 0.013709555224153935, "loss": 0.0, "num_input_tokens_seen": 19604080, "step": 34515 }, { "epoch": 605.6194690265487, "grad_norm": 2.0059204075550952e-07, "learning_rate": 0.013684963474139222, "loss": 0.0, "num_input_tokens_seen": 19607456, "step": 34520 }, { "epoch": 605.70796460177, "grad_norm": 4.2033938996155484e-08, "learning_rate": 0.013660392745616224, "loss": 0.0, "num_input_tokens_seen": 19610304, "step": 34525 }, { "epoch": 605.7964601769911, "grad_norm": 5.8566079985666875e-08, "learning_rate": 0.013635843042373974, "loss": 0.0, "num_input_tokens_seen": 19612976, "step": 34530 }, { "epoch": 605.8849557522124, "grad_norm": 2.7801563717844147e-08, "learning_rate": 0.01361131436819843, "loss": 0.0, "num_input_tokens_seen": 19616224, "step": 34535 }, { "epoch": 605.9734513274336, "grad_norm": 5.1369802633871586e-08, "learning_rate": 0.013586806726872147, "loss": 0.0, "num_input_tokens_seen": 19618912, "step": 34540 }, { "epoch": 606.0530973451328, "grad_norm": 4.192483515907952e-08, "learning_rate": 0.013562320122174537, "loss": 0.0, "num_input_tokens_seen": 19621232, "step": 34545 }, { "epoch": 606.141592920354, "grad_norm": 4.741044890010926e-08, "learning_rate": 0.013537854557881762, "loss": 0.0, "num_input_tokens_seen": 19623680, "step": 34550 }, { "epoch": 606.2300884955753, "grad_norm": 6.466916602221318e-08, "learning_rate": 0.013513410037766687, "loss": 0.0, "num_input_tokens_seen": 19626768, "step": 34555 }, { "epoch": 606.3185840707964, "grad_norm": 3.2793511905993e-08, "learning_rate": 0.013488986565598998, "loss": 0.0, "num_input_tokens_seen": 19630224, "step": 34560 }, { "epoch": 606.4070796460177, "grad_norm": 6.081109660271977e-08, "learning_rate": 0.013464584145145097, "loss": 0.0, "num_input_tokens_seen": 19633392, "step": 34565 }, { "epoch": 606.4955752212389, "grad_norm": 1.9985655796972424e-08, "learning_rate": 0.013440202780168109, "loss": 0.0, "num_input_tokens_seen": 19635872, "step": 34570 }, { "epoch": 606.5840707964602, "grad_norm": 1.7025310938834082e-08, "learning_rate": 0.01341584247442799, "loss": 0.0, "num_input_tokens_seen": 19638784, "step": 34575 }, { "epoch": 606.6725663716815, "grad_norm": 1.4414576199328621e-08, "learning_rate": 0.013391503231681355, "loss": 0.0, "num_input_tokens_seen": 19641376, "step": 34580 }, { "epoch": 606.7610619469026, "grad_norm": 5.820317383609108e-08, "learning_rate": 0.013367185055681685, "loss": 0.0, "num_input_tokens_seen": 19644176, "step": 34585 }, { "epoch": 606.8495575221239, "grad_norm": 1.5729256119811907e-08, "learning_rate": 0.013342887950179095, "loss": 0.0, "num_input_tokens_seen": 19647680, "step": 34590 }, { "epoch": 606.9380530973451, "grad_norm": 6.455587708842359e-08, "learning_rate": 0.013318611918920554, "loss": 0.0, "num_input_tokens_seen": 19650336, "step": 34595 }, { "epoch": 607.0176991150443, "grad_norm": 4.8529997798141267e-08, "learning_rate": 0.01329435696564965, "loss": 0.0, "num_input_tokens_seen": 19652392, "step": 34600 }, { "epoch": 607.0176991150443, "eval_loss": 0.7865257263183594, "eval_runtime": 0.932, "eval_samples_per_second": 26.823, "eval_steps_per_second": 13.948, "num_input_tokens_seen": 19652392, "step": 34600 }, { "epoch": 607.1061946902655, "grad_norm": 2.528213016717018e-08, "learning_rate": 0.013270123094106894, "loss": 0.0, "num_input_tokens_seen": 19654904, "step": 34605 }, { "epoch": 607.1946902654868, "grad_norm": 5.326219110202146e-08, "learning_rate": 0.013245910308029395, "loss": 0.0, "num_input_tokens_seen": 19657784, "step": 34610 }, { "epoch": 607.2831858407079, "grad_norm": 4.627883853913772e-08, "learning_rate": 0.0132217186111511, "loss": 0.0, "num_input_tokens_seen": 19660856, "step": 34615 }, { "epoch": 607.3716814159292, "grad_norm": 6.936889462849649e-08, "learning_rate": 0.013197548007202626, "loss": 0.0, "num_input_tokens_seen": 19663528, "step": 34620 }, { "epoch": 607.4601769911504, "grad_norm": 3.5578388235535385e-08, "learning_rate": 0.01317339849991142, "loss": 0.0, "num_input_tokens_seen": 19666168, "step": 34625 }, { "epoch": 607.5486725663717, "grad_norm": 4.3130729210361096e-08, "learning_rate": 0.013149270093001675, "loss": 0.0, "num_input_tokens_seen": 19669096, "step": 34630 }, { "epoch": 607.637168141593, "grad_norm": 3.978034612828196e-08, "learning_rate": 0.013125162790194227, "loss": 0.0, "num_input_tokens_seen": 19672424, "step": 34635 }, { "epoch": 607.7256637168142, "grad_norm": 8.650069993620946e-09, "learning_rate": 0.01310107659520674, "loss": 0.0, "num_input_tokens_seen": 19675128, "step": 34640 }, { "epoch": 607.8141592920354, "grad_norm": 4.8252982054464155e-08, "learning_rate": 0.013077011511753655, "loss": 0.0, "num_input_tokens_seen": 19677944, "step": 34645 }, { "epoch": 607.9026548672566, "grad_norm": 5.1047468474507696e-08, "learning_rate": 0.013052967543546056, "loss": 0.0, "num_input_tokens_seen": 19681160, "step": 34650 }, { "epoch": 607.9911504424779, "grad_norm": 3.349270727426301e-08, "learning_rate": 0.01302894469429186, "loss": 0.0, "num_input_tokens_seen": 19684296, "step": 34655 }, { "epoch": 608.070796460177, "grad_norm": 1.819583062001584e-08, "learning_rate": 0.013004942967695653, "loss": 0.0, "num_input_tokens_seen": 19686560, "step": 34660 }, { "epoch": 608.1592920353983, "grad_norm": 2.187780623330582e-08, "learning_rate": 0.012980962367458859, "loss": 0.0, "num_input_tokens_seen": 19689328, "step": 34665 }, { "epoch": 608.2477876106195, "grad_norm": 5.2718384324634826e-08, "learning_rate": 0.012957002897279567, "loss": 0.0, "num_input_tokens_seen": 19692768, "step": 34670 }, { "epoch": 608.3362831858407, "grad_norm": 2.6670706532172517e-08, "learning_rate": 0.012933064560852576, "loss": 0.0, "num_input_tokens_seen": 19695840, "step": 34675 }, { "epoch": 608.4247787610619, "grad_norm": 1.8209431118521024e-07, "learning_rate": 0.012909147361869527, "loss": 0.0, "num_input_tokens_seen": 19699120, "step": 34680 }, { "epoch": 608.5132743362832, "grad_norm": 4.640748230144709e-08, "learning_rate": 0.012885251304018774, "loss": 0.0, "num_input_tokens_seen": 19701616, "step": 34685 }, { "epoch": 608.6017699115044, "grad_norm": 1.3244311425353317e-08, "learning_rate": 0.012861376390985335, "loss": 0.0, "num_input_tokens_seen": 19704512, "step": 34690 }, { "epoch": 608.6902654867257, "grad_norm": 5.488977095069458e-08, "learning_rate": 0.012837522626451063, "loss": 0.0, "num_input_tokens_seen": 19706896, "step": 34695 }, { "epoch": 608.7787610619469, "grad_norm": 6.77704363738485e-08, "learning_rate": 0.01281369001409447, "loss": 0.0, "num_input_tokens_seen": 19709456, "step": 34700 }, { "epoch": 608.8672566371681, "grad_norm": 2.7425150150861555e-08, "learning_rate": 0.012789878557590877, "loss": 0.0, "num_input_tokens_seen": 19712480, "step": 34705 }, { "epoch": 608.9557522123894, "grad_norm": 9.182645754890473e-08, "learning_rate": 0.012766088260612334, "loss": 0.0, "num_input_tokens_seen": 19715312, "step": 34710 }, { "epoch": 609.0353982300885, "grad_norm": 5.572996641944883e-08, "learning_rate": 0.012742319126827523, "loss": 0.0, "num_input_tokens_seen": 19717576, "step": 34715 }, { "epoch": 609.1238938053098, "grad_norm": 3.9513473382157827e-08, "learning_rate": 0.012718571159902008, "loss": 0.0, "num_input_tokens_seen": 19720744, "step": 34720 }, { "epoch": 609.212389380531, "grad_norm": 6.126664686689764e-08, "learning_rate": 0.01269484436349803, "loss": 0.0, "num_input_tokens_seen": 19723784, "step": 34725 }, { "epoch": 609.3008849557522, "grad_norm": 2.438356716538692e-08, "learning_rate": 0.012671138741274528, "loss": 0.0, "num_input_tokens_seen": 19726920, "step": 34730 }, { "epoch": 609.3893805309734, "grad_norm": 6.616791381475196e-08, "learning_rate": 0.012647454296887194, "loss": 0.0, "num_input_tokens_seen": 19730088, "step": 34735 }, { "epoch": 609.4778761061947, "grad_norm": 3.945961424278721e-08, "learning_rate": 0.012623791033988507, "loss": 0.0, "num_input_tokens_seen": 19733432, "step": 34740 }, { "epoch": 609.566371681416, "grad_norm": 3.909693191417318e-08, "learning_rate": 0.012600148956227597, "loss": 0.0, "num_input_tokens_seen": 19736040, "step": 34745 }, { "epoch": 609.6548672566372, "grad_norm": 4.3181085374044414e-08, "learning_rate": 0.012576528067250414, "loss": 0.0, "num_input_tokens_seen": 19738776, "step": 34750 }, { "epoch": 609.7433628318585, "grad_norm": 3.492865729981531e-08, "learning_rate": 0.012552928370699561, "loss": 0.0, "num_input_tokens_seen": 19741384, "step": 34755 }, { "epoch": 609.8318584070796, "grad_norm": 4.0844671644890695e-08, "learning_rate": 0.012529349870214411, "loss": 0.0, "num_input_tokens_seen": 19744232, "step": 34760 }, { "epoch": 609.9203539823009, "grad_norm": 2.2538223731771723e-08, "learning_rate": 0.012505792569431106, "loss": 0.0, "num_input_tokens_seen": 19746936, "step": 34765 }, { "epoch": 610.0, "grad_norm": 4.902017280983273e-08, "learning_rate": 0.012482256471982422, "loss": 0.0, "num_input_tokens_seen": 19749320, "step": 34770 }, { "epoch": 610.0884955752213, "grad_norm": 3.280094418300905e-08, "learning_rate": 0.012458741581497956, "loss": 0.0, "num_input_tokens_seen": 19752776, "step": 34775 }, { "epoch": 610.1769911504425, "grad_norm": 4.829253086313656e-08, "learning_rate": 0.012435247901603974, "loss": 0.0, "num_input_tokens_seen": 19755656, "step": 34780 }, { "epoch": 610.2654867256637, "grad_norm": 1.8944950497257196e-08, "learning_rate": 0.012411775435923528, "loss": 0.0, "num_input_tokens_seen": 19758344, "step": 34785 }, { "epoch": 610.3539823008849, "grad_norm": 3.239629720042103e-08, "learning_rate": 0.012388324188076354, "loss": 0.0, "num_input_tokens_seen": 19761240, "step": 34790 }, { "epoch": 610.4424778761062, "grad_norm": 1.4895189082153593e-08, "learning_rate": 0.012364894161678913, "loss": 0.0, "num_input_tokens_seen": 19764248, "step": 34795 }, { "epoch": 610.5309734513274, "grad_norm": 3.365513734365777e-08, "learning_rate": 0.012341485360344445, "loss": 0.0, "num_input_tokens_seen": 19766904, "step": 34800 }, { "epoch": 610.5309734513274, "eval_loss": 0.8026770353317261, "eval_runtime": 0.9397, "eval_samples_per_second": 26.603, "eval_steps_per_second": 13.834, "num_input_tokens_seen": 19766904, "step": 34800 }, { "epoch": 610.6194690265487, "grad_norm": 4.859318991634609e-08, "learning_rate": 0.01231809778768283, "loss": 0.0, "num_input_tokens_seen": 19769528, "step": 34805 }, { "epoch": 610.70796460177, "grad_norm": 2.663139397895975e-08, "learning_rate": 0.012294731447300799, "loss": 0.0, "num_input_tokens_seen": 19772296, "step": 34810 }, { "epoch": 610.7964601769911, "grad_norm": 4.611985815472508e-08, "learning_rate": 0.012271386342801671, "loss": 0.0, "num_input_tokens_seen": 19775096, "step": 34815 }, { "epoch": 610.8849557522124, "grad_norm": 6.418071762936961e-08, "learning_rate": 0.012248062477785565, "loss": 0.0, "num_input_tokens_seen": 19777944, "step": 34820 }, { "epoch": 610.9734513274336, "grad_norm": 6.66990658260147e-08, "learning_rate": 0.012224759855849305, "loss": 0.0, "num_input_tokens_seen": 19780600, "step": 34825 }, { "epoch": 611.0530973451328, "grad_norm": 3.9752379166202445e-08, "learning_rate": 0.012201478480586513, "loss": 0.0, "num_input_tokens_seen": 19783248, "step": 34830 }, { "epoch": 611.141592920354, "grad_norm": 1.5444573620015944e-08, "learning_rate": 0.012178218355587389, "loss": 0.0, "num_input_tokens_seen": 19786496, "step": 34835 }, { "epoch": 611.2300884955753, "grad_norm": 2.3326267140078016e-08, "learning_rate": 0.01215497948443896, "loss": 0.0, "num_input_tokens_seen": 19789584, "step": 34840 }, { "epoch": 611.3185840707964, "grad_norm": 1.889429057655434e-08, "learning_rate": 0.012131761870724993, "loss": 0.0, "num_input_tokens_seen": 19792288, "step": 34845 }, { "epoch": 611.4070796460177, "grad_norm": 3.698351846992409e-08, "learning_rate": 0.012108565518025893, "loss": 0.0, "num_input_tokens_seen": 19794864, "step": 34850 }, { "epoch": 611.4955752212389, "grad_norm": 2.988483771559913e-08, "learning_rate": 0.012085390429918862, "loss": 0.0, "num_input_tokens_seen": 19797264, "step": 34855 }, { "epoch": 611.5840707964602, "grad_norm": 1.948395755846377e-08, "learning_rate": 0.012062236609977744, "loss": 0.0, "num_input_tokens_seen": 19800448, "step": 34860 }, { "epoch": 611.6725663716815, "grad_norm": 4.1631899705407704e-08, "learning_rate": 0.01203910406177318, "loss": 0.0, "num_input_tokens_seen": 19803584, "step": 34865 }, { "epoch": 611.7610619469026, "grad_norm": 5.5365347861879854e-08, "learning_rate": 0.01201599278887252, "loss": 0.0, "num_input_tokens_seen": 19806336, "step": 34870 }, { "epoch": 611.8495575221239, "grad_norm": 1.2117820524792933e-08, "learning_rate": 0.011992902794839744, "loss": 0.0, "num_input_tokens_seen": 19809456, "step": 34875 }, { "epoch": 611.9380530973451, "grad_norm": 6.346784431343622e-08, "learning_rate": 0.011969834083235703, "loss": 0.0, "num_input_tokens_seen": 19812656, "step": 34880 }, { "epoch": 612.0176991150443, "grad_norm": 3.2140679451231335e-08, "learning_rate": 0.011946786657617836, "loss": 0.0, "num_input_tokens_seen": 19814672, "step": 34885 }, { "epoch": 612.1061946902655, "grad_norm": 6.996198465003545e-08, "learning_rate": 0.011923760521540332, "loss": 0.0, "num_input_tokens_seen": 19817376, "step": 34890 }, { "epoch": 612.1946902654868, "grad_norm": 2.9415412328148705e-08, "learning_rate": 0.011900755678554153, "loss": 0.0, "num_input_tokens_seen": 19820384, "step": 34895 }, { "epoch": 612.2831858407079, "grad_norm": 1.082010214759066e-08, "learning_rate": 0.011877772132206893, "loss": 0.0, "num_input_tokens_seen": 19822992, "step": 34900 }, { "epoch": 612.3716814159292, "grad_norm": 1.7334333080043507e-08, "learning_rate": 0.011854809886042915, "loss": 0.0, "num_input_tokens_seen": 19825552, "step": 34905 }, { "epoch": 612.4601769911504, "grad_norm": 3.836788664557389e-08, "learning_rate": 0.011831868943603325, "loss": 0.0, "num_input_tokens_seen": 19828320, "step": 34910 }, { "epoch": 612.5486725663717, "grad_norm": 5.7816169629631986e-08, "learning_rate": 0.011808949308425836, "loss": 0.0, "num_input_tokens_seen": 19832016, "step": 34915 }, { "epoch": 612.637168141593, "grad_norm": 4.6166842793127216e-08, "learning_rate": 0.01178605098404501, "loss": 0.0, "num_input_tokens_seen": 19835632, "step": 34920 }, { "epoch": 612.7256637168142, "grad_norm": 3.1490738905404214e-08, "learning_rate": 0.011763173973992002, "loss": 0.0, "num_input_tokens_seen": 19838384, "step": 34925 }, { "epoch": 612.8141592920354, "grad_norm": 5.288305970907459e-08, "learning_rate": 0.011740318281794776, "loss": 0.0, "num_input_tokens_seen": 19840784, "step": 34930 }, { "epoch": 612.9026548672566, "grad_norm": 4.346510351638244e-08, "learning_rate": 0.01171748391097796, "loss": 0.0, "num_input_tokens_seen": 19843472, "step": 34935 }, { "epoch": 612.9911504424779, "grad_norm": 4.9419025316410625e-08, "learning_rate": 0.011694670865062873, "loss": 0.0, "num_input_tokens_seen": 19846304, "step": 34940 }, { "epoch": 613.070796460177, "grad_norm": 3.235821210978429e-08, "learning_rate": 0.011671879147567616, "loss": 0.0, "num_input_tokens_seen": 19848624, "step": 34945 }, { "epoch": 613.1592920353983, "grad_norm": 4.659067442958076e-08, "learning_rate": 0.011649108762006893, "loss": 0.0, "num_input_tokens_seen": 19851312, "step": 34950 }, { "epoch": 613.2477876106195, "grad_norm": 2.0164609537687284e-08, "learning_rate": 0.011626359711892265, "loss": 0.0, "num_input_tokens_seen": 19854032, "step": 34955 }, { "epoch": 613.3362831858407, "grad_norm": 3.346449517493966e-08, "learning_rate": 0.01160363200073189, "loss": 0.0, "num_input_tokens_seen": 19856832, "step": 34960 }, { "epoch": 613.4247787610619, "grad_norm": 4.131363340320604e-08, "learning_rate": 0.011580925632030614, "loss": 0.0, "num_input_tokens_seen": 19859984, "step": 34965 }, { "epoch": 613.5132743362832, "grad_norm": 2.9904583698225906e-08, "learning_rate": 0.011558240609290104, "loss": 0.0, "num_input_tokens_seen": 19863152, "step": 34970 }, { "epoch": 613.6017699115044, "grad_norm": 1.0214664314389665e-07, "learning_rate": 0.011535576936008679, "loss": 0.0, "num_input_tokens_seen": 19865824, "step": 34975 }, { "epoch": 613.6902654867257, "grad_norm": 4.3640252300747306e-08, "learning_rate": 0.011512934615681309, "loss": 0.0, "num_input_tokens_seen": 19868768, "step": 34980 }, { "epoch": 613.7787610619469, "grad_norm": 3.753305577447463e-08, "learning_rate": 0.011490313651799765, "loss": 0.0, "num_input_tokens_seen": 19871184, "step": 34985 }, { "epoch": 613.8672566371681, "grad_norm": 3.473439846857218e-08, "learning_rate": 0.011467714047852512, "loss": 0.0, "num_input_tokens_seen": 19874480, "step": 34990 }, { "epoch": 613.9557522123894, "grad_norm": 9.138671686059752e-08, "learning_rate": 0.011445135807324624, "loss": 0.0, "num_input_tokens_seen": 19877808, "step": 34995 }, { "epoch": 614.0353982300885, "grad_norm": 1.5541480991032586e-08, "learning_rate": 0.011422578933698002, "loss": 0.0, "num_input_tokens_seen": 19879808, "step": 35000 }, { "epoch": 614.0353982300885, "eval_loss": 0.8159120082855225, "eval_runtime": 0.9473, "eval_samples_per_second": 26.392, "eval_steps_per_second": 13.724, "num_input_tokens_seen": 19879808, "step": 35000 }, { "epoch": 614.1238938053098, "grad_norm": 5.97066076579722e-08, "learning_rate": 0.011400043430451161, "loss": 0.0, "num_input_tokens_seen": 19882944, "step": 35005 }, { "epoch": 614.212389380531, "grad_norm": 2.3124076875546962e-08, "learning_rate": 0.011377529301059392, "loss": 0.0, "num_input_tokens_seen": 19885712, "step": 35010 }, { "epoch": 614.3008849557522, "grad_norm": 6.204471247883703e-08, "learning_rate": 0.011355036548994646, "loss": 0.0, "num_input_tokens_seen": 19889200, "step": 35015 }, { "epoch": 614.3893805309734, "grad_norm": 1.2840845720063498e-08, "learning_rate": 0.011332565177725584, "loss": 0.0, "num_input_tokens_seen": 19892128, "step": 35020 }, { "epoch": 614.4778761061947, "grad_norm": 2.21002949274407e-08, "learning_rate": 0.011310115190717585, "loss": 0.0, "num_input_tokens_seen": 19894768, "step": 35025 }, { "epoch": 614.566371681416, "grad_norm": 7.042755356678754e-08, "learning_rate": 0.01128768659143271, "loss": 0.0, "num_input_tokens_seen": 19897520, "step": 35030 }, { "epoch": 614.6548672566372, "grad_norm": 4.884065418764294e-08, "learning_rate": 0.011265279383329713, "loss": 0.0, "num_input_tokens_seen": 19899888, "step": 35035 }, { "epoch": 614.7433628318585, "grad_norm": 6.549825570800749e-08, "learning_rate": 0.01124289356986411, "loss": 0.0, "num_input_tokens_seen": 19903024, "step": 35040 }, { "epoch": 614.8318584070796, "grad_norm": 7.186221750998811e-08, "learning_rate": 0.011220529154488023, "loss": 0.0, "num_input_tokens_seen": 19905856, "step": 35045 }, { "epoch": 614.9203539823009, "grad_norm": 1.5788732099508707e-08, "learning_rate": 0.011198186140650346, "loss": 0.0, "num_input_tokens_seen": 19908624, "step": 35050 }, { "epoch": 615.0, "grad_norm": 3.5770401751733516e-08, "learning_rate": 0.011175864531796685, "loss": 0.0, "num_input_tokens_seen": 19911632, "step": 35055 }, { "epoch": 615.0884955752213, "grad_norm": 3.6758141419568346e-08, "learning_rate": 0.011153564331369258, "loss": 0.0, "num_input_tokens_seen": 19914528, "step": 35060 }, { "epoch": 615.1769911504425, "grad_norm": 1.9911778892378607e-08, "learning_rate": 0.011131285542807078, "loss": 0.0, "num_input_tokens_seen": 19917424, "step": 35065 }, { "epoch": 615.2654867256637, "grad_norm": 2.267776011422029e-08, "learning_rate": 0.011109028169545815, "loss": 0.0, "num_input_tokens_seen": 19919952, "step": 35070 }, { "epoch": 615.3539823008849, "grad_norm": 3.426308481380147e-08, "learning_rate": 0.011086792215017804, "loss": 0.0, "num_input_tokens_seen": 19922608, "step": 35075 }, { "epoch": 615.4424778761062, "grad_norm": 2.1737935895771443e-08, "learning_rate": 0.011064577682652137, "loss": 0.0, "num_input_tokens_seen": 19925584, "step": 35080 }, { "epoch": 615.5309734513274, "grad_norm": 4.587133517475195e-08, "learning_rate": 0.011042384575874559, "loss": 0.0, "num_input_tokens_seen": 19928576, "step": 35085 }, { "epoch": 615.6194690265487, "grad_norm": 1.0263689631528905e-07, "learning_rate": 0.011020212898107512, "loss": 0.0, "num_input_tokens_seen": 19931808, "step": 35090 }, { "epoch": 615.70796460177, "grad_norm": 3.592248631889561e-08, "learning_rate": 0.010998062652770197, "loss": 0.0, "num_input_tokens_seen": 19934528, "step": 35095 }, { "epoch": 615.7964601769911, "grad_norm": 4.342821213754178e-08, "learning_rate": 0.010975933843278428, "loss": 0.0, "num_input_tokens_seen": 19937040, "step": 35100 }, { "epoch": 615.8849557522124, "grad_norm": 3.890325928068705e-08, "learning_rate": 0.010953826473044714, "loss": 0.0, "num_input_tokens_seen": 19939840, "step": 35105 }, { "epoch": 615.9734513274336, "grad_norm": 2.5001044789974003e-08, "learning_rate": 0.010931740545478357, "loss": 0.0, "num_input_tokens_seen": 19943088, "step": 35110 }, { "epoch": 616.0530973451328, "grad_norm": 2.819897382266845e-08, "learning_rate": 0.010909676063985218, "loss": 0.0, "num_input_tokens_seen": 19945608, "step": 35115 }, { "epoch": 616.141592920354, "grad_norm": 5.501622268866413e-08, "learning_rate": 0.010887633031967974, "loss": 0.0, "num_input_tokens_seen": 19949160, "step": 35120 }, { "epoch": 616.2300884955753, "grad_norm": 3.439635776203431e-08, "learning_rate": 0.01086561145282589, "loss": 0.0, "num_input_tokens_seen": 19951752, "step": 35125 }, { "epoch": 616.3185840707964, "grad_norm": 2.006376753627137e-08, "learning_rate": 0.010843611329954983, "loss": 0.0, "num_input_tokens_seen": 19954360, "step": 35130 }, { "epoch": 616.4070796460177, "grad_norm": 1.9099939407851707e-08, "learning_rate": 0.010821632666747988, "loss": 0.0, "num_input_tokens_seen": 19957512, "step": 35135 }, { "epoch": 616.4955752212389, "grad_norm": 3.260218761624856e-08, "learning_rate": 0.010799675466594244, "loss": 0.0, "num_input_tokens_seen": 19959912, "step": 35140 }, { "epoch": 616.5840707964602, "grad_norm": 9.55368903987619e-08, "learning_rate": 0.010777739732879826, "loss": 0.0, "num_input_tokens_seen": 19962712, "step": 35145 }, { "epoch": 616.6725663716815, "grad_norm": 6.233225491314442e-08, "learning_rate": 0.010755825468987562, "loss": 0.0, "num_input_tokens_seen": 19965896, "step": 35150 }, { "epoch": 616.7610619469026, "grad_norm": 9.251436949853087e-08, "learning_rate": 0.010733932678296814, "loss": 0.0, "num_input_tokens_seen": 19968424, "step": 35155 }, { "epoch": 616.8495575221239, "grad_norm": 2.561074730067503e-08, "learning_rate": 0.010712061364183817, "loss": 0.0, "num_input_tokens_seen": 19971368, "step": 35160 }, { "epoch": 616.9380530973451, "grad_norm": 3.712296603453069e-08, "learning_rate": 0.010690211530021337, "loss": 0.0, "num_input_tokens_seen": 19974552, "step": 35165 }, { "epoch": 617.0176991150443, "grad_norm": 5.181991014069354e-08, "learning_rate": 0.01066838317917893, "loss": 0.0, "num_input_tokens_seen": 19976784, "step": 35170 }, { "epoch": 617.1061946902655, "grad_norm": 1.2893822898263352e-08, "learning_rate": 0.010646576315022787, "loss": 0.0, "num_input_tokens_seen": 19979376, "step": 35175 }, { "epoch": 617.1946902654868, "grad_norm": 6.653246487076103e-08, "learning_rate": 0.010624790940915785, "loss": 0.0, "num_input_tokens_seen": 19982160, "step": 35180 }, { "epoch": 617.2831858407079, "grad_norm": 3.2854412523875e-08, "learning_rate": 0.0106030270602175, "loss": 0.0, "num_input_tokens_seen": 19985040, "step": 35185 }, { "epoch": 617.3716814159292, "grad_norm": 1.8023262882138624e-08, "learning_rate": 0.010581284676284252, "loss": 0.0, "num_input_tokens_seen": 19987680, "step": 35190 }, { "epoch": 617.4601769911504, "grad_norm": 3.05828677937825e-08, "learning_rate": 0.010559563792468923, "loss": 0.0, "num_input_tokens_seen": 19990416, "step": 35195 }, { "epoch": 617.5486725663717, "grad_norm": 2.6095971605855084e-08, "learning_rate": 0.010537864412121217, "loss": 0.0, "num_input_tokens_seen": 19993952, "step": 35200 }, { "epoch": 617.5486725663717, "eval_loss": 0.8087599873542786, "eval_runtime": 0.9461, "eval_samples_per_second": 26.425, "eval_steps_per_second": 13.741, "num_input_tokens_seen": 19993952, "step": 35200 }, { "epoch": 617.637168141593, "grad_norm": 4.017178767412588e-08, "learning_rate": 0.010516186538587357, "loss": 0.0, "num_input_tokens_seen": 19997328, "step": 35205 }, { "epoch": 617.7256637168142, "grad_norm": 1.970399843287396e-08, "learning_rate": 0.01049453017521042, "loss": 0.0, "num_input_tokens_seen": 20000320, "step": 35210 }, { "epoch": 617.8141592920354, "grad_norm": 3.592591468759565e-08, "learning_rate": 0.010472895325330083, "loss": 0.0, "num_input_tokens_seen": 20003296, "step": 35215 }, { "epoch": 617.9026548672566, "grad_norm": 3.9218154057607535e-08, "learning_rate": 0.010451281992282662, "loss": 0.0, "num_input_tokens_seen": 20006144, "step": 35220 }, { "epoch": 617.9911504424779, "grad_norm": 4.758028282481064e-08, "learning_rate": 0.01042969017940124, "loss": 0.0, "num_input_tokens_seen": 20008832, "step": 35225 }, { "epoch": 618.070796460177, "grad_norm": 1.4923386970622232e-08, "learning_rate": 0.01040811989001557, "loss": 0.0, "num_input_tokens_seen": 20011784, "step": 35230 }, { "epoch": 618.1592920353983, "grad_norm": 5.965829785736787e-08, "learning_rate": 0.010386571127451992, "loss": 0.0, "num_input_tokens_seen": 20014936, "step": 35235 }, { "epoch": 618.2477876106195, "grad_norm": 4.773392703327772e-08, "learning_rate": 0.010365043895033682, "loss": 0.0, "num_input_tokens_seen": 20017608, "step": 35240 }, { "epoch": 618.3362831858407, "grad_norm": 4.6699060618493604e-08, "learning_rate": 0.010343538196080365, "loss": 0.0, "num_input_tokens_seen": 20020616, "step": 35245 }, { "epoch": 618.4247787610619, "grad_norm": 2.3207705979189086e-08, "learning_rate": 0.010322054033908457, "loss": 0.0, "num_input_tokens_seen": 20023352, "step": 35250 }, { "epoch": 618.5132743362832, "grad_norm": 3.117172298061632e-08, "learning_rate": 0.010300591411831156, "loss": 0.0, "num_input_tokens_seen": 20025912, "step": 35255 }, { "epoch": 618.6017699115044, "grad_norm": 3.8519306855278046e-08, "learning_rate": 0.010279150333158198, "loss": 0.0, "num_input_tokens_seen": 20028280, "step": 35260 }, { "epoch": 618.6902654867257, "grad_norm": 2.2896029960861597e-08, "learning_rate": 0.010257730801196107, "loss": 0.0, "num_input_tokens_seen": 20031176, "step": 35265 }, { "epoch": 618.7787610619469, "grad_norm": 2.1631787916476242e-08, "learning_rate": 0.010236332819248056, "loss": 0.0, "num_input_tokens_seen": 20033912, "step": 35270 }, { "epoch": 618.8672566371681, "grad_norm": 2.7715607586742408e-08, "learning_rate": 0.010214956390613854, "loss": 0.0, "num_input_tokens_seen": 20036936, "step": 35275 }, { "epoch": 618.9557522123894, "grad_norm": 5.082806353584601e-08, "learning_rate": 0.010193601518590034, "loss": 0.0, "num_input_tokens_seen": 20039928, "step": 35280 }, { "epoch": 619.0353982300885, "grad_norm": 4.730117808549039e-08, "learning_rate": 0.010172268206469758, "loss": 0.0, "num_input_tokens_seen": 20041984, "step": 35285 }, { "epoch": 619.1238938053098, "grad_norm": 1.6364094079790448e-08, "learning_rate": 0.010150956457542897, "loss": 0.0, "num_input_tokens_seen": 20044912, "step": 35290 }, { "epoch": 619.212389380531, "grad_norm": 3.1816863810263385e-08, "learning_rate": 0.010129666275096054, "loss": 0.0, "num_input_tokens_seen": 20047888, "step": 35295 }, { "epoch": 619.3008849557522, "grad_norm": 5.427827431958576e-08, "learning_rate": 0.010108397662412338, "loss": 0.0, "num_input_tokens_seen": 20050784, "step": 35300 }, { "epoch": 619.3893805309734, "grad_norm": 2.447749203327021e-08, "learning_rate": 0.010087150622771707, "loss": 0.0, "num_input_tokens_seen": 20053376, "step": 35305 }, { "epoch": 619.4778761061947, "grad_norm": 1.2337507371285028e-07, "learning_rate": 0.010065925159450739, "loss": 0.0, "num_input_tokens_seen": 20056288, "step": 35310 }, { "epoch": 619.566371681416, "grad_norm": 1.0604500033650766e-07, "learning_rate": 0.010044721275722618, "loss": 0.0, "num_input_tokens_seen": 20059344, "step": 35315 }, { "epoch": 619.6548672566372, "grad_norm": 7.608757357502327e-08, "learning_rate": 0.01002353897485726, "loss": 0.0, "num_input_tokens_seen": 20062768, "step": 35320 }, { "epoch": 619.7433628318585, "grad_norm": 2.5282563598238994e-08, "learning_rate": 0.010002378260121236, "loss": 0.0, "num_input_tokens_seen": 20065344, "step": 35325 }, { "epoch": 619.8318584070796, "grad_norm": 3.467662423872753e-08, "learning_rate": 0.009981239134777786, "loss": 0.0, "num_input_tokens_seen": 20068240, "step": 35330 }, { "epoch": 619.9203539823009, "grad_norm": 3.0295165487359554e-08, "learning_rate": 0.009960121602086884, "loss": 0.0, "num_input_tokens_seen": 20070784, "step": 35335 }, { "epoch": 620.0, "grad_norm": 2.7444103878337955e-08, "learning_rate": 0.009939025665305062, "loss": 0.0, "num_input_tokens_seen": 20073496, "step": 35340 }, { "epoch": 620.0884955752213, "grad_norm": 1.7573780652924142e-08, "learning_rate": 0.009917951327685597, "loss": 0.0, "num_input_tokens_seen": 20077080, "step": 35345 }, { "epoch": 620.1769911504425, "grad_norm": 1.6410249159548584e-08, "learning_rate": 0.009896898592478425, "loss": 0.0, "num_input_tokens_seen": 20079960, "step": 35350 }, { "epoch": 620.2654867256637, "grad_norm": 5.122269897128717e-08, "learning_rate": 0.009875867462930132, "loss": 0.0, "num_input_tokens_seen": 20082744, "step": 35355 }, { "epoch": 620.3539823008849, "grad_norm": 1.903304891470725e-08, "learning_rate": 0.009854857942284006, "loss": 0.0, "num_input_tokens_seen": 20085416, "step": 35360 }, { "epoch": 620.4424778761062, "grad_norm": 4.9211937636073344e-08, "learning_rate": 0.009833870033779923, "loss": 0.0, "num_input_tokens_seen": 20087928, "step": 35365 }, { "epoch": 620.5309734513274, "grad_norm": 2.1951443329726317e-08, "learning_rate": 0.009812903740654527, "loss": 0.0, "num_input_tokens_seen": 20090600, "step": 35370 }, { "epoch": 620.6194690265487, "grad_norm": 6.082048997768652e-08, "learning_rate": 0.009791959066141097, "loss": 0.0, "num_input_tokens_seen": 20093560, "step": 35375 }, { "epoch": 620.70796460177, "grad_norm": 3.256888803093716e-08, "learning_rate": 0.009771036013469537, "loss": 0.0, "num_input_tokens_seen": 20096488, "step": 35380 }, { "epoch": 620.7964601769911, "grad_norm": 4.282190957383136e-08, "learning_rate": 0.00975013458586646, "loss": 0.0, "num_input_tokens_seen": 20099512, "step": 35385 }, { "epoch": 620.8849557522124, "grad_norm": 2.204463811494861e-08, "learning_rate": 0.009729254786555107, "loss": 0.0, "num_input_tokens_seen": 20102568, "step": 35390 }, { "epoch": 620.9734513274336, "grad_norm": 1.0572760800187098e-07, "learning_rate": 0.009708396618755421, "loss": 0.0, "num_input_tokens_seen": 20105112, "step": 35395 }, { "epoch": 621.0530973451328, "grad_norm": 7.004344126926298e-08, "learning_rate": 0.009687560085683994, "loss": 0.0, "num_input_tokens_seen": 20107560, "step": 35400 }, { "epoch": 621.0530973451328, "eval_loss": 0.8044732809066772, "eval_runtime": 0.9434, "eval_samples_per_second": 26.499, "eval_steps_per_second": 13.78, "num_input_tokens_seen": 20107560, "step": 35400 }, { "epoch": 621.141592920354, "grad_norm": 2.5927237246037294e-08, "learning_rate": 0.009666745190554054, "loss": 0.0, "num_input_tokens_seen": 20110056, "step": 35405 }, { "epoch": 621.2300884955753, "grad_norm": 5.724987772737222e-08, "learning_rate": 0.009645951936575553, "loss": 0.0, "num_input_tokens_seen": 20113336, "step": 35410 }, { "epoch": 621.3185840707964, "grad_norm": 7.078506314428523e-08, "learning_rate": 0.00962518032695509, "loss": 0.0, "num_input_tokens_seen": 20115784, "step": 35415 }, { "epoch": 621.4070796460177, "grad_norm": 3.4440930107848544e-08, "learning_rate": 0.009604430364895855, "loss": 0.0, "num_input_tokens_seen": 20118760, "step": 35420 }, { "epoch": 621.4955752212389, "grad_norm": 5.671471470236611e-08, "learning_rate": 0.00958370205359777, "loss": 0.0, "num_input_tokens_seen": 20121896, "step": 35425 }, { "epoch": 621.5840707964602, "grad_norm": 5.932748337045268e-08, "learning_rate": 0.009562995396257445, "loss": 0.0, "num_input_tokens_seen": 20124536, "step": 35430 }, { "epoch": 621.6725663716815, "grad_norm": 2.9823905123294026e-08, "learning_rate": 0.009542310396068026, "loss": 0.0, "num_input_tokens_seen": 20127448, "step": 35435 }, { "epoch": 621.7610619469026, "grad_norm": 4.3993924947471896e-08, "learning_rate": 0.009521647056219495, "loss": 0.0, "num_input_tokens_seen": 20130504, "step": 35440 }, { "epoch": 621.8495575221239, "grad_norm": 3.279973626035826e-08, "learning_rate": 0.00950100537989832, "loss": 0.0, "num_input_tokens_seen": 20133064, "step": 35445 }, { "epoch": 621.9380530973451, "grad_norm": 6.478993697101032e-08, "learning_rate": 0.00948038537028772, "loss": 0.0, "num_input_tokens_seen": 20136632, "step": 35450 }, { "epoch": 622.0176991150443, "grad_norm": 1.964955487210318e-08, "learning_rate": 0.009459787030567617, "loss": 0.0, "num_input_tokens_seen": 20139096, "step": 35455 }, { "epoch": 622.1061946902655, "grad_norm": 6.54334826322156e-08, "learning_rate": 0.00943921036391449, "loss": 0.0, "num_input_tokens_seen": 20141880, "step": 35460 }, { "epoch": 622.1946902654868, "grad_norm": 5.1866233974351417e-08, "learning_rate": 0.009418655373501483, "loss": 0.0, "num_input_tokens_seen": 20144792, "step": 35465 }, { "epoch": 622.2831858407079, "grad_norm": 5.844966821655362e-08, "learning_rate": 0.00939812206249851, "loss": 0.0, "num_input_tokens_seen": 20147560, "step": 35470 }, { "epoch": 622.3716814159292, "grad_norm": 3.7143713882414886e-08, "learning_rate": 0.009377610434072004, "loss": 0.0, "num_input_tokens_seen": 20150424, "step": 35475 }, { "epoch": 622.4601769911504, "grad_norm": 3.593940078872038e-08, "learning_rate": 0.009357120491385167, "loss": 0.0, "num_input_tokens_seen": 20152920, "step": 35480 }, { "epoch": 622.5486725663717, "grad_norm": 7.042199001716654e-08, "learning_rate": 0.009336652237597743, "loss": 0.0, "num_input_tokens_seen": 20156088, "step": 35485 }, { "epoch": 622.637168141593, "grad_norm": 1.0109066295171942e-07, "learning_rate": 0.009316205675866251, "loss": 0.0, "num_input_tokens_seen": 20158776, "step": 35490 }, { "epoch": 622.7256637168142, "grad_norm": 2.8772509708119287e-08, "learning_rate": 0.00929578080934379, "loss": 0.0, "num_input_tokens_seen": 20161576, "step": 35495 }, { "epoch": 622.8141592920354, "grad_norm": 5.3139267208734964e-08, "learning_rate": 0.00927537764118012, "loss": 0.0, "num_input_tokens_seen": 20164376, "step": 35500 }, { "epoch": 622.9026548672566, "grad_norm": 5.6552266869402956e-08, "learning_rate": 0.009254996174521678, "loss": 0.0, "num_input_tokens_seen": 20167736, "step": 35505 }, { "epoch": 622.9911504424779, "grad_norm": 2.1567437613612128e-08, "learning_rate": 0.009234636412511531, "loss": 0.0, "num_input_tokens_seen": 20170728, "step": 35510 }, { "epoch": 623.070796460177, "grad_norm": 3.3073956018370154e-08, "learning_rate": 0.009214298358289418, "loss": 0.0, "num_input_tokens_seen": 20173000, "step": 35515 }, { "epoch": 623.1592920353983, "grad_norm": 2.995533421312757e-08, "learning_rate": 0.00919398201499173, "loss": 0.0, "num_input_tokens_seen": 20175752, "step": 35520 }, { "epoch": 623.2477876106195, "grad_norm": 6.778059713496987e-08, "learning_rate": 0.009173687385751495, "loss": 0.0, "num_input_tokens_seen": 20178216, "step": 35525 }, { "epoch": 623.3362831858407, "grad_norm": 2.55919410108163e-08, "learning_rate": 0.009153414473698407, "loss": 0.0, "num_input_tokens_seen": 20180904, "step": 35530 }, { "epoch": 623.4247787610619, "grad_norm": 5.2506500480831164e-08, "learning_rate": 0.009133163281958784, "loss": 0.0, "num_input_tokens_seen": 20183448, "step": 35535 }, { "epoch": 623.5132743362832, "grad_norm": 2.921013653178761e-08, "learning_rate": 0.009112933813655627, "loss": 0.0, "num_input_tokens_seen": 20186696, "step": 35540 }, { "epoch": 623.6017699115044, "grad_norm": 2.7071591190974686e-08, "learning_rate": 0.009092726071908573, "loss": 0.0, "num_input_tokens_seen": 20189512, "step": 35545 }, { "epoch": 623.6902654867257, "grad_norm": 1.6254396939530125e-08, "learning_rate": 0.0090725400598339, "loss": 0.0, "num_input_tokens_seen": 20192360, "step": 35550 }, { "epoch": 623.7787610619469, "grad_norm": 3.782583846145826e-08, "learning_rate": 0.009052375780544563, "loss": 0.0, "num_input_tokens_seen": 20195640, "step": 35555 }, { "epoch": 623.8672566371681, "grad_norm": 4.152096622078716e-08, "learning_rate": 0.009032233237150144, "loss": 0.0, "num_input_tokens_seen": 20198856, "step": 35560 }, { "epoch": 623.9557522123894, "grad_norm": 4.2623188534207657e-08, "learning_rate": 0.009012112432756875, "loss": 0.0, "num_input_tokens_seen": 20201464, "step": 35565 }, { "epoch": 624.0353982300885, "grad_norm": 2.980875635216762e-08, "learning_rate": 0.008992013370467605, "loss": 0.0, "num_input_tokens_seen": 20204120, "step": 35570 }, { "epoch": 624.1238938053098, "grad_norm": 3.719810237612364e-08, "learning_rate": 0.008971936053381924, "loss": 0.0, "num_input_tokens_seen": 20206728, "step": 35575 }, { "epoch": 624.212389380531, "grad_norm": 3.724227681800585e-08, "learning_rate": 0.008951880484595953, "loss": 0.0, "num_input_tokens_seen": 20209912, "step": 35580 }, { "epoch": 624.3008849557522, "grad_norm": 4.57954101307223e-08, "learning_rate": 0.008931846667202552, "loss": 0.0, "num_input_tokens_seen": 20212520, "step": 35585 }, { "epoch": 624.3893805309734, "grad_norm": 1.4452055552283127e-08, "learning_rate": 0.008911834604291152, "loss": 0.0, "num_input_tokens_seen": 20215464, "step": 35590 }, { "epoch": 624.4778761061947, "grad_norm": 4.0956809499448354e-08, "learning_rate": 0.008891844298947882, "loss": 0.0, "num_input_tokens_seen": 20218184, "step": 35595 }, { "epoch": 624.566371681416, "grad_norm": 4.347544191318775e-08, "learning_rate": 0.008871875754255508, "loss": 0.0, "num_input_tokens_seen": 20220888, "step": 35600 }, { "epoch": 624.566371681416, "eval_loss": 0.8094174861907959, "eval_runtime": 0.9174, "eval_samples_per_second": 27.25, "eval_steps_per_second": 14.17, "num_input_tokens_seen": 20220888, "step": 35600 }, { "epoch": 624.6548672566372, "grad_norm": 3.153455097049118e-08, "learning_rate": 0.008851928973293422, "loss": 0.0, "num_input_tokens_seen": 20224344, "step": 35605 }, { "epoch": 624.7433628318585, "grad_norm": 1.0101405223394977e-07, "learning_rate": 0.00883200395913764, "loss": 0.0, "num_input_tokens_seen": 20226984, "step": 35610 }, { "epoch": 624.8318584070796, "grad_norm": 2.511393226711789e-08, "learning_rate": 0.00881210071486091, "loss": 0.0, "num_input_tokens_seen": 20229896, "step": 35615 }, { "epoch": 624.9203539823009, "grad_norm": 8.639434412316405e-08, "learning_rate": 0.008792219243532505, "loss": 0.0, "num_input_tokens_seen": 20232712, "step": 35620 }, { "epoch": 625.0, "grad_norm": 7.420732117680018e-08, "learning_rate": 0.008772359548218428, "loss": 0.0, "num_input_tokens_seen": 20235192, "step": 35625 }, { "epoch": 625.0884955752213, "grad_norm": 5.356084287200247e-08, "learning_rate": 0.008752521631981274, "loss": 0.0, "num_input_tokens_seen": 20237512, "step": 35630 }, { "epoch": 625.1769911504425, "grad_norm": 2.1732088129056137e-08, "learning_rate": 0.008732705497880315, "loss": 0.0, "num_input_tokens_seen": 20240280, "step": 35635 }, { "epoch": 625.2654867256637, "grad_norm": 4.843345280391986e-08, "learning_rate": 0.008712911148971459, "loss": 0.0, "num_input_tokens_seen": 20243192, "step": 35640 }, { "epoch": 625.3539823008849, "grad_norm": 2.8718343259015455e-08, "learning_rate": 0.008693138588307208, "loss": 0.0, "num_input_tokens_seen": 20246152, "step": 35645 }, { "epoch": 625.4424778761062, "grad_norm": 2.000305521221435e-08, "learning_rate": 0.008673387818936762, "loss": 0.0, "num_input_tokens_seen": 20248936, "step": 35650 }, { "epoch": 625.5309734513274, "grad_norm": 3.401690662485635e-08, "learning_rate": 0.008653658843905948, "loss": 0.0, "num_input_tokens_seen": 20251816, "step": 35655 }, { "epoch": 625.6194690265487, "grad_norm": 2.207826099720478e-08, "learning_rate": 0.0086339516662572, "loss": 0.0, "num_input_tokens_seen": 20254776, "step": 35660 }, { "epoch": 625.70796460177, "grad_norm": 1.5731281166608824e-08, "learning_rate": 0.008614266289029638, "loss": 0.0, "num_input_tokens_seen": 20257576, "step": 35665 }, { "epoch": 625.7964601769911, "grad_norm": 1.4142019111318405e-08, "learning_rate": 0.008594602715258965, "loss": 0.0, "num_input_tokens_seen": 20260120, "step": 35670 }, { "epoch": 625.8849557522124, "grad_norm": 2.546413924164881e-08, "learning_rate": 0.008574960947977573, "loss": 0.0, "num_input_tokens_seen": 20263528, "step": 35675 }, { "epoch": 625.9734513274336, "grad_norm": 6.332965085675824e-08, "learning_rate": 0.008555340990214438, "loss": 0.0, "num_input_tokens_seen": 20266504, "step": 35680 }, { "epoch": 626.0530973451328, "grad_norm": 2.920368125103323e-08, "learning_rate": 0.008535742844995258, "loss": 0.0, "num_input_tokens_seen": 20269536, "step": 35685 }, { "epoch": 626.141592920354, "grad_norm": 6.489102588602691e-08, "learning_rate": 0.008516166515342266, "loss": 0.0, "num_input_tokens_seen": 20272560, "step": 35690 }, { "epoch": 626.2300884955753, "grad_norm": 4.927399288590095e-08, "learning_rate": 0.008496612004274411, "loss": 0.0, "num_input_tokens_seen": 20275440, "step": 35695 }, { "epoch": 626.3185840707964, "grad_norm": 5.51581216257091e-08, "learning_rate": 0.008477079314807201, "loss": 0.0, "num_input_tokens_seen": 20278272, "step": 35700 }, { "epoch": 626.4070796460177, "grad_norm": 2.2516957187690423e-08, "learning_rate": 0.008457568449952874, "loss": 0.0, "num_input_tokens_seen": 20280976, "step": 35705 }, { "epoch": 626.4955752212389, "grad_norm": 4.316599344633687e-08, "learning_rate": 0.008438079412720189, "loss": 0.0, "num_input_tokens_seen": 20283744, "step": 35710 }, { "epoch": 626.5840707964602, "grad_norm": 4.1895265923130864e-08, "learning_rate": 0.00841861220611466, "loss": 0.0, "num_input_tokens_seen": 20286256, "step": 35715 }, { "epoch": 626.6725663716815, "grad_norm": 9.099159825609604e-08, "learning_rate": 0.008399166833138355, "loss": 0.0, "num_input_tokens_seen": 20288976, "step": 35720 }, { "epoch": 626.7610619469026, "grad_norm": 3.074380572343216e-08, "learning_rate": 0.008379743296789987, "loss": 0.0, "num_input_tokens_seen": 20292032, "step": 35725 }, { "epoch": 626.8495575221239, "grad_norm": 1.9826773112185947e-08, "learning_rate": 0.008360341600064896, "loss": 0.0, "num_input_tokens_seen": 20294672, "step": 35730 }, { "epoch": 626.9380530973451, "grad_norm": 3.884901289552545e-08, "learning_rate": 0.008340961745955121, "loss": 0.0, "num_input_tokens_seen": 20297520, "step": 35735 }, { "epoch": 627.0176991150443, "grad_norm": 3.1170625192089574e-08, "learning_rate": 0.008321603737449224, "loss": 0.0, "num_input_tokens_seen": 20300392, "step": 35740 }, { "epoch": 627.1061946902655, "grad_norm": 4.406467368767153e-08, "learning_rate": 0.008302267577532479, "loss": 0.0, "num_input_tokens_seen": 20303432, "step": 35745 }, { "epoch": 627.1946902654868, "grad_norm": 3.586815111589203e-08, "learning_rate": 0.008282953269186771, "loss": 0.0, "num_input_tokens_seen": 20306616, "step": 35750 }, { "epoch": 627.2831858407079, "grad_norm": 4.4431541112999184e-08, "learning_rate": 0.008263660815390567, "loss": 0.0, "num_input_tokens_seen": 20309384, "step": 35755 }, { "epoch": 627.3716814159292, "grad_norm": 2.1609176670267516e-08, "learning_rate": 0.008244390219119069, "loss": 0.0, "num_input_tokens_seen": 20312280, "step": 35760 }, { "epoch": 627.4601769911504, "grad_norm": 3.723109642805866e-08, "learning_rate": 0.008225141483343967, "loss": 0.0, "num_input_tokens_seen": 20314664, "step": 35765 }, { "epoch": 627.5486725663717, "grad_norm": 5.4024980045142e-08, "learning_rate": 0.00820591461103372, "loss": 0.0, "num_input_tokens_seen": 20317512, "step": 35770 }, { "epoch": 627.637168141593, "grad_norm": 4.8130448959682326e-08, "learning_rate": 0.008186709605153358, "loss": 0.0, "num_input_tokens_seen": 20321016, "step": 35775 }, { "epoch": 627.7256637168142, "grad_norm": 2.0222481467158104e-08, "learning_rate": 0.008167526468664492, "loss": 0.0, "num_input_tokens_seen": 20323928, "step": 35780 }, { "epoch": 627.8141592920354, "grad_norm": 3.620104394030932e-08, "learning_rate": 0.008148365204525443, "loss": 0.0, "num_input_tokens_seen": 20326360, "step": 35785 }, { "epoch": 627.9026548672566, "grad_norm": 5.8218947884824956e-08, "learning_rate": 0.00812922581569106, "loss": 0.0, "num_input_tokens_seen": 20329016, "step": 35790 }, { "epoch": 627.9911504424779, "grad_norm": 5.059553132014116e-08, "learning_rate": 0.008110108305112934, "loss": 0.0, "num_input_tokens_seen": 20331704, "step": 35795 }, { "epoch": 628.070796460177, "grad_norm": 6.420307130383662e-08, "learning_rate": 0.008091012675739223, "loss": 0.0, "num_input_tokens_seen": 20333904, "step": 35800 }, { "epoch": 628.070796460177, "eval_loss": 0.8172165751457214, "eval_runtime": 0.9258, "eval_samples_per_second": 27.002, "eval_steps_per_second": 14.041, "num_input_tokens_seen": 20333904, "step": 35800 }, { "epoch": 628.1592920353983, "grad_norm": 5.507838807261578e-08, "learning_rate": 0.008071938930514671, "loss": 0.0, "num_input_tokens_seen": 20337120, "step": 35805 }, { "epoch": 628.2477876106195, "grad_norm": 7.193754214540604e-08, "learning_rate": 0.008052887072380726, "loss": 0.0, "num_input_tokens_seen": 20339840, "step": 35810 }, { "epoch": 628.3362831858407, "grad_norm": 5.749597420390273e-08, "learning_rate": 0.008033857104275437, "loss": 0.0, "num_input_tokens_seen": 20342368, "step": 35815 }, { "epoch": 628.4247787610619, "grad_norm": 2.355075956472774e-08, "learning_rate": 0.008014849029133424, "loss": 0.0, "num_input_tokens_seen": 20344928, "step": 35820 }, { "epoch": 628.5132743362832, "grad_norm": 6.004756158972668e-08, "learning_rate": 0.007995862849885975, "loss": 0.0, "num_input_tokens_seen": 20347616, "step": 35825 }, { "epoch": 628.6017699115044, "grad_norm": 5.076590170460804e-08, "learning_rate": 0.007976898569461032, "loss": 0.0, "num_input_tokens_seen": 20350736, "step": 35830 }, { "epoch": 628.6902654867257, "grad_norm": 6.563828236494373e-08, "learning_rate": 0.007957956190783088, "loss": 0.0, "num_input_tokens_seen": 20354064, "step": 35835 }, { "epoch": 628.7787610619469, "grad_norm": 5.243937550858391e-08, "learning_rate": 0.007939035716773324, "loss": 0.0, "num_input_tokens_seen": 20356624, "step": 35840 }, { "epoch": 628.8672566371681, "grad_norm": 5.846408157594851e-08, "learning_rate": 0.007920137150349487, "loss": 0.0, "num_input_tokens_seen": 20359232, "step": 35845 }, { "epoch": 628.9557522123894, "grad_norm": 3.7002585884238215e-08, "learning_rate": 0.007901260494425981, "loss": 0.0, "num_input_tokens_seen": 20362432, "step": 35850 }, { "epoch": 629.0353982300885, "grad_norm": 1.683111250372349e-08, "learning_rate": 0.007882405751913861, "loss": 0.0, "num_input_tokens_seen": 20364664, "step": 35855 }, { "epoch": 629.1238938053098, "grad_norm": 3.6837107586507045e-08, "learning_rate": 0.007863572925720702, "loss": 0.0, "num_input_tokens_seen": 20367576, "step": 35860 }, { "epoch": 629.212389380531, "grad_norm": 2.186189718145215e-08, "learning_rate": 0.007844762018750827, "loss": 0.0, "num_input_tokens_seen": 20370120, "step": 35865 }, { "epoch": 629.3008849557522, "grad_norm": 5.7720146884321366e-08, "learning_rate": 0.007825973033905054, "loss": 0.0, "num_input_tokens_seen": 20373448, "step": 35870 }, { "epoch": 629.3893805309734, "grad_norm": 3.829319439319079e-08, "learning_rate": 0.007807205974080927, "loss": 0.0, "num_input_tokens_seen": 20376504, "step": 35875 }, { "epoch": 629.4778761061947, "grad_norm": 5.5878039972867555e-08, "learning_rate": 0.007788460842172551, "loss": 0.0, "num_input_tokens_seen": 20379592, "step": 35880 }, { "epoch": 629.566371681416, "grad_norm": 6.426935073022833e-08, "learning_rate": 0.0077697376410706285, "loss": 0.0, "num_input_tokens_seen": 20382168, "step": 35885 }, { "epoch": 629.6548672566372, "grad_norm": 3.922755098528796e-08, "learning_rate": 0.007751036373662567, "loss": 0.0, "num_input_tokens_seen": 20385176, "step": 35890 }, { "epoch": 629.7433628318585, "grad_norm": 5.2522608484650846e-08, "learning_rate": 0.00773235704283231, "loss": 0.0, "num_input_tokens_seen": 20388248, "step": 35895 }, { "epoch": 629.8318584070796, "grad_norm": 6.217045012135713e-08, "learning_rate": 0.007713699651460437, "loss": 0.0, "num_input_tokens_seen": 20390744, "step": 35900 }, { "epoch": 629.9203539823009, "grad_norm": 3.572539952756415e-08, "learning_rate": 0.007695064202424162, "loss": 0.0, "num_input_tokens_seen": 20393832, "step": 35905 }, { "epoch": 630.0, "grad_norm": 1.321115661312433e-07, "learning_rate": 0.007676450698597286, "loss": 0.0, "num_input_tokens_seen": 20395800, "step": 35910 }, { "epoch": 630.0884955752213, "grad_norm": 2.6222521043450797e-08, "learning_rate": 0.007657859142850265, "loss": 0.0, "num_input_tokens_seen": 20398424, "step": 35915 }, { "epoch": 630.1769911504425, "grad_norm": 2.0579996373726317e-08, "learning_rate": 0.0076392895380501535, "loss": 0.0, "num_input_tokens_seen": 20401128, "step": 35920 }, { "epoch": 630.2654867256637, "grad_norm": 2.975755108991507e-08, "learning_rate": 0.007620741887060611, "loss": 0.0, "num_input_tokens_seen": 20403816, "step": 35925 }, { "epoch": 630.3539823008849, "grad_norm": 1.7064756718809804e-08, "learning_rate": 0.007602216192741901, "loss": 0.0, "num_input_tokens_seen": 20406408, "step": 35930 }, { "epoch": 630.4424778761062, "grad_norm": 2.2949583566855836e-08, "learning_rate": 0.007583712457950969, "loss": 0.0, "num_input_tokens_seen": 20409496, "step": 35935 }, { "epoch": 630.5309734513274, "grad_norm": 2.6943999031914245e-08, "learning_rate": 0.007565230685541269, "loss": 0.0, "num_input_tokens_seen": 20412616, "step": 35940 }, { "epoch": 630.6194690265487, "grad_norm": 6.74678943823892e-08, "learning_rate": 0.007546770878362968, "loss": 0.0, "num_input_tokens_seen": 20415896, "step": 35945 }, { "epoch": 630.70796460177, "grad_norm": 2.810614851966875e-08, "learning_rate": 0.0075283330392627405, "loss": 0.0, "num_input_tokens_seen": 20418376, "step": 35950 }, { "epoch": 630.7964601769911, "grad_norm": 5.6004864745773375e-08, "learning_rate": 0.007509917171083979, "loss": 0.0, "num_input_tokens_seen": 20421608, "step": 35955 }, { "epoch": 630.8849557522124, "grad_norm": 3.7332029023673385e-08, "learning_rate": 0.007491523276666662, "loss": 0.0, "num_input_tokens_seen": 20424360, "step": 35960 }, { "epoch": 630.9734513274336, "grad_norm": 4.374618711722178e-08, "learning_rate": 0.007473151358847318, "loss": 0.0, "num_input_tokens_seen": 20427240, "step": 35965 }, { "epoch": 631.0530973451328, "grad_norm": 4.572062906049723e-08, "learning_rate": 0.007454801420459117, "loss": 0.0, "num_input_tokens_seen": 20429808, "step": 35970 }, { "epoch": 631.141592920354, "grad_norm": 3.8703774407622404e-08, "learning_rate": 0.0074364734643319105, "loss": 0.0, "num_input_tokens_seen": 20433088, "step": 35975 }, { "epoch": 631.2300884955753, "grad_norm": 1.8370185372873493e-08, "learning_rate": 0.007418167493292022, "loss": 0.0, "num_input_tokens_seen": 20435744, "step": 35980 }, { "epoch": 631.3185840707964, "grad_norm": 6.841931821099934e-08, "learning_rate": 0.0073998835101625245, "loss": 0.0, "num_input_tokens_seen": 20438528, "step": 35985 }, { "epoch": 631.4070796460177, "grad_norm": 4.119818797221342e-08, "learning_rate": 0.007381621517762998, "loss": 0.0, "num_input_tokens_seen": 20441504, "step": 35990 }, { "epoch": 631.4955752212389, "grad_norm": 5.981234352248066e-08, "learning_rate": 0.007363381518909689, "loss": 0.0, "num_input_tokens_seen": 20443952, "step": 35995 }, { "epoch": 631.5840707964602, "grad_norm": 1.4563193317940204e-08, "learning_rate": 0.007345163516415448, "loss": 0.0, "num_input_tokens_seen": 20446736, "step": 36000 }, { "epoch": 631.5840707964602, "eval_loss": 0.7858958840370178, "eval_runtime": 0.9419, "eval_samples_per_second": 26.542, "eval_steps_per_second": 13.802, "num_input_tokens_seen": 20446736, "step": 36000 }, { "epoch": 631.6725663716815, "grad_norm": 7.12771139887991e-08, "learning_rate": 0.007326967513089693, "loss": 0.0, "num_input_tokens_seen": 20449360, "step": 36005 }, { "epoch": 631.7610619469026, "grad_norm": 2.6567816391320775e-08, "learning_rate": 0.0073087935117384815, "loss": 0.0, "num_input_tokens_seen": 20452112, "step": 36010 }, { "epoch": 631.8495575221239, "grad_norm": 2.09283577135011e-08, "learning_rate": 0.007290641515164503, "loss": 0.0, "num_input_tokens_seen": 20455136, "step": 36015 }, { "epoch": 631.9380530973451, "grad_norm": 4.701249878280578e-08, "learning_rate": 0.007272511526166986, "loss": 0.0, "num_input_tokens_seen": 20458336, "step": 36020 }, { "epoch": 632.0176991150443, "grad_norm": 7.515038191741041e-08, "learning_rate": 0.0072544035475418265, "loss": 0.0, "num_input_tokens_seen": 20460712, "step": 36025 }, { "epoch": 632.1061946902655, "grad_norm": 2.0163794189898e-08, "learning_rate": 0.007236317582081475, "loss": 0.0, "num_input_tokens_seen": 20463080, "step": 36030 }, { "epoch": 632.1946902654868, "grad_norm": 3.0936693207195276e-08, "learning_rate": 0.007218253632575066, "loss": 0.0, "num_input_tokens_seen": 20465736, "step": 36035 }, { "epoch": 632.2831858407079, "grad_norm": 5.13783469102691e-08, "learning_rate": 0.007200211701808223, "loss": 0.0, "num_input_tokens_seen": 20468808, "step": 36040 }, { "epoch": 632.3716814159292, "grad_norm": 3.802066217417632e-08, "learning_rate": 0.007182191792563286, "loss": 0.0, "num_input_tokens_seen": 20471240, "step": 36045 }, { "epoch": 632.4601769911504, "grad_norm": 2.037796242859713e-08, "learning_rate": 0.0071641939076191145, "loss": 0.0, "num_input_tokens_seen": 20474536, "step": 36050 }, { "epoch": 632.5486725663717, "grad_norm": 6.437270627657199e-08, "learning_rate": 0.007146218049751257, "loss": 0.0, "num_input_tokens_seen": 20477512, "step": 36055 }, { "epoch": 632.637168141593, "grad_norm": 3.494285749638948e-08, "learning_rate": 0.0071282642217317775, "loss": 0.0, "num_input_tokens_seen": 20480408, "step": 36060 }, { "epoch": 632.7256637168142, "grad_norm": 1.5544388887178684e-08, "learning_rate": 0.007110332426329396, "loss": 0.0, "num_input_tokens_seen": 20483864, "step": 36065 }, { "epoch": 632.8141592920354, "grad_norm": 1.0496570723717014e-08, "learning_rate": 0.007092422666309417, "loss": 0.0, "num_input_tokens_seen": 20486632, "step": 36070 }, { "epoch": 632.9026548672566, "grad_norm": 4.546360088397705e-08, "learning_rate": 0.0070745349444337295, "loss": 0.0, "num_input_tokens_seen": 20489480, "step": 36075 }, { "epoch": 632.9911504424779, "grad_norm": 4.0464701811515624e-08, "learning_rate": 0.007056669263460913, "loss": 0.0, "num_input_tokens_seen": 20492392, "step": 36080 }, { "epoch": 633.070796460177, "grad_norm": 4.92800680262917e-08, "learning_rate": 0.007038825626145995, "loss": 0.0, "num_input_tokens_seen": 20495352, "step": 36085 }, { "epoch": 633.1592920353983, "grad_norm": 6.532734175834776e-08, "learning_rate": 0.007021004035240724, "loss": 0.0, "num_input_tokens_seen": 20498184, "step": 36090 }, { "epoch": 633.2477876106195, "grad_norm": 4.218840743419605e-08, "learning_rate": 0.007003204493493453, "loss": 0.0, "num_input_tokens_seen": 20500760, "step": 36095 }, { "epoch": 633.3362831858407, "grad_norm": 4.423942456810437e-08, "learning_rate": 0.006985427003649036, "loss": 0.0, "num_input_tokens_seen": 20503464, "step": 36100 }, { "epoch": 633.4247787610619, "grad_norm": 2.0102675080124754e-08, "learning_rate": 0.006967671568449013, "loss": 0.0, "num_input_tokens_seen": 20506168, "step": 36105 }, { "epoch": 633.5132743362832, "grad_norm": 3.36087957464315e-08, "learning_rate": 0.006949938190631511, "loss": 0.0, "num_input_tokens_seen": 20509784, "step": 36110 }, { "epoch": 633.6017699115044, "grad_norm": 7.435996707272352e-08, "learning_rate": 0.0069322268729311905, "loss": 0.0, "num_input_tokens_seen": 20512824, "step": 36115 }, { "epoch": 633.6902654867257, "grad_norm": 1.3678011612228147e-07, "learning_rate": 0.006914537618079403, "loss": 0.0, "num_input_tokens_seen": 20515640, "step": 36120 }, { "epoch": 633.7787610619469, "grad_norm": 1.4579753404575513e-08, "learning_rate": 0.006896870428804031, "loss": 0.0, "num_input_tokens_seen": 20518248, "step": 36125 }, { "epoch": 633.8672566371681, "grad_norm": 3.5141603405008937e-08, "learning_rate": 0.006879225307829595, "loss": 0.0, "num_input_tokens_seen": 20520824, "step": 36130 }, { "epoch": 633.9557522123894, "grad_norm": 3.6077253184885194e-08, "learning_rate": 0.00686160225787717, "loss": 0.0, "num_input_tokens_seen": 20523560, "step": 36135 }, { "epoch": 634.0353982300885, "grad_norm": 2.2600390892080213e-08, "learning_rate": 0.006844001281664463, "loss": 0.0, "num_input_tokens_seen": 20526128, "step": 36140 }, { "epoch": 634.1238938053098, "grad_norm": 2.9158233161297176e-08, "learning_rate": 0.006826422381905789, "loss": 0.0, "num_input_tokens_seen": 20528576, "step": 36145 }, { "epoch": 634.212389380531, "grad_norm": 2.156571277112107e-08, "learning_rate": 0.006808865561311994, "loss": 0.0, "num_input_tokens_seen": 20531360, "step": 36150 }, { "epoch": 634.3008849557522, "grad_norm": 2.795634301833161e-08, "learning_rate": 0.00679133082259058, "loss": 0.0, "num_input_tokens_seen": 20534064, "step": 36155 }, { "epoch": 634.3893805309734, "grad_norm": 3.473655496577521e-08, "learning_rate": 0.00677381816844565, "loss": 0.0, "num_input_tokens_seen": 20537040, "step": 36160 }, { "epoch": 634.4778761061947, "grad_norm": 2.2194667437247517e-08, "learning_rate": 0.0067563276015778434, "loss": 0.0, "num_input_tokens_seen": 20540064, "step": 36165 }, { "epoch": 634.566371681416, "grad_norm": 4.151344157321546e-08, "learning_rate": 0.006738859124684437, "loss": 0.0, "num_input_tokens_seen": 20542720, "step": 36170 }, { "epoch": 634.6548672566372, "grad_norm": 9.32192918412511e-08, "learning_rate": 0.006721412740459259, "loss": 0.0, "num_input_tokens_seen": 20545760, "step": 36175 }, { "epoch": 634.7433628318585, "grad_norm": 4.1363350078427175e-08, "learning_rate": 0.006703988451592824, "loss": 0.0, "num_input_tokens_seen": 20548464, "step": 36180 }, { "epoch": 634.8318584070796, "grad_norm": 1.1977131286755593e-08, "learning_rate": 0.006686586260772114, "loss": 0.0, "num_input_tokens_seen": 20551200, "step": 36185 }, { "epoch": 634.9203539823009, "grad_norm": 4.330090774828932e-08, "learning_rate": 0.006669206170680819, "loss": 0.0, "num_input_tokens_seen": 20554720, "step": 36190 }, { "epoch": 635.0, "grad_norm": 1.3480260463438754e-07, "learning_rate": 0.0066518481839991095, "loss": 0.0, "num_input_tokens_seen": 20557256, "step": 36195 }, { "epoch": 635.0884955752213, "grad_norm": 8.302558285322448e-08, "learning_rate": 0.006634512303403861, "loss": 0.0, "num_input_tokens_seen": 20560472, "step": 36200 }, { "epoch": 635.0884955752213, "eval_loss": 0.8261749744415283, "eval_runtime": 0.9396, "eval_samples_per_second": 26.606, "eval_steps_per_second": 13.835, "num_input_tokens_seen": 20560472, "step": 36200 }, { "epoch": 635.1769911504425, "grad_norm": 5.6968847417238067e-08, "learning_rate": 0.0066171985315684355, "loss": 0.0, "num_input_tokens_seen": 20563496, "step": 36205 }, { "epoch": 635.2654867256637, "grad_norm": 2.320011027734381e-08, "learning_rate": 0.0065999068711628806, "loss": 0.0, "num_input_tokens_seen": 20566216, "step": 36210 }, { "epoch": 635.3539823008849, "grad_norm": 3.72940185400239e-08, "learning_rate": 0.0065826373248537295, "loss": 0.0, "num_input_tokens_seen": 20569064, "step": 36215 }, { "epoch": 635.4424778761062, "grad_norm": 5.4479524891348774e-08, "learning_rate": 0.006565389895304218, "loss": 0.0, "num_input_tokens_seen": 20571896, "step": 36220 }, { "epoch": 635.5309734513274, "grad_norm": 1.0694590457660524e-07, "learning_rate": 0.006548164585174104, "loss": 0.0, "num_input_tokens_seen": 20574440, "step": 36225 }, { "epoch": 635.6194690265487, "grad_norm": 4.813339771203573e-08, "learning_rate": 0.006530961397119728, "loss": 0.0, "num_input_tokens_seen": 20577096, "step": 36230 }, { "epoch": 635.70796460177, "grad_norm": 3.248405278100108e-08, "learning_rate": 0.00651378033379405, "loss": 0.0, "num_input_tokens_seen": 20579912, "step": 36235 }, { "epoch": 635.7964601769911, "grad_norm": 2.991304981492249e-08, "learning_rate": 0.006496621397846619, "loss": 0.0, "num_input_tokens_seen": 20582664, "step": 36240 }, { "epoch": 635.8849557522124, "grad_norm": 3.336197806902419e-08, "learning_rate": 0.006479484591923518, "loss": 0.0, "num_input_tokens_seen": 20585496, "step": 36245 }, { "epoch": 635.9734513274336, "grad_norm": 3.866063025270705e-08, "learning_rate": 0.006462369918667515, "loss": 0.0, "num_input_tokens_seen": 20588536, "step": 36250 }, { "epoch": 636.0530973451328, "grad_norm": 3.4095020140512133e-08, "learning_rate": 0.006445277380717851, "loss": 0.0, "num_input_tokens_seen": 20591024, "step": 36255 }, { "epoch": 636.141592920354, "grad_norm": 3.5733556558170676e-08, "learning_rate": 0.006428206980710466, "loss": 0.0, "num_input_tokens_seen": 20593424, "step": 36260 }, { "epoch": 636.2300884955753, "grad_norm": 5.913215161967855e-08, "learning_rate": 0.006411158721277788, "loss": 0.0, "num_input_tokens_seen": 20596960, "step": 36265 }, { "epoch": 636.3185840707964, "grad_norm": 6.6669386455942e-08, "learning_rate": 0.00639413260504888, "loss": 0.0, "num_input_tokens_seen": 20599664, "step": 36270 }, { "epoch": 636.4070796460177, "grad_norm": 5.163292726706459e-08, "learning_rate": 0.006377128634649376, "loss": 0.0, "num_input_tokens_seen": 20602688, "step": 36275 }, { "epoch": 636.4955752212389, "grad_norm": 7.067172447250414e-08, "learning_rate": 0.006360146812701528, "loss": 0.0, "num_input_tokens_seen": 20605920, "step": 36280 }, { "epoch": 636.5840707964602, "grad_norm": 3.2114883197209565e-08, "learning_rate": 0.006343187141824125, "loss": 0.0, "num_input_tokens_seen": 20608800, "step": 36285 }, { "epoch": 636.6725663716815, "grad_norm": 6.1544611185127e-08, "learning_rate": 0.00632624962463259, "loss": 0.0, "num_input_tokens_seen": 20611552, "step": 36290 }, { "epoch": 636.7610619469026, "grad_norm": 3.9872062984613876e-08, "learning_rate": 0.006309334263738853, "loss": 0.0, "num_input_tokens_seen": 20614256, "step": 36295 }, { "epoch": 636.8495575221239, "grad_norm": 4.618738103090436e-08, "learning_rate": 0.006292441061751508, "loss": 0.0, "num_input_tokens_seen": 20617344, "step": 36300 }, { "epoch": 636.9380530973451, "grad_norm": 1.934855120566681e-08, "learning_rate": 0.0062755700212757054, "loss": 0.0, "num_input_tokens_seen": 20619888, "step": 36305 }, { "epoch": 637.0176991150443, "grad_norm": 2.0250588761427935e-08, "learning_rate": 0.006258721144913148, "loss": 0.0, "num_input_tokens_seen": 20622528, "step": 36310 }, { "epoch": 637.1061946902655, "grad_norm": 2.6440872602506715e-08, "learning_rate": 0.0062418944352621575, "loss": 0.0, "num_input_tokens_seen": 20624848, "step": 36315 }, { "epoch": 637.1946902654868, "grad_norm": 3.4927612801993746e-08, "learning_rate": 0.0062250898949176405, "loss": 0.0, "num_input_tokens_seen": 20628256, "step": 36320 }, { "epoch": 637.2831858407079, "grad_norm": 5.3100112751280903e-08, "learning_rate": 0.006208307526471041, "loss": 0.0, "num_input_tokens_seen": 20631040, "step": 36325 }, { "epoch": 637.3716814159292, "grad_norm": 3.679674520640219e-08, "learning_rate": 0.006191547332510405, "loss": 0.0, "num_input_tokens_seen": 20634352, "step": 36330 }, { "epoch": 637.4601769911504, "grad_norm": 1.298807372762667e-08, "learning_rate": 0.006174809315620416, "loss": 0.0, "num_input_tokens_seen": 20637104, "step": 36335 }, { "epoch": 637.5486725663717, "grad_norm": 3.204112175581031e-08, "learning_rate": 0.00615809347838221, "loss": 0.0, "num_input_tokens_seen": 20640240, "step": 36340 }, { "epoch": 637.637168141593, "grad_norm": 4.9341309704686864e-08, "learning_rate": 0.006141399823373655, "loss": 0.0, "num_input_tokens_seen": 20643136, "step": 36345 }, { "epoch": 637.7256637168142, "grad_norm": 5.9515787853570146e-08, "learning_rate": 0.0061247283531690455, "loss": 0.0, "num_input_tokens_seen": 20646000, "step": 36350 }, { "epoch": 637.8141592920354, "grad_norm": 3.4425436723495295e-08, "learning_rate": 0.0061080790703393895, "loss": 0.0, "num_input_tokens_seen": 20648592, "step": 36355 }, { "epoch": 637.9026548672566, "grad_norm": 5.353059862045484e-08, "learning_rate": 0.006091451977452217, "loss": 0.0, "num_input_tokens_seen": 20651536, "step": 36360 }, { "epoch": 637.9911504424779, "grad_norm": 6.83821212987823e-08, "learning_rate": 0.00607484707707161, "loss": 0.0, "num_input_tokens_seen": 20654448, "step": 36365 }, { "epoch": 638.070796460177, "grad_norm": 2.4757961014643115e-08, "learning_rate": 0.006058264371758254, "loss": 0.0, "num_input_tokens_seen": 20657120, "step": 36370 }, { "epoch": 638.1592920353983, "grad_norm": 1.192285026263562e-08, "learning_rate": 0.00604170386406942, "loss": 0.0, "num_input_tokens_seen": 20659856, "step": 36375 }, { "epoch": 638.2477876106195, "grad_norm": 4.9512177469068774e-08, "learning_rate": 0.006025165556558931, "loss": 0.0, "num_input_tokens_seen": 20662240, "step": 36380 }, { "epoch": 638.3362831858407, "grad_norm": 6.501687010995738e-08, "learning_rate": 0.006008649451777248, "loss": 0.0, "num_input_tokens_seen": 20664880, "step": 36385 }, { "epoch": 638.4247787610619, "grad_norm": 4.853299984119985e-08, "learning_rate": 0.005992155552271283, "loss": 0.0, "num_input_tokens_seen": 20668016, "step": 36390 }, { "epoch": 638.5132743362832, "grad_norm": 5.491632038001626e-08, "learning_rate": 0.005975683860584685, "loss": 0.0, "num_input_tokens_seen": 20670928, "step": 36395 }, { "epoch": 638.6017699115044, "grad_norm": 6.531324459047028e-08, "learning_rate": 0.0059592343792575385, "loss": 0.0, "num_input_tokens_seen": 20673984, "step": 36400 }, { "epoch": 638.6017699115044, "eval_loss": 0.7735335826873779, "eval_runtime": 0.9708, "eval_samples_per_second": 25.752, "eval_steps_per_second": 13.391, "num_input_tokens_seen": 20673984, "step": 36400 }, { "epoch": 638.6902654867257, "grad_norm": 3.994686181840734e-08, "learning_rate": 0.0059428071108265975, "loss": 0.0, "num_input_tokens_seen": 20677056, "step": 36405 }, { "epoch": 638.7787610619469, "grad_norm": 2.5417181248599263e-08, "learning_rate": 0.005926402057825136, "loss": 0.0, "num_input_tokens_seen": 20680048, "step": 36410 }, { "epoch": 638.8672566371681, "grad_norm": 3.6849403528549374e-08, "learning_rate": 0.005910019222782997, "loss": 0.0, "num_input_tokens_seen": 20682864, "step": 36415 }, { "epoch": 638.9557522123894, "grad_norm": 5.7376698947564364e-08, "learning_rate": 0.005893658608226643, "loss": 0.0, "num_input_tokens_seen": 20685248, "step": 36420 }, { "epoch": 639.0353982300885, "grad_norm": 3.239420109935054e-08, "learning_rate": 0.0058773202166791045, "loss": 0.0, "num_input_tokens_seen": 20687624, "step": 36425 }, { "epoch": 639.1238938053098, "grad_norm": 2.6459797908273686e-08, "learning_rate": 0.005861004050659918, "loss": 0.0, "num_input_tokens_seen": 20689992, "step": 36430 }, { "epoch": 639.212389380531, "grad_norm": 3.983943841490145e-08, "learning_rate": 0.005844710112685286, "loss": 0.0, "num_input_tokens_seen": 20692520, "step": 36435 }, { "epoch": 639.3008849557522, "grad_norm": 3.250666225085297e-08, "learning_rate": 0.005828438405267933, "loss": 0.0, "num_input_tokens_seen": 20695176, "step": 36440 }, { "epoch": 639.3893805309734, "grad_norm": 2.6324466162463978e-08, "learning_rate": 0.00581218893091715, "loss": 0.0, "num_input_tokens_seen": 20698584, "step": 36445 }, { "epoch": 639.4778761061947, "grad_norm": 3.0896060820850835e-08, "learning_rate": 0.005795961692138801, "loss": 0.0, "num_input_tokens_seen": 20701416, "step": 36450 }, { "epoch": 639.566371681416, "grad_norm": 2.607143301247561e-08, "learning_rate": 0.00577975669143535, "loss": 0.0, "num_input_tokens_seen": 20704680, "step": 36455 }, { "epoch": 639.6548672566372, "grad_norm": 3.230451284252922e-08, "learning_rate": 0.005763573931305782, "loss": 0.0, "num_input_tokens_seen": 20707304, "step": 36460 }, { "epoch": 639.7433628318585, "grad_norm": 3.335070175580768e-08, "learning_rate": 0.005747413414245733, "loss": 0.0, "num_input_tokens_seen": 20710280, "step": 36465 }, { "epoch": 639.8318584070796, "grad_norm": 2.6609704661950673e-08, "learning_rate": 0.005731275142747294, "loss": 0.0, "num_input_tokens_seen": 20713256, "step": 36470 }, { "epoch": 639.9203539823009, "grad_norm": 2.7382389689023512e-08, "learning_rate": 0.005715159119299256, "loss": 0.0, "num_input_tokens_seen": 20716728, "step": 36475 }, { "epoch": 640.0, "grad_norm": 2.1588364518265735e-07, "learning_rate": 0.005699065346386867, "loss": 0.0, "num_input_tokens_seen": 20718920, "step": 36480 }, { "epoch": 640.0884955752213, "grad_norm": 2.6973028255383724e-08, "learning_rate": 0.0056829938264919885, "loss": 0.0, "num_input_tokens_seen": 20721464, "step": 36485 }, { "epoch": 640.1769911504425, "grad_norm": 2.9187951611220342e-08, "learning_rate": 0.005666944562093074, "loss": 0.0, "num_input_tokens_seen": 20724536, "step": 36490 }, { "epoch": 640.2654867256637, "grad_norm": 4.424877175779329e-08, "learning_rate": 0.005650917555665108, "loss": 0.0, "num_input_tokens_seen": 20727496, "step": 36495 }, { "epoch": 640.3539823008849, "grad_norm": 7.109500899105115e-08, "learning_rate": 0.005634912809679632, "loss": 0.0, "num_input_tokens_seen": 20730328, "step": 36500 }, { "epoch": 640.4424778761062, "grad_norm": 3.3947369360021185e-08, "learning_rate": 0.005618930326604854, "loss": 0.0, "num_input_tokens_seen": 20732840, "step": 36505 }, { "epoch": 640.5309734513274, "grad_norm": 3.100324263982657e-08, "learning_rate": 0.005602970108905386, "loss": 0.0, "num_input_tokens_seen": 20735528, "step": 36510 }, { "epoch": 640.6194690265487, "grad_norm": 2.5697666217183723e-08, "learning_rate": 0.005587032159042543, "loss": 0.0, "num_input_tokens_seen": 20738520, "step": 36515 }, { "epoch": 640.70796460177, "grad_norm": 3.9029778520216496e-08, "learning_rate": 0.005571116479474158, "loss": 0.0, "num_input_tokens_seen": 20741624, "step": 36520 }, { "epoch": 640.7964601769911, "grad_norm": 4.9979703931057884e-08, "learning_rate": 0.005555223072654619, "loss": 0.0, "num_input_tokens_seen": 20745176, "step": 36525 }, { "epoch": 640.8849557522124, "grad_norm": 4.266974329425466e-08, "learning_rate": 0.005539351941034881, "loss": 0.0, "num_input_tokens_seen": 20748024, "step": 36530 }, { "epoch": 640.9734513274336, "grad_norm": 2.7497469190507218e-08, "learning_rate": 0.0055235030870624865, "loss": 0.0, "num_input_tokens_seen": 20750584, "step": 36535 }, { "epoch": 641.0530973451328, "grad_norm": 7.629714104950835e-08, "learning_rate": 0.005507676513181514, "loss": 0.0, "num_input_tokens_seen": 20752816, "step": 36540 }, { "epoch": 641.141592920354, "grad_norm": 6.2240289366855e-08, "learning_rate": 0.005491872221832628, "loss": 0.0, "num_input_tokens_seen": 20755440, "step": 36545 }, { "epoch": 641.2300884955753, "grad_norm": 3.2048593112676826e-08, "learning_rate": 0.005476090215453061, "loss": 0.0, "num_input_tokens_seen": 20757968, "step": 36550 }, { "epoch": 641.3185840707964, "grad_norm": 4.120362717685566e-08, "learning_rate": 0.0054603304964765675, "loss": 0.0, "num_input_tokens_seen": 20761344, "step": 36555 }, { "epoch": 641.4070796460177, "grad_norm": 3.456584707350885e-08, "learning_rate": 0.005444593067333519, "loss": 0.0, "num_input_tokens_seen": 20763728, "step": 36560 }, { "epoch": 641.4955752212389, "grad_norm": 2.3443591956606724e-08, "learning_rate": 0.00542887793045081, "loss": 0.0, "num_input_tokens_seen": 20766512, "step": 36565 }, { "epoch": 641.5840707964602, "grad_norm": 3.226651301702077e-08, "learning_rate": 0.005413185088251932, "loss": 0.0, "num_input_tokens_seen": 20769168, "step": 36570 }, { "epoch": 641.6725663716815, "grad_norm": 4.404321884976525e-08, "learning_rate": 0.005397514543156884, "loss": 0.0, "num_input_tokens_seen": 20771456, "step": 36575 }, { "epoch": 641.7610619469026, "grad_norm": 5.61456481307232e-08, "learning_rate": 0.0053818662975822825, "loss": 0.0, "num_input_tokens_seen": 20775248, "step": 36580 }, { "epoch": 641.8495575221239, "grad_norm": 3.587053143405683e-08, "learning_rate": 0.005366240353941315, "loss": 0.0, "num_input_tokens_seen": 20778432, "step": 36585 }, { "epoch": 641.9380530973451, "grad_norm": 1.9218337143911413e-08, "learning_rate": 0.005350636714643636, "loss": 0.0, "num_input_tokens_seen": 20781216, "step": 36590 }, { "epoch": 642.0176991150443, "grad_norm": 3.6878716969113157e-08, "learning_rate": 0.005335055382095555, "loss": 0.0, "num_input_tokens_seen": 20783856, "step": 36595 }, { "epoch": 642.1061946902655, "grad_norm": 4.0810462564877525e-08, "learning_rate": 0.005319496358699915, "loss": 0.0, "num_input_tokens_seen": 20786240, "step": 36600 }, { "epoch": 642.1061946902655, "eval_loss": 0.8091742992401123, "eval_runtime": 0.9421, "eval_samples_per_second": 26.537, "eval_steps_per_second": 13.799, "num_input_tokens_seen": 20786240, "step": 36600 }, { "epoch": 642.1946902654868, "grad_norm": 1.1611691519419765e-07, "learning_rate": 0.005303959646856099, "loss": 0.0, "num_input_tokens_seen": 20789136, "step": 36605 }, { "epoch": 642.2831858407079, "grad_norm": 5.699756044919013e-08, "learning_rate": 0.005288445248960089, "loss": 0.0, "num_input_tokens_seen": 20792432, "step": 36610 }, { "epoch": 642.3716814159292, "grad_norm": 3.16632871033562e-08, "learning_rate": 0.005272953167404354, "loss": 0.0, "num_input_tokens_seen": 20795424, "step": 36615 }, { "epoch": 642.4601769911504, "grad_norm": 3.9078649649582076e-08, "learning_rate": 0.005257483404578017, "loss": 0.0, "num_input_tokens_seen": 20798112, "step": 36620 }, { "epoch": 642.5486725663717, "grad_norm": 4.0164874093306935e-08, "learning_rate": 0.0052420359628666865, "loss": 0.0, "num_input_tokens_seen": 20801360, "step": 36625 }, { "epoch": 642.637168141593, "grad_norm": 8.027679854194503e-08, "learning_rate": 0.00522661084465254, "loss": 0.0, "num_input_tokens_seen": 20804464, "step": 36630 }, { "epoch": 642.7256637168142, "grad_norm": 4.532380870614361e-08, "learning_rate": 0.005211208052314326, "loss": 0.0, "num_input_tokens_seen": 20807040, "step": 36635 }, { "epoch": 642.8141592920354, "grad_norm": 4.3886618783517406e-08, "learning_rate": 0.005195827588227391, "loss": 0.0, "num_input_tokens_seen": 20809696, "step": 36640 }, { "epoch": 642.9026548672566, "grad_norm": 2.873183291285386e-08, "learning_rate": 0.0051804694547635255, "loss": 0.0, "num_input_tokens_seen": 20812320, "step": 36645 }, { "epoch": 642.9911504424779, "grad_norm": 3.3174178071249116e-08, "learning_rate": 0.005165133654291232, "loss": 0.0, "num_input_tokens_seen": 20814832, "step": 36650 }, { "epoch": 643.070796460177, "grad_norm": 2.834717882649329e-08, "learning_rate": 0.005149820189175402, "loss": 0.0, "num_input_tokens_seen": 20817328, "step": 36655 }, { "epoch": 643.1592920353983, "grad_norm": 3.9583760269579216e-08, "learning_rate": 0.005134529061777598, "loss": 0.0, "num_input_tokens_seen": 20820272, "step": 36660 }, { "epoch": 643.2477876106195, "grad_norm": 4.233273642739732e-08, "learning_rate": 0.005119260274455933, "loss": 0.0, "num_input_tokens_seen": 20822992, "step": 36665 }, { "epoch": 643.3362831858407, "grad_norm": 1.375044789142521e-08, "learning_rate": 0.005104013829565007, "loss": 0.0, "num_input_tokens_seen": 20825984, "step": 36670 }, { "epoch": 643.4247787610619, "grad_norm": 2.013017841306919e-08, "learning_rate": 0.005088789729456006, "loss": 0.0, "num_input_tokens_seen": 20828704, "step": 36675 }, { "epoch": 643.5132743362832, "grad_norm": 8.595691269874806e-08, "learning_rate": 0.005073587976476735, "loss": 0.0, "num_input_tokens_seen": 20831696, "step": 36680 }, { "epoch": 643.6017699115044, "grad_norm": 3.481757104850658e-08, "learning_rate": 0.005058408572971418, "loss": 0.0, "num_input_tokens_seen": 20834800, "step": 36685 }, { "epoch": 643.6902654867257, "grad_norm": 1.311202879605844e-07, "learning_rate": 0.005043251521280983, "loss": 0.0, "num_input_tokens_seen": 20837744, "step": 36690 }, { "epoch": 643.7787610619469, "grad_norm": 4.308834178345933e-08, "learning_rate": 0.005028116823742795, "loss": 0.0, "num_input_tokens_seen": 20840480, "step": 36695 }, { "epoch": 643.8672566371681, "grad_norm": 1.9494461156455145e-08, "learning_rate": 0.005013004482690819, "loss": 0.0, "num_input_tokens_seen": 20843088, "step": 36700 }, { "epoch": 643.9557522123894, "grad_norm": 3.034211815133858e-08, "learning_rate": 0.0049979145004555746, "loss": 0.0, "num_input_tokens_seen": 20846304, "step": 36705 }, { "epoch": 644.0353982300885, "grad_norm": 2.8696103271386164e-08, "learning_rate": 0.004982846879364116, "loss": 0.0, "num_input_tokens_seen": 20848320, "step": 36710 }, { "epoch": 644.1238938053098, "grad_norm": 1.838600738324203e-08, "learning_rate": 0.0049678016217400535, "loss": 0.0, "num_input_tokens_seen": 20851008, "step": 36715 }, { "epoch": 644.212389380531, "grad_norm": 1.07322559728118e-08, "learning_rate": 0.004952778729903595, "loss": 0.0, "num_input_tokens_seen": 20853856, "step": 36720 }, { "epoch": 644.3008849557522, "grad_norm": 2.2175894898168735e-08, "learning_rate": 0.004937778206171422, "loss": 0.0, "num_input_tokens_seen": 20857056, "step": 36725 }, { "epoch": 644.3893805309734, "grad_norm": 2.479312932734956e-08, "learning_rate": 0.004922800052856835, "loss": 0.0, "num_input_tokens_seen": 20860064, "step": 36730 }, { "epoch": 644.4778761061947, "grad_norm": 4.576204304385101e-08, "learning_rate": 0.004907844272269602, "loss": 0.0, "num_input_tokens_seen": 20863216, "step": 36735 }, { "epoch": 644.566371681416, "grad_norm": 3.5262956998849404e-08, "learning_rate": 0.004892910866716144, "loss": 0.0, "num_input_tokens_seen": 20866288, "step": 36740 }, { "epoch": 644.6548672566372, "grad_norm": 1.998389365098774e-08, "learning_rate": 0.004877999838499369, "loss": 0.0, "num_input_tokens_seen": 20868816, "step": 36745 }, { "epoch": 644.7433628318585, "grad_norm": 1.929228261587923e-07, "learning_rate": 0.0048631111899187065, "loss": 0.0, "num_input_tokens_seen": 20871056, "step": 36750 }, { "epoch": 644.8318584070796, "grad_norm": 3.7842976752244795e-08, "learning_rate": 0.0048482449232702335, "loss": 0.0, "num_input_tokens_seen": 20874096, "step": 36755 }, { "epoch": 644.9203539823009, "grad_norm": 7.028447157608753e-08, "learning_rate": 0.004833401040846469, "loss": 0.0, "num_input_tokens_seen": 20877168, "step": 36760 }, { "epoch": 645.0, "grad_norm": 8.119040550980117e-09, "learning_rate": 0.004818579544936546, "loss": 0.0, "num_input_tokens_seen": 20879464, "step": 36765 }, { "epoch": 645.0884955752213, "grad_norm": 2.2904336205442632e-08, "learning_rate": 0.004803780437826121, "loss": 0.0, "num_input_tokens_seen": 20882296, "step": 36770 }, { "epoch": 645.1769911504425, "grad_norm": 4.063452507807597e-08, "learning_rate": 0.004789003721797402, "loss": 0.0, "num_input_tokens_seen": 20885384, "step": 36775 }, { "epoch": 645.2654867256637, "grad_norm": 4.368028427848003e-08, "learning_rate": 0.004774249399129132, "loss": 0.0, "num_input_tokens_seen": 20887736, "step": 36780 }, { "epoch": 645.3539823008849, "grad_norm": 3.828874994837861e-08, "learning_rate": 0.004759517472096642, "loss": 0.0, "num_input_tokens_seen": 20890680, "step": 36785 }, { "epoch": 645.4424778761062, "grad_norm": 3.7803143726478083e-08, "learning_rate": 0.004744807942971746, "loss": 0.0, "num_input_tokens_seen": 20893480, "step": 36790 }, { "epoch": 645.5309734513274, "grad_norm": 7.840308313689093e-08, "learning_rate": 0.004730120814022881, "loss": 0.0, "num_input_tokens_seen": 20896312, "step": 36795 }, { "epoch": 645.6194690265487, "grad_norm": 3.8408423108649004e-08, "learning_rate": 0.004715456087514935, "loss": 0.0, "num_input_tokens_seen": 20899128, "step": 36800 }, { "epoch": 645.6194690265487, "eval_loss": 0.8278312683105469, "eval_runtime": 0.95, "eval_samples_per_second": 26.317, "eval_steps_per_second": 13.685, "num_input_tokens_seen": 20899128, "step": 36800 }, { "epoch": 645.70796460177, "grad_norm": 3.22693232135407e-08, "learning_rate": 0.004700813765709432, "loss": 0.0, "num_input_tokens_seen": 20901880, "step": 36805 }, { "epoch": 645.7964601769911, "grad_norm": 3.666595560503083e-08, "learning_rate": 0.004686193850864401, "loss": 0.0, "num_input_tokens_seen": 20904840, "step": 36810 }, { "epoch": 645.8849557522124, "grad_norm": 3.6521640822684276e-08, "learning_rate": 0.004671596345234385, "loss": 0.0, "num_input_tokens_seen": 20907688, "step": 36815 }, { "epoch": 645.9734513274336, "grad_norm": 4.0125627265297226e-08, "learning_rate": 0.00465702125107052, "loss": 0.0, "num_input_tokens_seen": 20911096, "step": 36820 }, { "epoch": 646.0530973451328, "grad_norm": 3.7802809771392276e-08, "learning_rate": 0.004642468570620506, "loss": 0.0, "num_input_tokens_seen": 20913912, "step": 36825 }, { "epoch": 646.141592920354, "grad_norm": 3.740027665344314e-08, "learning_rate": 0.004627938306128482, "loss": 0.0, "num_input_tokens_seen": 20916776, "step": 36830 }, { "epoch": 646.2300884955753, "grad_norm": 3.477663312878576e-08, "learning_rate": 0.004613430459835255, "loss": 0.0, "num_input_tokens_seen": 20919736, "step": 36835 }, { "epoch": 646.3185840707964, "grad_norm": 3.290728400884291e-08, "learning_rate": 0.004598945033978085, "loss": 0.0, "num_input_tokens_seen": 20922872, "step": 36840 }, { "epoch": 646.4070796460177, "grad_norm": 6.337757696428525e-08, "learning_rate": 0.004584482030790804, "loss": 0.0, "num_input_tokens_seen": 20925352, "step": 36845 }, { "epoch": 646.4955752212389, "grad_norm": 2.1806918937272712e-08, "learning_rate": 0.004570041452503826, "loss": 0.0, "num_input_tokens_seen": 20928024, "step": 36850 }, { "epoch": 646.5840707964602, "grad_norm": 1.4695407557496765e-08, "learning_rate": 0.004555623301344003, "loss": 0.0, "num_input_tokens_seen": 20930792, "step": 36855 }, { "epoch": 646.6725663716815, "grad_norm": 4.75331312088656e-08, "learning_rate": 0.004541227579534857, "loss": 0.0, "num_input_tokens_seen": 20933800, "step": 36860 }, { "epoch": 646.7610619469026, "grad_norm": 5.157863824933884e-08, "learning_rate": 0.004526854289296378, "loss": 0.0, "num_input_tokens_seen": 20936744, "step": 36865 }, { "epoch": 646.8495575221239, "grad_norm": 1.0859145582742258e-08, "learning_rate": 0.004512503432845078, "loss": 0.0, "num_input_tokens_seen": 20939656, "step": 36870 }, { "epoch": 646.9380530973451, "grad_norm": 1.117792169935683e-08, "learning_rate": 0.004498175012394068, "loss": 0.0, "num_input_tokens_seen": 20942680, "step": 36875 }, { "epoch": 647.0176991150443, "grad_norm": 6.540041397329333e-08, "learning_rate": 0.004483869030152965, "loss": 0.0, "num_input_tokens_seen": 20944744, "step": 36880 }, { "epoch": 647.1061946902655, "grad_norm": 3.9540399399129456e-08, "learning_rate": 0.004469585488327904, "loss": 0.0, "num_input_tokens_seen": 20947528, "step": 36885 }, { "epoch": 647.1946902654868, "grad_norm": 3.592704445054551e-08, "learning_rate": 0.0044553243891216395, "loss": 0.0, "num_input_tokens_seen": 20950200, "step": 36890 }, { "epoch": 647.2831858407079, "grad_norm": 3.462697506506629e-08, "learning_rate": 0.004441085734733363, "loss": 0.0, "num_input_tokens_seen": 20952680, "step": 36895 }, { "epoch": 647.3716814159292, "grad_norm": 3.129603598495123e-08, "learning_rate": 0.004426869527358884, "loss": 0.0, "num_input_tokens_seen": 20955272, "step": 36900 }, { "epoch": 647.4601769911504, "grad_norm": 4.8557048160091654e-08, "learning_rate": 0.0044126757691905156, "loss": 0.0, "num_input_tokens_seen": 20958232, "step": 36905 }, { "epoch": 647.5486725663717, "grad_norm": 4.991055391201371e-08, "learning_rate": 0.004398504462417107, "loss": 0.0, "num_input_tokens_seen": 20961640, "step": 36910 }, { "epoch": 647.637168141593, "grad_norm": 4.405602282986365e-08, "learning_rate": 0.0043843556092240605, "loss": 0.0, "num_input_tokens_seen": 20964552, "step": 36915 }, { "epoch": 647.7256637168142, "grad_norm": 2.5479900855884807e-08, "learning_rate": 0.004370229211793281, "loss": 0.0, "num_input_tokens_seen": 20967304, "step": 36920 }, { "epoch": 647.8141592920354, "grad_norm": 6.200297519853848e-08, "learning_rate": 0.0043561252723032405, "loss": 0.0, "num_input_tokens_seen": 20970504, "step": 36925 }, { "epoch": 647.9026548672566, "grad_norm": 2.9372975163255433e-08, "learning_rate": 0.004342043792929001, "loss": 0.0, "num_input_tokens_seen": 20973240, "step": 36930 }, { "epoch": 647.9911504424779, "grad_norm": 4.313697488100843e-08, "learning_rate": 0.004327984775842025, "loss": 0.0, "num_input_tokens_seen": 20976248, "step": 36935 }, { "epoch": 648.070796460177, "grad_norm": 5.492332988410453e-08, "learning_rate": 0.004313948223210428, "loss": 0.0, "num_input_tokens_seen": 20978880, "step": 36940 }, { "epoch": 648.1592920353983, "grad_norm": 1.4846460061335165e-08, "learning_rate": 0.004299934137198846, "loss": 0.0, "num_input_tokens_seen": 20981456, "step": 36945 }, { "epoch": 648.2477876106195, "grad_norm": 2.093704054573209e-08, "learning_rate": 0.004285942519968383, "loss": 0.0, "num_input_tokens_seen": 20984208, "step": 36950 }, { "epoch": 648.3362831858407, "grad_norm": 3.413774507521339e-08, "learning_rate": 0.004271973373676746, "loss": 0.0, "num_input_tokens_seen": 20986896, "step": 36955 }, { "epoch": 648.4247787610619, "grad_norm": 5.6070089016202473e-08, "learning_rate": 0.004258026700478146, "loss": 0.0, "num_input_tokens_seen": 20989504, "step": 36960 }, { "epoch": 648.5132743362832, "grad_norm": 4.3967393281718614e-08, "learning_rate": 0.004244102502523328, "loss": 0.0, "num_input_tokens_seen": 20992400, "step": 36965 }, { "epoch": 648.6017699115044, "grad_norm": 4.196034453229913e-08, "learning_rate": 0.004230200781959592, "loss": 0.0, "num_input_tokens_seen": 20995376, "step": 36970 }, { "epoch": 648.6902654867257, "grad_norm": 7.181028394143141e-08, "learning_rate": 0.004216321540930756, "loss": 0.0, "num_input_tokens_seen": 20998352, "step": 36975 }, { "epoch": 648.7787610619469, "grad_norm": 5.33842872130208e-08, "learning_rate": 0.004202464781577175, "loss": 0.0, "num_input_tokens_seen": 21001280, "step": 36980 }, { "epoch": 648.8672566371681, "grad_norm": 6.009101838344577e-08, "learning_rate": 0.00418863050603574, "loss": 0.0, "num_input_tokens_seen": 21004672, "step": 36985 }, { "epoch": 648.9557522123894, "grad_norm": 8.178159305316512e-08, "learning_rate": 0.004174818716439843, "loss": 0.0, "num_input_tokens_seen": 21007280, "step": 36990 }, { "epoch": 649.0353982300885, "grad_norm": 5.676412229149719e-08, "learning_rate": 0.004161029414919464, "loss": 0.0, "num_input_tokens_seen": 21009448, "step": 36995 }, { "epoch": 649.1238938053098, "grad_norm": 3.796812464429422e-08, "learning_rate": 0.004147262603601071, "loss": 0.0, "num_input_tokens_seen": 21011928, "step": 37000 }, { "epoch": 649.1238938053098, "eval_loss": 0.8117527365684509, "eval_runtime": 0.9349, "eval_samples_per_second": 26.741, "eval_steps_per_second": 13.905, "num_input_tokens_seen": 21011928, "step": 37000 }, { "epoch": 649.212389380531, "grad_norm": 2.9113389032886516e-08, "learning_rate": 0.004133518284607679, "loss": 0.0, "num_input_tokens_seen": 21014648, "step": 37005 }, { "epoch": 649.3008849557522, "grad_norm": 6.281534581376036e-08, "learning_rate": 0.004119796460058861, "loss": 0.0, "num_input_tokens_seen": 21017384, "step": 37010 }, { "epoch": 649.3893805309734, "grad_norm": 3.338741194625072e-08, "learning_rate": 0.00410609713207064, "loss": 0.0, "num_input_tokens_seen": 21020280, "step": 37015 }, { "epoch": 649.4778761061947, "grad_norm": 3.0811055040658175e-08, "learning_rate": 0.004092420302755678, "loss": 0.0, "num_input_tokens_seen": 21023752, "step": 37020 }, { "epoch": 649.566371681416, "grad_norm": 1.9337896617344086e-08, "learning_rate": 0.004078765974223103, "loss": 0.0, "num_input_tokens_seen": 21026904, "step": 37025 }, { "epoch": 649.6548672566372, "grad_norm": 4.727112568048142e-08, "learning_rate": 0.004065134148578564, "loss": 0.0, "num_input_tokens_seen": 21029224, "step": 37030 }, { "epoch": 649.7433628318585, "grad_norm": 2.212022742753561e-08, "learning_rate": 0.004051524827924279, "loss": 0.0, "num_input_tokens_seen": 21032296, "step": 37035 }, { "epoch": 649.8318584070796, "grad_norm": 2.0680026580066624e-08, "learning_rate": 0.004037938014358955, "loss": 0.0, "num_input_tokens_seen": 21035096, "step": 37040 }, { "epoch": 649.9203539823009, "grad_norm": 3.090278966055848e-08, "learning_rate": 0.004024373709977863, "loss": 0.0, "num_input_tokens_seen": 21037944, "step": 37045 }, { "epoch": 650.0, "grad_norm": 2.7358961318668662e-08, "learning_rate": 0.004010831916872814, "loss": 0.0, "num_input_tokens_seen": 21040632, "step": 37050 }, { "epoch": 650.0884955752213, "grad_norm": 3.2930039139955625e-08, "learning_rate": 0.003997312637132089, "loss": 0.0, "num_input_tokens_seen": 21043736, "step": 37055 }, { "epoch": 650.1769911504425, "grad_norm": 2.5299767614228585e-08, "learning_rate": 0.003983815872840535, "loss": 0.0, "num_input_tokens_seen": 21046376, "step": 37060 }, { "epoch": 650.2654867256637, "grad_norm": 3.8657571366229604e-08, "learning_rate": 0.003970341626079521, "loss": 0.0, "num_input_tokens_seen": 21049368, "step": 37065 }, { "epoch": 650.3539823008849, "grad_norm": 7.487333419931019e-08, "learning_rate": 0.003956889898926952, "loss": 0.0, "num_input_tokens_seen": 21051832, "step": 37070 }, { "epoch": 650.4424778761062, "grad_norm": 5.407892444964091e-08, "learning_rate": 0.0039434606934572675, "loss": 0.0, "num_input_tokens_seen": 21055064, "step": 37075 }, { "epoch": 650.5309734513274, "grad_norm": 4.9115886469053294e-08, "learning_rate": 0.003930054011741396, "loss": 0.0, "num_input_tokens_seen": 21057800, "step": 37080 }, { "epoch": 650.6194690265487, "grad_norm": 5.768522370885876e-08, "learning_rate": 0.0039166698558468155, "loss": 0.0, "num_input_tokens_seen": 21061336, "step": 37085 }, { "epoch": 650.70796460177, "grad_norm": 4.118613006198757e-08, "learning_rate": 0.0039033082278375594, "loss": 0.0, "num_input_tokens_seen": 21064280, "step": 37090 }, { "epoch": 650.7964601769911, "grad_norm": 1.8944835034062635e-08, "learning_rate": 0.003889969129774112, "loss": 0.0, "num_input_tokens_seen": 21066888, "step": 37095 }, { "epoch": 650.8849557522124, "grad_norm": 6.035667610149176e-08, "learning_rate": 0.0038766525637135784, "loss": 0.0, "num_input_tokens_seen": 21069544, "step": 37100 }, { "epoch": 650.9734513274336, "grad_norm": 2.5994147279106983e-08, "learning_rate": 0.0038633585317095318, "loss": 0.0, "num_input_tokens_seen": 21072312, "step": 37105 }, { "epoch": 651.0530973451328, "grad_norm": 5.194322838519838e-08, "learning_rate": 0.00385008703581205, "loss": 0.0, "num_input_tokens_seen": 21074672, "step": 37110 }, { "epoch": 651.141592920354, "grad_norm": 6.940526731114005e-08, "learning_rate": 0.0038368380780677944, "loss": 0.0, "num_input_tokens_seen": 21077696, "step": 37115 }, { "epoch": 651.2300884955753, "grad_norm": 7.362491771800705e-08, "learning_rate": 0.003823611660519882, "loss": 0.0, "num_input_tokens_seen": 21080592, "step": 37120 }, { "epoch": 651.3185840707964, "grad_norm": 6.333379332090772e-08, "learning_rate": 0.0038104077852080475, "loss": 0.0, "num_input_tokens_seen": 21083920, "step": 37125 }, { "epoch": 651.4070796460177, "grad_norm": 2.09202468681724e-08, "learning_rate": 0.003797226454168462, "loss": 0.0, "num_input_tokens_seen": 21086736, "step": 37130 }, { "epoch": 651.4955752212389, "grad_norm": 2.522790154557697e-08, "learning_rate": 0.003784067669433849, "loss": 0.0, "num_input_tokens_seen": 21088992, "step": 37135 }, { "epoch": 651.5840707964602, "grad_norm": 5.34563646681363e-08, "learning_rate": 0.0037709314330334528, "loss": 0.0, "num_input_tokens_seen": 21092416, "step": 37140 }, { "epoch": 651.6725663716815, "grad_norm": 4.249406515555165e-08, "learning_rate": 0.003757817746993086, "loss": 0.0, "num_input_tokens_seen": 21095232, "step": 37145 }, { "epoch": 651.7610619469026, "grad_norm": 8.579137755759803e-08, "learning_rate": 0.0037447266133349977, "loss": 0.0, "num_input_tokens_seen": 21098064, "step": 37150 }, { "epoch": 651.8495575221239, "grad_norm": 4.245572426953004e-08, "learning_rate": 0.003731658034078039, "loss": 0.0, "num_input_tokens_seen": 21101136, "step": 37155 }, { "epoch": 651.9380530973451, "grad_norm": 3.861057606968643e-08, "learning_rate": 0.0037186120112375153, "loss": 0.0, "num_input_tokens_seen": 21104112, "step": 37160 }, { "epoch": 652.0176991150443, "grad_norm": 7.755475373016907e-09, "learning_rate": 0.003705588546825317, "loss": 0.0, "num_input_tokens_seen": 21106448, "step": 37165 }, { "epoch": 652.1061946902655, "grad_norm": 3.408525728332279e-08, "learning_rate": 0.0036925876428498205, "loss": 0.0, "num_input_tokens_seen": 21109408, "step": 37170 }, { "epoch": 652.1946902654868, "grad_norm": 2.893963468864058e-08, "learning_rate": 0.0036796093013159057, "loss": 0.0, "num_input_tokens_seen": 21112464, "step": 37175 }, { "epoch": 652.2831858407079, "grad_norm": 2.30171597337403e-08, "learning_rate": 0.0036666535242250217, "loss": 0.0, "num_input_tokens_seen": 21115408, "step": 37180 }, { "epoch": 652.3716814159292, "grad_norm": 4.65489051748591e-08, "learning_rate": 0.003653720313575104, "loss": 0.0, "num_input_tokens_seen": 21118480, "step": 37185 }, { "epoch": 652.4601769911504, "grad_norm": 7.365077436816136e-08, "learning_rate": 0.003640809671360623, "loss": 0.0, "num_input_tokens_seen": 21121104, "step": 37190 }, { "epoch": 652.5486725663717, "grad_norm": 6.492869175644955e-08, "learning_rate": 0.003627921599572553, "loss": 0.0, "num_input_tokens_seen": 21124112, "step": 37195 }, { "epoch": 652.637168141593, "grad_norm": 3.0036300557867435e-08, "learning_rate": 0.003615056100198405, "loss": 0.0, "num_input_tokens_seen": 21126880, "step": 37200 }, { "epoch": 652.637168141593, "eval_loss": 0.7886305451393127, "eval_runtime": 0.9345, "eval_samples_per_second": 26.753, "eval_steps_per_second": 13.912, "num_input_tokens_seen": 21126880, "step": 37200 }, { "epoch": 652.7256637168142, "grad_norm": 6.062035140530497e-08, "learning_rate": 0.003602213175222174, "loss": 0.0, "num_input_tokens_seen": 21130272, "step": 37205 }, { "epoch": 652.8141592920354, "grad_norm": 3.5634577955079294e-08, "learning_rate": 0.0035893928266244432, "loss": 0.0, "num_input_tokens_seen": 21132608, "step": 37210 }, { "epoch": 652.9026548672566, "grad_norm": 3.227031086794341e-08, "learning_rate": 0.003576595056382248, "loss": 0.0, "num_input_tokens_seen": 21135184, "step": 37215 }, { "epoch": 652.9911504424779, "grad_norm": 2.6578204526117588e-08, "learning_rate": 0.0035638198664691423, "loss": 0.0, "num_input_tokens_seen": 21138096, "step": 37220 }, { "epoch": 653.070796460177, "grad_norm": 4.208596138255416e-08, "learning_rate": 0.003551067258855267, "loss": 0.0, "num_input_tokens_seen": 21140472, "step": 37225 }, { "epoch": 653.1592920353983, "grad_norm": 3.33659144757803e-08, "learning_rate": 0.0035383372355071996, "loss": 0.0, "num_input_tokens_seen": 21142856, "step": 37230 }, { "epoch": 653.2477876106195, "grad_norm": 2.7888562570410613e-08, "learning_rate": 0.0035256297983881023, "loss": 0.0, "num_input_tokens_seen": 21145688, "step": 37235 }, { "epoch": 653.3362831858407, "grad_norm": 1.0680620476932745e-07, "learning_rate": 0.0035129449494575747, "loss": 0.0, "num_input_tokens_seen": 21148728, "step": 37240 }, { "epoch": 653.4247787610619, "grad_norm": 6.677143460365187e-08, "learning_rate": 0.0035002826906718187, "loss": 0.0, "num_input_tokens_seen": 21151800, "step": 37245 }, { "epoch": 653.5132743362832, "grad_norm": 2.2372571351070292e-08, "learning_rate": 0.003487643023983522, "loss": 0.0, "num_input_tokens_seen": 21154456, "step": 37250 }, { "epoch": 653.6017699115044, "grad_norm": 4.016085242142253e-08, "learning_rate": 0.003475025951341842, "loss": 0.0, "num_input_tokens_seen": 21157192, "step": 37255 }, { "epoch": 653.6902654867257, "grad_norm": 3.2407420746949356e-08, "learning_rate": 0.00346243147469249, "loss": 0.0, "num_input_tokens_seen": 21160504, "step": 37260 }, { "epoch": 653.7787610619469, "grad_norm": 5.3330690974462414e-08, "learning_rate": 0.0034498595959777446, "loss": 0.0, "num_input_tokens_seen": 21163080, "step": 37265 }, { "epoch": 653.8672566371681, "grad_norm": 3.686852423356868e-08, "learning_rate": 0.003437310317136305, "loss": 0.0, "num_input_tokens_seen": 21166392, "step": 37270 }, { "epoch": 653.9557522123894, "grad_norm": 4.860536506612334e-08, "learning_rate": 0.0034247836401034236, "loss": 0.0, "num_input_tokens_seen": 21169320, "step": 37275 }, { "epoch": 654.0353982300885, "grad_norm": 4.481107396259176e-08, "learning_rate": 0.003412279566810905, "loss": 0.0, "num_input_tokens_seen": 21171848, "step": 37280 }, { "epoch": 654.1238938053098, "grad_norm": 3.59937111227282e-08, "learning_rate": 0.00339979809918699, "loss": 0.0, "num_input_tokens_seen": 21174520, "step": 37285 }, { "epoch": 654.212389380531, "grad_norm": 3.788504443491547e-08, "learning_rate": 0.0033873392391565228, "loss": 0.0, "num_input_tokens_seen": 21177896, "step": 37290 }, { "epoch": 654.3008849557522, "grad_norm": 9.270372913761094e-08, "learning_rate": 0.003374902988640782, "loss": 0.0, "num_input_tokens_seen": 21181176, "step": 37295 }, { "epoch": 654.3893805309734, "grad_norm": 3.3911117469642704e-08, "learning_rate": 0.0033624893495576014, "loss": 0.0, "num_input_tokens_seen": 21184296, "step": 37300 }, { "epoch": 654.4778761061947, "grad_norm": 7.00971654055138e-08, "learning_rate": 0.0033500983238213323, "loss": 0.0, "num_input_tokens_seen": 21186808, "step": 37305 }, { "epoch": 654.566371681416, "grad_norm": 5.063611752120778e-08, "learning_rate": 0.0033377299133428126, "loss": 0.0, "num_input_tokens_seen": 21189352, "step": 37310 }, { "epoch": 654.6548672566372, "grad_norm": 3.709910600946387e-08, "learning_rate": 0.003325384120029434, "loss": 0.0, "num_input_tokens_seen": 21192088, "step": 37315 }, { "epoch": 654.7433628318585, "grad_norm": 4.739462511338388e-08, "learning_rate": 0.0033130609457850233, "loss": 0.0, "num_input_tokens_seen": 21195112, "step": 37320 }, { "epoch": 654.8318584070796, "grad_norm": 2.5356511557106387e-08, "learning_rate": 0.0033007603925100104, "loss": 0.0, "num_input_tokens_seen": 21197720, "step": 37325 }, { "epoch": 654.9203539823009, "grad_norm": 2.0548434065403853e-08, "learning_rate": 0.003288482462101294, "loss": 0.0, "num_input_tokens_seen": 21200888, "step": 37330 }, { "epoch": 655.0, "grad_norm": 8.300532527982796e-08, "learning_rate": 0.0032762271564522605, "loss": 0.0, "num_input_tokens_seen": 21202968, "step": 37335 }, { "epoch": 655.0884955752213, "grad_norm": 4.202845005352174e-08, "learning_rate": 0.003263994477452864, "loss": 0.0, "num_input_tokens_seen": 21206328, "step": 37340 }, { "epoch": 655.1769911504425, "grad_norm": 6.618953563020114e-08, "learning_rate": 0.0032517844269895125, "loss": 0.0, "num_input_tokens_seen": 21209128, "step": 37345 }, { "epoch": 655.2654867256637, "grad_norm": 4.0008632851140646e-08, "learning_rate": 0.0032395970069451496, "loss": 0.0, "num_input_tokens_seen": 21211752, "step": 37350 }, { "epoch": 655.3539823008849, "grad_norm": 4.323899460700886e-08, "learning_rate": 0.0032274322191992388, "loss": 0.0, "num_input_tokens_seen": 21214664, "step": 37355 }, { "epoch": 655.4424778761062, "grad_norm": 1.05073318934501e-07, "learning_rate": 0.0032152900656277294, "loss": 0.0, "num_input_tokens_seen": 21217400, "step": 37360 }, { "epoch": 655.5309734513274, "grad_norm": 4.386232532738177e-08, "learning_rate": 0.0032031705481030902, "loss": 0.0, "num_input_tokens_seen": 21219896, "step": 37365 }, { "epoch": 655.6194690265487, "grad_norm": 4.4575337199148635e-08, "learning_rate": 0.0031910736684943428, "loss": 0.0, "num_input_tokens_seen": 21223032, "step": 37370 }, { "epoch": 655.70796460177, "grad_norm": 3.5059535719028645e-08, "learning_rate": 0.0031789994286669453, "loss": 0.0, "num_input_tokens_seen": 21225592, "step": 37375 }, { "epoch": 655.7964601769911, "grad_norm": 1.4152097271846742e-08, "learning_rate": 0.003166947830482908, "loss": 0.0, "num_input_tokens_seen": 21228680, "step": 37380 }, { "epoch": 655.8849557522124, "grad_norm": 6.626687820698862e-08, "learning_rate": 0.003154918875800727, "loss": 0.0, "num_input_tokens_seen": 21231832, "step": 37385 }, { "epoch": 655.9734513274336, "grad_norm": 3.7577326139626166e-08, "learning_rate": 0.00314291256647542, "loss": 0.0, "num_input_tokens_seen": 21234840, "step": 37390 }, { "epoch": 656.0530973451328, "grad_norm": 7.368196008883388e-08, "learning_rate": 0.0031309289043585375, "loss": 0.0, "num_input_tokens_seen": 21237344, "step": 37395 }, { "epoch": 656.141592920354, "grad_norm": 2.6667446917372217e-08, "learning_rate": 0.003118967891298069, "loss": 0.0, "num_input_tokens_seen": 21239760, "step": 37400 }, { "epoch": 656.141592920354, "eval_loss": 0.7863349914550781, "eval_runtime": 0.9403, "eval_samples_per_second": 26.588, "eval_steps_per_second": 13.826, "num_input_tokens_seen": 21239760, "step": 37400 }, { "epoch": 656.2300884955753, "grad_norm": 3.121964553542966e-08, "learning_rate": 0.003107029529138572, "loss": 0.0, "num_input_tokens_seen": 21242448, "step": 37405 }, { "epoch": 656.3185840707964, "grad_norm": 9.19739875371306e-08, "learning_rate": 0.0030951138197211235, "loss": 0.0, "num_input_tokens_seen": 21245472, "step": 37410 }, { "epoch": 656.4070796460177, "grad_norm": 3.861311981268045e-08, "learning_rate": 0.0030832207648832377, "loss": 0.0, "num_input_tokens_seen": 21248432, "step": 37415 }, { "epoch": 656.4955752212389, "grad_norm": 3.2920649317702555e-08, "learning_rate": 0.0030713503664589635, "loss": 0.0, "num_input_tokens_seen": 21251152, "step": 37420 }, { "epoch": 656.5840707964602, "grad_norm": 2.0426202951284722e-08, "learning_rate": 0.0030595026262788872, "loss": 0.0, "num_input_tokens_seen": 21254528, "step": 37425 }, { "epoch": 656.6725663716815, "grad_norm": 3.511190982408152e-08, "learning_rate": 0.00304767754617008, "loss": 0.0, "num_input_tokens_seen": 21257216, "step": 37430 }, { "epoch": 656.7610619469026, "grad_norm": 2.987644265317613e-08, "learning_rate": 0.003035875127956117, "loss": 0.0, "num_input_tokens_seen": 21259872, "step": 37435 }, { "epoch": 656.8495575221239, "grad_norm": 4.80549395831531e-08, "learning_rate": 0.0030240953734570752, "loss": 0.0, "num_input_tokens_seen": 21262784, "step": 37440 }, { "epoch": 656.9380530973451, "grad_norm": 4.500115124983495e-08, "learning_rate": 0.003012338284489535, "loss": 0.0, "num_input_tokens_seen": 21265920, "step": 37445 }, { "epoch": 657.0176991150443, "grad_norm": 3.123362546375574e-08, "learning_rate": 0.0030006038628665964, "loss": 0.0, "num_input_tokens_seen": 21268192, "step": 37450 }, { "epoch": 657.1061946902655, "grad_norm": 3.082235267015676e-08, "learning_rate": 0.002988892110397845, "loss": 0.0, "num_input_tokens_seen": 21270656, "step": 37455 }, { "epoch": 657.1946902654868, "grad_norm": 9.903742181904818e-08, "learning_rate": 0.0029772030288894025, "loss": 0.0, "num_input_tokens_seen": 21274016, "step": 37460 }, { "epoch": 657.2831858407079, "grad_norm": 3.446801244422204e-08, "learning_rate": 0.0029655366201438438, "loss": 0.0, "num_input_tokens_seen": 21277312, "step": 37465 }, { "epoch": 657.3716814159292, "grad_norm": 1.30500424688762e-07, "learning_rate": 0.0029538928859602965, "loss": 0.0, "num_input_tokens_seen": 21280400, "step": 37470 }, { "epoch": 657.4601769911504, "grad_norm": 6.624212289807474e-08, "learning_rate": 0.002942271828134374, "loss": 0.0, "num_input_tokens_seen": 21283216, "step": 37475 }, { "epoch": 657.5486725663717, "grad_norm": 4.497356087540538e-08, "learning_rate": 0.00293067344845816, "loss": 0.0, "num_input_tokens_seen": 21285856, "step": 37480 }, { "epoch": 657.637168141593, "grad_norm": 6.281396025542563e-08, "learning_rate": 0.0029190977487202896, "loss": 0.0, "num_input_tokens_seen": 21288784, "step": 37485 }, { "epoch": 657.7256637168142, "grad_norm": 5.85421275900444e-08, "learning_rate": 0.0029075447307058853, "loss": 0.0, "num_input_tokens_seen": 21291312, "step": 37490 }, { "epoch": 657.8141592920354, "grad_norm": 2.2603668270448907e-08, "learning_rate": 0.0028960143961965722, "loss": 0.0, "num_input_tokens_seen": 21294640, "step": 37495 }, { "epoch": 657.9026548672566, "grad_norm": 3.44041026778541e-08, "learning_rate": 0.002884506746970461, "loss": 0.0, "num_input_tokens_seen": 21297344, "step": 37500 }, { "epoch": 657.9911504424779, "grad_norm": 5.6824134730959486e-08, "learning_rate": 0.0028730217848021654, "loss": 0.0, "num_input_tokens_seen": 21299936, "step": 37505 }, { "epoch": 658.070796460177, "grad_norm": 2.8667901830203846e-08, "learning_rate": 0.0028615595114628188, "loss": 0.0, "num_input_tokens_seen": 21302256, "step": 37510 }, { "epoch": 658.1592920353983, "grad_norm": 1.5941381548145728e-08, "learning_rate": 0.002850119928720074, "loss": 0.0, "num_input_tokens_seen": 21305648, "step": 37515 }, { "epoch": 658.2477876106195, "grad_norm": 2.1498847146972366e-08, "learning_rate": 0.0028387030383380195, "loss": 0.0, "num_input_tokens_seen": 21308576, "step": 37520 }, { "epoch": 658.3362831858407, "grad_norm": 5.689162208000198e-08, "learning_rate": 0.0028273088420772974, "loss": 0.0, "num_input_tokens_seen": 21311584, "step": 37525 }, { "epoch": 658.4247787610619, "grad_norm": 6.221060289135494e-08, "learning_rate": 0.002815937341695068, "loss": 0.0, "num_input_tokens_seen": 21314480, "step": 37530 }, { "epoch": 658.5132743362832, "grad_norm": 2.7359016385730683e-08, "learning_rate": 0.0028045885389448963, "loss": 0.0, "num_input_tokens_seen": 21317104, "step": 37535 }, { "epoch": 658.6017699115044, "grad_norm": 2.864973325245046e-08, "learning_rate": 0.002793262435576965, "loss": 0.0, "num_input_tokens_seen": 21319792, "step": 37540 }, { "epoch": 658.6902654867257, "grad_norm": 6.323876533542716e-08, "learning_rate": 0.0027819590333378772, "loss": 0.0, "num_input_tokens_seen": 21322816, "step": 37545 }, { "epoch": 658.7787610619469, "grad_norm": 9.240433485047106e-08, "learning_rate": 0.002770678333970755, "loss": 0.0, "num_input_tokens_seen": 21325536, "step": 37550 }, { "epoch": 658.8672566371681, "grad_norm": 2.3633747403550842e-08, "learning_rate": 0.0027594203392152573, "loss": 0.0, "num_input_tokens_seen": 21328272, "step": 37555 }, { "epoch": 658.9557522123894, "grad_norm": 5.0810282203883617e-08, "learning_rate": 0.002748185050807478, "loss": 0.0, "num_input_tokens_seen": 21331344, "step": 37560 }, { "epoch": 659.0353982300885, "grad_norm": 3.7327133384224e-08, "learning_rate": 0.002736972470480031, "loss": 0.0, "num_input_tokens_seen": 21333712, "step": 37565 }, { "epoch": 659.1238938053098, "grad_norm": 1.9719260890838086e-08, "learning_rate": 0.002725782599962068, "loss": 0.0, "num_input_tokens_seen": 21336400, "step": 37570 }, { "epoch": 659.212389380531, "grad_norm": 3.127897230115195e-08, "learning_rate": 0.0027146154409791734, "loss": 0.0, "num_input_tokens_seen": 21339440, "step": 37575 }, { "epoch": 659.3008849557522, "grad_norm": 1.7234295768275842e-08, "learning_rate": 0.002703470995253504, "loss": 0.0, "num_input_tokens_seen": 21342160, "step": 37580 }, { "epoch": 659.3893805309734, "grad_norm": 7.686649183824557e-08, "learning_rate": 0.0026923492645036184, "loss": 0.0, "num_input_tokens_seen": 21345168, "step": 37585 }, { "epoch": 659.4778761061947, "grad_norm": 8.048530020232647e-08, "learning_rate": 0.0026812502504446776, "loss": 0.0, "num_input_tokens_seen": 21347808, "step": 37590 }, { "epoch": 659.566371681416, "grad_norm": 2.920110375725926e-08, "learning_rate": 0.0026701739547882798, "loss": 0.0, "num_input_tokens_seen": 21350704, "step": 37595 }, { "epoch": 659.6548672566372, "grad_norm": 3.558504602096946e-08, "learning_rate": 0.0026591203792425077, "loss": 0.0, "num_input_tokens_seen": 21353776, "step": 37600 }, { "epoch": 659.6548672566372, "eval_loss": 0.7899188995361328, "eval_runtime": 0.9402, "eval_samples_per_second": 26.59, "eval_steps_per_second": 13.827, "num_input_tokens_seen": 21353776, "step": 37600 }, { "epoch": 659.7433628318585, "grad_norm": 3.45776527410635e-08, "learning_rate": 0.0026480895255119818, "loss": 0.0, "num_input_tokens_seen": 21356960, "step": 37605 }, { "epoch": 659.8318584070796, "grad_norm": 5.158298321816801e-08, "learning_rate": 0.002637081395297791, "loss": 0.0, "num_input_tokens_seen": 21359680, "step": 37610 }, { "epoch": 659.9203539823009, "grad_norm": 2.8147780994913774e-08, "learning_rate": 0.0026260959902975113, "loss": 0.0, "num_input_tokens_seen": 21362736, "step": 37615 }, { "epoch": 660.0, "grad_norm": 5.1334104966827e-08, "learning_rate": 0.00261513331220527, "loss": 0.0, "num_input_tokens_seen": 21365112, "step": 37620 }, { "epoch": 660.0884955752213, "grad_norm": 4.7880980957870634e-08, "learning_rate": 0.0026041933627116154, "loss": 0.0, "num_input_tokens_seen": 21368568, "step": 37625 }, { "epoch": 660.1769911504425, "grad_norm": 1.650272629660776e-08, "learning_rate": 0.0025932761435036476, "loss": 0.0, "num_input_tokens_seen": 21371352, "step": 37630 }, { "epoch": 660.2654867256637, "grad_norm": 2.599655068991069e-08, "learning_rate": 0.002582381656264904, "loss": 0.0, "num_input_tokens_seen": 21373912, "step": 37635 }, { "epoch": 660.3539823008849, "grad_norm": 1.4284664118235924e-08, "learning_rate": 0.0025715099026754895, "loss": 0.0, "num_input_tokens_seen": 21376952, "step": 37640 }, { "epoch": 660.4424778761062, "grad_norm": 3.1396012900586356e-08, "learning_rate": 0.002560660884411947, "loss": 0.0, "num_input_tokens_seen": 21379656, "step": 37645 }, { "epoch": 660.5309734513274, "grad_norm": 2.3053406295048262e-08, "learning_rate": 0.0025498346031473385, "loss": 0.0, "num_input_tokens_seen": 21381944, "step": 37650 }, { "epoch": 660.6194690265487, "grad_norm": 5.413679815546857e-08, "learning_rate": 0.0025390310605511945, "loss": 0.0, "num_input_tokens_seen": 21384792, "step": 37655 }, { "epoch": 660.70796460177, "grad_norm": 6.238354899323895e-08, "learning_rate": 0.0025282502582895995, "loss": 0.0, "num_input_tokens_seen": 21387288, "step": 37660 }, { "epoch": 660.7964601769911, "grad_norm": 8.262998107966268e-08, "learning_rate": 0.002517492198025023, "loss": 0.0, "num_input_tokens_seen": 21390072, "step": 37665 }, { "epoch": 660.8849557522124, "grad_norm": 2.4502419648797513e-08, "learning_rate": 0.0025067568814165554, "loss": 0.0, "num_input_tokens_seen": 21393304, "step": 37670 }, { "epoch": 660.9734513274336, "grad_norm": 1.6573912020589887e-08, "learning_rate": 0.0024960443101196884, "loss": 0.0, "num_input_tokens_seen": 21396600, "step": 37675 }, { "epoch": 661.0530973451328, "grad_norm": 2.2239980523863778e-08, "learning_rate": 0.002485354485786434, "loss": 0.0, "num_input_tokens_seen": 21398984, "step": 37680 }, { "epoch": 661.141592920354, "grad_norm": 2.081779015838947e-08, "learning_rate": 0.002474687410065307, "loss": 0.0, "num_input_tokens_seen": 21401896, "step": 37685 }, { "epoch": 661.2300884955753, "grad_norm": 4.423341692927352e-08, "learning_rate": 0.002464043084601308, "loss": 0.0, "num_input_tokens_seen": 21404488, "step": 37690 }, { "epoch": 661.3185840707964, "grad_norm": 3.7595281554558824e-08, "learning_rate": 0.0024534215110358915, "loss": 0.0, "num_input_tokens_seen": 21407368, "step": 37695 }, { "epoch": 661.4070796460177, "grad_norm": 3.756954569666959e-08, "learning_rate": 0.002442822691007096, "loss": 0.0, "num_input_tokens_seen": 21409768, "step": 37700 }, { "epoch": 661.4955752212389, "grad_norm": 2.200377302585821e-08, "learning_rate": 0.002432246626149348, "loss": 0.0, "num_input_tokens_seen": 21412952, "step": 37705 }, { "epoch": 661.5840707964602, "grad_norm": 2.1044506581802125e-08, "learning_rate": 0.002421693318093626, "loss": 0.0, "num_input_tokens_seen": 21415720, "step": 37710 }, { "epoch": 661.6725663716815, "grad_norm": 6.366853710915166e-08, "learning_rate": 0.0024111627684673784, "loss": 0.0, "num_input_tokens_seen": 21418968, "step": 37715 }, { "epoch": 661.7610619469026, "grad_norm": 4.862944891215193e-08, "learning_rate": 0.0024006549788945395, "loss": 0.0, "num_input_tokens_seen": 21421832, "step": 37720 }, { "epoch": 661.8495575221239, "grad_norm": 1.1695328083760614e-07, "learning_rate": 0.0023901699509955463, "loss": 0.0, "num_input_tokens_seen": 21424504, "step": 37725 }, { "epoch": 661.9380530973451, "grad_norm": 2.575555235750926e-08, "learning_rate": 0.0023797076863873554, "loss": 0.0, "num_input_tokens_seen": 21427624, "step": 37730 }, { "epoch": 662.0176991150443, "grad_norm": 3.465128273205664e-08, "learning_rate": 0.0023692681866833262, "loss": 0.0, "num_input_tokens_seen": 21430248, "step": 37735 }, { "epoch": 662.1061946902655, "grad_norm": 4.376929041427502e-08, "learning_rate": 0.0023588514534934046, "loss": 0.0, "num_input_tokens_seen": 21433352, "step": 37740 }, { "epoch": 662.1946902654868, "grad_norm": 3.0929502514709384e-08, "learning_rate": 0.002348457488423955, "loss": 0.0, "num_input_tokens_seen": 21436120, "step": 37745 }, { "epoch": 662.2831858407079, "grad_norm": 3.898851730355091e-08, "learning_rate": 0.0023380862930778624, "loss": 0.0, "num_input_tokens_seen": 21438856, "step": 37750 }, { "epoch": 662.3716814159292, "grad_norm": 1.7803250429437867e-08, "learning_rate": 0.0023277378690545135, "loss": 0.0, "num_input_tokens_seen": 21441576, "step": 37755 }, { "epoch": 662.4601769911504, "grad_norm": 3.583700447506999e-08, "learning_rate": 0.0023174122179497325, "loss": 0.0, "num_input_tokens_seen": 21444552, "step": 37760 }, { "epoch": 662.5486725663717, "grad_norm": 4.353854166083693e-08, "learning_rate": 0.0023071093413558784, "loss": 0.0, "num_input_tokens_seen": 21447208, "step": 37765 }, { "epoch": 662.637168141593, "grad_norm": 4.518368612593804e-08, "learning_rate": 0.002296829240861814, "loss": 0.0, "num_input_tokens_seen": 21450504, "step": 37770 }, { "epoch": 662.7256637168142, "grad_norm": 5.4693071405154114e-08, "learning_rate": 0.002286571918052821, "loss": 0.0, "num_input_tokens_seen": 21453704, "step": 37775 }, { "epoch": 662.8141592920354, "grad_norm": 2.2519618170235844e-08, "learning_rate": 0.0022763373745107174, "loss": 0.0, "num_input_tokens_seen": 21456856, "step": 37780 }, { "epoch": 662.9026548672566, "grad_norm": 4.508081019594101e-08, "learning_rate": 0.0022661256118138074, "loss": 0.0, "num_input_tokens_seen": 21459224, "step": 37785 }, { "epoch": 662.9911504424779, "grad_norm": 1.9781850824074354e-08, "learning_rate": 0.0022559366315368645, "loss": 0.0, "num_input_tokens_seen": 21462200, "step": 37790 }, { "epoch": 663.070796460177, "grad_norm": 2.007508648205203e-08, "learning_rate": 0.002245770435251182, "loss": 0.0, "num_input_tokens_seen": 21464776, "step": 37795 }, { "epoch": 663.1592920353983, "grad_norm": 4.4238692709086536e-08, "learning_rate": 0.002235627024524456, "loss": 0.0, "num_input_tokens_seen": 21467368, "step": 37800 }, { "epoch": 663.1592920353983, "eval_loss": 0.7847999930381775, "eval_runtime": 0.9529, "eval_samples_per_second": 26.235, "eval_steps_per_second": 13.642, "num_input_tokens_seen": 21467368, "step": 37800 }, { "epoch": 663.2477876106195, "grad_norm": 2.649870012305655e-08, "learning_rate": 0.0022255064009209847, "loss": 0.0, "num_input_tokens_seen": 21469960, "step": 37805 }, { "epoch": 663.3362831858407, "grad_norm": 1.9268748374656752e-08, "learning_rate": 0.0022154085660014864, "loss": 0.0, "num_input_tokens_seen": 21473000, "step": 37810 }, { "epoch": 663.4247787610619, "grad_norm": 2.5421618587984085e-08, "learning_rate": 0.0022053335213231494, "loss": 0.0, "num_input_tokens_seen": 21475912, "step": 37815 }, { "epoch": 663.5132743362832, "grad_norm": 4.5663337999712894e-08, "learning_rate": 0.002195281268439697, "loss": 0.0, "num_input_tokens_seen": 21479224, "step": 37820 }, { "epoch": 663.6017699115044, "grad_norm": 1.003513006025969e-07, "learning_rate": 0.002185251808901306, "loss": 0.0, "num_input_tokens_seen": 21482200, "step": 37825 }, { "epoch": 663.6902654867257, "grad_norm": 4.103519302134373e-08, "learning_rate": 0.0021752451442546227, "loss": 0.0, "num_input_tokens_seen": 21485160, "step": 37830 }, { "epoch": 663.7787610619469, "grad_norm": 2.9529447331810843e-08, "learning_rate": 0.0021652612760428456, "loss": 0.0, "num_input_tokens_seen": 21488104, "step": 37835 }, { "epoch": 663.8672566371681, "grad_norm": 3.1629589614112774e-08, "learning_rate": 0.0021553002058055603, "loss": 0.0, "num_input_tokens_seen": 21490952, "step": 37840 }, { "epoch": 663.9557522123894, "grad_norm": 5.90975872682975e-08, "learning_rate": 0.0021453619350789376, "loss": 0.0, "num_input_tokens_seen": 21493880, "step": 37845 }, { "epoch": 664.0353982300885, "grad_norm": 4.1690952912176726e-08, "learning_rate": 0.0021354464653955516, "loss": 0.0, "num_input_tokens_seen": 21496280, "step": 37850 }, { "epoch": 664.1238938053098, "grad_norm": 2.60836010568255e-08, "learning_rate": 0.002125553798284513, "loss": 0.0, "num_input_tokens_seen": 21499048, "step": 37855 }, { "epoch": 664.212389380531, "grad_norm": 2.418865641118373e-08, "learning_rate": 0.002115683935271384, "loss": 0.0, "num_input_tokens_seen": 21502184, "step": 37860 }, { "epoch": 664.3008849557522, "grad_norm": 1.6188881346579365e-08, "learning_rate": 0.0021058368778782144, "loss": 0.0, "num_input_tokens_seen": 21504856, "step": 37865 }, { "epoch": 664.3893805309734, "grad_norm": 2.4827597755461284e-08, "learning_rate": 0.002096012627623539, "loss": 0.0, "num_input_tokens_seen": 21508200, "step": 37870 }, { "epoch": 664.4778761061947, "grad_norm": 5.0405848384116325e-08, "learning_rate": 0.00208621118602243, "loss": 0.0, "num_input_tokens_seen": 21511096, "step": 37875 }, { "epoch": 664.566371681416, "grad_norm": 5.8483117015839525e-08, "learning_rate": 0.002076432554586327, "loss": 0.0, "num_input_tokens_seen": 21513864, "step": 37880 }, { "epoch": 664.6548672566372, "grad_norm": 3.786524160886984e-08, "learning_rate": 0.002066676734823258, "loss": 0.0, "num_input_tokens_seen": 21516584, "step": 37885 }, { "epoch": 664.7433628318585, "grad_norm": 1.0048010778973548e-07, "learning_rate": 0.0020569437282376866, "loss": 0.0, "num_input_tokens_seen": 21519432, "step": 37890 }, { "epoch": 664.8318584070796, "grad_norm": 4.706843625967849e-08, "learning_rate": 0.002047233536330545, "loss": 0.0, "num_input_tokens_seen": 21522168, "step": 37895 }, { "epoch": 664.9203539823009, "grad_norm": 8.064790080197781e-08, "learning_rate": 0.0020375461605993015, "loss": 0.0, "num_input_tokens_seen": 21525368, "step": 37900 }, { "epoch": 665.0, "grad_norm": 1.9485643321104362e-08, "learning_rate": 0.002027881602537845, "loss": 0.0, "num_input_tokens_seen": 21527784, "step": 37905 }, { "epoch": 665.0884955752213, "grad_norm": 2.4993514813331785e-08, "learning_rate": 0.002018239863636567, "loss": 0.0, "num_input_tokens_seen": 21530072, "step": 37910 }, { "epoch": 665.1769911504425, "grad_norm": 7.861197559577704e-08, "learning_rate": 0.002008620945382378, "loss": 0.0, "num_input_tokens_seen": 21532872, "step": 37915 }, { "epoch": 665.2654867256637, "grad_norm": 1.2556266426599905e-07, "learning_rate": 0.001999024849258607, "loss": 0.0, "num_input_tokens_seen": 21535672, "step": 37920 }, { "epoch": 665.3539823008849, "grad_norm": 5.137002645483335e-08, "learning_rate": 0.001989451576745105, "loss": 0.0, "num_input_tokens_seen": 21538840, "step": 37925 }, { "epoch": 665.4424778761062, "grad_norm": 3.264521808432619e-08, "learning_rate": 0.00197990112931819, "loss": 0.0, "num_input_tokens_seen": 21541464, "step": 37930 }, { "epoch": 665.5309734513274, "grad_norm": 6.84740228962255e-08, "learning_rate": 0.0019703735084506345, "loss": 0.0, "num_input_tokens_seen": 21544216, "step": 37935 }, { "epoch": 665.6194690265487, "grad_norm": 9.753097884868112e-08, "learning_rate": 0.001960868715611763, "loss": 0.0, "num_input_tokens_seen": 21547576, "step": 37940 }, { "epoch": 665.70796460177, "grad_norm": 2.3686125061317398e-08, "learning_rate": 0.0019513867522673034, "loss": 0.0, "num_input_tokens_seen": 21550392, "step": 37945 }, { "epoch": 665.7964601769911, "grad_norm": 3.4306431473396515e-08, "learning_rate": 0.001941927619879502, "loss": 0.0, "num_input_tokens_seen": 21553416, "step": 37950 }, { "epoch": 665.8849557522124, "grad_norm": 3.636456824551715e-08, "learning_rate": 0.0019324913199070758, "loss": 0.0, "num_input_tokens_seen": 21556792, "step": 37955 }, { "epoch": 665.9734513274336, "grad_norm": 4.344121151689251e-08, "learning_rate": 0.0019230778538052106, "loss": 0.0, "num_input_tokens_seen": 21559416, "step": 37960 }, { "epoch": 666.0530973451328, "grad_norm": 7.866967166592076e-08, "learning_rate": 0.0019136872230255952, "loss": 0.0, "num_input_tokens_seen": 21561768, "step": 37965 }, { "epoch": 666.141592920354, "grad_norm": 6.551740483473623e-08, "learning_rate": 0.0019043194290164045, "loss": 0.0, "num_input_tokens_seen": 21564568, "step": 37970 }, { "epoch": 666.2300884955753, "grad_norm": 2.868379844755964e-08, "learning_rate": 0.0018949744732222162, "loss": 0.0, "num_input_tokens_seen": 21567640, "step": 37975 }, { "epoch": 666.3185840707964, "grad_norm": 2.7418002090939808e-08, "learning_rate": 0.0018856523570841776, "loss": 0.0, "num_input_tokens_seen": 21570568, "step": 37980 }, { "epoch": 666.4070796460177, "grad_norm": 5.185851748024106e-08, "learning_rate": 0.0018763530820398555, "loss": 0.0, "num_input_tokens_seen": 21573176, "step": 37985 }, { "epoch": 666.4955752212389, "grad_norm": 6.124844986743483e-08, "learning_rate": 0.0018670766495233525, "loss": 0.0, "num_input_tokens_seen": 21576184, "step": 37990 }, { "epoch": 666.5840707964602, "grad_norm": 1.6464724694742472e-08, "learning_rate": 0.001857823060965158, "loss": 0.0, "num_input_tokens_seen": 21578840, "step": 37995 }, { "epoch": 666.6725663716815, "grad_norm": 3.086255162543239e-08, "learning_rate": 0.0018485923177923467, "loss": 0.0, "num_input_tokens_seen": 21581512, "step": 38000 }, { "epoch": 666.6725663716815, "eval_loss": 0.7813501954078674, "eval_runtime": 0.9546, "eval_samples_per_second": 26.189, "eval_steps_per_second": 13.618, "num_input_tokens_seen": 21581512, "step": 38000 }, { "epoch": 666.7610619469026, "grad_norm": 5.436567462879793e-08, "learning_rate": 0.001839384421428364, "loss": 0.0, "num_input_tokens_seen": 21584920, "step": 38005 }, { "epoch": 666.8495575221239, "grad_norm": 2.973313861787119e-08, "learning_rate": 0.0018301993732932065, "loss": 0.0, "num_input_tokens_seen": 21587512, "step": 38010 }, { "epoch": 666.9380530973451, "grad_norm": 5.375415312869336e-08, "learning_rate": 0.0018210371748033248, "loss": 0.0, "num_input_tokens_seen": 21590312, "step": 38015 }, { "epoch": 667.0176991150443, "grad_norm": 8.453049815670965e-08, "learning_rate": 0.0018118978273716556, "loss": 0.0, "num_input_tokens_seen": 21592400, "step": 38020 }, { "epoch": 667.1061946902655, "grad_norm": 1.632611379420723e-08, "learning_rate": 0.001802781332407588, "loss": 0.0, "num_input_tokens_seen": 21595792, "step": 38025 }, { "epoch": 667.1946902654868, "grad_norm": 6.664970442216145e-08, "learning_rate": 0.0017936876913169806, "loss": 0.0, "num_input_tokens_seen": 21598464, "step": 38030 }, { "epoch": 667.2831858407079, "grad_norm": 2.0106444509337962e-08, "learning_rate": 0.0017846169055022287, "loss": 0.0, "num_input_tokens_seen": 21601408, "step": 38035 }, { "epoch": 667.3716814159292, "grad_norm": 1.1351018258665135e-07, "learning_rate": 0.0017755689763621295, "loss": 0.0, "num_input_tokens_seen": 21604192, "step": 38040 }, { "epoch": 667.4601769911504, "grad_norm": 3.233967760252199e-08, "learning_rate": 0.0017665439052920173, "loss": 0.0, "num_input_tokens_seen": 21607360, "step": 38045 }, { "epoch": 667.5486725663717, "grad_norm": 4.887234084094416e-08, "learning_rate": 0.0017575416936836286, "loss": 0.0, "num_input_tokens_seen": 21610000, "step": 38050 }, { "epoch": 667.637168141593, "grad_norm": 3.4989188435474716e-08, "learning_rate": 0.0017485623429252528, "loss": 0.0, "num_input_tokens_seen": 21613296, "step": 38055 }, { "epoch": 667.7256637168142, "grad_norm": 3.648264268463208e-08, "learning_rate": 0.0017396058544016156, "loss": 0.0, "num_input_tokens_seen": 21615648, "step": 38060 }, { "epoch": 667.8141592920354, "grad_norm": 3.75050248635489e-08, "learning_rate": 0.0017306722294938958, "loss": 0.0, "num_input_tokens_seen": 21618352, "step": 38065 }, { "epoch": 667.9026548672566, "grad_norm": 1.761000945066371e-08, "learning_rate": 0.0017217614695798078, "loss": 0.0, "num_input_tokens_seen": 21621520, "step": 38070 }, { "epoch": 667.9911504424779, "grad_norm": 1.8809098278893543e-08, "learning_rate": 0.001712873576033469, "loss": 0.0, "num_input_tokens_seen": 21624368, "step": 38075 }, { "epoch": 668.070796460177, "grad_norm": 4.82365010157082e-08, "learning_rate": 0.0017040085502255163, "loss": 0.0, "num_input_tokens_seen": 21626816, "step": 38080 }, { "epoch": 668.1592920353983, "grad_norm": 6.67929143105539e-08, "learning_rate": 0.0016951663935230565, "loss": 0.0, "num_input_tokens_seen": 21629504, "step": 38085 }, { "epoch": 668.2477876106195, "grad_norm": 2.9938732382106537e-08, "learning_rate": 0.0016863471072896485, "loss": 0.0, "num_input_tokens_seen": 21632416, "step": 38090 }, { "epoch": 668.3362831858407, "grad_norm": 2.9025962078321754e-08, "learning_rate": 0.0016775506928853377, "loss": 0.0, "num_input_tokens_seen": 21635264, "step": 38095 }, { "epoch": 668.4247787610619, "grad_norm": 4.615922577499987e-08, "learning_rate": 0.001668777151666656, "loss": 0.0, "num_input_tokens_seen": 21638192, "step": 38100 }, { "epoch": 668.5132743362832, "grad_norm": 3.4986676666903804e-08, "learning_rate": 0.0016600264849865709, "loss": 0.0, "num_input_tokens_seen": 21641504, "step": 38105 }, { "epoch": 668.6017699115044, "grad_norm": 3.149193261720029e-08, "learning_rate": 0.0016512986941945695, "loss": 0.0, "num_input_tokens_seen": 21644672, "step": 38110 }, { "epoch": 668.6902654867257, "grad_norm": 3.8721751138837135e-08, "learning_rate": 0.0016425937806365753, "loss": 0.0, "num_input_tokens_seen": 21647200, "step": 38115 }, { "epoch": 668.7787610619469, "grad_norm": 3.9269011153919564e-08, "learning_rate": 0.0016339117456549979, "loss": 0.0, "num_input_tokens_seen": 21649808, "step": 38120 }, { "epoch": 668.8672566371681, "grad_norm": 3.5385632202178385e-08, "learning_rate": 0.0016252525905886995, "loss": 0.0, "num_input_tokens_seen": 21652784, "step": 38125 }, { "epoch": 668.9557522123894, "grad_norm": 3.6932092939423455e-08, "learning_rate": 0.0016166163167730617, "loss": 0.0, "num_input_tokens_seen": 21655456, "step": 38130 }, { "epoch": 669.0353982300885, "grad_norm": 1.7174599520330958e-08, "learning_rate": 0.0016080029255398864, "loss": 0.0, "num_input_tokens_seen": 21657464, "step": 38135 }, { "epoch": 669.1238938053098, "grad_norm": 2.0777921605485972e-08, "learning_rate": 0.0015994124182174606, "loss": 0.0, "num_input_tokens_seen": 21660184, "step": 38140 }, { "epoch": 669.212389380531, "grad_norm": 5.614894504901713e-08, "learning_rate": 0.001590844796130575, "loss": 0.0, "num_input_tokens_seen": 21662776, "step": 38145 }, { "epoch": 669.3008849557522, "grad_norm": 4.0771780618342746e-08, "learning_rate": 0.001582300060600439, "loss": 0.0, "num_input_tokens_seen": 21665384, "step": 38150 }, { "epoch": 669.3893805309734, "grad_norm": 3.5777517837232153e-08, "learning_rate": 0.0015737782129447652, "loss": 0.0, "num_input_tokens_seen": 21668760, "step": 38155 }, { "epoch": 669.4778761061947, "grad_norm": 4.314702906071943e-08, "learning_rate": 0.0015652792544777361, "loss": 0.0, "num_input_tokens_seen": 21671592, "step": 38160 }, { "epoch": 669.566371681416, "grad_norm": 3.5235938611322126e-08, "learning_rate": 0.0015568031865099863, "loss": 0.0, "num_input_tokens_seen": 21674664, "step": 38165 }, { "epoch": 669.6548672566372, "grad_norm": 2.869182402776005e-08, "learning_rate": 0.0015483500103486369, "loss": 0.0, "num_input_tokens_seen": 21677672, "step": 38170 }, { "epoch": 669.7433628318585, "grad_norm": 4.279128873463378e-08, "learning_rate": 0.0015399197272972787, "loss": 0.0, "num_input_tokens_seen": 21680728, "step": 38175 }, { "epoch": 669.8318584070796, "grad_norm": 2.407829491346547e-08, "learning_rate": 0.0015315123386559714, "loss": 0.0, "num_input_tokens_seen": 21683464, "step": 38180 }, { "epoch": 669.9203539823009, "grad_norm": 2.7190120377440508e-08, "learning_rate": 0.0015231278457212283, "loss": 0.0, "num_input_tokens_seen": 21686568, "step": 38185 }, { "epoch": 670.0, "grad_norm": 1.3715221314214432e-07, "learning_rate": 0.001514766249786048, "loss": 0.0, "num_input_tokens_seen": 21688728, "step": 38190 }, { "epoch": 670.0884955752213, "grad_norm": 5.9099864557765613e-08, "learning_rate": 0.0015064275521398994, "loss": 0.0, "num_input_tokens_seen": 21691496, "step": 38195 }, { "epoch": 670.1769911504425, "grad_norm": 3.1588239579605215e-08, "learning_rate": 0.0014981117540686872, "loss": 0.0, "num_input_tokens_seen": 21694376, "step": 38200 }, { "epoch": 670.1769911504425, "eval_loss": 0.7832437753677368, "eval_runtime": 0.9531, "eval_samples_per_second": 26.23, "eval_steps_per_second": 13.64, "num_input_tokens_seen": 21694376, "step": 38200 }, { "epoch": 670.2654867256637, "grad_norm": 3.499761191960715e-08, "learning_rate": 0.0014898188568548687, "loss": 0.0, "num_input_tokens_seen": 21697336, "step": 38205 }, { "epoch": 670.3539823008849, "grad_norm": 4.272216003187168e-08, "learning_rate": 0.0014815488617772542, "loss": 0.0, "num_input_tokens_seen": 21700440, "step": 38210 }, { "epoch": 670.4424778761062, "grad_norm": 2.989823855159557e-08, "learning_rate": 0.0014733017701112072, "loss": 0.0, "num_input_tokens_seen": 21703448, "step": 38215 }, { "epoch": 670.5309734513274, "grad_norm": 7.983527439137106e-08, "learning_rate": 0.0014650775831285435, "loss": 0.0, "num_input_tokens_seen": 21706408, "step": 38220 }, { "epoch": 670.6194690265487, "grad_norm": 5.3262475319115765e-08, "learning_rate": 0.001456876302097515, "loss": 0.0, "num_input_tokens_seen": 21709368, "step": 38225 }, { "epoch": 670.70796460177, "grad_norm": 3.7635768279642434e-08, "learning_rate": 0.0014486979282828604, "loss": 0.0, "num_input_tokens_seen": 21712104, "step": 38230 }, { "epoch": 670.7964601769911, "grad_norm": 3.2032481556143466e-08, "learning_rate": 0.001440542462945804, "loss": 0.0, "num_input_tokens_seen": 21715304, "step": 38235 }, { "epoch": 670.8849557522124, "grad_norm": 7.429822090898597e-08, "learning_rate": 0.0014324099073440232, "loss": 0.0, "num_input_tokens_seen": 21718184, "step": 38240 }, { "epoch": 670.9734513274336, "grad_norm": 7.701578397245612e-08, "learning_rate": 0.0014243002627316482, "loss": 0.0, "num_input_tokens_seen": 21720712, "step": 38245 }, { "epoch": 671.0530973451328, "grad_norm": 6.362460425179961e-08, "learning_rate": 0.0014162135303592781, "loss": 0.0, "num_input_tokens_seen": 21723472, "step": 38250 }, { "epoch": 671.141592920354, "grad_norm": 4.059573655013082e-08, "learning_rate": 0.001408149711474016, "loss": 0.0, "num_input_tokens_seen": 21726368, "step": 38255 }, { "epoch": 671.2300884955753, "grad_norm": 3.114764624001509e-08, "learning_rate": 0.0014001088073193834, "loss": 0.0, "num_input_tokens_seen": 21729168, "step": 38260 }, { "epoch": 671.3185840707964, "grad_norm": 6.347609371459839e-08, "learning_rate": 0.0013920908191354052, "loss": 0.0, "num_input_tokens_seen": 21731824, "step": 38265 }, { "epoch": 671.4070796460177, "grad_norm": 8.312285082467952e-09, "learning_rate": 0.001384095748158526, "loss": 0.0, "num_input_tokens_seen": 21734896, "step": 38270 }, { "epoch": 671.4955752212389, "grad_norm": 3.9911959959226806e-08, "learning_rate": 0.0013761235956217255, "loss": 0.0, "num_input_tokens_seen": 21737392, "step": 38275 }, { "epoch": 671.5840707964602, "grad_norm": 1.497691570762072e-08, "learning_rate": 0.0013681743627543873, "loss": 0.0, "num_input_tokens_seen": 21740080, "step": 38280 }, { "epoch": 671.6725663716815, "grad_norm": 4.503295514268757e-08, "learning_rate": 0.001360248050782381, "loss": 0.0, "num_input_tokens_seen": 21743056, "step": 38285 }, { "epoch": 671.7610619469026, "grad_norm": 5.029926342103863e-08, "learning_rate": 0.001352344660928062, "loss": 0.0, "num_input_tokens_seen": 21745856, "step": 38290 }, { "epoch": 671.8495575221239, "grad_norm": 1.0662675720141124e-07, "learning_rate": 0.0013444641944102052, "loss": 0.0, "num_input_tokens_seen": 21748784, "step": 38295 }, { "epoch": 671.9380530973451, "grad_norm": 4.2400344568704895e-08, "learning_rate": 0.0013366066524441056, "loss": 0.0, "num_input_tokens_seen": 21751360, "step": 38300 }, { "epoch": 672.0176991150443, "grad_norm": 5.79097800823547e-08, "learning_rate": 0.0013287720362414768, "loss": 0.0, "num_input_tokens_seen": 21754216, "step": 38305 }, { "epoch": 672.1061946902655, "grad_norm": 2.3854747510654306e-08, "learning_rate": 0.0013209603470105025, "loss": 0.0, "num_input_tokens_seen": 21757096, "step": 38310 }, { "epoch": 672.1946902654868, "grad_norm": 3.186459807125175e-08, "learning_rate": 0.0013131715859558857, "loss": 0.0, "num_input_tokens_seen": 21759656, "step": 38315 }, { "epoch": 672.2831858407079, "grad_norm": 4.6508581874604715e-08, "learning_rate": 0.001305405754278699, "loss": 0.0, "num_input_tokens_seen": 21762728, "step": 38320 }, { "epoch": 672.3716814159292, "grad_norm": 3.7767328819882096e-08, "learning_rate": 0.0012976628531765843, "loss": 0.0, "num_input_tokens_seen": 21765384, "step": 38325 }, { "epoch": 672.4601769911504, "grad_norm": 2.351332639705106e-08, "learning_rate": 0.0012899428838435533, "loss": 0.0, "num_input_tokens_seen": 21768072, "step": 38330 }, { "epoch": 672.5486725663717, "grad_norm": 3.540122506251464e-08, "learning_rate": 0.001282245847470137, "loss": 0.0, "num_input_tokens_seen": 21770776, "step": 38335 }, { "epoch": 672.637168141593, "grad_norm": 2.3661606007863156e-08, "learning_rate": 0.001274571745243319, "loss": 0.0, "num_input_tokens_seen": 21773688, "step": 38340 }, { "epoch": 672.7256637168142, "grad_norm": 3.064680953457355e-08, "learning_rate": 0.0012669205783465364, "loss": 0.0, "num_input_tokens_seen": 21776520, "step": 38345 }, { "epoch": 672.8141592920354, "grad_norm": 5.077304976452979e-08, "learning_rate": 0.001259292347959695, "loss": 0.0, "num_input_tokens_seen": 21779912, "step": 38350 }, { "epoch": 672.9026548672566, "grad_norm": 3.058561759416989e-08, "learning_rate": 0.0012516870552591707, "loss": 0.0, "num_input_tokens_seen": 21783064, "step": 38355 }, { "epoch": 672.9911504424779, "grad_norm": 4.562546607189688e-08, "learning_rate": 0.001244104701417792, "loss": 0.0, "num_input_tokens_seen": 21785752, "step": 38360 }, { "epoch": 673.070796460177, "grad_norm": 4.18063059726137e-08, "learning_rate": 0.0012365452876048565, "loss": 0.0, "num_input_tokens_seen": 21788280, "step": 38365 }, { "epoch": 673.1592920353983, "grad_norm": 5.460309893123849e-08, "learning_rate": 0.001229008814986099, "loss": 0.0, "num_input_tokens_seen": 21790888, "step": 38370 }, { "epoch": 673.2477876106195, "grad_norm": 6.222386161880422e-08, "learning_rate": 0.0012214952847237725, "loss": 0.0, "num_input_tokens_seen": 21793880, "step": 38375 }, { "epoch": 673.3362831858407, "grad_norm": 7.134742929792992e-08, "learning_rate": 0.0012140046979765339, "loss": 0.0, "num_input_tokens_seen": 21796712, "step": 38380 }, { "epoch": 673.4247787610619, "grad_norm": 3.7434595867580356e-08, "learning_rate": 0.0012065370558995258, "loss": 0.0, "num_input_tokens_seen": 21799816, "step": 38385 }, { "epoch": 673.5132743362832, "grad_norm": 3.837060447153817e-08, "learning_rate": 0.0011990923596443602, "loss": 0.0, "num_input_tokens_seen": 21802776, "step": 38390 }, { "epoch": 673.6017699115044, "grad_norm": 2.7394044366246817e-08, "learning_rate": 0.001191670610359119, "loss": 0.0, "num_input_tokens_seen": 21805768, "step": 38395 }, { "epoch": 673.6902654867257, "grad_norm": 3.340823440112217e-08, "learning_rate": 0.0011842718091882865, "loss": 0.0, "num_input_tokens_seen": 21808568, "step": 38400 }, { "epoch": 673.6902654867257, "eval_loss": 0.7953433990478516, "eval_runtime": 0.9416, "eval_samples_per_second": 26.552, "eval_steps_per_second": 13.807, "num_input_tokens_seen": 21808568, "step": 38400 }, { "epoch": 673.7787610619469, "grad_norm": 3.83823142158235e-08, "learning_rate": 0.0011768959572729, "loss": 0.0, "num_input_tokens_seen": 21811496, "step": 38405 }, { "epoch": 673.8672566371681, "grad_norm": 3.2094856550202167e-08, "learning_rate": 0.001169543055750366, "loss": 0.0, "num_input_tokens_seen": 21813784, "step": 38410 }, { "epoch": 673.9557522123894, "grad_norm": 3.650057323056899e-08, "learning_rate": 0.0011622131057546115, "loss": 0.0, "num_input_tokens_seen": 21817224, "step": 38415 }, { "epoch": 674.0353982300885, "grad_norm": 3.2848998188228506e-08, "learning_rate": 0.0011549061084160316, "loss": 0.0, "num_input_tokens_seen": 21819688, "step": 38420 }, { "epoch": 674.1238938053098, "grad_norm": 6.140194130921373e-08, "learning_rate": 0.0011476220648614088, "loss": 0.0, "num_input_tokens_seen": 21822312, "step": 38425 }, { "epoch": 674.212389380531, "grad_norm": 2.2812741917732637e-08, "learning_rate": 0.0011403609762140777, "loss": 0.0, "num_input_tokens_seen": 21825144, "step": 38430 }, { "epoch": 674.3008849557522, "grad_norm": 7.511926725101148e-08, "learning_rate": 0.0011331228435937756, "loss": 0.0, "num_input_tokens_seen": 21828008, "step": 38435 }, { "epoch": 674.3893805309734, "grad_norm": 1.225963899287308e-07, "learning_rate": 0.0011259076681166935, "loss": 0.0, "num_input_tokens_seen": 21831432, "step": 38440 }, { "epoch": 674.4778761061947, "grad_norm": 5.003910530376743e-08, "learning_rate": 0.0011187154508955244, "loss": 0.0, "num_input_tokens_seen": 21834472, "step": 38445 }, { "epoch": 674.566371681416, "grad_norm": 6.247152839478076e-08, "learning_rate": 0.001111546193039381, "loss": 0.0, "num_input_tokens_seen": 21836840, "step": 38450 }, { "epoch": 674.6548672566372, "grad_norm": 2.8499313131646886e-08, "learning_rate": 0.0011043998956538792, "loss": 0.0, "num_input_tokens_seen": 21839992, "step": 38455 }, { "epoch": 674.7433628318585, "grad_norm": 1.9929581540623076e-08, "learning_rate": 0.0010972765598410538, "loss": 0.0, "num_input_tokens_seen": 21842968, "step": 38460 }, { "epoch": 674.8318584070796, "grad_norm": 2.3524480141645654e-08, "learning_rate": 0.0010901761866993931, "loss": 0.0, "num_input_tokens_seen": 21845400, "step": 38465 }, { "epoch": 674.9203539823009, "grad_norm": 2.9118906397229694e-08, "learning_rate": 0.0010830987773238876, "loss": 0.0, "num_input_tokens_seen": 21848168, "step": 38470 }, { "epoch": 675.0, "grad_norm": 1.0766945024442975e-07, "learning_rate": 0.0010760443328059644, "loss": 0.0, "num_input_tokens_seen": 21850760, "step": 38475 }, { "epoch": 675.0884955752213, "grad_norm": 3.585607544209779e-08, "learning_rate": 0.001069012854233503, "loss": 0.0, "num_input_tokens_seen": 21853336, "step": 38480 }, { "epoch": 675.1769911504425, "grad_norm": 8.061828538075133e-08, "learning_rate": 0.0010620043426908365, "loss": 0.0, "num_input_tokens_seen": 21856296, "step": 38485 }, { "epoch": 675.2654867256637, "grad_norm": 6.746235214905028e-08, "learning_rate": 0.0010550187992587833, "loss": 0.0, "num_input_tokens_seen": 21858936, "step": 38490 }, { "epoch": 675.3539823008849, "grad_norm": 4.288154187293003e-08, "learning_rate": 0.0010480562250145653, "loss": 0.0, "num_input_tokens_seen": 21862184, "step": 38495 }, { "epoch": 675.4424778761062, "grad_norm": 3.8811350577816484e-08, "learning_rate": 0.0010411166210319567, "loss": 0.0, "num_input_tokens_seen": 21864792, "step": 38500 }, { "epoch": 675.5309734513274, "grad_norm": 5.484825038593044e-08, "learning_rate": 0.0010341999883810848, "loss": 0.0, "num_input_tokens_seen": 21867800, "step": 38505 }, { "epoch": 675.6194690265487, "grad_norm": 1.035331642640358e-08, "learning_rate": 0.0010273063281285965, "loss": 0.0, "num_input_tokens_seen": 21870968, "step": 38510 }, { "epoch": 675.70796460177, "grad_norm": 2.7131127566804025e-08, "learning_rate": 0.0010204356413375747, "loss": 0.0, "num_input_tokens_seen": 21873736, "step": 38515 }, { "epoch": 675.7964601769911, "grad_norm": 5.689189563895525e-08, "learning_rate": 0.001013587929067572, "loss": 0.0, "num_input_tokens_seen": 21876808, "step": 38520 }, { "epoch": 675.8849557522124, "grad_norm": 4.6168974421334497e-08, "learning_rate": 0.00100676319237461, "loss": 0.0, "num_input_tokens_seen": 21879944, "step": 38525 }, { "epoch": 675.9734513274336, "grad_norm": 3.850312779718479e-08, "learning_rate": 0.0009999614323110972, "loss": 0.0, "num_input_tokens_seen": 21882920, "step": 38530 }, { "epoch": 676.0530973451328, "grad_norm": 2.1913125536343614e-08, "learning_rate": 0.000993182649926011, "loss": 0.0, "num_input_tokens_seen": 21885056, "step": 38535 }, { "epoch": 676.141592920354, "grad_norm": 3.7638457683897286e-08, "learning_rate": 0.000986426846264682, "loss": 0.0, "num_input_tokens_seen": 21887664, "step": 38540 }, { "epoch": 676.2300884955753, "grad_norm": 5.807854108752508e-08, "learning_rate": 0.00097969402236896, "loss": 0.0, "num_input_tokens_seen": 21890864, "step": 38545 }, { "epoch": 676.3185840707964, "grad_norm": 4.466122049961996e-08, "learning_rate": 0.0009729841792771143, "loss": 0.0, "num_input_tokens_seen": 21893232, "step": 38550 }, { "epoch": 676.4070796460177, "grad_norm": 3.050596930620486e-08, "learning_rate": 0.0009662973180239176, "loss": 0.0, "num_input_tokens_seen": 21896128, "step": 38555 }, { "epoch": 676.4955752212389, "grad_norm": 3.402989889877972e-08, "learning_rate": 0.0009596334396405448, "loss": 0.0, "num_input_tokens_seen": 21898768, "step": 38560 }, { "epoch": 676.5840707964602, "grad_norm": 8.500694548274623e-08, "learning_rate": 0.0009529925451546406, "loss": 0.0, "num_input_tokens_seen": 21901824, "step": 38565 }, { "epoch": 676.6725663716815, "grad_norm": 3.272971937917646e-08, "learning_rate": 0.0009463746355903357, "loss": 0.0, "num_input_tokens_seen": 21904816, "step": 38570 }, { "epoch": 676.7610619469026, "grad_norm": 5.4881912348037076e-08, "learning_rate": 0.0009397797119681971, "loss": 0.0, "num_input_tokens_seen": 21908208, "step": 38575 }, { "epoch": 676.8495575221239, "grad_norm": 7.060258866431468e-08, "learning_rate": 0.0009332077753052281, "loss": 0.0, "num_input_tokens_seen": 21911136, "step": 38580 }, { "epoch": 676.9380530973451, "grad_norm": 4.4273139820916185e-08, "learning_rate": 0.0009266588266149011, "loss": 0.0, "num_input_tokens_seen": 21913952, "step": 38585 }, { "epoch": 677.0176991150443, "grad_norm": 3.794923131295036e-08, "learning_rate": 0.0009201328669071584, "loss": 0.0, "num_input_tokens_seen": 21916616, "step": 38590 }, { "epoch": 677.1061946902655, "grad_norm": 2.179042901673256e-08, "learning_rate": 0.0009136298971883949, "loss": 0.0, "num_input_tokens_seen": 21919464, "step": 38595 }, { "epoch": 677.1946902654868, "grad_norm": 6.344411929148919e-08, "learning_rate": 0.0009071499184614251, "loss": 0.0, "num_input_tokens_seen": 21922424, "step": 38600 }, { "epoch": 677.1946902654868, "eval_loss": 0.8041082620620728, "eval_runtime": 0.936, "eval_samples_per_second": 26.709, "eval_steps_per_second": 13.889, "num_input_tokens_seen": 21922424, "step": 38600 }, { "epoch": 677.2831858407079, "grad_norm": 2.624825157226951e-08, "learning_rate": 0.0009006929317255663, "loss": 0.0, "num_input_tokens_seen": 21925352, "step": 38605 }, { "epoch": 677.3716814159292, "grad_norm": 7.456537076677705e-08, "learning_rate": 0.0008942589379765387, "loss": 0.0, "num_input_tokens_seen": 21928232, "step": 38610 }, { "epoch": 677.4601769911504, "grad_norm": 1.7836745414001598e-08, "learning_rate": 0.0008878479382065817, "loss": 0.0, "num_input_tokens_seen": 21930936, "step": 38615 }, { "epoch": 677.5486725663717, "grad_norm": 3.617888921780832e-08, "learning_rate": 0.0008814599334043215, "loss": 0.0, "num_input_tokens_seen": 21933832, "step": 38620 }, { "epoch": 677.637168141593, "grad_norm": 3.4327193532135425e-08, "learning_rate": 0.0008750949245548866, "loss": 0.0, "num_input_tokens_seen": 21936584, "step": 38625 }, { "epoch": 677.7256637168142, "grad_norm": 9.644985965451269e-08, "learning_rate": 0.0008687529126398252, "loss": 0.0, "num_input_tokens_seen": 21938936, "step": 38630 }, { "epoch": 677.8141592920354, "grad_norm": 4.290393107453383e-08, "learning_rate": 0.0008624338986371715, "loss": 0.0, "num_input_tokens_seen": 21941928, "step": 38635 }, { "epoch": 677.9026548672566, "grad_norm": 3.877931931128842e-08, "learning_rate": 0.0008561378835213962, "loss": 0.0, "num_input_tokens_seen": 21944952, "step": 38640 }, { "epoch": 677.9911504424779, "grad_norm": 2.95348510093163e-08, "learning_rate": 0.0008498648682634058, "loss": 0.0, "num_input_tokens_seen": 21947768, "step": 38645 }, { "epoch": 678.070796460177, "grad_norm": 4.983577284178864e-08, "learning_rate": 0.0008436148538306099, "loss": 0.0, "num_input_tokens_seen": 21950296, "step": 38650 }, { "epoch": 678.1592920353983, "grad_norm": 5.8301171890207115e-08, "learning_rate": 0.0008373878411868041, "loss": 0.0, "num_input_tokens_seen": 21953080, "step": 38655 }, { "epoch": 678.2477876106195, "grad_norm": 2.352419592455135e-08, "learning_rate": 0.000831183831292287, "loss": 0.0, "num_input_tokens_seen": 21955736, "step": 38660 }, { "epoch": 678.3362831858407, "grad_norm": 3.525707015228363e-08, "learning_rate": 0.0008250028251037933, "loss": 0.0, "num_input_tokens_seen": 21958952, "step": 38665 }, { "epoch": 678.4247787610619, "grad_norm": 4.336259706860801e-08, "learning_rate": 0.0008188448235745271, "loss": 0.0, "num_input_tokens_seen": 21962056, "step": 38670 }, { "epoch": 678.5132743362832, "grad_norm": 4.345683635165187e-08, "learning_rate": 0.0008127098276541122, "loss": 0.0, "num_input_tokens_seen": 21964808, "step": 38675 }, { "epoch": 678.6017699115044, "grad_norm": 1.5270691378077572e-08, "learning_rate": 0.0008065978382886418, "loss": 0.0, "num_input_tokens_seen": 21967928, "step": 38680 }, { "epoch": 678.6902654867257, "grad_norm": 5.0121695238658504e-08, "learning_rate": 0.0008005088564206785, "loss": 0.0, "num_input_tokens_seen": 21970376, "step": 38685 }, { "epoch": 678.7787610619469, "grad_norm": 6.852297929071938e-08, "learning_rate": 0.0007944428829891881, "loss": 0.0, "num_input_tokens_seen": 21973208, "step": 38690 }, { "epoch": 678.8672566371681, "grad_norm": 3.20256212660297e-08, "learning_rate": 0.0007883999189296386, "loss": 0.0, "num_input_tokens_seen": 21976712, "step": 38695 }, { "epoch": 678.9557522123894, "grad_norm": 1.823904582920477e-08, "learning_rate": 0.0007823799651739515, "loss": 0.0, "num_input_tokens_seen": 21979784, "step": 38700 }, { "epoch": 679.0353982300885, "grad_norm": 2.4705203216512928e-08, "learning_rate": 0.0007763830226504509, "loss": 0.0, "num_input_tokens_seen": 21982024, "step": 38705 }, { "epoch": 679.1238938053098, "grad_norm": 3.205364507152808e-08, "learning_rate": 0.0007704090922839468, "loss": 0.0, "num_input_tokens_seen": 21984936, "step": 38710 }, { "epoch": 679.212389380531, "grad_norm": 8.22729901983621e-08, "learning_rate": 0.0007644581749957025, "loss": 0.0, "num_input_tokens_seen": 21988008, "step": 38715 }, { "epoch": 679.3008849557522, "grad_norm": 5.181451356861544e-08, "learning_rate": 0.000758530271703417, "loss": 0.0, "num_input_tokens_seen": 21990760, "step": 38720 }, { "epoch": 679.3893805309734, "grad_norm": 4.284996535375285e-08, "learning_rate": 0.0007526253833212426, "loss": 0.0, "num_input_tokens_seen": 21993464, "step": 38725 }, { "epoch": 679.4778761061947, "grad_norm": 5.3267630306663705e-08, "learning_rate": 0.0007467435107598008, "loss": 0.0, "num_input_tokens_seen": 21996376, "step": 38730 }, { "epoch": 679.566371681416, "grad_norm": 3.3001622767869776e-08, "learning_rate": 0.0007408846549261328, "loss": 0.0, "num_input_tokens_seen": 21998856, "step": 38735 }, { "epoch": 679.6548672566372, "grad_norm": 2.6317604095993374e-08, "learning_rate": 0.0007350488167237656, "loss": 0.0, "num_input_tokens_seen": 22001784, "step": 38740 }, { "epoch": 679.7433628318585, "grad_norm": 3.01145348657883e-08, "learning_rate": 0.0007292359970526629, "loss": 0.0, "num_input_tokens_seen": 22004616, "step": 38745 }, { "epoch": 679.8318584070796, "grad_norm": 2.8702325849394583e-08, "learning_rate": 0.0007234461968092076, "loss": 0.0, "num_input_tokens_seen": 22007400, "step": 38750 }, { "epoch": 679.9203539823009, "grad_norm": 5.4236743096680584e-08, "learning_rate": 0.0007176794168862854, "loss": 0.0, "num_input_tokens_seen": 22010792, "step": 38755 }, { "epoch": 680.0, "grad_norm": 5.833883420791608e-08, "learning_rate": 0.000711935658173185, "loss": 0.0, "num_input_tokens_seen": 22013496, "step": 38760 }, { "epoch": 680.0884955752213, "grad_norm": 3.7171886901887774e-08, "learning_rate": 0.0007062149215556812, "loss": 0.0, "num_input_tokens_seen": 22016200, "step": 38765 }, { "epoch": 680.1769911504425, "grad_norm": 2.7656378520646285e-08, "learning_rate": 0.0007005172079159849, "loss": 0.0, "num_input_tokens_seen": 22019496, "step": 38770 }, { "epoch": 680.2654867256637, "grad_norm": 7.105413857289022e-08, "learning_rate": 0.0006948425181327267, "loss": 0.0, "num_input_tokens_seen": 22022168, "step": 38775 }, { "epoch": 680.3539823008849, "grad_norm": 2.7950550318678324e-08, "learning_rate": 0.000689190853081073, "loss": 0.0, "num_input_tokens_seen": 22025416, "step": 38780 }, { "epoch": 680.4424778761062, "grad_norm": 3.921374869264582e-08, "learning_rate": 0.000683562213632527, "loss": 0.0, "num_input_tokens_seen": 22028696, "step": 38785 }, { "epoch": 680.5309734513274, "grad_norm": 5.5112046482008736e-08, "learning_rate": 0.0006779566006551108, "loss": 0.0, "num_input_tokens_seen": 22031016, "step": 38790 }, { "epoch": 680.6194690265487, "grad_norm": 4.7365080746430976e-08, "learning_rate": 0.0006723740150132995, "loss": 0.0, "num_input_tokens_seen": 22033752, "step": 38795 }, { "epoch": 680.70796460177, "grad_norm": 4.727286295747035e-08, "learning_rate": 0.0006668144575679713, "loss": 0.0, "num_input_tokens_seen": 22036600, "step": 38800 }, { "epoch": 680.70796460177, "eval_loss": 0.8143786191940308, "eval_runtime": 0.9416, "eval_samples_per_second": 26.552, "eval_steps_per_second": 13.807, "num_input_tokens_seen": 22036600, "step": 38800 }, { "epoch": 680.7964601769911, "grad_norm": 4.839253264776744e-08, "learning_rate": 0.0006612779291765069, "loss": 0.0, "num_input_tokens_seen": 22039480, "step": 38805 }, { "epoch": 680.8849557522124, "grad_norm": 3.82449947267105e-08, "learning_rate": 0.0006557644306926736, "loss": 0.0, "num_input_tokens_seen": 22042696, "step": 38810 }, { "epoch": 680.9734513274336, "grad_norm": 4.112534313094329e-08, "learning_rate": 0.0006502739629667575, "loss": 0.0, "num_input_tokens_seen": 22045272, "step": 38815 }, { "epoch": 681.0530973451328, "grad_norm": 4.6685702415061314e-08, "learning_rate": 0.0006448065268454317, "loss": 0.0, "num_input_tokens_seen": 22047824, "step": 38820 }, { "epoch": 681.141592920354, "grad_norm": 2.5546706083900972e-08, "learning_rate": 0.0006393621231718549, "loss": 0.0, "num_input_tokens_seen": 22051280, "step": 38825 }, { "epoch": 681.2300884955753, "grad_norm": 3.749482146986338e-08, "learning_rate": 0.0006339407527856389, "loss": 0.0, "num_input_tokens_seen": 22054256, "step": 38830 }, { "epoch": 681.3185840707964, "grad_norm": 3.546698934542292e-08, "learning_rate": 0.0006285424165227982, "loss": 0.0, "num_input_tokens_seen": 22056848, "step": 38835 }, { "epoch": 681.4070796460177, "grad_norm": 3.837685724761286e-08, "learning_rate": 0.0006231671152158169, "loss": 0.0, "num_input_tokens_seen": 22059824, "step": 38840 }, { "epoch": 681.4955752212389, "grad_norm": 3.1770746033998876e-08, "learning_rate": 0.0006178148496936819, "loss": 0.0, "num_input_tokens_seen": 22062544, "step": 38845 }, { "epoch": 681.5840707964602, "grad_norm": 5.629536659057521e-08, "learning_rate": 0.000612485620781733, "loss": 0.0, "num_input_tokens_seen": 22065504, "step": 38850 }, { "epoch": 681.6725663716815, "grad_norm": 3.8152784043177235e-08, "learning_rate": 0.0006071794293018296, "loss": 0.0, "num_input_tokens_seen": 22068304, "step": 38855 }, { "epoch": 681.7610619469026, "grad_norm": 2.201691984282661e-08, "learning_rate": 0.0006018962760722501, "loss": 0.0, "num_input_tokens_seen": 22070832, "step": 38860 }, { "epoch": 681.8495575221239, "grad_norm": 2.9852547100972515e-08, "learning_rate": 0.0005966361619077098, "loss": 0.0, "num_input_tokens_seen": 22073968, "step": 38865 }, { "epoch": 681.9380530973451, "grad_norm": 3.642498924705251e-08, "learning_rate": 0.000591399087619393, "loss": 0.0, "num_input_tokens_seen": 22076576, "step": 38870 }, { "epoch": 682.0176991150443, "grad_norm": 8.753843872000289e-08, "learning_rate": 0.0005861850540149371, "loss": 0.0, "num_input_tokens_seen": 22078808, "step": 38875 }, { "epoch": 682.1061946902655, "grad_norm": 2.790616626668907e-08, "learning_rate": 0.0005809940618983822, "loss": 0.0, "num_input_tokens_seen": 22082264, "step": 38880 }, { "epoch": 682.1946902654868, "grad_norm": 3.7128252472484746e-08, "learning_rate": 0.0005758261120702712, "loss": 0.0, "num_input_tokens_seen": 22085080, "step": 38885 }, { "epoch": 682.2831858407079, "grad_norm": 5.128852720304167e-08, "learning_rate": 0.0005706812053275501, "loss": 0.0, "num_input_tokens_seen": 22088376, "step": 38890 }, { "epoch": 682.3716814159292, "grad_norm": 4.9408686919605316e-08, "learning_rate": 0.0005655593424636173, "loss": 0.0, "num_input_tokens_seen": 22091160, "step": 38895 }, { "epoch": 682.4601769911504, "grad_norm": 3.523823721707231e-08, "learning_rate": 0.0005604605242683746, "loss": 0.0, "num_input_tokens_seen": 22094216, "step": 38900 }, { "epoch": 682.5486725663717, "grad_norm": 2.7729063489800865e-08, "learning_rate": 0.0005553847515280596, "loss": 0.0, "num_input_tokens_seen": 22097736, "step": 38905 }, { "epoch": 682.637168141593, "grad_norm": 5.659629920273801e-08, "learning_rate": 0.0005503320250254795, "loss": 0.0, "num_input_tokens_seen": 22100328, "step": 38910 }, { "epoch": 682.7256637168142, "grad_norm": 6.466908786251224e-08, "learning_rate": 0.0005453023455397943, "loss": 0.0, "num_input_tokens_seen": 22103096, "step": 38915 }, { "epoch": 682.8141592920354, "grad_norm": 1.208355815407458e-08, "learning_rate": 0.0005402957138466502, "loss": 0.0, "num_input_tokens_seen": 22105624, "step": 38920 }, { "epoch": 682.9026548672566, "grad_norm": 4.38168044070153e-08, "learning_rate": 0.0005353121307181463, "loss": 0.0, "num_input_tokens_seen": 22108104, "step": 38925 }, { "epoch": 682.9911504424779, "grad_norm": 3.073882481885448e-08, "learning_rate": 0.0005303515969227845, "loss": 0.0, "num_input_tokens_seen": 22110808, "step": 38930 }, { "epoch": 683.070796460177, "grad_norm": 3.6609819176192104e-08, "learning_rate": 0.0005254141132255862, "loss": 0.0, "num_input_tokens_seen": 22112896, "step": 38935 }, { "epoch": 683.1592920353983, "grad_norm": 5.318355889016857e-08, "learning_rate": 0.0005204996803879258, "loss": 0.0, "num_input_tokens_seen": 22116272, "step": 38940 }, { "epoch": 683.2477876106195, "grad_norm": 1.9793343852825274e-08, "learning_rate": 0.0005156082991676969, "loss": 0.0, "num_input_tokens_seen": 22119456, "step": 38945 }, { "epoch": 683.3362831858407, "grad_norm": 2.009167054950467e-08, "learning_rate": 0.0005107399703192127, "loss": 0.0, "num_input_tokens_seen": 22122544, "step": 38950 }, { "epoch": 683.4247787610619, "grad_norm": 4.4817646482897544e-08, "learning_rate": 0.0005058946945932063, "loss": 0.0, "num_input_tokens_seen": 22125728, "step": 38955 }, { "epoch": 683.5132743362832, "grad_norm": 3.899962308651084e-08, "learning_rate": 0.0005010724727369131, "loss": 0.0, "num_input_tokens_seen": 22128864, "step": 38960 }, { "epoch": 683.6017699115044, "grad_norm": 7.598786311291406e-08, "learning_rate": 0.000496273305493955, "loss": 0.0, "num_input_tokens_seen": 22131408, "step": 38965 }, { "epoch": 683.6902654867257, "grad_norm": 3.3749458339116245e-08, "learning_rate": 0.0004914971936044399, "loss": 0.0, "num_input_tokens_seen": 22133920, "step": 38970 }, { "epoch": 683.7787610619469, "grad_norm": 3.862858477532427e-08, "learning_rate": 0.00048674413780491196, "loss": 0.0, "num_input_tokens_seen": 22136384, "step": 38975 }, { "epoch": 683.8672566371681, "grad_norm": 3.1215524387562255e-08, "learning_rate": 0.0004820141388283183, "loss": 0.0, "num_input_tokens_seen": 22139024, "step": 38980 }, { "epoch": 683.9557522123894, "grad_norm": 4.2310009717994035e-08, "learning_rate": 0.00047730719740410874, "loss": 0.0, "num_input_tokens_seen": 22142064, "step": 38985 }, { "epoch": 684.0353982300885, "grad_norm": 3.1045278348074135e-08, "learning_rate": 0.00047262331425816927, "loss": 0.0, "num_input_tokens_seen": 22144496, "step": 38990 }, { "epoch": 684.1238938053098, "grad_norm": 6.161729970699525e-08, "learning_rate": 0.00046796249011277213, "loss": 0.0, "num_input_tokens_seen": 22147328, "step": 38995 }, { "epoch": 684.212389380531, "grad_norm": 3.166390172282263e-08, "learning_rate": 0.00046332472568669236, "loss": 0.0, "num_input_tokens_seen": 22150992, "step": 39000 }, { "epoch": 684.212389380531, "eval_loss": 0.7954248189926147, "eval_runtime": 0.9462, "eval_samples_per_second": 26.423, "eval_steps_per_second": 13.74, "num_input_tokens_seen": 22150992, "step": 39000 }, { "epoch": 684.3008849557522, "grad_norm": 2.5294500716199764e-08, "learning_rate": 0.0004587100216951578, "loss": 0.0, "num_input_tokens_seen": 22153408, "step": 39005 }, { "epoch": 684.3893805309734, "grad_norm": 2.2927425291641157e-08, "learning_rate": 0.00045411837884978265, "loss": 0.0, "num_input_tokens_seen": 22156784, "step": 39010 }, { "epoch": 684.4778761061947, "grad_norm": 4.89205653764202e-08, "learning_rate": 0.00044954979785865045, "loss": 0.0, "num_input_tokens_seen": 22159968, "step": 39015 }, { "epoch": 684.566371681416, "grad_norm": 1.7083859660260714e-08, "learning_rate": 0.00044500427942631426, "loss": 0.0, "num_input_tokens_seen": 22162464, "step": 39020 }, { "epoch": 684.6548672566372, "grad_norm": 7.462561057991479e-08, "learning_rate": 0.0004404818242537467, "loss": 0.0, "num_input_tokens_seen": 22164880, "step": 39025 }, { "epoch": 684.7433628318585, "grad_norm": 2.689155920165831e-08, "learning_rate": 0.00043598243303837324, "loss": 0.0, "num_input_tokens_seen": 22167280, "step": 39030 }, { "epoch": 684.8318584070796, "grad_norm": 1.694288975784275e-08, "learning_rate": 0.00043150610647403885, "loss": 0.0, "num_input_tokens_seen": 22170144, "step": 39035 }, { "epoch": 684.9203539823009, "grad_norm": 4.301513811810764e-08, "learning_rate": 0.00042705284525104134, "loss": 0.0, "num_input_tokens_seen": 22172912, "step": 39040 }, { "epoch": 685.0, "grad_norm": 1.0134002081940707e-07, "learning_rate": 0.0004226226500561647, "loss": 0.0, "num_input_tokens_seen": 22175264, "step": 39045 }, { "epoch": 685.0884955752213, "grad_norm": 2.2385096443144903e-08, "learning_rate": 0.0004182155215725791, "loss": 0.0, "num_input_tokens_seen": 22178128, "step": 39050 }, { "epoch": 685.1769911504425, "grad_norm": 1.546244909889083e-08, "learning_rate": 0.00041383146047992424, "loss": 0.0, "num_input_tokens_seen": 22180960, "step": 39055 }, { "epoch": 685.2654867256637, "grad_norm": 3.231144063420288e-08, "learning_rate": 0.00040947046745427597, "loss": 0.0, "num_input_tokens_seen": 22183760, "step": 39060 }, { "epoch": 685.3539823008849, "grad_norm": 6.349777237346643e-08, "learning_rate": 0.00040513254316814625, "loss": 0.0, "num_input_tokens_seen": 22186400, "step": 39065 }, { "epoch": 685.4424778761062, "grad_norm": 7.876209195956108e-08, "learning_rate": 0.0004008176882905168, "loss": 0.0, "num_input_tokens_seen": 22189040, "step": 39070 }, { "epoch": 685.5309734513274, "grad_norm": 7.956150938071005e-08, "learning_rate": 0.00039652590348677184, "loss": 0.0, "num_input_tokens_seen": 22191728, "step": 39075 }, { "epoch": 685.6194690265487, "grad_norm": 2.8812442209869005e-08, "learning_rate": 0.00039225718941878206, "loss": 0.0, "num_input_tokens_seen": 22194880, "step": 39080 }, { "epoch": 685.70796460177, "grad_norm": 3.713010698902508e-08, "learning_rate": 0.00038801154674480417, "loss": 0.0, "num_input_tokens_seen": 22198032, "step": 39085 }, { "epoch": 685.7964601769911, "grad_norm": 1.9561419151159498e-08, "learning_rate": 0.00038378897611959784, "loss": 0.0, "num_input_tokens_seen": 22201184, "step": 39090 }, { "epoch": 685.8849557522124, "grad_norm": 1.0735693933838775e-07, "learning_rate": 0.00037958947819430875, "loss": 0.0, "num_input_tokens_seen": 22204144, "step": 39095 }, { "epoch": 685.9734513274336, "grad_norm": 3.138675808145308e-08, "learning_rate": 0.0003754130536165856, "loss": 0.0, "num_input_tokens_seen": 22206912, "step": 39100 }, { "epoch": 686.0530973451328, "grad_norm": 3.4077654476050157e-08, "learning_rate": 0.0003712597030304632, "loss": 0.0, "num_input_tokens_seen": 22209152, "step": 39105 }, { "epoch": 686.141592920354, "grad_norm": 6.462130386353238e-08, "learning_rate": 0.00036712942707646247, "loss": 0.0, "num_input_tokens_seen": 22211904, "step": 39110 }, { "epoch": 686.2300884955753, "grad_norm": 2.8266196494541873e-08, "learning_rate": 0.00036302222639149063, "loss": 0.0, "num_input_tokens_seen": 22214864, "step": 39115 }, { "epoch": 686.3185840707964, "grad_norm": 3.488814215302227e-08, "learning_rate": 0.000358938101608941, "loss": 0.0, "num_input_tokens_seen": 22217424, "step": 39120 }, { "epoch": 686.4070796460177, "grad_norm": 6.724427237259079e-08, "learning_rate": 0.0003548770533586598, "loss": 0.0, "num_input_tokens_seen": 22219936, "step": 39125 }, { "epoch": 686.4955752212389, "grad_norm": 3.0814636176046406e-08, "learning_rate": 0.0003508390822668961, "loss": 0.0, "num_input_tokens_seen": 22222432, "step": 39130 }, { "epoch": 686.5840707964602, "grad_norm": 3.6699528749295496e-08, "learning_rate": 0.00034682418895633503, "loss": 0.0, "num_input_tokens_seen": 22225456, "step": 39135 }, { "epoch": 686.6725663716815, "grad_norm": 3.6772746625501895e-08, "learning_rate": 0.0003428323740461647, "loss": 0.0, "num_input_tokens_seen": 22228304, "step": 39140 }, { "epoch": 686.7610619469026, "grad_norm": 5.0563148334958896e-08, "learning_rate": 0.00033886363815194276, "loss": 0.0, "num_input_tokens_seen": 22231824, "step": 39145 }, { "epoch": 686.8495575221239, "grad_norm": 2.0795443589349816e-08, "learning_rate": 0.0003349179818857129, "loss": 0.0, "num_input_tokens_seen": 22234688, "step": 39150 }, { "epoch": 686.9380530973451, "grad_norm": 3.447995666761017e-08, "learning_rate": 0.0003309954058559383, "loss": 0.0, "num_input_tokens_seen": 22237632, "step": 39155 }, { "epoch": 687.0176991150443, "grad_norm": 3.10585903662286e-08, "learning_rate": 0.0003270959106675186, "loss": 0.0, "num_input_tokens_seen": 22240208, "step": 39160 }, { "epoch": 687.1061946902655, "grad_norm": 4.719969126654178e-08, "learning_rate": 0.0003232194969218227, "loss": 0.0, "num_input_tokens_seen": 22243472, "step": 39165 }, { "epoch": 687.1946902654868, "grad_norm": 4.73492569597056e-08, "learning_rate": 0.00031936616521663905, "loss": 0.0, "num_input_tokens_seen": 22246384, "step": 39170 }, { "epoch": 687.2831858407079, "grad_norm": 7.638634968998304e-08, "learning_rate": 0.00031553591614619236, "loss": 0.0, "num_input_tokens_seen": 22249520, "step": 39175 }, { "epoch": 687.3716814159292, "grad_norm": 5.606282016401565e-08, "learning_rate": 0.00031172875030117676, "loss": 0.0, "num_input_tokens_seen": 22252336, "step": 39180 }, { "epoch": 687.4601769911504, "grad_norm": 6.103462624196254e-08, "learning_rate": 0.0003079446682686726, "loss": 0.0, "num_input_tokens_seen": 22254912, "step": 39185 }, { "epoch": 687.5486725663717, "grad_norm": 9.037844961312658e-08, "learning_rate": 0.0003041836706322465, "loss": 0.0, "num_input_tokens_seen": 22258080, "step": 39190 }, { "epoch": 687.637168141593, "grad_norm": 3.013387228634201e-08, "learning_rate": 0.0003004457579719011, "loss": 0.0, "num_input_tokens_seen": 22261072, "step": 39195 }, { "epoch": 687.7256637168142, "grad_norm": 3.391904002114643e-08, "learning_rate": 0.00029673093086405867, "loss": 0.0, "num_input_tokens_seen": 22263616, "step": 39200 }, { "epoch": 687.7256637168142, "eval_loss": 0.7884461879730225, "eval_runtime": 0.9674, "eval_samples_per_second": 25.844, "eval_steps_per_second": 13.439, "num_input_tokens_seen": 22263616, "step": 39200 }, { "epoch": 687.8141592920354, "grad_norm": 4.0504449572154044e-08, "learning_rate": 0.00029303918988159426, "loss": 0.0, "num_input_tokens_seen": 22266288, "step": 39205 }, { "epoch": 687.9026548672566, "grad_norm": 5.4145072425626495e-08, "learning_rate": 0.0002893705355938192, "loss": 0.0, "num_input_tokens_seen": 22269216, "step": 39210 }, { "epoch": 687.9911504424779, "grad_norm": 2.4260192077463216e-08, "learning_rate": 0.0002857249685664975, "loss": 0.0, "num_input_tokens_seen": 22271984, "step": 39215 }, { "epoch": 688.070796460177, "grad_norm": 1.9482490287714427e-08, "learning_rate": 0.0002821024893618129, "loss": 0.0, "num_input_tokens_seen": 22274032, "step": 39220 }, { "epoch": 688.1592920353983, "grad_norm": 4.3404824623394234e-08, "learning_rate": 0.0002785030985383852, "loss": 0.0, "num_input_tokens_seen": 22276752, "step": 39225 }, { "epoch": 688.2477876106195, "grad_norm": 4.691457178296332e-08, "learning_rate": 0.00027492679665130356, "loss": 0.0, "num_input_tokens_seen": 22279616, "step": 39230 }, { "epoch": 688.3362831858407, "grad_norm": 4.8156756804473844e-08, "learning_rate": 0.000271373584252077, "loss": 0.0, "num_input_tokens_seen": 22282272, "step": 39235 }, { "epoch": 688.4247787610619, "grad_norm": 3.7486842074940796e-08, "learning_rate": 0.00026784346188865046, "loss": 0.0, "num_input_tokens_seen": 22285376, "step": 39240 }, { "epoch": 688.5132743362832, "grad_norm": 2.3325609888047438e-08, "learning_rate": 0.0002643364301054218, "loss": 0.0, "num_input_tokens_seen": 22288400, "step": 39245 }, { "epoch": 688.6017699115044, "grad_norm": 3.916523994007548e-08, "learning_rate": 0.0002608524894431918, "loss": 0.0, "num_input_tokens_seen": 22291408, "step": 39250 }, { "epoch": 688.6902654867257, "grad_norm": 2.3816271621512897e-08, "learning_rate": 0.000257391640439264, "loss": 0.0, "num_input_tokens_seen": 22294608, "step": 39255 }, { "epoch": 688.7787610619469, "grad_norm": 5.47835696806942e-08, "learning_rate": 0.00025395388362732806, "loss": 0.0, "num_input_tokens_seen": 22297488, "step": 39260 }, { "epoch": 688.8672566371681, "grad_norm": 5.003320779906062e-08, "learning_rate": 0.00025053921953751, "loss": 0.0, "num_input_tokens_seen": 22300544, "step": 39265 }, { "epoch": 688.9557522123894, "grad_norm": 4.44091661222501e-08, "learning_rate": 0.00024714764869643855, "loss": 0.0, "num_input_tokens_seen": 22303408, "step": 39270 }, { "epoch": 689.0353982300885, "grad_norm": 5.192984886548402e-08, "learning_rate": 0.0002437791716270954, "loss": 0.0, "num_input_tokens_seen": 22305656, "step": 39275 }, { "epoch": 689.1238938053098, "grad_norm": 2.8609550284386387e-08, "learning_rate": 0.00024043378884896493, "loss": 0.0, "num_input_tokens_seen": 22308152, "step": 39280 }, { "epoch": 689.212389380531, "grad_norm": 2.953695421581415e-08, "learning_rate": 0.00023711150087793453, "loss": 0.0, "num_input_tokens_seen": 22311112, "step": 39285 }, { "epoch": 689.3008849557522, "grad_norm": 2.3997474229986437e-08, "learning_rate": 0.000233812308226361, "loss": 0.0, "num_input_tokens_seen": 22313784, "step": 39290 }, { "epoch": 689.3893805309734, "grad_norm": 7.912215238548015e-08, "learning_rate": 0.00023053621140300406, "loss": 0.0, "num_input_tokens_seen": 22316552, "step": 39295 }, { "epoch": 689.4778761061947, "grad_norm": 6.392016160816638e-08, "learning_rate": 0.00022728321091307623, "loss": 0.0, "num_input_tokens_seen": 22319544, "step": 39300 }, { "epoch": 689.566371681416, "grad_norm": 2.7768567889552287e-08, "learning_rate": 0.0002240533072582429, "loss": 0.0, "num_input_tokens_seen": 22322152, "step": 39305 }, { "epoch": 689.6548672566372, "grad_norm": 1.0118569804262734e-07, "learning_rate": 0.00022084650093658897, "loss": 0.0, "num_input_tokens_seen": 22325224, "step": 39310 }, { "epoch": 689.7433628318585, "grad_norm": 3.061665054815421e-08, "learning_rate": 0.0002176627924426522, "loss": 0.0, "num_input_tokens_seen": 22328312, "step": 39315 }, { "epoch": 689.8318584070796, "grad_norm": 3.447567564762721e-08, "learning_rate": 0.0002145021822673898, "loss": 0.0, "num_input_tokens_seen": 22330952, "step": 39320 }, { "epoch": 689.9203539823009, "grad_norm": 5.8727916751877274e-08, "learning_rate": 0.00021136467089822862, "loss": 0.0, "num_input_tokens_seen": 22334248, "step": 39325 }, { "epoch": 690.0, "grad_norm": 4.292676791806116e-08, "learning_rate": 0.00020825025881898162, "loss": 0.0, "num_input_tokens_seen": 22336792, "step": 39330 }, { "epoch": 690.0884955752213, "grad_norm": 2.8850090316723254e-08, "learning_rate": 0.0002051589465099479, "loss": 0.0, "num_input_tokens_seen": 22339912, "step": 39335 }, { "epoch": 690.1769911504425, "grad_norm": 4.51353443509106e-08, "learning_rate": 0.0002020907344478462, "loss": 0.0, "num_input_tokens_seen": 22342344, "step": 39340 }, { "epoch": 690.2654867256637, "grad_norm": 4.7052793661350734e-08, "learning_rate": 0.0001990456231058313, "loss": 0.0, "num_input_tokens_seen": 22345512, "step": 39345 }, { "epoch": 690.3539823008849, "grad_norm": 2.159246115240876e-08, "learning_rate": 0.00019602361295349423, "loss": 0.0, "num_input_tokens_seen": 22348264, "step": 39350 }, { "epoch": 690.4424778761062, "grad_norm": 6.89950780952131e-08, "learning_rate": 0.0001930247044568789, "loss": 0.0, "num_input_tokens_seen": 22351208, "step": 39355 }, { "epoch": 690.5309734513274, "grad_norm": 5.8653483847592724e-08, "learning_rate": 0.00019004889807843205, "loss": 0.0, "num_input_tokens_seen": 22354824, "step": 39360 }, { "epoch": 690.6194690265487, "grad_norm": 3.791862823732117e-08, "learning_rate": 0.00018709619427708656, "loss": 0.0, "num_input_tokens_seen": 22357864, "step": 39365 }, { "epoch": 690.70796460177, "grad_norm": 5.385053825079922e-08, "learning_rate": 0.00018416659350817822, "loss": 0.0, "num_input_tokens_seen": 22360520, "step": 39370 }, { "epoch": 690.7964601769911, "grad_norm": 1.9329338130091855e-08, "learning_rate": 0.00018126009622346229, "loss": 0.0, "num_input_tokens_seen": 22363416, "step": 39375 }, { "epoch": 690.8849557522124, "grad_norm": 7.318239170217566e-08, "learning_rate": 0.00017837670287119687, "loss": 0.0, "num_input_tokens_seen": 22366376, "step": 39380 }, { "epoch": 690.9734513274336, "grad_norm": 5.104568501224094e-08, "learning_rate": 0.00017551641389602633, "loss": 0.0, "num_input_tokens_seen": 22368984, "step": 39385 }, { "epoch": 691.0530973451328, "grad_norm": 2.625835193725834e-08, "learning_rate": 0.00017267922973903115, "loss": 0.0, "num_input_tokens_seen": 22371184, "step": 39390 }, { "epoch": 691.141592920354, "grad_norm": 8.548322938395359e-08, "learning_rate": 0.00016986515083774467, "loss": 0.0, "num_input_tokens_seen": 22374272, "step": 39395 }, { "epoch": 691.2300884955753, "grad_norm": 1.1660207022146096e-08, "learning_rate": 0.00016707417762611975, "loss": 0.0, "num_input_tokens_seen": 22377936, "step": 39400 }, { "epoch": 691.2300884955753, "eval_loss": 0.7761130928993225, "eval_runtime": 0.9458, "eval_samples_per_second": 26.434, "eval_steps_per_second": 13.745, "num_input_tokens_seen": 22377936, "step": 39400 }, { "epoch": 691.3185840707964, "grad_norm": 2.7841338123835158e-08, "learning_rate": 0.00016430631053459543, "loss": 0.0, "num_input_tokens_seen": 22380656, "step": 39405 }, { "epoch": 691.4070796460177, "grad_norm": 1.5615123416523602e-08, "learning_rate": 0.0001615615499899803, "loss": 0.0, "num_input_tokens_seen": 22383424, "step": 39410 }, { "epoch": 691.4955752212389, "grad_norm": 3.784317215149713e-08, "learning_rate": 0.00015883989641556905, "loss": 0.0, "num_input_tokens_seen": 22385840, "step": 39415 }, { "epoch": 691.5840707964602, "grad_norm": 1.4327982356121538e-08, "learning_rate": 0.00015614135023105934, "loss": 0.0, "num_input_tokens_seen": 22388720, "step": 39420 }, { "epoch": 691.6725663716815, "grad_norm": 2.5693637439871964e-08, "learning_rate": 0.00015346591185261827, "loss": 0.0, "num_input_tokens_seen": 22391520, "step": 39425 }, { "epoch": 691.7610619469026, "grad_norm": 4.7748908826861225e-08, "learning_rate": 0.00015081358169281576, "loss": 0.0, "num_input_tokens_seen": 22394432, "step": 39430 }, { "epoch": 691.8495575221239, "grad_norm": 4.116213858651463e-08, "learning_rate": 0.00014818436016069135, "loss": 0.0, "num_input_tokens_seen": 22397264, "step": 39435 }, { "epoch": 691.9380530973451, "grad_norm": 1.687740436295826e-08, "learning_rate": 0.00014557824766168735, "loss": 0.0, "num_input_tokens_seen": 22400176, "step": 39440 }, { "epoch": 692.0176991150443, "grad_norm": 1.637687851996361e-08, "learning_rate": 0.00014299524459769896, "loss": 0.0, "num_input_tokens_seen": 22402368, "step": 39445 }, { "epoch": 692.1061946902655, "grad_norm": 3.1421546253795896e-08, "learning_rate": 0.0001404353513670742, "loss": 0.0, "num_input_tokens_seen": 22404960, "step": 39450 }, { "epoch": 692.1946902654868, "grad_norm": 6.436471977622205e-08, "learning_rate": 0.0001378985683645806, "loss": 0.0, "num_input_tokens_seen": 22407568, "step": 39455 }, { "epoch": 692.2831858407079, "grad_norm": 4.29671160873113e-08, "learning_rate": 0.0001353848959813886, "loss": 0.0, "num_input_tokens_seen": 22410464, "step": 39460 }, { "epoch": 692.3716814159292, "grad_norm": 5.950649750730008e-08, "learning_rate": 0.00013289433460517142, "loss": 0.0, "num_input_tokens_seen": 22413088, "step": 39465 }, { "epoch": 692.4601769911504, "grad_norm": 4.947712994862741e-08, "learning_rate": 0.00013042688462000518, "loss": 0.0, "num_input_tokens_seen": 22416144, "step": 39470 }, { "epoch": 692.5486725663717, "grad_norm": 4.763738559177e-08, "learning_rate": 0.0001279825464063855, "loss": 0.0, "num_input_tokens_seen": 22418752, "step": 39475 }, { "epoch": 692.637168141593, "grad_norm": 5.572302796963413e-08, "learning_rate": 0.00012556132034126087, "loss": 0.0, "num_input_tokens_seen": 22422064, "step": 39480 }, { "epoch": 692.7256637168142, "grad_norm": 3.2798055826788186e-08, "learning_rate": 0.0001231632067980326, "loss": 0.0, "num_input_tokens_seen": 22425712, "step": 39485 }, { "epoch": 692.8141592920354, "grad_norm": 7.21892945421132e-08, "learning_rate": 0.00012078820614650486, "loss": 0.0, "num_input_tokens_seen": 22428624, "step": 39490 }, { "epoch": 692.9026548672566, "grad_norm": 5.744853481814971e-08, "learning_rate": 0.00011843631875291804, "loss": 0.0, "num_input_tokens_seen": 22431344, "step": 39495 }, { "epoch": 692.9911504424779, "grad_norm": 1.4601060804864119e-08, "learning_rate": 0.00011610754497999863, "loss": 0.0, "num_input_tokens_seen": 22434000, "step": 39500 }, { "epoch": 693.070796460177, "grad_norm": 6.641523242478797e-08, "learning_rate": 0.0001138018851868594, "loss": 0.0, "num_input_tokens_seen": 22436352, "step": 39505 }, { "epoch": 693.1592920353983, "grad_norm": 3.7329954238884966e-08, "learning_rate": 0.0001115193397290326, "loss": 0.0, "num_input_tokens_seen": 22439056, "step": 39510 }, { "epoch": 693.2477876106195, "grad_norm": 4.250429341823292e-08, "learning_rate": 0.00010925990895856996, "loss": 0.0, "num_input_tokens_seen": 22442384, "step": 39515 }, { "epoch": 693.3362831858407, "grad_norm": 4.654124197145393e-08, "learning_rate": 0.00010702359322385946, "loss": 0.0, "num_input_tokens_seen": 22445760, "step": 39520 }, { "epoch": 693.4247787610619, "grad_norm": 3.140776172472215e-08, "learning_rate": 0.00010481039286977523, "loss": 0.0, "num_input_tokens_seen": 22448672, "step": 39525 }, { "epoch": 693.5132743362832, "grad_norm": 2.029189261065767e-08, "learning_rate": 0.00010262030823764423, "loss": 0.0, "num_input_tokens_seen": 22451120, "step": 39530 }, { "epoch": 693.6017699115044, "grad_norm": 1.1787520293182752e-08, "learning_rate": 0.00010045333966517966, "loss": 0.0, "num_input_tokens_seen": 22453728, "step": 39535 }, { "epoch": 693.6902654867257, "grad_norm": 2.5953648119525496e-08, "learning_rate": 9.83094874865642e-05, "loss": 0.0, "num_input_tokens_seen": 22456832, "step": 39540 }, { "epoch": 693.7787610619469, "grad_norm": 2.941468224548771e-08, "learning_rate": 9.618875203241672e-05, "loss": 0.0, "num_input_tokens_seen": 22459456, "step": 39545 }, { "epoch": 693.8672566371681, "grad_norm": 8.18252701151323e-08, "learning_rate": 9.409113362977561e-05, "loss": 0.0, "num_input_tokens_seen": 22462416, "step": 39550 }, { "epoch": 693.9557522123894, "grad_norm": 3.658012559526469e-08, "learning_rate": 9.20166326020988e-05, "loss": 0.0, "num_input_tokens_seen": 22465168, "step": 39555 }, { "epoch": 694.0353982300885, "grad_norm": 7.01834395044898e-08, "learning_rate": 8.996524926933035e-05, "loss": 0.0, "num_input_tokens_seen": 22467592, "step": 39560 }, { "epoch": 694.1238938053098, "grad_norm": 5.982777651070137e-08, "learning_rate": 8.793698394781723e-05, "loss": 0.0, "num_input_tokens_seen": 22470280, "step": 39565 }, { "epoch": 694.212389380531, "grad_norm": 6.003021724154678e-08, "learning_rate": 8.593183695030926e-05, "loss": 0.0, "num_input_tokens_seen": 22473576, "step": 39570 }, { "epoch": 694.3008849557522, "grad_norm": 5.730890606514549e-08, "learning_rate": 8.39498085860757e-05, "loss": 0.0, "num_input_tokens_seen": 22475960, "step": 39575 }, { "epoch": 694.3893805309734, "grad_norm": 2.559386480527337e-08, "learning_rate": 8.199089916072211e-05, "loss": 0.0, "num_input_tokens_seen": 22478872, "step": 39580 }, { "epoch": 694.4778761061947, "grad_norm": 2.241480778764071e-08, "learning_rate": 8.005510897637346e-05, "loss": 0.0, "num_input_tokens_seen": 22481368, "step": 39585 }, { "epoch": 694.566371681416, "grad_norm": 2.5472360221101553e-08, "learning_rate": 7.8142438331541e-05, "loss": 0.0, "num_input_tokens_seen": 22484472, "step": 39590 }, { "epoch": 694.6548672566372, "grad_norm": 3.077883903301881e-08, "learning_rate": 7.625288752117209e-05, "loss": 0.0, "num_input_tokens_seen": 22487672, "step": 39595 }, { "epoch": 694.7433628318585, "grad_norm": 3.762900746551168e-08, "learning_rate": 7.4386456836667e-05, "loss": 0.0, "num_input_tokens_seen": 22490328, "step": 39600 }, { "epoch": 694.7433628318585, "eval_loss": 0.7776755690574646, "eval_runtime": 0.9339, "eval_samples_per_second": 26.77, "eval_steps_per_second": 13.92, "num_input_tokens_seen": 22490328, "step": 39600 }, { "epoch": 694.8318584070796, "grad_norm": 5.624158916361921e-08, "learning_rate": 7.254314656586214e-05, "loss": 0.0, "num_input_tokens_seen": 22493160, "step": 39605 }, { "epoch": 694.9203539823009, "grad_norm": 5.875026332091693e-08, "learning_rate": 7.07229569929968e-05, "loss": 0.0, "num_input_tokens_seen": 22496040, "step": 39610 }, { "epoch": 695.0, "grad_norm": 4.4388656306182384e-07, "learning_rate": 6.892588839879643e-05, "loss": 0.0, "num_input_tokens_seen": 22498816, "step": 39615 }, { "epoch": 695.0884955752213, "grad_norm": 1.5649906259795898e-08, "learning_rate": 6.71519410603727e-05, "loss": 0.0, "num_input_tokens_seen": 22501760, "step": 39620 }, { "epoch": 695.1769911504425, "grad_norm": 2.513178465335386e-08, "learning_rate": 6.540111525129011e-05, "loss": 0.0, "num_input_tokens_seen": 22504800, "step": 39625 }, { "epoch": 695.2654867256637, "grad_norm": 3.471575027447216e-08, "learning_rate": 6.367341124154934e-05, "loss": 0.0, "num_input_tokens_seen": 22507888, "step": 39630 }, { "epoch": 695.3539823008849, "grad_norm": 7.431927429024654e-08, "learning_rate": 6.19688292975873e-05, "loss": 0.0, "num_input_tokens_seen": 22510160, "step": 39635 }, { "epoch": 695.4424778761062, "grad_norm": 1.5086335736214096e-08, "learning_rate": 6.0287369682260336e-05, "loss": 0.0, "num_input_tokens_seen": 22512816, "step": 39640 }, { "epoch": 695.5309734513274, "grad_norm": 3.8050337991535343e-08, "learning_rate": 5.8629032654894384e-05, "loss": 0.0, "num_input_tokens_seen": 22515760, "step": 39645 }, { "epoch": 695.6194690265487, "grad_norm": 5.582302975426501e-08, "learning_rate": 5.699381847120155e-05, "loss": 0.0, "num_input_tokens_seen": 22518624, "step": 39650 }, { "epoch": 695.70796460177, "grad_norm": 5.59450157311403e-08, "learning_rate": 5.5381727383380094e-05, "loss": 0.0, "num_input_tokens_seen": 22521344, "step": 39655 }, { "epoch": 695.7964601769911, "grad_norm": 1.4608381171399287e-08, "learning_rate": 5.379275964001451e-05, "loss": 0.0, "num_input_tokens_seen": 22524096, "step": 39660 }, { "epoch": 695.8849557522124, "grad_norm": 2.8321748501980437e-08, "learning_rate": 5.222691548614211e-05, "loss": 0.0, "num_input_tokens_seen": 22527280, "step": 39665 }, { "epoch": 695.9734513274336, "grad_norm": 3.973908846433005e-08, "learning_rate": 5.068419516323641e-05, "loss": 0.0, "num_input_tokens_seen": 22530672, "step": 39670 }, { "epoch": 696.0530973451328, "grad_norm": 8.909488968811274e-08, "learning_rate": 4.91645989092071e-05, "loss": 0.0, "num_input_tokens_seen": 22532944, "step": 39675 }, { "epoch": 696.141592920354, "grad_norm": 1.9300210851724842e-07, "learning_rate": 4.7668126958400056e-05, "loss": 0.0, "num_input_tokens_seen": 22536400, "step": 39680 }, { "epoch": 696.2300884955753, "grad_norm": 3.585479646517342e-08, "learning_rate": 4.619477954159734e-05, "loss": 0.0, "num_input_tokens_seen": 22538688, "step": 39685 }, { "epoch": 696.3185840707964, "grad_norm": 3.232300471722738e-08, "learning_rate": 4.4744556885983884e-05, "loss": 0.0, "num_input_tokens_seen": 22542336, "step": 39690 }, { "epoch": 696.4070796460177, "grad_norm": 1.0651888260326814e-07, "learning_rate": 4.331745921523078e-05, "loss": 0.0, "num_input_tokens_seen": 22545184, "step": 39695 }, { "epoch": 696.4955752212389, "grad_norm": 1.773559610285247e-08, "learning_rate": 4.191348674937867e-05, "loss": 0.0, "num_input_tokens_seen": 22547920, "step": 39700 }, { "epoch": 696.5840707964602, "grad_norm": 2.0159424352073074e-08, "learning_rate": 4.0532639704971006e-05, "loss": 0.0, "num_input_tokens_seen": 22550592, "step": 39705 }, { "epoch": 696.6725663716815, "grad_norm": 3.4930959458279176e-08, "learning_rate": 3.917491829493747e-05, "loss": 0.0, "num_input_tokens_seen": 22553216, "step": 39710 }, { "epoch": 696.7610619469026, "grad_norm": 3.904590784031825e-08, "learning_rate": 3.78403227286439e-05, "loss": 0.0, "num_input_tokens_seen": 22556144, "step": 39715 }, { "epoch": 696.8495575221239, "grad_norm": 3.688143479507744e-08, "learning_rate": 3.652885321192567e-05, "loss": 0.0, "num_input_tokens_seen": 22559264, "step": 39720 }, { "epoch": 696.9380530973451, "grad_norm": 3.0796755368101e-08, "learning_rate": 3.524050994702099e-05, "loss": 0.0, "num_input_tokens_seen": 22562016, "step": 39725 }, { "epoch": 697.0176991150443, "grad_norm": 3.0661539085485856e-08, "learning_rate": 3.3975293132604276e-05, "loss": 0.0, "num_input_tokens_seen": 22564280, "step": 39730 }, { "epoch": 697.1061946902655, "grad_norm": 2.907765761506198e-08, "learning_rate": 3.2733202963786125e-05, "loss": 0.0, "num_input_tokens_seen": 22567400, "step": 39735 }, { "epoch": 697.1946902654868, "grad_norm": 1.3817262001225572e-08, "learning_rate": 3.15142396321133e-05, "loss": 0.0, "num_input_tokens_seen": 22570520, "step": 39740 }, { "epoch": 697.2831858407079, "grad_norm": 3.027406236810748e-08, "learning_rate": 3.0318403325552132e-05, "loss": 0.0, "num_input_tokens_seen": 22573336, "step": 39745 }, { "epoch": 697.3716814159292, "grad_norm": 4.938921804864549e-08, "learning_rate": 2.914569422855506e-05, "loss": 0.0, "num_input_tokens_seen": 22576152, "step": 39750 }, { "epoch": 697.4601769911504, "grad_norm": 3.282753979760855e-08, "learning_rate": 2.7996112521927462e-05, "loss": 0.0, "num_input_tokens_seen": 22578920, "step": 39755 }, { "epoch": 697.5486725663717, "grad_norm": 5.4546340777505975e-08, "learning_rate": 2.68696583829775e-05, "loss": 0.0, "num_input_tokens_seen": 22581352, "step": 39760 }, { "epoch": 697.637168141593, "grad_norm": 1.259524662344802e-08, "learning_rate": 2.576633198539957e-05, "loss": 0.0, "num_input_tokens_seen": 22584152, "step": 39765 }, { "epoch": 697.7256637168142, "grad_norm": 5.1487862862131806e-08, "learning_rate": 2.46861334993409e-05, "loss": 0.0, "num_input_tokens_seen": 22587384, "step": 39770 }, { "epoch": 697.8141592920354, "grad_norm": 2.9510447419056618e-08, "learning_rate": 2.3629063091384903e-05, "loss": 0.0, "num_input_tokens_seen": 22590296, "step": 39775 }, { "epoch": 697.9026548672566, "grad_norm": 4.303086598156369e-08, "learning_rate": 2.2595120924567834e-05, "loss": 0.0, "num_input_tokens_seen": 22593032, "step": 39780 }, { "epoch": 697.9911504424779, "grad_norm": 1.0244991521801694e-08, "learning_rate": 2.158430715829551e-05, "loss": 0.0, "num_input_tokens_seen": 22596136, "step": 39785 }, { "epoch": 698.070796460177, "grad_norm": 1.7763627013778205e-08, "learning_rate": 2.059662194849321e-05, "loss": 0.0, "num_input_tokens_seen": 22598352, "step": 39790 }, { "epoch": 698.1592920353983, "grad_norm": 9.447489901504014e-08, "learning_rate": 1.9632065447422463e-05, "loss": 0.0, "num_input_tokens_seen": 22601024, "step": 39795 }, { "epoch": 698.2477876106195, "grad_norm": 2.7160782067880973e-08, "learning_rate": 1.8690637803880916e-05, "loss": 0.0, "num_input_tokens_seen": 22604096, "step": 39800 }, { "epoch": 698.2477876106195, "eval_loss": 0.794979989528656, "eval_runtime": 0.9438, "eval_samples_per_second": 26.488, "eval_steps_per_second": 13.774, "num_input_tokens_seen": 22604096, "step": 39800 }, { "epoch": 698.3362831858407, "grad_norm": 2.6614801029722912e-08, "learning_rate": 1.7772339163019123e-05, "loss": 0.0, "num_input_tokens_seen": 22607040, "step": 39805 }, { "epoch": 698.4247787610619, "grad_norm": 3.505069301468211e-08, "learning_rate": 1.6877169666457138e-05, "loss": 0.0, "num_input_tokens_seen": 22609568, "step": 39810 }, { "epoch": 698.5132743362832, "grad_norm": 3.931443259830303e-08, "learning_rate": 1.6005129452234532e-05, "loss": 0.0, "num_input_tokens_seen": 22612320, "step": 39815 }, { "epoch": 698.6017699115044, "grad_norm": 4.5311438157114026e-08, "learning_rate": 1.5156218654843733e-05, "loss": 0.0, "num_input_tokens_seen": 22615328, "step": 39820 }, { "epoch": 698.6902654867257, "grad_norm": 4.4661319975602964e-08, "learning_rate": 1.4330437405196683e-05, "loss": 0.0, "num_input_tokens_seen": 22618784, "step": 39825 }, { "epoch": 698.7787610619469, "grad_norm": 1.9648030757934976e-08, "learning_rate": 1.352778583062486e-05, "loss": 0.0, "num_input_tokens_seen": 22621376, "step": 39830 }, { "epoch": 698.8672566371681, "grad_norm": 2.96684827816307e-08, "learning_rate": 1.2748264054929237e-05, "loss": 0.0, "num_input_tokens_seen": 22623936, "step": 39835 }, { "epoch": 698.9557522123894, "grad_norm": 5.997301855131809e-08, "learning_rate": 1.1991872198297004e-05, "loss": 0.0, "num_input_tokens_seen": 22627120, "step": 39840 }, { "epoch": 699.0353982300885, "grad_norm": 5.395724755885567e-08, "learning_rate": 1.1258610377384847e-05, "loss": 0.0, "num_input_tokens_seen": 22629960, "step": 39845 }, { "epoch": 699.1238938053098, "grad_norm": 4.8975341115919946e-08, "learning_rate": 1.0548478705268982e-05, "loss": 0.0, "num_input_tokens_seen": 22632840, "step": 39850 }, { "epoch": 699.212389380531, "grad_norm": 3.48762050350615e-08, "learning_rate": 9.86147729147846e-06, "loss": 0.0, "num_input_tokens_seen": 22635656, "step": 39855 }, { "epoch": 699.3008849557522, "grad_norm": 4.512328999339843e-08, "learning_rate": 9.197606241928557e-06, "loss": 0.0, "num_input_tokens_seen": 22638632, "step": 39860 }, { "epoch": 699.3893805309734, "grad_norm": 4.064805736447852e-08, "learning_rate": 8.556865659004042e-06, "loss": 0.0, "num_input_tokens_seen": 22641400, "step": 39865 }, { "epoch": 699.4778761061947, "grad_norm": 1.6698278315629977e-08, "learning_rate": 7.939255641525867e-06, "loss": 0.0, "num_input_tokens_seen": 22644264, "step": 39870 }, { "epoch": 699.566371681416, "grad_norm": 5.893304333426386e-08, "learning_rate": 7.344776284751164e-06, "loss": 0.0, "num_input_tokens_seen": 22647128, "step": 39875 }, { "epoch": 699.6548672566372, "grad_norm": 3.594860586986215e-08, "learning_rate": 6.773427680323296e-06, "loss": 0.0, "num_input_tokens_seen": 22649928, "step": 39880 }, { "epoch": 699.7433628318585, "grad_norm": 4.4241421193191854e-08, "learning_rate": 6.225209916355112e-06, "loss": 0.0, "num_input_tokens_seen": 22652840, "step": 39885 }, { "epoch": 699.8318584070796, "grad_norm": 3.267879478130453e-08, "learning_rate": 5.7001230774123e-06, "loss": 0.0, "num_input_tokens_seen": 22655384, "step": 39890 }, { "epoch": 699.9203539823009, "grad_norm": 2.6052697776890454e-08, "learning_rate": 5.198167244446772e-06, "loss": 0.0, "num_input_tokens_seen": 22658232, "step": 39895 }, { "epoch": 700.0, "grad_norm": 1.6486517040448234e-07, "learning_rate": 4.71934249487993e-06, "loss": 0.0, "num_input_tokens_seen": 22660488, "step": 39900 }, { "epoch": 700.0884955752213, "grad_norm": 4.879915138644719e-08, "learning_rate": 4.2636489025527075e-06, "loss": 0.0, "num_input_tokens_seen": 22663336, "step": 39905 }, { "epoch": 700.1769911504425, "grad_norm": 3.912854396048715e-08, "learning_rate": 3.831086537742223e-06, "loss": 0.0, "num_input_tokens_seen": 22666120, "step": 39910 }, { "epoch": 700.2654867256637, "grad_norm": 2.035062074412508e-08, "learning_rate": 3.4216554671451236e-06, "loss": 0.0, "num_input_tokens_seen": 22668648, "step": 39915 }, { "epoch": 700.3539823008849, "grad_norm": 1.4439789808307069e-08, "learning_rate": 3.035355753894242e-06, "loss": 0.0, "num_input_tokens_seen": 22671464, "step": 39920 }, { "epoch": 700.4424778761062, "grad_norm": 3.807232928920712e-08, "learning_rate": 2.6721874575752477e-06, "loss": 0.0, "num_input_tokens_seen": 22674360, "step": 39925 }, { "epoch": 700.5309734513274, "grad_norm": 6.602136437550143e-08, "learning_rate": 2.3321506341933418e-06, "loss": 0.0, "num_input_tokens_seen": 22677432, "step": 39930 }, { "epoch": 700.6194690265487, "grad_norm": 7.97234704918992e-08, "learning_rate": 2.0152453361732546e-06, "loss": 0.0, "num_input_tokens_seen": 22680296, "step": 39935 }, { "epoch": 700.70796460177, "grad_norm": 2.34460344472609e-08, "learning_rate": 1.7214716123925554e-06, "loss": 0.0, "num_input_tokens_seen": 22683304, "step": 39940 }, { "epoch": 700.7964601769911, "grad_norm": 6.456873791194084e-08, "learning_rate": 1.4508295081649968e-06, "loss": 0.0, "num_input_tokens_seen": 22686520, "step": 39945 }, { "epoch": 700.8849557522124, "grad_norm": 4.315196022730561e-08, "learning_rate": 1.2033190652238623e-06, "loss": 0.0, "num_input_tokens_seen": 22689240, "step": 39950 }, { "epoch": 700.9734513274336, "grad_norm": 7.595829032425172e-08, "learning_rate": 9.78940321721966e-07, "loss": 0.0, "num_input_tokens_seen": 22692696, "step": 39955 }, { "epoch": 701.0530973451328, "grad_norm": 1.8965266690429416e-08, "learning_rate": 7.776933122816132e-07, "loss": 0.0, "num_input_tokens_seen": 22694888, "step": 39960 }, { "epoch": 701.141592920354, "grad_norm": 4.067050696221486e-08, "learning_rate": 5.99578067927986e-07, "loss": 0.0, "num_input_tokens_seen": 22698120, "step": 39965 }, { "epoch": 701.2300884955753, "grad_norm": 3.855930685858766e-08, "learning_rate": 4.445946161224512e-07, "loss": 0.0, "num_input_tokens_seen": 22701000, "step": 39970 }, { "epoch": 701.3185840707964, "grad_norm": 4.8956064091498774e-08, "learning_rate": 3.127429807792126e-07, "loss": 0.0, "num_input_tokens_seen": 22704392, "step": 39975 }, { "epoch": 701.4070796460177, "grad_norm": 3.980955654014906e-08, "learning_rate": 2.040231822320049e-07, "loss": 0.0, "num_input_tokens_seen": 22707096, "step": 39980 }, { "epoch": 701.4955752212389, "grad_norm": 3.9464016055035245e-08, "learning_rate": 1.1843523723409354e-07, "loss": 0.0, "num_input_tokens_seen": 22710184, "step": 39985 }, { "epoch": 701.5840707964602, "grad_norm": 1.7197296031667975e-08, "learning_rate": 5.597915897492811e-08, "loss": 0.0, "num_input_tokens_seen": 22713240, "step": 39990 }, { "epoch": 701.6725663716815, "grad_norm": 1.9337814194386738e-07, "learning_rate": 1.6654957113448885e-08, "loss": 0.0, "num_input_tokens_seen": 22715928, "step": 39995 }, { "epoch": 701.7610619469026, "grad_norm": 2.723468561782738e-08, "learning_rate": 4.626377114735902e-10, "loss": 0.0, "num_input_tokens_seen": 22718312, "step": 40000 }, { "epoch": 701.7610619469026, "eval_loss": 0.8050469756126404, "eval_runtime": 0.9464, "eval_samples_per_second": 26.417, "eval_steps_per_second": 13.737, "num_input_tokens_seen": 22718312, "step": 40000 }, { "epoch": 701.7610619469026, "num_input_tokens_seen": 22718312, "step": 40000, "total_flos": 9.51301231531438e+16, "train_loss": 0.015288212282552349, "train_runtime": 8564.9201, "train_samples_per_second": 18.681, "train_steps_per_second": 4.67 } ], "logging_steps": 5, "max_steps": 40000, "num_input_tokens_seen": 22718312, "num_train_epochs": 715, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.51301231531438e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }